[NTOSKRNL] Drop the useless Timestamp field
[reactos.git] / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #define COBJMACROS
22
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
32
33 #include "wine/debug.h"
34 #include "wine/list.h"
35 #include "wine/unicode.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38
39 /* not defined in public headers */
40 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41
42 typedef enum
43 {
44 XmlReadInState_Initial,
45 XmlReadInState_XmlDecl,
46 XmlReadInState_Misc_DTD,
47 XmlReadInState_DTD,
48 XmlReadInState_DTD_Misc,
49 XmlReadInState_Element,
50 XmlReadInState_Content,
51 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
52 XmlReadInState_Eof
53 } XmlReaderInternalState;
54
55 /* This state denotes where parsing was interrupted by input problem.
56 Reader resumes parsing using this information. */
57 typedef enum
58 {
59 XmlReadResumeState_Initial,
60 XmlReadResumeState_PITarget,
61 XmlReadResumeState_PIBody,
62 XmlReadResumeState_CDATA,
63 XmlReadResumeState_Comment,
64 XmlReadResumeState_STag,
65 XmlReadResumeState_CharData,
66 XmlReadResumeState_Whitespace
67 } XmlReaderResumeState;
68
69 /* saved pointer index to resume from particular input position */
70 typedef enum
71 {
72 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
73 XmlReadResume_Local, /* local for QName */
74 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
75 XmlReadResume_Last
76 } XmlReaderResume;
77
78 typedef enum
79 {
80 StringValue_LocalName,
81 StringValue_Prefix,
82 StringValue_QualifiedName,
83 StringValue_Value,
84 StringValue_Last
85 } XmlReaderStringValue;
86
87 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
88 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
89 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
90
91 static const WCHAR dblquoteW[] = {'\"',0};
92 static const WCHAR quoteW[] = {'\'',0};
93 static const WCHAR ltW[] = {'<',0};
94 static const WCHAR gtW[] = {'>',0};
95 static const WCHAR commentW[] = {'<','!','-','-',0};
96 static const WCHAR piW[] = {'<','?',0};
97
98 static BOOL is_namestartchar(WCHAR ch);
99
100 static const char *debugstr_nodetype(XmlNodeType nodetype)
101 {
102 static const char * const type_names[] =
103 {
104 "None",
105 "Element",
106 "Attribute",
107 "Text",
108 "CDATA",
109 "",
110 "",
111 "ProcessingInstruction",
112 "Comment",
113 "",
114 "DocumentType",
115 "",
116 "",
117 "Whitespace",
118 "",
119 "EndElement",
120 "",
121 "XmlDeclaration"
122 };
123
124 if (nodetype > _XmlNodeType_Last)
125 return wine_dbg_sprintf("unknown type=%d", nodetype);
126
127 return type_names[nodetype];
128 }
129
130 static const char *debugstr_reader_prop(XmlReaderProperty prop)
131 {
132 static const char * const prop_names[] =
133 {
134 "MultiLanguage",
135 "ConformanceLevel",
136 "RandomAccess",
137 "XmlResolver",
138 "DtdProcessing",
139 "ReadState",
140 "MaxElementDepth",
141 "MaxEntityExpansion"
142 };
143
144 if (prop > _XmlReaderProperty_Last)
145 return wine_dbg_sprintf("unknown property=%d", prop);
146
147 return prop_names[prop];
148 }
149
150 struct xml_encoding_data
151 {
152 const WCHAR *name;
153 xml_encoding enc;
154 UINT cp;
155 };
156
157 static const struct xml_encoding_data xml_encoding_map[] = {
158 { usasciiW, XmlEncoding_USASCII, 20127 },
159 { utf16W, XmlEncoding_UTF16, 1200 },
160 { utf8W, XmlEncoding_UTF8, CP_UTF8 },
161 };
162
163 const WCHAR *get_encoding_name(xml_encoding encoding)
164 {
165 return xml_encoding_map[encoding].name;
166 }
167
168 xml_encoding get_encoding_from_codepage(UINT codepage)
169 {
170 int i;
171 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
172 {
173 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
174 }
175 return XmlEncoding_Unknown;
176 }
177
178 typedef struct
179 {
180 char *data;
181 UINT cur;
182 unsigned int allocated;
183 unsigned int written;
184 BOOL prev_cr;
185 } encoded_buffer;
186
187 typedef struct input_buffer input_buffer;
188
189 typedef struct
190 {
191 IXmlReaderInput IXmlReaderInput_iface;
192 LONG ref;
193 /* reference passed on IXmlReaderInput creation, is kept when input is created */
194 IUnknown *input;
195 IMalloc *imalloc;
196 xml_encoding encoding;
197 BOOL hint;
198 WCHAR *baseuri;
199 /* stream reference set after SetInput() call from reader,
200 stored as sequential stream, cause currently
201 optimizations possible with IStream aren't implemented */
202 ISequentialStream *stream;
203 input_buffer *buffer;
204 unsigned int pending : 1;
205 } xmlreaderinput;
206
207 static const struct IUnknownVtbl xmlreaderinputvtbl;
208
209 /* Structure to hold parsed string of specific length.
210
211 Reader stores node value as 'start' pointer, on request
212 a null-terminated version of it is allocated.
213
214 To init a strval variable use reader_init_strval(),
215 to set strval as a reader value use reader_set_strval().
216 */
217 typedef struct
218 {
219 WCHAR *str; /* allocated null-terminated string */
220 UINT len; /* length in WCHARs, altered after ReadValueChunk */
221 UINT start; /* input position where value starts */
222 } strval;
223
224 static WCHAR emptyW[] = {0};
225 static WCHAR xmlW[] = {'x','m','l',0};
226 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
227 static const strval strval_empty = { emptyW };
228 static const strval strval_xml = { xmlW, 3 };
229 static const strval strval_xmlns = { xmlnsW, 5 };
230
231 struct reader_position
232 {
233 UINT line_number;
234 UINT line_position;
235 };
236
237 enum attribute_flags
238 {
239 ATTRIBUTE_NS_DEFINITION = 0x1,
240 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
241 };
242
243 struct attribute
244 {
245 struct list entry;
246 strval prefix;
247 strval localname;
248 strval qname;
249 strval value;
250 struct reader_position position;
251 unsigned int flags;
252 };
253
254 struct element
255 {
256 struct list entry;
257 strval prefix;
258 strval localname;
259 strval qname;
260 struct reader_position position;
261 };
262
263 struct ns
264 {
265 struct list entry;
266 strval prefix;
267 strval uri;
268 struct element *element;
269 };
270
271 typedef struct
272 {
273 IXmlReader IXmlReader_iface;
274 LONG ref;
275 xmlreaderinput *input;
276 IMalloc *imalloc;
277 XmlReadState state;
278 HRESULT error; /* error set on XmlReadState_Error */
279 XmlReaderInternalState instate;
280 XmlReaderResumeState resumestate;
281 XmlNodeType nodetype;
282 DtdProcessing dtdmode;
283 IXmlResolver *resolver;
284 IUnknown *mlang;
285 struct reader_position position;
286 struct list attrs; /* attributes list for current node */
287 struct attribute *attr; /* current attribute */
288 UINT attr_count;
289 struct list nsdef;
290 struct list ns;
291 struct list elements;
292 int chunk_read_off;
293 strval strvalues[StringValue_Last];
294 UINT depth;
295 UINT max_depth;
296 BOOL is_empty_element;
297 struct element empty_element; /* used for empty elements without end tag <a />,
298 and to keep <?xml reader position */
299 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
300 } xmlreader;
301
302 struct input_buffer
303 {
304 encoded_buffer utf16;
305 encoded_buffer encoded;
306 UINT code_page;
307 xmlreaderinput *input;
308 };
309
310 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
311 {
312 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
313 }
314
315 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
316 {
317 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
318 }
319
320 /* reader memory allocation functions */
321 static inline void *reader_alloc(xmlreader *reader, size_t len)
322 {
323 return m_alloc(reader->imalloc, len);
324 }
325
326 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
327 {
328 void *ret = reader_alloc(reader, len);
329 if (ret)
330 memset(ret, 0, len);
331 return ret;
332 }
333
334 static inline void reader_free(xmlreader *reader, void *mem)
335 {
336 m_free(reader->imalloc, mem);
337 }
338
339 /* Just return pointer from offset, no attempt to read more. */
340 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
341 {
342 encoded_buffer *buffer = &reader->input->buffer->utf16;
343 return (WCHAR*)buffer->data + offset;
344 }
345
346 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
347 {
348 return v->str ? v->str : reader_get_ptr2(reader, v->start);
349 }
350
351 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
352 {
353 *dest = *src;
354
355 if (src->str != strval_empty.str)
356 {
357 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
358 if (!dest->str) return E_OUTOFMEMORY;
359 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
360 dest->str[dest->len] = 0;
361 dest->start = 0;
362 }
363
364 return S_OK;
365 }
366
367 /* reader input memory allocation functions */
368 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
369 {
370 return m_alloc(input->imalloc, len);
371 }
372
373 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
374 {
375 return m_realloc(input->imalloc, mem, len);
376 }
377
378 static inline void readerinput_free(xmlreaderinput *input, void *mem)
379 {
380 m_free(input->imalloc, mem);
381 }
382
383 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
384 {
385 LPWSTR ret = NULL;
386
387 if(str) {
388 DWORD size;
389
390 size = (strlenW(str)+1)*sizeof(WCHAR);
391 ret = readerinput_alloc(input, size);
392 if (ret) memcpy(ret, str, size);
393 }
394
395 return ret;
396 }
397
398 /* This one frees stored string value if needed */
399 static void reader_free_strvalued(xmlreader *reader, strval *v)
400 {
401 if (v->str != strval_empty.str)
402 {
403 reader_free(reader, v->str);
404 *v = strval_empty;
405 }
406 }
407
408 static void reader_clear_attrs(xmlreader *reader)
409 {
410 struct attribute *attr, *attr2;
411 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
412 {
413 reader_free_strvalued(reader, &attr->localname);
414 reader_free_strvalued(reader, &attr->value);
415 reader_free(reader, attr);
416 }
417 list_init(&reader->attrs);
418 reader->attr_count = 0;
419 reader->attr = NULL;
420 }
421
422 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
423 while we are on a node with attributes */
424 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
425 strval *value, const struct reader_position *position, unsigned int flags)
426 {
427 struct attribute *attr;
428 HRESULT hr;
429
430 attr = reader_alloc(reader, sizeof(*attr));
431 if (!attr) return E_OUTOFMEMORY;
432
433 hr = reader_strvaldup(reader, localname, &attr->localname);
434 if (hr == S_OK)
435 {
436 hr = reader_strvaldup(reader, value, &attr->value);
437 if (hr != S_OK)
438 reader_free_strvalued(reader, &attr->value);
439 }
440 if (hr != S_OK)
441 {
442 reader_free(reader, attr);
443 return hr;
444 }
445
446 if (prefix)
447 attr->prefix = *prefix;
448 else
449 memset(&attr->prefix, 0, sizeof(attr->prefix));
450 attr->qname = qname ? *qname : *localname;
451 attr->position = *position;
452 attr->flags = flags;
453 list_add_tail(&reader->attrs, &attr->entry);
454 reader->attr_count++;
455
456 return S_OK;
457 }
458
459 /* Returns current element, doesn't check if reader is actually positioned on it. */
460 static struct element *reader_get_element(xmlreader *reader)
461 {
462 if (reader->is_empty_element)
463 return &reader->empty_element;
464
465 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
466 }
467
468 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
469 {
470 v->start = start;
471 v->len = len;
472 v->str = NULL;
473 }
474
475 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
476 {
477 return debugstr_wn(reader_get_strptr(reader, v), v->len);
478 }
479
480 /* used to initialize from constant string */
481 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
482 {
483 v->start = 0;
484 v->len = len;
485 v->str = str;
486 }
487
488 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
489 {
490 reader_free_strvalued(reader, &reader->strvalues[type]);
491 }
492
493 static void reader_free_strvalues(xmlreader *reader)
494 {
495 int type;
496 for (type = 0; type < StringValue_Last; type++)
497 reader_free_strvalue(reader, type);
498 }
499
500 /* This helper should only be used to test if strings are the same,
501 it doesn't try to sort. */
502 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
503 {
504 if (str1->len != str2->len) return 0;
505 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
506 }
507
508 static void reader_clear_elements(xmlreader *reader)
509 {
510 struct element *elem, *elem2;
511 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
512 {
513 reader_free_strvalued(reader, &elem->prefix);
514 reader_free_strvalued(reader, &elem->localname);
515 reader_free_strvalued(reader, &elem->qname);
516 reader_free(reader, elem);
517 }
518 list_init(&reader->elements);
519 reader_free_strvalued(reader, &reader->empty_element.localname);
520 reader_free_strvalued(reader, &reader->empty_element.qname);
521 reader->is_empty_element = FALSE;
522 }
523
524 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
525 {
526 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
527 struct ns *ns;
528
529 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
530 if (strval_eq(reader, prefix, &ns->prefix))
531 return ns;
532 }
533
534 return NULL;
535 }
536
537 static HRESULT reader_inc_depth(xmlreader *reader)
538 {
539 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
540 }
541
542 static void reader_dec_depth(xmlreader *reader)
543 {
544 if (reader->depth)
545 reader->depth--;
546 }
547
548 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
549 {
550 struct ns *ns;
551 HRESULT hr;
552
553 ns = reader_alloc(reader, sizeof(*ns));
554 if (!ns) return E_OUTOFMEMORY;
555
556 if (def)
557 memset(&ns->prefix, 0, sizeof(ns->prefix));
558 else {
559 hr = reader_strvaldup(reader, prefix, &ns->prefix);
560 if (FAILED(hr)) {
561 reader_free(reader, ns);
562 return hr;
563 }
564 }
565
566 hr = reader_strvaldup(reader, uri, &ns->uri);
567 if (FAILED(hr)) {
568 reader_free_strvalued(reader, &ns->prefix);
569 reader_free(reader, ns);
570 return hr;
571 }
572
573 ns->element = NULL;
574 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
575 return hr;
576 }
577
578 static void reader_free_element(xmlreader *reader, struct element *element)
579 {
580 reader_free_strvalued(reader, &element->prefix);
581 reader_free_strvalued(reader, &element->localname);
582 reader_free_strvalued(reader, &element->qname);
583 reader_free(reader, element);
584 }
585
586 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
587 {
588 struct ns *ns;
589
590 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
591 if (ns->element)
592 break;
593 ns->element = element;
594 }
595
596 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
597 if (ns->element)
598 break;
599 ns->element = element;
600 }
601 }
602
603 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
604 strval *qname, const struct reader_position *position)
605 {
606 struct element *element;
607 HRESULT hr;
608
609 element = reader_alloc_zero(reader, sizeof(*element));
610 if (!element)
611 return E_OUTOFMEMORY;
612
613 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
614 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
615 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
616 {
617 list_add_head(&reader->elements, &element->entry);
618 reader_mark_ns_nodes(reader, element);
619 reader->is_empty_element = FALSE;
620 element->position = *position;
621 }
622 else
623 reader_free_element(reader, element);
624
625 return hr;
626 }
627
628 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
629 {
630 struct ns *ns, *ns2;
631
632 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
633 if (ns->element != element)
634 break;
635
636 list_remove(&ns->entry);
637 reader_free_strvalued(reader, &ns->prefix);
638 reader_free_strvalued(reader, &ns->uri);
639 reader_free(reader, ns);
640 }
641
642 if (!list_empty(&reader->nsdef)) {
643 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
644 if (ns->element == element) {
645 list_remove(&ns->entry);
646 reader_free_strvalued(reader, &ns->prefix);
647 reader_free_strvalued(reader, &ns->uri);
648 reader_free(reader, ns);
649 }
650 }
651 }
652
653 static void reader_pop_element(xmlreader *reader)
654 {
655 struct element *element;
656
657 if (list_empty(&reader->elements))
658 return;
659
660 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
661 list_remove(&element->entry);
662
663 reader_pop_ns_nodes(reader, element);
664 reader_free_element(reader, element);
665
666 /* It was a root element, the rest is expected as Misc */
667 if (list_empty(&reader->elements))
668 reader->instate = XmlReadInState_MiscEnd;
669 }
670
671 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
672 means node value is to be determined. */
673 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
674 {
675 strval *v = &reader->strvalues[type];
676
677 reader_free_strvalue(reader, type);
678 if (!value)
679 {
680 v->str = NULL;
681 v->start = 0;
682 v->len = 0;
683 return;
684 }
685
686 if (value->str == strval_empty.str)
687 *v = *value;
688 else
689 {
690 if (type == StringValue_Value)
691 {
692 /* defer allocation for value string */
693 v->str = NULL;
694 v->start = value->start;
695 v->len = value->len;
696 }
697 else
698 {
699 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
700 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
701 v->str[value->len] = 0;
702 v->len = value->len;
703 }
704 }
705 }
706
707 static inline int is_reader_pending(xmlreader *reader)
708 {
709 return reader->input->pending;
710 }
711
712 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
713 {
714 const int initial_len = 0x2000;
715 buffer->data = readerinput_alloc(input, initial_len);
716 if (!buffer->data) return E_OUTOFMEMORY;
717
718 memset(buffer->data, 0, 4);
719 buffer->cur = 0;
720 buffer->allocated = initial_len;
721 buffer->written = 0;
722 buffer->prev_cr = FALSE;
723
724 return S_OK;
725 }
726
727 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
728 {
729 readerinput_free(input, buffer->data);
730 }
731
732 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
733 {
734 if (encoding == XmlEncoding_Unknown)
735 {
736 FIXME("unsupported encoding %d\n", encoding);
737 return E_NOTIMPL;
738 }
739
740 *cp = xml_encoding_map[encoding].cp;
741
742 return S_OK;
743 }
744
745 xml_encoding parse_encoding_name(const WCHAR *name, int len)
746 {
747 int min, max, n, c;
748
749 if (!name) return XmlEncoding_Unknown;
750
751 min = 0;
752 max = ARRAY_SIZE(xml_encoding_map) - 1;
753
754 while (min <= max)
755 {
756 n = (min+max)/2;
757
758 if (len != -1)
759 c = strncmpiW(xml_encoding_map[n].name, name, len);
760 else
761 c = strcmpiW(xml_encoding_map[n].name, name);
762 if (!c)
763 return xml_encoding_map[n].enc;
764
765 if (c > 0)
766 max = n-1;
767 else
768 min = n+1;
769 }
770
771 return XmlEncoding_Unknown;
772 }
773
774 static HRESULT alloc_input_buffer(xmlreaderinput *input)
775 {
776 input_buffer *buffer;
777 HRESULT hr;
778
779 input->buffer = NULL;
780
781 buffer = readerinput_alloc(input, sizeof(*buffer));
782 if (!buffer) return E_OUTOFMEMORY;
783
784 buffer->input = input;
785 buffer->code_page = ~0; /* code page is unknown at this point */
786 hr = init_encoded_buffer(input, &buffer->utf16);
787 if (hr != S_OK) {
788 readerinput_free(input, buffer);
789 return hr;
790 }
791
792 hr = init_encoded_buffer(input, &buffer->encoded);
793 if (hr != S_OK) {
794 free_encoded_buffer(input, &buffer->utf16);
795 readerinput_free(input, buffer);
796 return hr;
797 }
798
799 input->buffer = buffer;
800 return S_OK;
801 }
802
803 static void free_input_buffer(input_buffer *buffer)
804 {
805 free_encoded_buffer(buffer->input, &buffer->encoded);
806 free_encoded_buffer(buffer->input, &buffer->utf16);
807 readerinput_free(buffer->input, buffer);
808 }
809
810 static void readerinput_release_stream(xmlreaderinput *readerinput)
811 {
812 if (readerinput->stream) {
813 ISequentialStream_Release(readerinput->stream);
814 readerinput->stream = NULL;
815 }
816 }
817
818 /* Queries already stored interface for IStream/ISequentialStream.
819 Interface supplied on creation will be overwritten */
820 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
821 {
822 HRESULT hr;
823
824 readerinput_release_stream(readerinput);
825 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
826 if (hr != S_OK)
827 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
828
829 return hr;
830 }
831
832 /* reads a chunk to raw buffer */
833 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
834 {
835 encoded_buffer *buffer = &readerinput->buffer->encoded;
836 /* to make sure aligned length won't exceed allocated length */
837 ULONG len = buffer->allocated - buffer->written - 4;
838 ULONG read;
839 HRESULT hr;
840
841 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
842 variable width encodings like UTF-8 */
843 len = (len + 3) & ~3;
844 /* try to use allocated space or grow */
845 if (buffer->allocated - buffer->written < len)
846 {
847 buffer->allocated *= 2;
848 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
849 len = buffer->allocated - buffer->written;
850 }
851
852 read = 0;
853 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
854 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
855 readerinput->pending = hr == E_PENDING;
856 if (FAILED(hr)) return hr;
857 buffer->written += read;
858
859 return hr;
860 }
861
862 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
863 static void readerinput_grow(xmlreaderinput *readerinput, int length)
864 {
865 encoded_buffer *buffer = &readerinput->buffer->utf16;
866
867 length *= sizeof(WCHAR);
868 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
869 if (buffer->allocated < buffer->written + length + 4)
870 {
871 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
872 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
873 buffer->allocated = grown_size;
874 }
875 }
876
877 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
878 {
879 static const char startA[] = {'<','?'};
880 static const char commentA[] = {'<','!'};
881 encoded_buffer *buffer = &readerinput->buffer->encoded;
882 unsigned char *ptr = (unsigned char*)buffer->data;
883
884 return !memcmp(buffer->data, startA, sizeof(startA)) ||
885 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
886 /* test start byte */
887 (ptr[0] == '<' &&
888 (
889 (ptr[1] && (ptr[1] <= 0x7f)) ||
890 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
891 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
892 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
893 );
894 }
895
896 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
897 {
898 encoded_buffer *buffer = &readerinput->buffer->encoded;
899 static const char utf8bom[] = {0xef,0xbb,0xbf};
900 static const char utf16lebom[] = {0xff,0xfe};
901 WCHAR *ptrW;
902
903 *enc = XmlEncoding_Unknown;
904
905 if (buffer->written <= 3)
906 {
907 HRESULT hr = readerinput_growraw(readerinput);
908 if (FAILED(hr)) return hr;
909 if (buffer->written < 3) return MX_E_INPUTEND;
910 }
911
912 ptrW = (WCHAR *)buffer->data;
913 /* try start symbols if we have enough data to do that, input buffer should contain
914 first chunk already */
915 if (readerinput_is_utf8(readerinput))
916 *enc = XmlEncoding_UTF8;
917 else if (*ptrW == '<')
918 {
919 ptrW++;
920 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
921 *enc = XmlEncoding_UTF16;
922 }
923 /* try with BOM now */
924 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
925 {
926 buffer->cur += sizeof(utf8bom);
927 *enc = XmlEncoding_UTF8;
928 }
929 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
930 {
931 buffer->cur += sizeof(utf16lebom);
932 *enc = XmlEncoding_UTF16;
933 }
934
935 return S_OK;
936 }
937
938 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
939 {
940 encoded_buffer *buffer = &readerinput->buffer->encoded;
941 int len = buffer->written;
942
943 /* complete single byte char */
944 if (!(buffer->data[len-1] & 0x80)) return len;
945
946 /* find start byte of multibyte char */
947 while (--len && !(buffer->data[len] & 0xc0))
948 ;
949
950 return len;
951 }
952
953 /* Returns byte length of complete char sequence for buffer code page,
954 it's relative to current buffer position which is currently used for BOM handling
955 only. */
956 static int readerinput_get_convlen(xmlreaderinput *readerinput)
957 {
958 encoded_buffer *buffer = &readerinput->buffer->encoded;
959 int len;
960
961 if (readerinput->buffer->code_page == CP_UTF8)
962 len = readerinput_get_utf8_convlen(readerinput);
963 else
964 len = buffer->written;
965
966 TRACE("%d\n", len - buffer->cur);
967 return len - buffer->cur;
968 }
969
970 /* It's possible that raw buffer has some leftovers from last conversion - some char
971 sequence that doesn't represent a full code point. Length argument should be calculated with
972 readerinput_get_convlen(), if it's -1 it will be calculated here. */
973 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
974 {
975 encoded_buffer *buffer = &readerinput->buffer->encoded;
976
977 if (len == -1)
978 len = readerinput_get_convlen(readerinput);
979
980 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
981 /* everything below cur is lost too */
982 buffer->written -= len + buffer->cur;
983 /* after this point we don't need cur offset really,
984 it's used only to mark where actual data begins when first chunk is read */
985 buffer->cur = 0;
986 }
987
988 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
989 {
990 BOOL prev_cr = buffer->prev_cr;
991 const WCHAR *src;
992 WCHAR *dest;
993
994 src = dest = (WCHAR*)buffer->data + off;
995 while ((const char*)src < buffer->data + buffer->written)
996 {
997 if (*src == '\r')
998 {
999 *dest++ = '\n';
1000 src++;
1001 prev_cr = TRUE;
1002 continue;
1003 }
1004 if(prev_cr && *src == '\n')
1005 src++;
1006 else
1007 *dest++ = *src++;
1008 prev_cr = FALSE;
1009 }
1010
1011 buffer->written = (char*)dest - buffer->data;
1012 buffer->prev_cr = prev_cr;
1013 *dest = 0;
1014 }
1015
1016 /* note that raw buffer content is kept */
1017 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1018 {
1019 encoded_buffer *src = &readerinput->buffer->encoded;
1020 encoded_buffer *dest = &readerinput->buffer->utf16;
1021 int len, dest_len;
1022 HRESULT hr;
1023 WCHAR *ptr;
1024 UINT cp;
1025
1026 hr = get_code_page(enc, &cp);
1027 if (FAILED(hr)) return;
1028
1029 readerinput->buffer->code_page = cp;
1030 len = readerinput_get_convlen(readerinput);
1031
1032 TRACE("switching to cp %d\n", cp);
1033
1034 /* just copy in this case */
1035 if (enc == XmlEncoding_UTF16)
1036 {
1037 readerinput_grow(readerinput, len);
1038 memcpy(dest->data, src->data + src->cur, len);
1039 dest->written += len*sizeof(WCHAR);
1040 }
1041 else
1042 {
1043 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1044 readerinput_grow(readerinput, dest_len);
1045 ptr = (WCHAR*)dest->data;
1046 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1047 ptr[dest_len] = 0;
1048 dest->written += dest_len*sizeof(WCHAR);
1049 }
1050
1051 fixup_buffer_cr(dest, 0);
1052 }
1053
1054 /* shrinks parsed data a buffer begins with */
1055 static void reader_shrink(xmlreader *reader)
1056 {
1057 encoded_buffer *buffer = &reader->input->buffer->utf16;
1058
1059 /* avoid to move too often using threshold shrink length */
1060 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1061 {
1062 buffer->written -= buffer->cur*sizeof(WCHAR);
1063 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1064 buffer->cur = 0;
1065 *(WCHAR*)&buffer->data[buffer->written] = 0;
1066 }
1067 }
1068
1069 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1070 It won't attempt to shrink but will grow destination buffer if needed */
1071 static HRESULT reader_more(xmlreader *reader)
1072 {
1073 xmlreaderinput *readerinput = reader->input;
1074 encoded_buffer *src = &readerinput->buffer->encoded;
1075 encoded_buffer *dest = &readerinput->buffer->utf16;
1076 UINT cp = readerinput->buffer->code_page;
1077 int len, dest_len, prev_len;
1078 HRESULT hr;
1079 WCHAR *ptr;
1080
1081 /* get some raw data from stream first */
1082 hr = readerinput_growraw(readerinput);
1083 len = readerinput_get_convlen(readerinput);
1084 prev_len = dest->written / sizeof(WCHAR);
1085
1086 /* just copy for UTF-16 case */
1087 if (cp == 1200)
1088 {
1089 readerinput_grow(readerinput, len);
1090 memcpy(dest->data + dest->written, src->data + src->cur, len);
1091 dest->written += len*sizeof(WCHAR);
1092 }
1093 else
1094 {
1095 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1096 readerinput_grow(readerinput, dest_len);
1097 ptr = (WCHAR*)(dest->data + dest->written);
1098 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1099 ptr[dest_len] = 0;
1100 dest->written += dest_len*sizeof(WCHAR);
1101 /* get rid of processed data */
1102 readerinput_shrinkraw(readerinput, len);
1103 }
1104
1105 fixup_buffer_cr(dest, prev_len);
1106 return hr;
1107 }
1108
1109 static inline UINT reader_get_cur(xmlreader *reader)
1110 {
1111 return reader->input->buffer->utf16.cur;
1112 }
1113
1114 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1115 {
1116 encoded_buffer *buffer = &reader->input->buffer->utf16;
1117 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1118 if (!*ptr) reader_more(reader);
1119 return (WCHAR*)buffer->data + buffer->cur;
1120 }
1121
1122 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1123 {
1124 int i=0;
1125 const WCHAR *ptr = reader_get_ptr(reader);
1126 while (str[i])
1127 {
1128 if (!ptr[i])
1129 {
1130 reader_more(reader);
1131 ptr = reader_get_ptr(reader);
1132 }
1133 if (str[i] != ptr[i])
1134 return ptr[i] - str[i];
1135 i++;
1136 }
1137 return 0;
1138 }
1139
1140 static void reader_update_position(xmlreader *reader, WCHAR ch)
1141 {
1142 if (ch == '\r')
1143 reader->position.line_position = 1;
1144 else if (ch == '\n')
1145 {
1146 reader->position.line_number++;
1147 reader->position.line_position = 1;
1148 }
1149 else
1150 reader->position.line_position++;
1151 }
1152
1153 /* moves cursor n WCHARs forward */
1154 static void reader_skipn(xmlreader *reader, int n)
1155 {
1156 encoded_buffer *buffer = &reader->input->buffer->utf16;
1157 const WCHAR *ptr;
1158
1159 while (*(ptr = reader_get_ptr(reader)) && n--)
1160 {
1161 reader_update_position(reader, *ptr);
1162 buffer->cur++;
1163 }
1164 }
1165
1166 static inline BOOL is_wchar_space(WCHAR ch)
1167 {
1168 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1169 }
1170
1171 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1172 static int reader_skipspaces(xmlreader *reader)
1173 {
1174 const WCHAR *ptr = reader_get_ptr(reader);
1175 UINT start = reader_get_cur(reader);
1176
1177 while (is_wchar_space(*ptr))
1178 {
1179 reader_skipn(reader, 1);
1180 ptr = reader_get_ptr(reader);
1181 }
1182
1183 return reader_get_cur(reader) - start;
1184 }
1185
1186 /* [26] VersionNum ::= '1.' [0-9]+ */
1187 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1188 {
1189 static const WCHAR onedotW[] = {'1','.',0};
1190 WCHAR *ptr, *ptr2;
1191 UINT start;
1192
1193 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1194
1195 start = reader_get_cur(reader);
1196 /* skip "1." */
1197 reader_skipn(reader, 2);
1198
1199 ptr2 = ptr = reader_get_ptr(reader);
1200 while (*ptr >= '0' && *ptr <= '9')
1201 {
1202 reader_skipn(reader, 1);
1203 ptr = reader_get_ptr(reader);
1204 }
1205
1206 if (ptr2 == ptr) return WC_E_DIGIT;
1207 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1208 TRACE("version=%s\n", debug_strval(reader, val));
1209 return S_OK;
1210 }
1211
1212 /* [25] Eq ::= S? '=' S? */
1213 static HRESULT reader_parse_eq(xmlreader *reader)
1214 {
1215 static const WCHAR eqW[] = {'=',0};
1216 reader_skipspaces(reader);
1217 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1218 /* skip '=' */
1219 reader_skipn(reader, 1);
1220 reader_skipspaces(reader);
1221 return S_OK;
1222 }
1223
1224 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1225 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1226 {
1227 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1228 struct reader_position position;
1229 strval val, name;
1230 HRESULT hr;
1231
1232 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1233
1234 position = reader->position;
1235 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1236 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1237 /* skip 'version' */
1238 reader_skipn(reader, 7);
1239
1240 hr = reader_parse_eq(reader);
1241 if (FAILED(hr)) return hr;
1242
1243 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1244 return WC_E_QUOTE;
1245 /* skip "'"|'"' */
1246 reader_skipn(reader, 1);
1247
1248 hr = reader_parse_versionnum(reader, &val);
1249 if (FAILED(hr)) return hr;
1250
1251 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1252 return WC_E_QUOTE;
1253
1254 /* skip "'"|'"' */
1255 reader_skipn(reader, 1);
1256
1257 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1258 }
1259
1260 /* ([A-Za-z0-9._] | '-') */
1261 static inline BOOL is_wchar_encname(WCHAR ch)
1262 {
1263 return ((ch >= 'A' && ch <= 'Z') ||
1264 (ch >= 'a' && ch <= 'z') ||
1265 (ch >= '0' && ch <= '9') ||
1266 (ch == '.') || (ch == '_') ||
1267 (ch == '-'));
1268 }
1269
1270 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1271 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1272 {
1273 WCHAR *start = reader_get_ptr(reader), *ptr;
1274 xml_encoding enc;
1275 int len;
1276
1277 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1278 return WC_E_ENCNAME;
1279
1280 val->start = reader_get_cur(reader);
1281
1282 ptr = start;
1283 while (is_wchar_encname(*++ptr))
1284 ;
1285
1286 len = ptr - start;
1287 enc = parse_encoding_name(start, len);
1288 TRACE("encoding name %s\n", debugstr_wn(start, len));
1289 val->str = start;
1290 val->len = len;
1291
1292 if (enc == XmlEncoding_Unknown)
1293 return WC_E_ENCNAME;
1294
1295 /* skip encoding name */
1296 reader_skipn(reader, len);
1297 return S_OK;
1298 }
1299
1300 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1301 static HRESULT reader_parse_encdecl(xmlreader *reader)
1302 {
1303 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1304 struct reader_position position;
1305 strval name, val;
1306 HRESULT hr;
1307
1308 if (!reader_skipspaces(reader)) return S_FALSE;
1309
1310 position = reader->position;
1311 if (reader_cmp(reader, encodingW)) return S_FALSE;
1312 name.str = reader_get_ptr(reader);
1313 name.start = reader_get_cur(reader);
1314 name.len = 8;
1315 /* skip 'encoding' */
1316 reader_skipn(reader, 8);
1317
1318 hr = reader_parse_eq(reader);
1319 if (FAILED(hr)) return hr;
1320
1321 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1322 return WC_E_QUOTE;
1323 /* skip "'"|'"' */
1324 reader_skipn(reader, 1);
1325
1326 hr = reader_parse_encname(reader, &val);
1327 if (FAILED(hr)) return hr;
1328
1329 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1330 return WC_E_QUOTE;
1331
1332 /* skip "'"|'"' */
1333 reader_skipn(reader, 1);
1334
1335 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1336 }
1337
1338 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1339 static HRESULT reader_parse_sddecl(xmlreader *reader)
1340 {
1341 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1342 static const WCHAR yesW[] = {'y','e','s',0};
1343 static const WCHAR noW[] = {'n','o',0};
1344 struct reader_position position;
1345 strval name, val;
1346 UINT start;
1347 HRESULT hr;
1348
1349 if (!reader_skipspaces(reader)) return S_FALSE;
1350
1351 position = reader->position;
1352 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1353 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1354 /* skip 'standalone' */
1355 reader_skipn(reader, 10);
1356
1357 hr = reader_parse_eq(reader);
1358 if (FAILED(hr)) return hr;
1359
1360 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1361 return WC_E_QUOTE;
1362 /* skip "'"|'"' */
1363 reader_skipn(reader, 1);
1364
1365 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1366 return WC_E_XMLDECL;
1367
1368 start = reader_get_cur(reader);
1369 /* skip 'yes'|'no' */
1370 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1371 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1372 TRACE("standalone=%s\n", debug_strval(reader, &val));
1373
1374 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1375 return WC_E_QUOTE;
1376 /* skip "'"|'"' */
1377 reader_skipn(reader, 1);
1378
1379 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1380 }
1381
1382 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1383 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1384 {
1385 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1386 static const WCHAR declcloseW[] = {'?','>',0};
1387 struct reader_position position;
1388 HRESULT hr;
1389
1390 /* check if we have "<?xml " */
1391 if (reader_cmp(reader, xmldeclW))
1392 return S_FALSE;
1393
1394 reader_skipn(reader, 2);
1395 position = reader->position;
1396 reader_skipn(reader, 3);
1397 hr = reader_parse_versioninfo(reader);
1398 if (FAILED(hr))
1399 return hr;
1400
1401 hr = reader_parse_encdecl(reader);
1402 if (FAILED(hr))
1403 return hr;
1404
1405 hr = reader_parse_sddecl(reader);
1406 if (FAILED(hr))
1407 return hr;
1408
1409 reader_skipspaces(reader);
1410 if (reader_cmp(reader, declcloseW))
1411 return WC_E_XMLDECL;
1412
1413 /* skip '?>' */
1414 reader_skipn(reader, 2);
1415
1416 reader->nodetype = XmlNodeType_XmlDeclaration;
1417 reader->empty_element.position = position;
1418 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1419 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1420
1421 return S_OK;
1422 }
1423
1424 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1425 static HRESULT reader_parse_comment(xmlreader *reader)
1426 {
1427 WCHAR *ptr;
1428 UINT start;
1429
1430 if (reader->resumestate == XmlReadResumeState_Comment)
1431 {
1432 start = reader->resume[XmlReadResume_Body];
1433 ptr = reader_get_ptr(reader);
1434 }
1435 else
1436 {
1437 /* skip '<!--' */
1438 reader_skipn(reader, 4);
1439 reader_shrink(reader);
1440 ptr = reader_get_ptr(reader);
1441 start = reader_get_cur(reader);
1442 reader->nodetype = XmlNodeType_Comment;
1443 reader->resume[XmlReadResume_Body] = start;
1444 reader->resumestate = XmlReadResumeState_Comment;
1445 reader_set_strvalue(reader, StringValue_Value, NULL);
1446 }
1447
1448 /* will exit when there's no more data, it won't attempt to
1449 read more from stream */
1450 while (*ptr)
1451 {
1452 if (ptr[0] == '-')
1453 {
1454 if (ptr[1] == '-')
1455 {
1456 if (ptr[2] == '>')
1457 {
1458 strval value;
1459
1460 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1461 TRACE("%s\n", debug_strval(reader, &value));
1462
1463 /* skip rest of markup '->' */
1464 reader_skipn(reader, 3);
1465
1466 reader_set_strvalue(reader, StringValue_Value, &value);
1467 reader->resume[XmlReadResume_Body] = 0;
1468 reader->resumestate = XmlReadResumeState_Initial;
1469 return S_OK;
1470 }
1471 else
1472 return WC_E_COMMENT;
1473 }
1474 }
1475
1476 reader_skipn(reader, 1);
1477 ptr++;
1478 }
1479
1480 return S_OK;
1481 }
1482
1483 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1484 static inline BOOL is_char(WCHAR ch)
1485 {
1486 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1487 (ch >= 0x20 && ch <= 0xd7ff) ||
1488 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1489 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1490 (ch >= 0xe000 && ch <= 0xfffd);
1491 }
1492
1493 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1494 static inline BOOL is_pubchar(WCHAR ch)
1495 {
1496 return (ch == ' ') ||
1497 (ch >= 'a' && ch <= 'z') ||
1498 (ch >= 'A' && ch <= 'Z') ||
1499 (ch >= '0' && ch <= '9') ||
1500 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1501 (ch == '=') || (ch == '?') ||
1502 (ch == '@') || (ch == '!') ||
1503 (ch >= '#' && ch <= '%') || /* #$% */
1504 (ch == '_') || (ch == '\r') || (ch == '\n');
1505 }
1506
1507 static inline BOOL is_namestartchar(WCHAR ch)
1508 {
1509 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1510 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1511 (ch >= 0xc0 && ch <= 0xd6) ||
1512 (ch >= 0xd8 && ch <= 0xf6) ||
1513 (ch >= 0xf8 && ch <= 0x2ff) ||
1514 (ch >= 0x370 && ch <= 0x37d) ||
1515 (ch >= 0x37f && ch <= 0x1fff) ||
1516 (ch >= 0x200c && ch <= 0x200d) ||
1517 (ch >= 0x2070 && ch <= 0x218f) ||
1518 (ch >= 0x2c00 && ch <= 0x2fef) ||
1519 (ch >= 0x3001 && ch <= 0xd7ff) ||
1520 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1521 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1522 (ch >= 0xf900 && ch <= 0xfdcf) ||
1523 (ch >= 0xfdf0 && ch <= 0xfffd);
1524 }
1525
1526 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1527 static inline BOOL is_ncnamechar(WCHAR ch)
1528 {
1529 return (ch >= 'A' && ch <= 'Z') ||
1530 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1531 (ch == '-') || (ch == '.') ||
1532 (ch >= '0' && ch <= '9') ||
1533 (ch == 0xb7) ||
1534 (ch >= 0xc0 && ch <= 0xd6) ||
1535 (ch >= 0xd8 && ch <= 0xf6) ||
1536 (ch >= 0xf8 && ch <= 0x2ff) ||
1537 (ch >= 0x300 && ch <= 0x36f) ||
1538 (ch >= 0x370 && ch <= 0x37d) ||
1539 (ch >= 0x37f && ch <= 0x1fff) ||
1540 (ch >= 0x200c && ch <= 0x200d) ||
1541 (ch >= 0x203f && ch <= 0x2040) ||
1542 (ch >= 0x2070 && ch <= 0x218f) ||
1543 (ch >= 0x2c00 && ch <= 0x2fef) ||
1544 (ch >= 0x3001 && ch <= 0xd7ff) ||
1545 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1546 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1547 (ch >= 0xf900 && ch <= 0xfdcf) ||
1548 (ch >= 0xfdf0 && ch <= 0xfffd);
1549 }
1550
1551 static inline BOOL is_namechar(WCHAR ch)
1552 {
1553 return (ch == ':') || is_ncnamechar(ch);
1554 }
1555
1556 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1557 {
1558 /* When we're on attribute always return attribute type, container node type is kept.
1559 Note that container is not necessarily an element, and attribute doesn't mean it's
1560 an attribute in XML spec terms. */
1561 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1562 }
1563
1564 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1565 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1566 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1567 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1568 [5] Name ::= NameStartChar (NameChar)* */
1569 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1570 {
1571 WCHAR *ptr;
1572 UINT start;
1573
1574 if (reader->resume[XmlReadResume_Name])
1575 {
1576 start = reader->resume[XmlReadResume_Name];
1577 ptr = reader_get_ptr(reader);
1578 }
1579 else
1580 {
1581 ptr = reader_get_ptr(reader);
1582 start = reader_get_cur(reader);
1583 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1584 }
1585
1586 while (is_namechar(*ptr))
1587 {
1588 reader_skipn(reader, 1);
1589 ptr = reader_get_ptr(reader);
1590 }
1591
1592 if (is_reader_pending(reader))
1593 {
1594 reader->resume[XmlReadResume_Name] = start;
1595 return E_PENDING;
1596 }
1597 else
1598 reader->resume[XmlReadResume_Name] = 0;
1599
1600 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1601 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1602
1603 return S_OK;
1604 }
1605
1606 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1607 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1608 {
1609 static const WCHAR xmlW[] = {'x','m','l'};
1610 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1611 strval name;
1612 WCHAR *ptr;
1613 HRESULT hr;
1614 UINT i;
1615
1616 hr = reader_parse_name(reader, &name);
1617 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1618
1619 /* now that we got name check for illegal content */
1620 if (strval_eq(reader, &name, &xmlval))
1621 return WC_E_LEADINGXML;
1622
1623 /* PITarget can't be a qualified name */
1624 ptr = reader_get_strptr(reader, &name);
1625 for (i = 0; i < name.len; i++)
1626 if (ptr[i] == ':')
1627 return i ? NC_E_NAMECOLON : WC_E_PI;
1628
1629 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1630 *target = name;
1631 return S_OK;
1632 }
1633
1634 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1635 static HRESULT reader_parse_pi(xmlreader *reader)
1636 {
1637 strval target;
1638 WCHAR *ptr;
1639 UINT start;
1640 HRESULT hr;
1641
1642 switch (reader->resumestate)
1643 {
1644 case XmlReadResumeState_Initial:
1645 /* skip '<?' */
1646 reader_skipn(reader, 2);
1647 reader_shrink(reader);
1648 reader->resumestate = XmlReadResumeState_PITarget;
1649 case XmlReadResumeState_PITarget:
1650 hr = reader_parse_pitarget(reader, &target);
1651 if (FAILED(hr)) return hr;
1652 reader_set_strvalue(reader, StringValue_LocalName, &target);
1653 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1654 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1655 reader->resumestate = XmlReadResumeState_PIBody;
1656 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1657 default:
1658 ;
1659 }
1660
1661 start = reader->resume[XmlReadResume_Body];
1662 ptr = reader_get_ptr(reader);
1663 while (*ptr)
1664 {
1665 if (ptr[0] == '?')
1666 {
1667 if (ptr[1] == '>')
1668 {
1669 UINT cur = reader_get_cur(reader);
1670 strval value;
1671
1672 /* strip all leading whitespace chars */
1673 while (start < cur)
1674 {
1675 ptr = reader_get_ptr2(reader, start);
1676 if (!is_wchar_space(*ptr)) break;
1677 start++;
1678 }
1679
1680 reader_init_strvalue(start, cur-start, &value);
1681
1682 /* skip '?>' */
1683 reader_skipn(reader, 2);
1684 TRACE("%s\n", debug_strval(reader, &value));
1685 reader->nodetype = XmlNodeType_ProcessingInstruction;
1686 reader->resumestate = XmlReadResumeState_Initial;
1687 reader->resume[XmlReadResume_Body] = 0;
1688 reader_set_strvalue(reader, StringValue_Value, &value);
1689 return S_OK;
1690 }
1691 }
1692
1693 reader_skipn(reader, 1);
1694 ptr = reader_get_ptr(reader);
1695 }
1696
1697 return S_OK;
1698 }
1699
1700 /* This one is used to parse significant whitespace nodes, like in Misc production */
1701 static HRESULT reader_parse_whitespace(xmlreader *reader)
1702 {
1703 switch (reader->resumestate)
1704 {
1705 case XmlReadResumeState_Initial:
1706 reader_shrink(reader);
1707 reader->resumestate = XmlReadResumeState_Whitespace;
1708 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1709 reader->nodetype = XmlNodeType_Whitespace;
1710 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1711 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1712 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1713 /* fallthrough */
1714 case XmlReadResumeState_Whitespace:
1715 {
1716 strval value;
1717 UINT start;
1718
1719 reader_skipspaces(reader);
1720 if (is_reader_pending(reader)) return S_OK;
1721
1722 start = reader->resume[XmlReadResume_Body];
1723 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1724 reader_set_strvalue(reader, StringValue_Value, &value);
1725 TRACE("%s\n", debug_strval(reader, &value));
1726 reader->resumestate = XmlReadResumeState_Initial;
1727 }
1728 default:
1729 ;
1730 }
1731
1732 return S_OK;
1733 }
1734
1735 /* [27] Misc ::= Comment | PI | S */
1736 static HRESULT reader_parse_misc(xmlreader *reader)
1737 {
1738 HRESULT hr = S_FALSE;
1739
1740 if (reader->resumestate != XmlReadResumeState_Initial)
1741 {
1742 hr = reader_more(reader);
1743 if (FAILED(hr)) return hr;
1744
1745 /* finish current node */
1746 switch (reader->resumestate)
1747 {
1748 case XmlReadResumeState_PITarget:
1749 case XmlReadResumeState_PIBody:
1750 return reader_parse_pi(reader);
1751 case XmlReadResumeState_Comment:
1752 return reader_parse_comment(reader);
1753 case XmlReadResumeState_Whitespace:
1754 return reader_parse_whitespace(reader);
1755 default:
1756 ERR("unknown resume state %d\n", reader->resumestate);
1757 }
1758 }
1759
1760 while (1)
1761 {
1762 const WCHAR *cur = reader_get_ptr(reader);
1763
1764 if (is_wchar_space(*cur))
1765 hr = reader_parse_whitespace(reader);
1766 else if (!reader_cmp(reader, commentW))
1767 hr = reader_parse_comment(reader);
1768 else if (!reader_cmp(reader, piW))
1769 hr = reader_parse_pi(reader);
1770 else
1771 break;
1772
1773 if (hr != S_FALSE) return hr;
1774 }
1775
1776 return hr;
1777 }
1778
1779 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1780 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1781 {
1782 WCHAR *cur = reader_get_ptr(reader), quote;
1783 UINT start;
1784
1785 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1786
1787 quote = *cur;
1788 reader_skipn(reader, 1);
1789
1790 cur = reader_get_ptr(reader);
1791 start = reader_get_cur(reader);
1792 while (is_char(*cur) && *cur != quote)
1793 {
1794 reader_skipn(reader, 1);
1795 cur = reader_get_ptr(reader);
1796 }
1797 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1798 if (*cur == quote) reader_skipn(reader, 1);
1799
1800 TRACE("%s\n", debug_strval(reader, literal));
1801 return S_OK;
1802 }
1803
1804 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1805 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1806 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1807 {
1808 WCHAR *cur = reader_get_ptr(reader), quote;
1809 UINT start;
1810
1811 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1812
1813 quote = *cur;
1814 reader_skipn(reader, 1);
1815
1816 start = reader_get_cur(reader);
1817 cur = reader_get_ptr(reader);
1818 while (is_pubchar(*cur) && *cur != quote)
1819 {
1820 reader_skipn(reader, 1);
1821 cur = reader_get_ptr(reader);
1822 }
1823 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1824 if (*cur == quote) reader_skipn(reader, 1);
1825
1826 TRACE("%s\n", debug_strval(reader, literal));
1827 return S_OK;
1828 }
1829
1830 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1831 static HRESULT reader_parse_externalid(xmlreader *reader)
1832 {
1833 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1834 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1835 struct reader_position position = reader->position;
1836 strval name, sys;
1837 HRESULT hr;
1838 int cnt;
1839
1840 if (!reader_cmp(reader, publicW)) {
1841 strval pub;
1842
1843 /* public id */
1844 reader_skipn(reader, 6);
1845 cnt = reader_skipspaces(reader);
1846 if (!cnt) return WC_E_WHITESPACE;
1847
1848 hr = reader_parse_pub_literal(reader, &pub);
1849 if (FAILED(hr)) return hr;
1850
1851 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1852 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1853 if (FAILED(hr)) return hr;
1854
1855 cnt = reader_skipspaces(reader);
1856 if (!cnt) return S_OK;
1857
1858 /* optional system id */
1859 hr = reader_parse_sys_literal(reader, &sys);
1860 if (FAILED(hr)) return S_OK;
1861
1862 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1863 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1864 if (FAILED(hr)) return hr;
1865
1866 return S_OK;
1867 } else if (!reader_cmp(reader, systemW)) {
1868 /* system id */
1869 reader_skipn(reader, 6);
1870 cnt = reader_skipspaces(reader);
1871 if (!cnt) return WC_E_WHITESPACE;
1872
1873 hr = reader_parse_sys_literal(reader, &sys);
1874 if (FAILED(hr)) return hr;
1875
1876 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1877 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1878 }
1879
1880 return S_FALSE;
1881 }
1882
1883 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1884 static HRESULT reader_parse_dtd(xmlreader *reader)
1885 {
1886 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1887 strval name;
1888 WCHAR *cur;
1889 HRESULT hr;
1890
1891 /* check if we have "<!DOCTYPE" */
1892 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1893 reader_shrink(reader);
1894
1895 /* DTD processing is not allowed by default */
1896 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1897
1898 reader_skipn(reader, 9);
1899 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1900
1901 /* name */
1902 hr = reader_parse_name(reader, &name);
1903 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1904
1905 reader_skipspaces(reader);
1906
1907 hr = reader_parse_externalid(reader);
1908 if (FAILED(hr)) return hr;
1909
1910 reader_skipspaces(reader);
1911
1912 cur = reader_get_ptr(reader);
1913 if (*cur != '>')
1914 {
1915 FIXME("internal subset parsing not implemented\n");
1916 return E_NOTIMPL;
1917 }
1918
1919 /* skip '>' */
1920 reader_skipn(reader, 1);
1921
1922 reader->nodetype = XmlNodeType_DocumentType;
1923 reader_set_strvalue(reader, StringValue_LocalName, &name);
1924 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1925
1926 return S_OK;
1927 }
1928
1929 /* [11 NS] LocalPart ::= NCName */
1930 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1931 {
1932 WCHAR *ptr;
1933 UINT start;
1934
1935 if (reader->resume[XmlReadResume_Local])
1936 {
1937 start = reader->resume[XmlReadResume_Local];
1938 ptr = reader_get_ptr(reader);
1939 }
1940 else
1941 {
1942 ptr = reader_get_ptr(reader);
1943 start = reader_get_cur(reader);
1944 }
1945
1946 while (is_ncnamechar(*ptr))
1947 {
1948 reader_skipn(reader, 1);
1949 ptr = reader_get_ptr(reader);
1950 }
1951
1952 if (check_for_separator && *ptr == ':')
1953 return NC_E_QNAMECOLON;
1954
1955 if (is_reader_pending(reader))
1956 {
1957 reader->resume[XmlReadResume_Local] = start;
1958 return E_PENDING;
1959 }
1960 else
1961 reader->resume[XmlReadResume_Local] = 0;
1962
1963 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1964
1965 return S_OK;
1966 }
1967
1968 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1969 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1970 [9 NS] UnprefixedName ::= LocalPart
1971 [10 NS] Prefix ::= NCName */
1972 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1973 {
1974 WCHAR *ptr;
1975 UINT start;
1976 HRESULT hr;
1977
1978 if (reader->resume[XmlReadResume_Name])
1979 {
1980 start = reader->resume[XmlReadResume_Name];
1981 ptr = reader_get_ptr(reader);
1982 }
1983 else
1984 {
1985 ptr = reader_get_ptr(reader);
1986 start = reader_get_cur(reader);
1987 reader->resume[XmlReadResume_Name] = start;
1988 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1989 }
1990
1991 if (reader->resume[XmlReadResume_Local])
1992 {
1993 hr = reader_parse_local(reader, local, FALSE);
1994 if (FAILED(hr)) return hr;
1995
1996 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1997 local->start - reader->resume[XmlReadResume_Name] - 1,
1998 prefix);
1999 }
2000 else
2001 {
2002 /* skip prefix part */
2003 while (is_ncnamechar(*ptr))
2004 {
2005 reader_skipn(reader, 1);
2006 ptr = reader_get_ptr(reader);
2007 }
2008
2009 if (is_reader_pending(reader)) return E_PENDING;
2010
2011 /* got a qualified name */
2012 if (*ptr == ':')
2013 {
2014 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2015
2016 /* skip ':' */
2017 reader_skipn(reader, 1);
2018 hr = reader_parse_local(reader, local, TRUE);
2019 if (FAILED(hr)) return hr;
2020 }
2021 else
2022 {
2023 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2024 reader_init_strvalue(0, 0, prefix);
2025 }
2026 }
2027
2028 if (prefix->len)
2029 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2030 else
2031 TRACE("ncname %s\n", debug_strval(reader, local));
2032
2033 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2034 /* count ':' too */
2035 (prefix->len ? prefix->len + 1 : 0) + local->len,
2036 qname);
2037
2038 reader->resume[XmlReadResume_Name] = 0;
2039 reader->resume[XmlReadResume_Local] = 0;
2040
2041 return S_OK;
2042 }
2043
2044 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2045 {
2046 static const WCHAR entltW[] = {'l','t'};
2047 static const WCHAR entgtW[] = {'g','t'};
2048 static const WCHAR entampW[] = {'a','m','p'};
2049 static const WCHAR entaposW[] = {'a','p','o','s'};
2050 static const WCHAR entquotW[] = {'q','u','o','t'};
2051 static const strval lt = { (WCHAR*)entltW, 2 };
2052 static const strval gt = { (WCHAR*)entgtW, 2 };
2053 static const strval amp = { (WCHAR*)entampW, 3 };
2054 static const strval apos = { (WCHAR*)entaposW, 4 };
2055 static const strval quot = { (WCHAR*)entquotW, 4 };
2056 WCHAR *str = reader_get_strptr(reader, name);
2057
2058 switch (*str)
2059 {
2060 case 'l':
2061 if (strval_eq(reader, name, &lt)) return '<';
2062 break;
2063 case 'g':
2064 if (strval_eq(reader, name, &gt)) return '>';
2065 break;
2066 case 'a':
2067 if (strval_eq(reader, name, &amp))
2068 return '&';
2069 else if (strval_eq(reader, name, &apos))
2070 return '\'';
2071 break;
2072 case 'q':
2073 if (strval_eq(reader, name, &quot)) return '\"';
2074 break;
2075 default:
2076 ;
2077 }
2078
2079 return 0;
2080 }
2081
2082 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2083 [67] Reference ::= EntityRef | CharRef
2084 [68] EntityRef ::= '&' Name ';' */
2085 static HRESULT reader_parse_reference(xmlreader *reader)
2086 {
2087 encoded_buffer *buffer = &reader->input->buffer->utf16;
2088 WCHAR *start = reader_get_ptr(reader), *ptr;
2089 UINT cur = reader_get_cur(reader);
2090 WCHAR ch = 0;
2091 int len;
2092
2093 /* skip '&' */
2094 reader_skipn(reader, 1);
2095 ptr = reader_get_ptr(reader);
2096
2097 if (*ptr == '#')
2098 {
2099 reader_skipn(reader, 1);
2100 ptr = reader_get_ptr(reader);
2101
2102 /* hex char or decimal */
2103 if (*ptr == 'x')
2104 {
2105 reader_skipn(reader, 1);
2106 ptr = reader_get_ptr(reader);
2107
2108 while (*ptr != ';')
2109 {
2110 if ((*ptr >= '0' && *ptr <= '9'))
2111 ch = ch*16 + *ptr - '0';
2112 else if ((*ptr >= 'a' && *ptr <= 'f'))
2113 ch = ch*16 + *ptr - 'a' + 10;
2114 else if ((*ptr >= 'A' && *ptr <= 'F'))
2115 ch = ch*16 + *ptr - 'A' + 10;
2116 else
2117 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2118 reader_skipn(reader, 1);
2119 ptr = reader_get_ptr(reader);
2120 }
2121 }
2122 else
2123 {
2124 while (*ptr != ';')
2125 {
2126 if ((*ptr >= '0' && *ptr <= '9'))
2127 {
2128 ch = ch*10 + *ptr - '0';
2129 reader_skipn(reader, 1);
2130 ptr = reader_get_ptr(reader);
2131 }
2132 else
2133 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2134 }
2135 }
2136
2137 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2138
2139 /* normalize */
2140 if (is_wchar_space(ch)) ch = ' ';
2141
2142 ptr = reader_get_ptr(reader);
2143 start = reader_get_ptr2(reader, cur);
2144 len = buffer->written - ((char *)ptr - buffer->data);
2145 memmove(start + 1, ptr + 1, len);
2146
2147 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2148 buffer->cur = cur + 1;
2149
2150 *start = ch;
2151 }
2152 else
2153 {
2154 strval name;
2155 HRESULT hr;
2156
2157 hr = reader_parse_name(reader, &name);
2158 if (FAILED(hr)) return hr;
2159
2160 ptr = reader_get_ptr(reader);
2161 if (*ptr != ';') return WC_E_SEMICOLON;
2162
2163 /* predefined entities resolve to a single character */
2164 ch = get_predefined_entity(reader, &name);
2165 if (ch)
2166 {
2167 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2168 memmove(start+1, ptr+1, len);
2169 buffer->cur = cur + 1;
2170 buffer->written -= (ptr - start) * sizeof(WCHAR);
2171
2172 *start = ch;
2173 }
2174 else
2175 {
2176 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2177 return WC_E_UNDECLAREDENTITY;
2178 }
2179
2180 }
2181
2182 return S_OK;
2183 }
2184
2185 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2186 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2187 {
2188 WCHAR *ptr, quote;
2189 UINT start;
2190
2191 ptr = reader_get_ptr(reader);
2192
2193 /* skip opening quote */
2194 quote = *ptr;
2195 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2196 reader_skipn(reader, 1);
2197
2198 ptr = reader_get_ptr(reader);
2199 start = reader_get_cur(reader);
2200 while (*ptr)
2201 {
2202 if (*ptr == '<') return WC_E_LESSTHAN;
2203
2204 if (*ptr == quote)
2205 {
2206 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2207 /* skip closing quote */
2208 reader_skipn(reader, 1);
2209 return S_OK;
2210 }
2211
2212 if (*ptr == '&')
2213 {
2214 HRESULT hr = reader_parse_reference(reader);
2215 if (FAILED(hr)) return hr;
2216 }
2217 else
2218 {
2219 /* replace all whitespace chars with ' ' */
2220 if (is_wchar_space(*ptr)) *ptr = ' ';
2221 reader_skipn(reader, 1);
2222 }
2223 ptr = reader_get_ptr(reader);
2224 }
2225
2226 return WC_E_QUOTE;
2227 }
2228
2229 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2230 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2231 [3 NS] DefaultAttName ::= 'xmlns'
2232 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2233 static HRESULT reader_parse_attribute(xmlreader *reader)
2234 {
2235 struct reader_position position = reader->position;
2236 strval prefix, local, qname, value;
2237 enum attribute_flags flags = 0;
2238 HRESULT hr;
2239
2240 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2241 if (FAILED(hr)) return hr;
2242
2243 if (strval_eq(reader, &prefix, &strval_xmlns))
2244 flags |= ATTRIBUTE_NS_DEFINITION;
2245
2246 if (strval_eq(reader, &qname, &strval_xmlns))
2247 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2248
2249 hr = reader_parse_eq(reader);
2250 if (FAILED(hr)) return hr;
2251
2252 hr = reader_parse_attvalue(reader, &value);
2253 if (FAILED(hr)) return hr;
2254
2255 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2256 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2257
2258 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2259 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2260 }
2261
2262 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2263 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2264 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2265 {
2266 struct reader_position position = reader->position;
2267 HRESULT hr;
2268
2269 hr = reader_parse_qname(reader, prefix, local, qname);
2270 if (FAILED(hr)) return hr;
2271
2272 for (;;)
2273 {
2274 static const WCHAR endW[] = {'/','>',0};
2275
2276 reader_skipspaces(reader);
2277
2278 /* empty element */
2279 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2280 {
2281 struct element *element = &reader->empty_element;
2282
2283 /* skip '/>' */
2284 reader_skipn(reader, 2);
2285
2286 reader_free_strvalued(reader, &element->qname);
2287 reader_free_strvalued(reader, &element->localname);
2288
2289 element->prefix = *prefix;
2290 reader_strvaldup(reader, qname, &element->qname);
2291 reader_strvaldup(reader, local, &element->localname);
2292 element->position = position;
2293 reader_mark_ns_nodes(reader, element);
2294 return S_OK;
2295 }
2296
2297 /* got a start tag */
2298 if (!reader_cmp(reader, gtW))
2299 {
2300 /* skip '>' */
2301 reader_skipn(reader, 1);
2302 return reader_push_element(reader, prefix, local, qname, &position);
2303 }
2304
2305 hr = reader_parse_attribute(reader);
2306 if (FAILED(hr)) return hr;
2307 }
2308
2309 return S_OK;
2310 }
2311
2312 /* [39] element ::= EmptyElemTag | STag content ETag */
2313 static HRESULT reader_parse_element(xmlreader *reader)
2314 {
2315 HRESULT hr;
2316
2317 switch (reader->resumestate)
2318 {
2319 case XmlReadResumeState_Initial:
2320 /* check if we are really on element */
2321 if (reader_cmp(reader, ltW)) return S_FALSE;
2322
2323 /* skip '<' */
2324 reader_skipn(reader, 1);
2325
2326 reader_shrink(reader);
2327 reader->resumestate = XmlReadResumeState_STag;
2328 case XmlReadResumeState_STag:
2329 {
2330 strval qname, prefix, local;
2331
2332 /* this handles empty elements too */
2333 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2334 if (FAILED(hr)) return hr;
2335
2336 /* FIXME: need to check for defined namespace to reject invalid prefix */
2337
2338 /* if we got empty element and stack is empty go straight to Misc */
2339 if (reader->is_empty_element && list_empty(&reader->elements))
2340 reader->instate = XmlReadInState_MiscEnd;
2341 else
2342 reader->instate = XmlReadInState_Content;
2343
2344 reader->nodetype = XmlNodeType_Element;
2345 reader->resumestate = XmlReadResumeState_Initial;
2346 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2347 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2348 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2349 break;
2350 }
2351 default:
2352 hr = E_FAIL;
2353 }
2354
2355 return hr;
2356 }
2357
2358 /* [13 NS] ETag ::= '</' QName S? '>' */
2359 static HRESULT reader_parse_endtag(xmlreader *reader)
2360 {
2361 struct reader_position position;
2362 strval prefix, local, qname;
2363 struct element *element;
2364 HRESULT hr;
2365
2366 /* skip '</' */
2367 reader_skipn(reader, 2);
2368
2369 position = reader->position;
2370 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2371 if (FAILED(hr)) return hr;
2372
2373 reader_skipspaces(reader);
2374
2375 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2376
2377 /* skip '>' */
2378 reader_skipn(reader, 1);
2379
2380 /* Element stack should never be empty at this point, cause we shouldn't get to
2381 content parsing if it's empty. */
2382 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2383 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2384
2385 /* update position stored for start tag, we won't be using it */
2386 element->position = position;
2387
2388 reader->nodetype = XmlNodeType_EndElement;
2389 reader->is_empty_element = FALSE;
2390 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2391
2392 return S_OK;
2393 }
2394
2395 /* [18] CDSect ::= CDStart CData CDEnd
2396 [19] CDStart ::= '<![CDATA['
2397 [20] CData ::= (Char* - (Char* ']]>' Char*))
2398 [21] CDEnd ::= ']]>' */
2399 static HRESULT reader_parse_cdata(xmlreader *reader)
2400 {
2401 WCHAR *ptr;
2402 UINT start;
2403
2404 if (reader->resumestate == XmlReadResumeState_CDATA)
2405 {
2406 start = reader->resume[XmlReadResume_Body];
2407 ptr = reader_get_ptr(reader);
2408 }
2409 else
2410 {
2411 /* skip markup '<![CDATA[' */
2412 reader_skipn(reader, 9);
2413 reader_shrink(reader);
2414 ptr = reader_get_ptr(reader);
2415 start = reader_get_cur(reader);
2416 reader->nodetype = XmlNodeType_CDATA;
2417 reader->resume[XmlReadResume_Body] = start;
2418 reader->resumestate = XmlReadResumeState_CDATA;
2419 reader_set_strvalue(reader, StringValue_Value, NULL);
2420 }
2421
2422 while (*ptr)
2423 {
2424 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2425 {
2426 strval value;
2427
2428 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2429
2430 /* skip ']]>' */
2431 reader_skipn(reader, 3);
2432 TRACE("%s\n", debug_strval(reader, &value));
2433
2434 reader_set_strvalue(reader, StringValue_Value, &value);
2435 reader->resume[XmlReadResume_Body] = 0;
2436 reader->resumestate = XmlReadResumeState_Initial;
2437 return S_OK;
2438 }
2439 else
2440 {
2441 reader_skipn(reader, 1);
2442 ptr++;
2443 }
2444 }
2445
2446 return S_OK;
2447 }
2448
2449 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2450 static HRESULT reader_parse_chardata(xmlreader *reader)
2451 {
2452 struct reader_position position;
2453 WCHAR *ptr;
2454 UINT start;
2455
2456 if (reader->resumestate == XmlReadResumeState_CharData)
2457 {
2458 start = reader->resume[XmlReadResume_Body];
2459 ptr = reader_get_ptr(reader);
2460 }
2461 else
2462 {
2463 reader_shrink(reader);
2464 ptr = reader_get_ptr(reader);
2465 start = reader_get_cur(reader);
2466 /* There's no text */
2467 if (!*ptr || *ptr == '<') return S_OK;
2468 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2469 reader->resume[XmlReadResume_Body] = start;
2470 reader->resumestate = XmlReadResumeState_CharData;
2471 reader_set_strvalue(reader, StringValue_Value, NULL);
2472 }
2473
2474 position = reader->position;
2475 while (*ptr)
2476 {
2477 static const WCHAR ampW[] = {'&',0};
2478
2479 /* CDATA closing sequence ']]>' is not allowed */
2480 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2481 return WC_E_CDSECTEND;
2482
2483 /* Found next markup part */
2484 if (ptr[0] == '<')
2485 {
2486 strval value;
2487
2488 reader->empty_element.position = position;
2489 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2490 reader_set_strvalue(reader, StringValue_Value, &value);
2491 reader->resume[XmlReadResume_Body] = 0;
2492 reader->resumestate = XmlReadResumeState_Initial;
2493 return S_OK;
2494 }
2495
2496 /* this covers a case when text has leading whitespace chars */
2497 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2498
2499 if (!reader_cmp(reader, ampW))
2500 reader_parse_reference(reader);
2501 else
2502 reader_skipn(reader, 1);
2503
2504 ptr = reader_get_ptr(reader);
2505 }
2506
2507 return S_OK;
2508 }
2509
2510 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2511 static HRESULT reader_parse_content(xmlreader *reader)
2512 {
2513 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2514 static const WCHAR etagW[] = {'<','/',0};
2515
2516 if (reader->resumestate != XmlReadResumeState_Initial)
2517 {
2518 switch (reader->resumestate)
2519 {
2520 case XmlReadResumeState_CDATA:
2521 return reader_parse_cdata(reader);
2522 case XmlReadResumeState_Comment:
2523 return reader_parse_comment(reader);
2524 case XmlReadResumeState_PIBody:
2525 case XmlReadResumeState_PITarget:
2526 return reader_parse_pi(reader);
2527 case XmlReadResumeState_CharData:
2528 return reader_parse_chardata(reader);
2529 default:
2530 ERR("unknown resume state %d\n", reader->resumestate);
2531 }
2532 }
2533
2534 reader_shrink(reader);
2535
2536 /* handle end tag here, it indicates end of content as well */
2537 if (!reader_cmp(reader, etagW))
2538 return reader_parse_endtag(reader);
2539
2540 if (!reader_cmp(reader, commentW))
2541 return reader_parse_comment(reader);
2542
2543 if (!reader_cmp(reader, piW))
2544 return reader_parse_pi(reader);
2545
2546 if (!reader_cmp(reader, cdstartW))
2547 return reader_parse_cdata(reader);
2548
2549 if (!reader_cmp(reader, ltW))
2550 return reader_parse_element(reader);
2551
2552 /* what's left must be CharData */
2553 return reader_parse_chardata(reader);
2554 }
2555
2556 static HRESULT reader_parse_nextnode(xmlreader *reader)
2557 {
2558 XmlNodeType nodetype = reader_get_nodetype(reader);
2559 HRESULT hr;
2560
2561 if (!is_reader_pending(reader))
2562 {
2563 reader->chunk_read_off = 0;
2564 reader_clear_attrs(reader);
2565 }
2566
2567 /* When moving from EndElement or empty element, pop its own namespace definitions */
2568 switch (nodetype)
2569 {
2570 case XmlNodeType_Attribute:
2571 reader_dec_depth(reader);
2572 /* fallthrough */
2573 case XmlNodeType_Element:
2574 if (reader->is_empty_element)
2575 reader_pop_ns_nodes(reader, &reader->empty_element);
2576 else if (FAILED(hr = reader_inc_depth(reader)))
2577 return hr;
2578 break;
2579 case XmlNodeType_EndElement:
2580 reader_pop_element(reader);
2581 reader_dec_depth(reader);
2582 break;
2583 default:
2584 ;
2585 }
2586
2587 for (;;)
2588 {
2589 switch (reader->instate)
2590 {
2591 /* if it's a first call for a new input we need to detect stream encoding */
2592 case XmlReadInState_Initial:
2593 {
2594 xml_encoding enc;
2595
2596 hr = readerinput_growraw(reader->input);
2597 if (FAILED(hr)) return hr;
2598
2599 reader->position.line_number = 1;
2600 reader->position.line_position = 1;
2601
2602 /* try to detect encoding by BOM or data and set input code page */
2603 hr = readerinput_detectencoding(reader->input, &enc);
2604 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2605 debugstr_w(xml_encoding_map[enc].name), hr);
2606 if (FAILED(hr)) return hr;
2607
2608 /* always switch first time cause we have to put something in */
2609 readerinput_switchencoding(reader->input, enc);
2610
2611 /* parse xml declaration */
2612 hr = reader_parse_xmldecl(reader);
2613 if (FAILED(hr)) return hr;
2614
2615 readerinput_shrinkraw(reader->input, -1);
2616 reader->instate = XmlReadInState_Misc_DTD;
2617 if (hr == S_OK) return hr;
2618 }
2619 break;
2620 case XmlReadInState_Misc_DTD:
2621 hr = reader_parse_misc(reader);
2622 if (FAILED(hr)) return hr;
2623
2624 if (hr == S_FALSE)
2625 reader->instate = XmlReadInState_DTD;
2626 else
2627 return hr;
2628 break;
2629 case XmlReadInState_DTD:
2630 hr = reader_parse_dtd(reader);
2631 if (FAILED(hr)) return hr;
2632
2633 if (hr == S_OK)
2634 {
2635 reader->instate = XmlReadInState_DTD_Misc;
2636 return hr;
2637 }
2638 else
2639 reader->instate = XmlReadInState_Element;
2640 break;
2641 case XmlReadInState_DTD_Misc:
2642 hr = reader_parse_misc(reader);
2643 if (FAILED(hr)) return hr;
2644
2645 if (hr == S_FALSE)
2646 reader->instate = XmlReadInState_Element;
2647 else
2648 return hr;
2649 break;
2650 case XmlReadInState_Element:
2651 return reader_parse_element(reader);
2652 case XmlReadInState_Content:
2653 return reader_parse_content(reader);
2654 case XmlReadInState_MiscEnd:
2655 hr = reader_parse_misc(reader);
2656 if (hr != S_FALSE) return hr;
2657
2658 if (*reader_get_ptr(reader))
2659 {
2660 WARN("found garbage in the end of XML\n");
2661 return WC_E_SYNTAX;
2662 }
2663
2664 reader->instate = XmlReadInState_Eof;
2665 reader->state = XmlReadState_EndOfFile;
2666 reader->nodetype = XmlNodeType_None;
2667 return hr;
2668 case XmlReadInState_Eof:
2669 return S_FALSE;
2670 default:
2671 FIXME("internal state %d not handled\n", reader->instate);
2672 return E_NOTIMPL;
2673 }
2674 }
2675
2676 return E_NOTIMPL;
2677 }
2678
2679 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2680 {
2681 xmlreader *This = impl_from_IXmlReader(iface);
2682
2683 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2684
2685 if (IsEqualGUID(riid, &IID_IUnknown) ||
2686 IsEqualGUID(riid, &IID_IXmlReader))
2687 {
2688 *ppvObject = iface;
2689 }
2690 else
2691 {
2692 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2693 *ppvObject = NULL;
2694 return E_NOINTERFACE;
2695 }
2696
2697 IXmlReader_AddRef(iface);
2698
2699 return S_OK;
2700 }
2701
2702 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2703 {
2704 xmlreader *This = impl_from_IXmlReader(iface);
2705 ULONG ref = InterlockedIncrement(&This->ref);
2706 TRACE("(%p)->(%d)\n", This, ref);
2707 return ref;
2708 }
2709
2710 static void reader_clear_ns(xmlreader *reader)
2711 {
2712 struct ns *ns, *ns2;
2713
2714 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2715 list_remove(&ns->entry);
2716 reader_free_strvalued(reader, &ns->prefix);
2717 reader_free_strvalued(reader, &ns->uri);
2718 reader_free(reader, ns);
2719 }
2720
2721 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2722 list_remove(&ns->entry);
2723 reader_free_strvalued(reader, &ns->uri);
2724 reader_free(reader, ns);
2725 }
2726 }
2727
2728 static void reader_reset_parser(xmlreader *reader)
2729 {
2730 reader->position.line_number = 0;
2731 reader->position.line_position = 0;
2732
2733 reader_clear_elements(reader);
2734 reader_clear_attrs(reader);
2735 reader_clear_ns(reader);
2736 reader_free_strvalues(reader);
2737
2738 reader->depth = 0;
2739 reader->nodetype = XmlNodeType_None;
2740 reader->resumestate = XmlReadResumeState_Initial;
2741 memset(reader->resume, 0, sizeof(reader->resume));
2742 reader->is_empty_element = FALSE;
2743 }
2744
2745 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2746 {
2747 xmlreader *This = impl_from_IXmlReader(iface);
2748 LONG ref = InterlockedDecrement(&This->ref);
2749
2750 TRACE("(%p)->(%d)\n", This, ref);
2751
2752 if (ref == 0)
2753 {
2754 IMalloc *imalloc = This->imalloc;
2755 reader_reset_parser(This);
2756 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2757 if (This->resolver) IXmlResolver_Release(This->resolver);
2758 if (This->mlang) IUnknown_Release(This->mlang);
2759 reader_free(This, This);
2760 if (imalloc) IMalloc_Release(imalloc);
2761 }
2762
2763 return ref;
2764 }
2765
2766 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2767 {
2768 xmlreader *This = impl_from_IXmlReader(iface);
2769 IXmlReaderInput *readerinput;
2770 HRESULT hr;
2771
2772 TRACE("(%p)->(%p)\n", This, input);
2773
2774 if (This->input)
2775 {
2776 readerinput_release_stream(This->input);
2777 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2778 This->input = NULL;
2779 }
2780
2781 reader_reset_parser(This);
2782
2783 /* just reset current input */
2784 if (!input)
2785 {
2786 This->state = XmlReadState_Initial;
2787 return S_OK;
2788 }
2789
2790 /* now try IXmlReaderInput, ISequentialStream, IStream */
2791 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2792 if (hr == S_OK)
2793 {
2794 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2795 This->input = impl_from_IXmlReaderInput(readerinput);
2796 else
2797 {
2798 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2799 readerinput, readerinput->lpVtbl);
2800 IUnknown_Release(readerinput);
2801 return E_FAIL;
2802
2803 }
2804 }
2805
2806 if (hr != S_OK || !readerinput)
2807 {
2808 /* create IXmlReaderInput basing on supplied interface */
2809 hr = CreateXmlReaderInputWithEncodingName(input,
2810 This->imalloc, NULL, FALSE, NULL, &readerinput);
2811 if (hr != S_OK) return hr;
2812 This->input = impl_from_IXmlReaderInput(readerinput);
2813 }
2814
2815 /* set stream for supplied IXmlReaderInput */
2816 hr = readerinput_query_for_stream(This->input);
2817 if (hr == S_OK)
2818 {
2819 This->state = XmlReadState_Initial;
2820 This->instate = XmlReadInState_Initial;
2821 }
2822 return hr;
2823 }
2824
2825 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2826 {
2827 xmlreader *This = impl_from_IXmlReader(iface);
2828
2829 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2830
2831 if (!value) return E_INVALIDARG;
2832
2833 switch (property)
2834 {
2835 case XmlReaderProperty_MultiLanguage:
2836 *value = (LONG_PTR)This->mlang;
2837 if (This->mlang)
2838 IUnknown_AddRef(This->mlang);
2839 break;
2840 case XmlReaderProperty_XmlResolver:
2841 *value = (LONG_PTR)This->resolver;
2842 if (This->resolver)
2843 IXmlResolver_AddRef(This->resolver);
2844 break;
2845 case XmlReaderProperty_DtdProcessing:
2846 *value = This->dtdmode;
2847 break;
2848 case XmlReaderProperty_ReadState:
2849 *value = This->state;
2850 break;
2851 case XmlReaderProperty_MaxElementDepth:
2852 *value = This->max_depth;
2853 break;
2854 default:
2855 FIXME("Unimplemented property (%u)\n", property);
2856 return E_NOTIMPL;
2857 }
2858
2859 return S_OK;
2860 }
2861
2862 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2863 {
2864 xmlreader *This = impl_from_IXmlReader(iface);
2865
2866 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2867
2868 switch (property)
2869 {
2870 case XmlReaderProperty_MultiLanguage:
2871 if (This->mlang)
2872 IUnknown_Release(This->mlang);
2873 This->mlang = (IUnknown*)value;
2874 if (This->mlang)
2875 IUnknown_AddRef(This->mlang);
2876 if (This->mlang)
2877 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2878 break;
2879 case XmlReaderProperty_XmlResolver:
2880 if (This->resolver)
2881 IXmlResolver_Release(This->resolver);
2882 This->resolver = (IXmlResolver*)value;
2883 if (This->resolver)
2884 IXmlResolver_AddRef(This->resolver);
2885 break;
2886 case XmlReaderProperty_DtdProcessing:
2887 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2888 This->dtdmode = value;
2889 break;
2890 case XmlReaderProperty_MaxElementDepth:
2891 This->max_depth = value;
2892 break;
2893 default:
2894 FIXME("Unimplemented property (%u)\n", property);
2895 return E_NOTIMPL;
2896 }
2897
2898 return S_OK;
2899 }
2900
2901 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2902 {
2903 xmlreader *This = impl_from_IXmlReader(iface);
2904 XmlNodeType oldtype = This->nodetype;
2905 XmlNodeType type;
2906 HRESULT hr;
2907
2908 TRACE("(%p)->(%p)\n", This, nodetype);
2909
2910 if (!nodetype)
2911 nodetype = &type;
2912
2913 switch (This->state)
2914 {
2915 case XmlReadState_Closed:
2916 hr = S_FALSE;
2917 break;
2918 case XmlReadState_Error:
2919 hr = This->error;
2920 break;
2921 default:
2922 hr = reader_parse_nextnode(This);
2923 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2924 This->state = XmlReadState_Interactive;
2925
2926 if (FAILED(hr))
2927 {
2928 This->state = XmlReadState_Error;
2929 This->nodetype = XmlNodeType_None;
2930 This->depth = 0;
2931 This->error = hr;
2932 }
2933 }
2934
2935 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2936 *nodetype = This->nodetype;
2937
2938 return hr;
2939 }
2940
2941 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2942 {
2943 xmlreader *This = impl_from_IXmlReader(iface);
2944
2945 TRACE("(%p)->(%p)\n", This, node_type);
2946
2947 if (!node_type)
2948 return E_INVALIDARG;
2949
2950 *node_type = reader_get_nodetype(This);
2951 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2952 }
2953
2954 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2955 {
2956 reader->attr = attr;
2957 reader->chunk_read_off = 0;
2958 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2959 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2960 reader_set_strvalue(reader, StringValue_Value, &attr->value);
2961 }
2962
2963 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2964 {
2965 if (!reader->attr_count)
2966 return S_FALSE;
2967
2968 if (!reader->attr)
2969 reader_inc_depth(reader);
2970
2971 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2972
2973 return S_OK;
2974 }
2975
2976 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2977 {
2978 xmlreader *This = impl_from_IXmlReader(iface);
2979
2980 TRACE("(%p)\n", This);
2981
2982 return reader_move_to_first_attribute(This);
2983 }
2984
2985 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2986 {
2987 xmlreader *This = impl_from_IXmlReader(iface);
2988 const struct list *next;
2989
2990 TRACE("(%p)\n", This);
2991
2992 if (!This->attr_count) return S_FALSE;
2993
2994 if (!This->attr)
2995 return reader_move_to_first_attribute(This);
2996
2997 next = list_next(&This->attrs, &This->attr->entry);
2998 if (next)
2999 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
3000
3001 return next ? S_OK : S_FALSE;
3002 }
3003
3004 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
3005 {
3006 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3007 '2','0','0','0','/','x','m','l','n','s','/',0};
3008 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3010
3011 /* Check for reserved prefixes first */
3012 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
3013 strval_eq(reader, &attr->prefix, &strval_xmlns))
3014 {
3015 *uri = xmlns_uriW;
3016 *len = ARRAY_SIZE(xmlns_uriW) - 1;
3017 }
3018 else if (strval_eq(reader, &attr->prefix, &strval_xml))
3019 {
3020 *uri = xml_uriW;
3021 *len = ARRAY_SIZE(xml_uriW) - 1;
3022 }
3023 else
3024 {
3025 *uri = NULL;
3026 *len = 0;
3027 }
3028
3029 if (!*uri)
3030 {
3031 struct ns *ns;
3032
3033 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3034 {
3035 *uri = ns->uri.str;
3036 *len = ns->uri.len;
3037 }
3038 else
3039 {
3040 *uri = emptyW;
3041 *len = 0;
3042 }
3043 }
3044 }
3045
3046 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3047 {
3048 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3049 {
3050 *name = xmlnsW;
3051 *len = 5;
3052 }
3053 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3054 {
3055 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3056 *name = ns->prefix.str;
3057 *len = ns->prefix.len;
3058 }
3059 else
3060 {
3061 *name = attr->localname.str;
3062 *len = attr->localname.len;
3063 }
3064 }
3065
3066 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3067 const WCHAR *local_name, const WCHAR *namespace_uri)
3068 {
3069 xmlreader *This = impl_from_IXmlReader(iface);
3070 UINT target_name_len, target_uri_len;
3071 struct attribute *attr;
3072
3073 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3074
3075 if (!local_name)
3076 return E_INVALIDARG;
3077
3078 if (!This->attr_count)
3079 return S_FALSE;
3080
3081 if (!namespace_uri)
3082 namespace_uri = emptyW;
3083
3084 target_name_len = strlenW(local_name);
3085 target_uri_len = strlenW(namespace_uri);
3086
3087 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3088 {
3089 UINT name_len, uri_len;
3090 const WCHAR *name, *uri;
3091
3092 reader_get_attribute_local_name(This, attr, &name, &name_len);
3093 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3094
3095 if (name_len == target_name_len && uri_len == target_uri_len &&
3096 !strcmpW(name, local_name) && !strcmpW(uri, namespace_uri))
3097 {
3098 reader_set_current_attribute(This, attr);
3099 return S_OK;
3100 }
3101 }
3102
3103 return S_FALSE;
3104 }
3105
3106 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3107 {
3108 xmlreader *This = impl_from_IXmlReader(iface);
3109
3110 TRACE("(%p)\n", This);
3111
3112 if (!This->attr_count) return S_FALSE;
3113
3114 if (This->attr)
3115 reader_dec_depth(This);
3116
3117 This->attr = NULL;
3118
3119 /* FIXME: support other node types with 'attributes' like DTD */
3120 if (This->is_empty_element) {
3121 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3122 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3123 }
3124 else {
3125 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3126 if (element) {
3127 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3128 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3129 }
3130 }
3131 This->chunk_read_off = 0;
3132 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3133
3134 return S_OK;
3135 }
3136
3137 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3138 {
3139 xmlreader *This = impl_from_IXmlReader(iface);
3140 struct attribute *attribute = This->attr;
3141 struct element *element;
3142 UINT length;
3143
3144 TRACE("(%p)->(%p %p)\n", This, name, len);
3145
3146 if (!len)
3147 len = &length;
3148
3149 switch (reader_get_nodetype(This))
3150 {
3151 case XmlNodeType_Text:
3152 case XmlNodeType_CDATA:
3153 case XmlNodeType_Comment:
3154 case XmlNodeType_Whitespace:
3155 *name = emptyW;
3156 *len = 0;
3157 break;
3158 case XmlNodeType_Element:
3159 case XmlNodeType_EndElement:
3160 element = reader_get_element(This);
3161 if (element->prefix.len)
3162 {
3163 *name = element->qname.str;
3164 *len = element->qname.len;
3165 }
3166 else
3167 {
3168 *name = element->localname.str;
3169 *len = element->localname.len;
3170 }
3171 break;
3172 case XmlNodeType_Attribute:
3173 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3174 {
3175 *name = xmlnsW;
3176 *len = 5;
3177 } else if (attribute->prefix.len)
3178 {
3179 *name = This->strvalues[StringValue_QualifiedName].str;
3180 *len = This->strvalues[StringValue_QualifiedName].len;
3181 }
3182 else
3183 {
3184 *name = attribute->localname.str;
3185 *len = attribute->localname.len;
3186 }
3187 break;
3188 default:
3189 *name = This->strvalues[StringValue_QualifiedName].str;
3190 *len = This->strvalues[StringValue_QualifiedName].len;
3191 break;
3192 }
3193
3194 return S_OK;
3195 }
3196
3197 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3198 {
3199 if (list_empty(&reader->nsdef))
3200 return NULL;
3201
3202 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3203 }
3204
3205 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3206 {
3207 xmlreader *This = impl_from_IXmlReader(iface);
3208 const strval *prefix = &This->strvalues[StringValue_Prefix];
3209 XmlNodeType nodetype;
3210 struct ns *ns;
3211 UINT length;
3212
3213 TRACE("(%p %p %p)\n", iface, uri, len);
3214
3215 if (!len)
3216 len = &length;
3217
3218 switch ((nodetype = reader_get_nodetype(This)))
3219 {
3220 case XmlNodeType_Attribute:
3221 reader_get_attribute_ns_uri(This, This->attr, uri, len);
3222 break;
3223 case XmlNodeType_Element:
3224 case XmlNodeType_EndElement:
3225 {
3226 ns = reader_lookup_ns(This, prefix);
3227
3228 /* pick top default ns if any */
3229 if (!ns)
3230 ns = reader_lookup_nsdef(This);
3231
3232 if (ns) {
3233 *uri = ns->uri.str;
3234 *len = ns->uri.len;
3235 }
3236 else {
3237 *uri = emptyW;
3238 *len = 0;
3239 }
3240 }
3241 break;
3242 case XmlNodeType_Text:
3243 case XmlNodeType_CDATA:
3244 case XmlNodeType_ProcessingInstruction:
3245 case XmlNodeType_Comment:
3246 case XmlNodeType_Whitespace:
3247 case XmlNodeType_XmlDeclaration:
3248 *uri = emptyW;
3249 *len = 0;
3250 break;
3251 default:
3252 FIXME("Unhandled node type %d\n", nodetype);
3253 *uri = NULL;
3254 *len = 0;
3255 return E_NOTIMPL;
3256 }
3257
3258 return S_OK;
3259 }
3260
3261 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3262 {
3263 xmlreader *This = impl_from_IXmlReader(iface);
3264 struct element *element;
3265 UINT length;
3266
3267 TRACE("(%p)->(%p %p)\n", This, name, len);
3268
3269 if (!len)
3270 len = &length;
3271
3272 switch (reader_get_nodetype(This))
3273 {
3274 case XmlNodeType_Text:
3275 case XmlNodeType_CDATA:
3276 case XmlNodeType_Comment:
3277 case XmlNodeType_Whitespace:
3278 *name = emptyW;
3279 *len = 0;
3280 break;
3281 case XmlNodeType_Element:
3282 case XmlNodeType_EndElement:
3283 element = reader_get_element(This);
3284 *name = element->localname.str;
3285 *len = element->localname.len;
3286 break;
3287 case XmlNodeType_Attribute:
3288 reader_get_attribute_local_name(This, This->attr, name, len);
3289 break;
3290 default:
3291 *name = This->strvalues[StringValue_LocalName].str;
3292 *len = This->strvalues[StringValue_LocalName].len;
3293 break;
3294 }
3295
3296 return S_OK;
3297 }
3298
3299 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3300 {
3301 xmlreader *This = impl_from_IXmlReader(iface);
3302 XmlNodeType nodetype;
3303 UINT length;
3304
3305 TRACE("(%p)->(%p %p)\n", This, ret, len);
3306
3307 if (!len)
3308 len = &length;
3309
3310 *ret = emptyW;
3311 *len = 0;
3312
3313 switch ((nodetype = reader_get_nodetype(This)))
3314 {
3315 case XmlNodeType_Element:
3316 case XmlNodeType_EndElement:
3317 case XmlNodeType_Attribute:
3318 {
3319 const strval *prefix = &This->strvalues[StringValue_Prefix];
3320 struct ns *ns;
3321
3322 if (strval_eq(This, prefix, &strval_xml))
3323 {
3324 *ret = xmlW;
3325 *len = 3;
3326 }
3327 else if (strval_eq(This, prefix, &strval_xmlns))
3328 {
3329 *ret = xmlnsW;
3330 *len = 5;
3331 }
3332 else if ((ns = reader_lookup_ns(This, prefix)))
3333 {
3334 *ret = ns->prefix.str;
3335 *len = ns->prefix.len;
3336 }
3337
3338 break;
3339 }
3340 default:
3341 ;
3342 }
3343
3344 return S_OK;
3345 }
3346
3347 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3348 {
3349 strval *val;
3350
3351 switch (reader_get_nodetype(reader))
3352 {
3353 case XmlNodeType_XmlDeclaration:
3354 case XmlNodeType_EndElement:
3355 case XmlNodeType_None:
3356 return &strval_empty;
3357 case XmlNodeType_Attribute:
3358 /* For namespace definition attributes return values from namespace list */
3359 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3360 {
3361 struct ns *ns;
3362
3363 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3364 ns = reader_lookup_nsdef(reader);
3365
3366 return &ns->uri;
3367 }
3368 return &reader->attr->value;
3369 default:
3370 break;
3371 }
3372
3373 val = &reader->strvalues[StringValue_Value];
3374 if (!val->str && ensure_allocated)
3375 {
3376 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3377 if (!ptr) return NULL;
3378 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3379 ptr[val->len] = 0;
3380 val->str = ptr;
3381 }
3382
3383 return val;
3384 }
3385
3386 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3387 {
3388 xmlreader *reader = impl_from_IXmlReader(iface);
3389 const strval *val = &reader->strvalues[StringValue_Value];
3390 UINT off;
3391
3392 TRACE("(%p)->(%p %p)\n", reader, value, len);
3393
3394 *value = NULL;
3395
3396 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3397 {
3398 XmlNodeType type;
3399 HRESULT hr;
3400
3401 hr = IXmlReader_Read(iface, &type);
3402 if (FAILED(hr)) return hr;
3403
3404 /* return if still pending, partially read values are not reported */
3405 if (is_reader_pending(reader)) return E_PENDING;
3406 }
3407
3408 val = reader_get_value(reader, TRUE);
3409 if (!val)
3410 return E_OUTOFMEMORY;
3411
3412 off = abs(reader->chunk_read_off);
3413 assert(off <= val->len);
3414 *value = val->str + off;
3415 if (len) *len = val->len - off;
3416 reader->chunk_read_off = -off;
3417 return S_OK;
3418 }
3419
3420 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3421 {
3422 xmlreader *reader = impl_from_IXmlReader(iface);
3423 const strval *val;
3424 UINT len = 0;
3425
3426 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3427
3428 val = reader_get_value(reader, FALSE);
3429
3430 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3431 if (reader->chunk_read_off >= 0)
3432 {
3433 assert(reader->chunk_read_off <= val->len);
3434 len = min(val->len - reader->chunk_read_off, chunk_size);
3435 }
3436 if (read) *read = len;
3437
3438 if (len)
3439 {
3440 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3441 reader->chunk_read_off += len;
3442 }
3443
3444 return len || !chunk_size ? S_OK : S_FALSE;
3445 }
3446
3447 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3448 LPCWSTR *baseUri,
3449 UINT *baseUri_length)
3450 {
3451 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3452 return E_NOTIMPL;
3453 }
3454
3455 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3456 {
3457 FIXME("(%p): stub\n", iface);
3458 return FALSE;
3459 }
3460
3461 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3462 {
3463 xmlreader *This = impl_from_IXmlReader(iface);
3464 TRACE("(%p)\n", This);
3465 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3466 when current node is start tag of an element */
3467 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3468 }
3469
3470 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3471 {
3472 xmlreader *This = impl_from_IXmlReader(iface);
3473 const struct element *element;
3474
3475 TRACE("(%p %p)\n", This, line_number);
3476
3477 if (!line_number)
3478 return E_INVALIDARG;
3479
3480 switch (reader_get_nodetype(This))
3481 {
3482 case XmlNodeType_Element:
3483 case XmlNodeType_EndElement:
3484 element = reader_get_element(This);
3485 *line_number = element->position.line_number;
3486 break;
3487 case XmlNodeType_Attribute:
3488 *line_number = This->attr->position.line_number;
3489 break;
3490 case XmlNodeType_Whitespace:
3491 case XmlNodeType_XmlDeclaration:
3492 *line_number = This->empty_element.position.line_number;
3493 break;
3494 default:
3495 *line_number = This->position.line_number;
3496 break;
3497 }
3498
3499 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3500 }
3501
3502 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3503 {
3504 xmlreader *This = impl_from_IXmlReader(iface);
3505 const struct element *element;
3506
3507 TRACE("(%p %p)\n", This, line_position);
3508
3509 if (!line_position)
3510 return E_INVALIDARG;
3511
3512 switch (reader_get_nodetype(This))
3513 {
3514 case XmlNodeType_Element:
3515 case XmlNodeType_EndElement:
3516 element = reader_get_element(This);
3517 *line_position = element->position.line_position;
3518 break;
3519 case XmlNodeType_Attribute:
3520 *line_position = This->attr->position.line_position;
3521 break;
3522 case XmlNodeType_Whitespace:
3523 case XmlNodeType_XmlDeclaration:
3524 *line_position = This->empty_element.position.line_position;
3525 break;
3526 default:
3527 *line_position = This->position.line_position;
3528 break;
3529 }
3530
3531 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3532 }
3533
3534 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3535 {
3536 xmlreader *This = impl_from_IXmlReader(iface);
3537
3538 TRACE("(%p)->(%p)\n", This, count);
3539
3540 if (!count) return E_INVALIDARG;
3541
3542 *count = This->attr_count;
3543 return S_OK;
3544 }
3545
3546 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3547 {
3548 xmlreader *This = impl_from_IXmlReader(iface);
3549 TRACE("(%p)->(%p)\n", This, depth);
3550 *depth = This->depth;
3551 return S_OK;
3552 }
3553
3554 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3555 {
3556 xmlreader *This = impl_from_IXmlReader(iface);
3557 TRACE("(%p)\n", iface);
3558 return This->state == XmlReadState_EndOfFile;
3559 }
3560
3561 static const struct IXmlReaderVtbl xmlreader_vtbl =
3562 {
3563 xmlreader_QueryInterface,
3564 xmlreader_AddRef,
3565 xmlreader_Release,
3566 xmlreader_SetInput,
3567 xmlreader_GetProperty,
3568 xmlreader_SetProperty,
3569 xmlreader_Read,
3570 xmlreader_GetNodeType,
3571 xmlreader_MoveToFirstAttribute,
3572 xmlreader_MoveToNextAttribute,
3573 xmlreader_MoveToAttributeByName,
3574 xmlreader_MoveToElement,
3575 xmlreader_GetQualifiedName,
3576 xmlreader_GetNamespaceUri,
3577 xmlreader_GetLocalName,
3578 xmlreader_GetPrefix,
3579 xmlreader_GetValue,
3580 xmlreader_ReadValueChunk,
3581 xmlreader_GetBaseUri,
3582 xmlreader_IsDefault,
3583 xmlreader_IsEmptyElement,
3584 xmlreader_GetLineNumber,
3585 xmlreader_GetLinePosition,
3586 xmlreader_GetAttributeCount,
3587 xmlreader_GetDepth,
3588 xmlreader_IsEOF
3589 };
3590
3591 /** IXmlReaderInput **/
3592 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3593 {
3594 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3595
3596 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3597
3598 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3599 IsEqualGUID(riid, &IID_IUnknown))
3600 {
3601 *ppvObject = iface;
3602 }
3603 else
3604 {
3605 WARN("interface %s not implemented\n", debugstr_guid(riid));
3606 *ppvObject = NULL;
3607 return E_NOINTERFACE;
3608 }
3609
3610 IUnknown_AddRef(iface);
3611
3612 return S_OK;
3613 }
3614
3615 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3616 {
3617 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3618 ULONG ref = InterlockedIncrement(&This->ref);
3619 TRACE("(%p)->(%d)\n", This, ref);
3620 return ref;
3621 }
3622
3623 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3624 {
3625 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3626 LONG ref = InterlockedDecrement(&This->ref);
3627
3628 TRACE("(%p)->(%d)\n", This, ref);
3629
3630 if (ref == 0)
3631 {
3632 IMalloc *imalloc = This->imalloc;
3633 if (This->input) IUnknown_Release(This->input);
3634 if (This->stream) ISequentialStream_Release(This->stream);
3635 if (This->buffer) free_input_buffer(This->buffer);
3636 readerinput_free(This, This->baseuri);
3637 readerinput_free(This, This);
3638 if (imalloc) IMalloc_Release(imalloc);
3639 }
3640
3641 return ref;
3642 }
3643
3644 static const struct IUnknownVtbl xmlreaderinputvtbl =
3645 {
3646 xmlreaderinput_QueryInterface,
3647 xmlreaderinput_AddRef,
3648 xmlreaderinput_Release
3649 };
3650
3651 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3652 {
3653 xmlreader *reader;
3654 HRESULT hr;
3655 int i;
3656
3657 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3658
3659 if (imalloc)
3660 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3661 else
3662 reader = heap_alloc(sizeof(*reader));
3663 if (!reader)
3664 return E_OUTOFMEMORY;
3665
3666 memset(reader, 0, sizeof(*reader));
3667 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3668 reader->ref = 1;
3669 reader->state = XmlReadState_Closed;
3670 reader->instate = XmlReadInState_Initial;
3671 reader->resumestate = XmlReadResumeState_Initial;
3672 reader->dtdmode = DtdProcessing_Prohibit;
3673 reader->imalloc = imalloc;
3674 if (imalloc) IMalloc_AddRef(imalloc);
3675 reader->nodetype = XmlNodeType_None;
3676 list_init(&reader->attrs);
3677 list_init(&reader->nsdef);
3678 list_init(&reader->ns);
3679 list_init(&reader->elements);
3680 reader->max_depth = 256;
3681
3682 reader->chunk_read_off = 0;
3683 for (i = 0; i < StringValue_Last; i++)
3684 reader->strvalues[i] = strval_empty;
3685
3686 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3687 IXmlReader_Release(&reader->IXmlReader_iface);
3688
3689 TRACE("returning iface %p, hr %#x\n", *obj, hr);
3690
3691 return hr;
3692 }
3693
3694 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3695 IMalloc *imalloc,
3696 LPCWSTR encoding,
3697 BOOL hint,
3698 LPCWSTR base_uri,
3699 IXmlReaderInput **ppInput)
3700 {
3701 xmlreaderinput *readerinput;
3702 HRESULT hr;
3703
3704 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3705 hint, wine_dbgstr_w(base_uri), ppInput);
3706
3707 if (!stream || !ppInput) return E_INVALIDARG;
3708
3709 if (imalloc)
3710 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3711 else
3712 readerinput = heap_alloc(sizeof(*readerinput));
3713 if(!readerinput) return E_OUTOFMEMORY;
3714
3715 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3716 readerinput->ref = 1;
3717 readerinput->imalloc = imalloc;
3718 readerinput->stream = NULL;
3719 if (imalloc) IMalloc_AddRef(imalloc);
3720 readerinput->encoding = parse_encoding_name(encoding, -1);
3721 readerinput->hint = hint;
3722 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3723 readerinput->pending = 0;
3724
3725 hr = alloc_input_buffer(readerinput);
3726 if (hr != S_OK)
3727 {
3728 readerinput_free(readerinput, readerinput->baseuri);
3729 readerinput_free(readerinput, readerinput);
3730 if (imalloc) IMalloc_Release(imalloc);
3731 return hr;
3732 }
3733 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3734
3735 *ppInput = &readerinput->IXmlReaderInput_iface;
3736
3737 TRACE("returning iface %p\n", *ppInput);
3738
3739 return S_OK;
3740 }