* Sync up to trunk head (r65074).
[reactos.git] / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "xmllite_private.h"
22
23 #include <stdio.h>
24
25 #include <wine/list.h>
26 #include <wine/unicode.h>
27
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
30
31 typedef enum
32 {
33 XmlReadInState_Initial,
34 XmlReadInState_XmlDecl,
35 XmlReadInState_Misc_DTD,
36 XmlReadInState_DTD,
37 XmlReadInState_DTD_Misc,
38 XmlReadInState_Element,
39 XmlReadInState_Content,
40 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
41 XmlReadInState_Eof
42 } XmlReaderInternalState;
43
44 /* This state denotes where parsing was interrupted by input problem.
45 Reader resumes parsing using this information. */
46 typedef enum
47 {
48 XmlReadResumeState_Initial,
49 XmlReadResumeState_PITarget,
50 XmlReadResumeState_PIBody,
51 XmlReadResumeState_CDATA,
52 XmlReadResumeState_Comment,
53 XmlReadResumeState_STag,
54 XmlReadResumeState_CharData,
55 XmlReadResumeState_Whitespace
56 } XmlReaderResumeState;
57
58 /* saved pointer index to resume from particular input position */
59 typedef enum
60 {
61 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
62 XmlReadResume_Local, /* local for QName */
63 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
64 XmlReadResume_Last
65 } XmlReaderResume;
66
67 typedef enum
68 {
69 StringValue_LocalName,
70 StringValue_Prefix,
71 StringValue_QualifiedName,
72 StringValue_Value,
73 StringValue_Last
74 } XmlReaderStringValue;
75
76 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
77 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
78
79 static const WCHAR dblquoteW[] = {'\"',0};
80 static const WCHAR quoteW[] = {'\'',0};
81 static const WCHAR ltW[] = {'<',0};
82 static const WCHAR gtW[] = {'>',0};
83 static const WCHAR commentW[] = {'<','!','-','-',0};
84 static const WCHAR piW[] = {'<','?',0};
85
86 static const char *debugstr_nodetype(XmlNodeType nodetype)
87 {
88 static const char * const type_names[] =
89 {
90 "None",
91 "Element",
92 "Attribute",
93 "Text",
94 "CDATA",
95 "",
96 "",
97 "ProcessingInstruction",
98 "Comment",
99 "",
100 "DocumentType",
101 "",
102 "",
103 "Whitespace",
104 "",
105 "EndElement",
106 "",
107 "XmlDeclaration"
108 };
109
110 if (nodetype > _XmlNodeType_Last)
111 return wine_dbg_sprintf("unknown type=%d", nodetype);
112
113 return type_names[nodetype];
114 }
115
116 static const char *debugstr_reader_prop(XmlReaderProperty prop)
117 {
118 static const char * const prop_names[] =
119 {
120 "MultiLanguage",
121 "ConformanceLevel",
122 "RandomAccess",
123 "XmlResolver",
124 "DtdProcessing",
125 "ReadState",
126 "MaxElementDepth",
127 "MaxEntityExpansion"
128 };
129
130 if (prop > _XmlReaderProperty_Last)
131 return wine_dbg_sprintf("unknown property=%d", prop);
132
133 return prop_names[prop];
134 }
135
136 struct xml_encoding_data
137 {
138 const WCHAR *name;
139 xml_encoding enc;
140 UINT cp;
141 };
142
143 static const struct xml_encoding_data xml_encoding_map[] = {
144 { utf16W, XmlEncoding_UTF16, ~0 },
145 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
146 };
147
148 const WCHAR *get_encoding_name(xml_encoding encoding)
149 {
150 return xml_encoding_map[encoding].name;
151 }
152
153 typedef struct
154 {
155 char *data;
156 UINT cur;
157 unsigned int allocated;
158 unsigned int written;
159 } encoded_buffer;
160
161 typedef struct input_buffer input_buffer;
162
163 typedef struct
164 {
165 IXmlReaderInput IXmlReaderInput_iface;
166 LONG ref;
167 /* reference passed on IXmlReaderInput creation, is kept when input is created */
168 IUnknown *input;
169 IMalloc *imalloc;
170 xml_encoding encoding;
171 BOOL hint;
172 WCHAR *baseuri;
173 /* stream reference set after SetInput() call from reader,
174 stored as sequential stream, cause currently
175 optimizations possible with IStream aren't implemented */
176 ISequentialStream *stream;
177 input_buffer *buffer;
178 unsigned int pending : 1;
179 } xmlreaderinput;
180
181 static const struct IUnknownVtbl xmlreaderinputvtbl;
182
183 /* Structure to hold parsed string of specific length.
184
185 Reader stores node value as 'start' pointer, on request
186 a null-terminated version of it is allocated.
187
188 To init a strval variable use reader_init_strval(),
189 to set strval as a reader value use reader_set_strval().
190 */
191 typedef struct
192 {
193 WCHAR *str; /* allocated null-terminated string */
194 UINT len; /* length in WCHARs, altered after ReadValueChunk */
195 UINT start; /* input position where value starts */
196 } strval;
197
198 static WCHAR emptyW[] = {0};
199 static const strval strval_empty = { emptyW };
200
201 struct attribute
202 {
203 struct list entry;
204 strval localname;
205 strval value;
206 };
207
208 struct element
209 {
210 struct list entry;
211 strval qname;
212 strval localname;
213 };
214
215 typedef struct
216 {
217 IXmlReader IXmlReader_iface;
218 LONG ref;
219 xmlreaderinput *input;
220 IMalloc *imalloc;
221 XmlReadState state;
222 XmlReaderInternalState instate;
223 XmlReaderResumeState resumestate;
224 XmlNodeType nodetype;
225 DtdProcessing dtdmode;
226 UINT line, pos; /* reader position in XML stream */
227 struct list attrs; /* attributes list for current node */
228 struct attribute *attr; /* current attribute */
229 UINT attr_count;
230 struct list elements;
231 strval strvalues[StringValue_Last];
232 UINT depth;
233 UINT max_depth;
234 BOOL empty_element;
235 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
236 } xmlreader;
237
238 struct input_buffer
239 {
240 encoded_buffer utf16;
241 encoded_buffer encoded;
242 UINT code_page;
243 xmlreaderinput *input;
244 };
245
246 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
247 {
248 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
249 }
250
251 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
252 {
253 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
254 }
255
256 /* reader memory allocation functions */
257 static inline void *reader_alloc(xmlreader *reader, size_t len)
258 {
259 return m_alloc(reader->imalloc, len);
260 }
261
262 static inline void reader_free(xmlreader *reader, void *mem)
263 {
264 m_free(reader->imalloc, mem);
265 }
266
267 /* Just return pointer from offset, no attempt to read more. */
268 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
269 {
270 encoded_buffer *buffer = &reader->input->buffer->utf16;
271 return (WCHAR*)buffer->data + offset;
272 }
273
274 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
275 {
276 return v->str ? v->str : reader_get_ptr2(reader, v->start);
277 }
278
279 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
280 {
281 *dest = *src;
282
283 if (src->str != strval_empty.str)
284 {
285 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
286 if (!dest->str) return E_OUTOFMEMORY;
287 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
288 dest->str[dest->len] = 0;
289 dest->start = 0;
290 }
291
292 return S_OK;
293 }
294
295 /* reader input memory allocation functions */
296 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
297 {
298 return m_alloc(input->imalloc, len);
299 }
300
301 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
302 {
303 return m_realloc(input->imalloc, mem, len);
304 }
305
306 static inline void readerinput_free(xmlreaderinput *input, void *mem)
307 {
308 m_free(input->imalloc, mem);
309 }
310
311 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
312 {
313 LPWSTR ret = NULL;
314
315 if(str) {
316 DWORD size;
317
318 size = (strlenW(str)+1)*sizeof(WCHAR);
319 ret = readerinput_alloc(input, size);
320 if (ret) memcpy(ret, str, size);
321 }
322
323 return ret;
324 }
325
326 static void reader_clear_attrs(xmlreader *reader)
327 {
328 struct attribute *attr, *attr2;
329 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
330 {
331 reader_free(reader, attr);
332 }
333 list_init(&reader->attrs);
334 reader->attr_count = 0;
335 reader->attr = NULL;
336 }
337
338 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
339 while we are on a node with attributes */
340 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
341 {
342 struct attribute *attr;
343
344 attr = reader_alloc(reader, sizeof(*attr));
345 if (!attr) return E_OUTOFMEMORY;
346
347 attr->localname = *localname;
348 attr->value = *value;
349 list_add_tail(&reader->attrs, &attr->entry);
350 reader->attr_count++;
351
352 return S_OK;
353 }
354
355 /* This one frees stored string value if needed */
356 static void reader_free_strvalued(xmlreader *reader, strval *v)
357 {
358 if (v->str != strval_empty.str)
359 {
360 reader_free(reader, v->str);
361 *v = strval_empty;
362 }
363 }
364
365 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
366 {
367 v->start = start;
368 v->len = len;
369 v->str = NULL;
370 }
371
372 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
373 {
374 return debugstr_wn(reader_get_strptr(reader, v), v->len);
375 }
376
377 /* used to initialize from constant string */
378 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
379 {
380 v->start = 0;
381 v->len = len;
382 v->str = str;
383 }
384
385 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
386 {
387 reader_free_strvalued(reader, &reader->strvalues[type]);
388 }
389
390 static void reader_free_strvalues(xmlreader *reader)
391 {
392 int type;
393 for (type = 0; type < StringValue_Last; type++)
394 reader_free_strvalue(reader, type);
395 }
396
397 /* This helper should only be used to test if strings are the same,
398 it doesn't try to sort. */
399 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
400 {
401 if (str1->len != str2->len) return 0;
402 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
403 }
404
405 static void reader_clear_elements(xmlreader *reader)
406 {
407 struct element *elem, *elem2;
408 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
409 {
410 reader_free_strvalued(reader, &elem->qname);
411 reader_free(reader, elem);
412 }
413 list_init(&reader->elements);
414 reader->empty_element = FALSE;
415 }
416
417 static HRESULT reader_inc_depth(xmlreader *reader)
418 {
419 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
420 return S_OK;
421 }
422
423 static void reader_dec_depth(xmlreader *reader)
424 {
425 if (reader->depth > 1) reader->depth--;
426 }
427
428 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
429 {
430 struct element *elem;
431 HRESULT hr;
432
433 elem = reader_alloc(reader, sizeof(*elem));
434 if (!elem) return E_OUTOFMEMORY;
435
436 hr = reader_strvaldup(reader, qname, &elem->qname);
437 if (FAILED(hr)) {
438 reader_free(reader, elem);
439 return hr;
440 }
441
442 hr = reader_strvaldup(reader, localname, &elem->localname);
443 if (FAILED(hr))
444 {
445 reader_free_strvalued(reader, &elem->qname);
446 reader_free(reader, elem);
447 return hr;
448 }
449
450 if (!list_empty(&reader->elements))
451 {
452 hr = reader_inc_depth(reader);
453 if (FAILED(hr)) {
454 reader_free(reader, elem);
455 return hr;
456 }
457 }
458
459 list_add_head(&reader->elements, &elem->entry);
460 reader->empty_element = FALSE;
461 return hr;
462 }
463
464 static void reader_pop_element(xmlreader *reader)
465 {
466 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
467
468 if (elem)
469 {
470 list_remove(&elem->entry);
471 reader_free_strvalued(reader, &elem->qname);
472 reader_free_strvalued(reader, &elem->localname);
473 reader_free(reader, elem);
474 reader_dec_depth(reader);
475 }
476 }
477
478 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
479 means node value is to be determined. */
480 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
481 {
482 strval *v = &reader->strvalues[type];
483
484 reader_free_strvalue(reader, type);
485 if (!value)
486 {
487 v->str = NULL;
488 v->start = 0;
489 v->len = 0;
490 return;
491 }
492
493 if (value->str == strval_empty.str)
494 *v = *value;
495 else
496 {
497 if (type == StringValue_Value)
498 {
499 /* defer allocation for value string */
500 v->str = NULL;
501 v->start = value->start;
502 v->len = value->len;
503 }
504 else
505 {
506 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
507 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
508 v->str[value->len] = 0;
509 v->len = value->len;
510 }
511 }
512 }
513
514 static inline int is_reader_pending(xmlreader *reader)
515 {
516 return reader->input->pending;
517 }
518
519 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
520 {
521 const int initial_len = 0x2000;
522 buffer->data = readerinput_alloc(input, initial_len);
523 if (!buffer->data) return E_OUTOFMEMORY;
524
525 memset(buffer->data, 0, 4);
526 buffer->cur = 0;
527 buffer->allocated = initial_len;
528 buffer->written = 0;
529
530 return S_OK;
531 }
532
533 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
534 {
535 readerinput_free(input, buffer->data);
536 }
537
538 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
539 {
540 if (encoding == XmlEncoding_Unknown)
541 {
542 FIXME("unsupported encoding %d\n", encoding);
543 return E_NOTIMPL;
544 }
545
546 *cp = xml_encoding_map[encoding].cp;
547
548 return S_OK;
549 }
550
551 xml_encoding parse_encoding_name(const WCHAR *name, int len)
552 {
553 int min, max, n, c;
554
555 if (!name) return XmlEncoding_Unknown;
556
557 min = 0;
558 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
559
560 while (min <= max)
561 {
562 n = (min+max)/2;
563
564 if (len != -1)
565 c = strncmpiW(xml_encoding_map[n].name, name, len);
566 else
567 c = strcmpiW(xml_encoding_map[n].name, name);
568 if (!c)
569 return xml_encoding_map[n].enc;
570
571 if (c > 0)
572 max = n-1;
573 else
574 min = n+1;
575 }
576
577 return XmlEncoding_Unknown;
578 }
579
580 static HRESULT alloc_input_buffer(xmlreaderinput *input)
581 {
582 input_buffer *buffer;
583 HRESULT hr;
584
585 input->buffer = NULL;
586
587 buffer = readerinput_alloc(input, sizeof(*buffer));
588 if (!buffer) return E_OUTOFMEMORY;
589
590 buffer->input = input;
591 buffer->code_page = ~0; /* code page is unknown at this point */
592 hr = init_encoded_buffer(input, &buffer->utf16);
593 if (hr != S_OK) {
594 readerinput_free(input, buffer);
595 return hr;
596 }
597
598 hr = init_encoded_buffer(input, &buffer->encoded);
599 if (hr != S_OK) {
600 free_encoded_buffer(input, &buffer->utf16);
601 readerinput_free(input, buffer);
602 return hr;
603 }
604
605 input->buffer = buffer;
606 return S_OK;
607 }
608
609 static void free_input_buffer(input_buffer *buffer)
610 {
611 free_encoded_buffer(buffer->input, &buffer->encoded);
612 free_encoded_buffer(buffer->input, &buffer->utf16);
613 readerinput_free(buffer->input, buffer);
614 }
615
616 static void readerinput_release_stream(xmlreaderinput *readerinput)
617 {
618 if (readerinput->stream) {
619 ISequentialStream_Release(readerinput->stream);
620 readerinput->stream = NULL;
621 }
622 }
623
624 /* Queries already stored interface for IStream/ISequentialStream.
625 Interface supplied on creation will be overwritten */
626 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
627 {
628 HRESULT hr;
629
630 readerinput_release_stream(readerinput);
631 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
632 if (hr != S_OK)
633 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
634
635 return hr;
636 }
637
638 /* reads a chunk to raw buffer */
639 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
640 {
641 encoded_buffer *buffer = &readerinput->buffer->encoded;
642 /* to make sure aligned length won't exceed allocated length */
643 ULONG len = buffer->allocated - buffer->written - 4;
644 ULONG read;
645 HRESULT hr;
646
647 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
648 variable width encodings like UTF-8 */
649 len = (len + 3) & ~3;
650 /* try to use allocated space or grow */
651 if (buffer->allocated - buffer->written < len)
652 {
653 buffer->allocated *= 2;
654 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
655 len = buffer->allocated - buffer->written;
656 }
657
658 read = 0;
659 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
660 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
661 readerinput->pending = hr == E_PENDING;
662 if (FAILED(hr)) return hr;
663 buffer->written += read;
664
665 return hr;
666 }
667
668 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
669 static void readerinput_grow(xmlreaderinput *readerinput, int length)
670 {
671 encoded_buffer *buffer = &readerinput->buffer->utf16;
672
673 length *= sizeof(WCHAR);
674 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
675 if (buffer->allocated < buffer->written + length + 4)
676 {
677 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
678 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
679 buffer->allocated = grown_size;
680 }
681 }
682
683 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
684 {
685 static const char startA[] = {'<','?'};
686 static const char commentA[] = {'<','!'};
687 encoded_buffer *buffer = &readerinput->buffer->encoded;
688 unsigned char *ptr = (unsigned char*)buffer->data;
689
690 return !memcmp(buffer->data, startA, sizeof(startA)) ||
691 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
692 /* test start byte */
693 (ptr[0] == '<' &&
694 (
695 (ptr[1] && (ptr[1] <= 0x7f)) ||
696 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
697 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
698 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
699 );
700 }
701
702 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
703 {
704 encoded_buffer *buffer = &readerinput->buffer->encoded;
705 static const WCHAR startW[] = {'<','?'};
706 static const WCHAR commentW[] = {'<','!'};
707 static const char utf8bom[] = {0xef,0xbb,0xbf};
708 static const char utf16lebom[] = {0xff,0xfe};
709
710 *enc = XmlEncoding_Unknown;
711
712 if (buffer->written <= 3)
713 {
714 HRESULT hr = readerinput_growraw(readerinput);
715 if (FAILED(hr)) return hr;
716 if (buffer->written <= 3) return MX_E_INPUTEND;
717 }
718
719 /* try start symbols if we have enough data to do that, input buffer should contain
720 first chunk already */
721 if (readerinput_is_utf8(readerinput))
722 *enc = XmlEncoding_UTF8;
723 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
724 !memcmp(buffer->data, commentW, sizeof(commentW)))
725 *enc = XmlEncoding_UTF16;
726 /* try with BOM now */
727 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
728 {
729 buffer->cur += sizeof(utf8bom);
730 *enc = XmlEncoding_UTF8;
731 }
732 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
733 {
734 buffer->cur += sizeof(utf16lebom);
735 *enc = XmlEncoding_UTF16;
736 }
737
738 return S_OK;
739 }
740
741 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
742 {
743 encoded_buffer *buffer = &readerinput->buffer->encoded;
744 int len = buffer->written;
745
746 /* complete single byte char */
747 if (!(buffer->data[len-1] & 0x80)) return len;
748
749 /* find start byte of multibyte char */
750 while (--len && !(buffer->data[len] & 0xc0))
751 ;
752
753 return len;
754 }
755
756 /* Returns byte length of complete char sequence for buffer code page,
757 it's relative to current buffer position which is currently used for BOM handling
758 only. */
759 static int readerinput_get_convlen(xmlreaderinput *readerinput)
760 {
761 encoded_buffer *buffer = &readerinput->buffer->encoded;
762 int len;
763
764 if (readerinput->buffer->code_page == CP_UTF8)
765 len = readerinput_get_utf8_convlen(readerinput);
766 else
767 len = buffer->written;
768
769 TRACE("%d\n", len - buffer->cur);
770 return len - buffer->cur;
771 }
772
773 /* It's possible that raw buffer has some leftovers from last conversion - some char
774 sequence that doesn't represent a full code point. Length argument should be calculated with
775 readerinput_get_convlen(), if it's -1 it will be calculated here. */
776 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
777 {
778 encoded_buffer *buffer = &readerinput->buffer->encoded;
779
780 if (len == -1)
781 len = readerinput_get_convlen(readerinput);
782
783 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
784 /* everything below cur is lost too */
785 buffer->written -= len + buffer->cur;
786 /* after this point we don't need cur offset really,
787 it's used only to mark where actual data begins when first chunk is read */
788 buffer->cur = 0;
789 }
790
791 /* note that raw buffer content is kept */
792 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
793 {
794 encoded_buffer *src = &readerinput->buffer->encoded;
795 encoded_buffer *dest = &readerinput->buffer->utf16;
796 int len, dest_len;
797 HRESULT hr;
798 WCHAR *ptr;
799 UINT cp;
800
801 hr = get_code_page(enc, &cp);
802 if (FAILED(hr)) return;
803
804 readerinput->buffer->code_page = cp;
805 len = readerinput_get_convlen(readerinput);
806
807 TRACE("switching to cp %d\n", cp);
808
809 /* just copy in this case */
810 if (enc == XmlEncoding_UTF16)
811 {
812 readerinput_grow(readerinput, len);
813 memcpy(dest->data, src->data + src->cur, len);
814 dest->written += len*sizeof(WCHAR);
815 return;
816 }
817
818 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
819 readerinput_grow(readerinput, dest_len);
820 ptr = (WCHAR*)dest->data;
821 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
822 ptr[dest_len] = 0;
823 dest->written += dest_len*sizeof(WCHAR);
824 }
825
826 /* shrinks parsed data a buffer begins with */
827 static void reader_shrink(xmlreader *reader)
828 {
829 encoded_buffer *buffer = &reader->input->buffer->utf16;
830
831 /* avoid to move too often using threshold shrink length */
832 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
833 {
834 buffer->written -= buffer->cur*sizeof(WCHAR);
835 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
836 buffer->cur = 0;
837 *(WCHAR*)&buffer->data[buffer->written] = 0;
838 }
839 }
840
841 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
842 It won't attempt to shrink but will grow destination buffer if needed */
843 static HRESULT reader_more(xmlreader *reader)
844 {
845 xmlreaderinput *readerinput = reader->input;
846 encoded_buffer *src = &readerinput->buffer->encoded;
847 encoded_buffer *dest = &readerinput->buffer->utf16;
848 UINT cp = readerinput->buffer->code_page;
849 int len, dest_len;
850 HRESULT hr;
851 WCHAR *ptr;
852
853 /* get some raw data from stream first */
854 hr = readerinput_growraw(readerinput);
855 len = readerinput_get_convlen(readerinput);
856
857 /* just copy for UTF-16 case */
858 if (cp == ~0)
859 {
860 readerinput_grow(readerinput, len);
861 memcpy(dest->data + dest->written, src->data + src->cur, len);
862 dest->written += len*sizeof(WCHAR);
863 return hr;
864 }
865
866 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
867 readerinput_grow(readerinput, dest_len);
868 ptr = (WCHAR*)(dest->data + dest->written);
869 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
870 ptr[dest_len] = 0;
871 dest->written += dest_len*sizeof(WCHAR);
872 /* get rid of processed data */
873 readerinput_shrinkraw(readerinput, len);
874
875 return hr;
876 }
877
878 static inline UINT reader_get_cur(xmlreader *reader)
879 {
880 return reader->input->buffer->utf16.cur;
881 }
882
883 static inline WCHAR *reader_get_ptr(xmlreader *reader)
884 {
885 encoded_buffer *buffer = &reader->input->buffer->utf16;
886 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
887 if (!*ptr) reader_more(reader);
888 return (WCHAR*)buffer->data + buffer->cur;
889 }
890
891 static int reader_cmp(xmlreader *reader, const WCHAR *str)
892 {
893 const WCHAR *ptr = reader_get_ptr(reader);
894 return strncmpW(str, ptr, strlenW(str));
895 }
896
897 /* moves cursor n WCHARs forward */
898 static void reader_skipn(xmlreader *reader, int n)
899 {
900 encoded_buffer *buffer = &reader->input->buffer->utf16;
901 const WCHAR *ptr = reader_get_ptr(reader);
902
903 while (*ptr++ && n--)
904 {
905 buffer->cur++;
906 reader->pos++;
907 }
908 }
909
910 static inline BOOL is_wchar_space(WCHAR ch)
911 {
912 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
913 }
914
915 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
916 static int reader_skipspaces(xmlreader *reader)
917 {
918 encoded_buffer *buffer = &reader->input->buffer->utf16;
919 const WCHAR *ptr = reader_get_ptr(reader);
920 UINT start = reader_get_cur(reader);
921
922 while (is_wchar_space(*ptr))
923 {
924 if (*ptr == '\r')
925 reader->pos = 0;
926 else if (*ptr == '\n')
927 {
928 reader->line++;
929 reader->pos = 0;
930 }
931 else
932 reader->pos++;
933
934 buffer->cur++;
935 ptr = reader_get_ptr(reader);
936 }
937
938 return reader_get_cur(reader) - start;
939 }
940
941 /* [26] VersionNum ::= '1.' [0-9]+ */
942 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
943 {
944 static const WCHAR onedotW[] = {'1','.',0};
945 WCHAR *ptr, *ptr2;
946 UINT start;
947
948 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
949
950 start = reader_get_cur(reader);
951 /* skip "1." */
952 reader_skipn(reader, 2);
953
954 ptr2 = ptr = reader_get_ptr(reader);
955 while (*ptr >= '0' && *ptr <= '9')
956 {
957 reader_skipn(reader, 1);
958 ptr = reader_get_ptr(reader);
959 }
960
961 if (ptr2 == ptr) return WC_E_DIGIT;
962 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
963 TRACE("version=%s\n", debug_strval(reader, val));
964 return S_OK;
965 }
966
967 /* [25] Eq ::= S? '=' S? */
968 static HRESULT reader_parse_eq(xmlreader *reader)
969 {
970 static const WCHAR eqW[] = {'=',0};
971 reader_skipspaces(reader);
972 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
973 /* skip '=' */
974 reader_skipn(reader, 1);
975 reader_skipspaces(reader);
976 return S_OK;
977 }
978
979 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
980 static HRESULT reader_parse_versioninfo(xmlreader *reader)
981 {
982 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
983 strval val, name;
984 HRESULT hr;
985
986 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
987
988 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
989 reader_init_strvalue(reader_get_cur(reader), 7, &name);
990 /* skip 'version' */
991 reader_skipn(reader, 7);
992
993 hr = reader_parse_eq(reader);
994 if (FAILED(hr)) return hr;
995
996 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
997 return WC_E_QUOTE;
998 /* skip "'"|'"' */
999 reader_skipn(reader, 1);
1000
1001 hr = reader_parse_versionnum(reader, &val);
1002 if (FAILED(hr)) return hr;
1003
1004 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1005 return WC_E_QUOTE;
1006
1007 /* skip "'"|'"' */
1008 reader_skipn(reader, 1);
1009
1010 return reader_add_attr(reader, &name, &val);
1011 }
1012
1013 /* ([A-Za-z0-9._] | '-') */
1014 static inline BOOL is_wchar_encname(WCHAR ch)
1015 {
1016 return ((ch >= 'A' && ch <= 'Z') ||
1017 (ch >= 'a' && ch <= 'z') ||
1018 (ch >= '0' && ch <= '9') ||
1019 (ch == '.') || (ch == '_') ||
1020 (ch == '-'));
1021 }
1022
1023 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1024 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1025 {
1026 WCHAR *start = reader_get_ptr(reader), *ptr;
1027 xml_encoding enc;
1028 int len;
1029
1030 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1031 return WC_E_ENCNAME;
1032
1033 val->start = reader_get_cur(reader);
1034
1035 ptr = start;
1036 while (is_wchar_encname(*++ptr))
1037 ;
1038
1039 len = ptr - start;
1040 enc = parse_encoding_name(start, len);
1041 TRACE("encoding name %s\n", debugstr_wn(start, len));
1042 val->str = start;
1043 val->len = len;
1044
1045 if (enc == XmlEncoding_Unknown)
1046 return WC_E_ENCNAME;
1047
1048 /* skip encoding name */
1049 reader_skipn(reader, len);
1050 return S_OK;
1051 }
1052
1053 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1054 static HRESULT reader_parse_encdecl(xmlreader *reader)
1055 {
1056 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1057 strval name, val;
1058 HRESULT hr;
1059
1060 if (!reader_skipspaces(reader)) return S_FALSE;
1061
1062 if (reader_cmp(reader, encodingW)) return S_FALSE;
1063 name.str = reader_get_ptr(reader);
1064 name.start = reader_get_cur(reader);
1065 name.len = 8;
1066 /* skip 'encoding' */
1067 reader_skipn(reader, 8);
1068
1069 hr = reader_parse_eq(reader);
1070 if (FAILED(hr)) return hr;
1071
1072 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1073 return WC_E_QUOTE;
1074 /* skip "'"|'"' */
1075 reader_skipn(reader, 1);
1076
1077 hr = reader_parse_encname(reader, &val);
1078 if (FAILED(hr)) return hr;
1079
1080 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1081 return WC_E_QUOTE;
1082
1083 /* skip "'"|'"' */
1084 reader_skipn(reader, 1);
1085
1086 return reader_add_attr(reader, &name, &val);
1087 }
1088
1089 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1090 static HRESULT reader_parse_sddecl(xmlreader *reader)
1091 {
1092 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1093 static const WCHAR yesW[] = {'y','e','s',0};
1094 static const WCHAR noW[] = {'n','o',0};
1095 strval name, val;
1096 UINT start;
1097 HRESULT hr;
1098
1099 if (!reader_skipspaces(reader)) return S_FALSE;
1100
1101 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1102 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1103 /* skip 'standalone' */
1104 reader_skipn(reader, 10);
1105
1106 hr = reader_parse_eq(reader);
1107 if (FAILED(hr)) return hr;
1108
1109 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1110 return WC_E_QUOTE;
1111 /* skip "'"|'"' */
1112 reader_skipn(reader, 1);
1113
1114 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1115 return WC_E_XMLDECL;
1116
1117 start = reader_get_cur(reader);
1118 /* skip 'yes'|'no' */
1119 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1120 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1121 TRACE("standalone=%s\n", debug_strval(reader, &val));
1122
1123 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1124 return WC_E_QUOTE;
1125 /* skip "'"|'"' */
1126 reader_skipn(reader, 1);
1127
1128 return reader_add_attr(reader, &name, &val);
1129 }
1130
1131 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1132 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1133 {
1134 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1135 static const WCHAR declcloseW[] = {'?','>',0};
1136 HRESULT hr;
1137
1138 /* check if we have "<?xml " */
1139 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1140
1141 reader_skipn(reader, 5);
1142 hr = reader_parse_versioninfo(reader);
1143 if (FAILED(hr))
1144 return hr;
1145
1146 hr = reader_parse_encdecl(reader);
1147 if (FAILED(hr))
1148 return hr;
1149
1150 hr = reader_parse_sddecl(reader);
1151 if (FAILED(hr))
1152 return hr;
1153
1154 reader_skipspaces(reader);
1155 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1156 reader_skipn(reader, 2);
1157
1158 reader_inc_depth(reader);
1159 reader->nodetype = XmlNodeType_XmlDeclaration;
1160 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1161 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1162 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1163
1164 return S_OK;
1165 }
1166
1167 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1168 static HRESULT reader_parse_comment(xmlreader *reader)
1169 {
1170 WCHAR *ptr;
1171 UINT start;
1172
1173 if (reader->resumestate == XmlReadResumeState_Comment)
1174 {
1175 start = reader->resume[XmlReadResume_Body];
1176 ptr = reader_get_ptr(reader);
1177 }
1178 else
1179 {
1180 /* skip '<!--' */
1181 reader_skipn(reader, 4);
1182 reader_shrink(reader);
1183 ptr = reader_get_ptr(reader);
1184 start = reader_get_cur(reader);
1185 reader->nodetype = XmlNodeType_Comment;
1186 reader->resume[XmlReadResume_Body] = start;
1187 reader->resumestate = XmlReadResumeState_Comment;
1188 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1189 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1190 reader_set_strvalue(reader, StringValue_Value, NULL);
1191 }
1192
1193 /* will exit when there's no more data, it won't attempt to
1194 read more from stream */
1195 while (*ptr)
1196 {
1197 if (ptr[0] == '-')
1198 {
1199 if (ptr[1] == '-')
1200 {
1201 if (ptr[2] == '>')
1202 {
1203 strval value;
1204
1205 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1206 TRACE("%s\n", debug_strval(reader, &value));
1207
1208 /* skip rest of markup '->' */
1209 reader_skipn(reader, 3);
1210
1211 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1212 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1213 reader_set_strvalue(reader, StringValue_Value, &value);
1214 reader->resume[XmlReadResume_Body] = 0;
1215 reader->resumestate = XmlReadResumeState_Initial;
1216 return S_OK;
1217 }
1218 else
1219 return WC_E_COMMENT;
1220 }
1221 }
1222
1223 reader_skipn(reader, 1);
1224 ptr++;
1225 }
1226
1227 return S_OK;
1228 }
1229
1230 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1231 static inline BOOL is_char(WCHAR ch)
1232 {
1233 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1234 (ch >= 0x20 && ch <= 0xd7ff) ||
1235 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1236 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1237 (ch >= 0xe000 && ch <= 0xfffd);
1238 }
1239
1240 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1241 static inline BOOL is_pubchar(WCHAR ch)
1242 {
1243 return (ch == ' ') ||
1244 (ch >= 'a' && ch <= 'z') ||
1245 (ch >= 'A' && ch <= 'Z') ||
1246 (ch >= '0' && ch <= '9') ||
1247 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1248 (ch == '=') || (ch == '?') ||
1249 (ch == '@') || (ch == '!') ||
1250 (ch >= '#' && ch <= '%') || /* #$% */
1251 (ch == '_') || (ch == '\r') || (ch == '\n');
1252 }
1253
1254 static inline BOOL is_namestartchar(WCHAR ch)
1255 {
1256 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1257 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1258 (ch >= 0xc0 && ch <= 0xd6) ||
1259 (ch >= 0xd8 && ch <= 0xf6) ||
1260 (ch >= 0xf8 && ch <= 0x2ff) ||
1261 (ch >= 0x370 && ch <= 0x37d) ||
1262 (ch >= 0x37f && ch <= 0x1fff) ||
1263 (ch >= 0x200c && ch <= 0x200d) ||
1264 (ch >= 0x2070 && ch <= 0x218f) ||
1265 (ch >= 0x2c00 && ch <= 0x2fef) ||
1266 (ch >= 0x3001 && ch <= 0xd7ff) ||
1267 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1268 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1269 (ch >= 0xf900 && ch <= 0xfdcf) ||
1270 (ch >= 0xfdf0 && ch <= 0xfffd);
1271 }
1272
1273 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1274 static inline BOOL is_ncnamechar(WCHAR ch)
1275 {
1276 return (ch >= 'A' && ch <= 'Z') ||
1277 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1278 (ch == '-') || (ch == '.') ||
1279 (ch >= '0' && ch <= '9') ||
1280 (ch == 0xb7) ||
1281 (ch >= 0xc0 && ch <= 0xd6) ||
1282 (ch >= 0xd8 && ch <= 0xf6) ||
1283 (ch >= 0xf8 && ch <= 0x2ff) ||
1284 (ch >= 0x300 && ch <= 0x36f) ||
1285 (ch >= 0x370 && ch <= 0x37d) ||
1286 (ch >= 0x37f && ch <= 0x1fff) ||
1287 (ch >= 0x200c && ch <= 0x200d) ||
1288 (ch >= 0x203f && ch <= 0x2040) ||
1289 (ch >= 0x2070 && ch <= 0x218f) ||
1290 (ch >= 0x2c00 && ch <= 0x2fef) ||
1291 (ch >= 0x3001 && ch <= 0xd7ff) ||
1292 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1293 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1294 (ch >= 0xf900 && ch <= 0xfdcf) ||
1295 (ch >= 0xfdf0 && ch <= 0xfffd);
1296 }
1297
1298 static inline BOOL is_namechar(WCHAR ch)
1299 {
1300 return (ch == ':') || is_ncnamechar(ch);
1301 }
1302
1303 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1304 {
1305 /* When we're on attribute always return attribute type, container node type is kept.
1306 Note that container is not necessarily an element, and attribute doesn't mean it's
1307 an attribute in XML spec terms. */
1308 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1309 }
1310
1311 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1312 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1313 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1314 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1315 [5] Name ::= NameStartChar (NameChar)* */
1316 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1317 {
1318 WCHAR *ptr;
1319 UINT start;
1320
1321 if (reader->resume[XmlReadResume_Name])
1322 {
1323 start = reader->resume[XmlReadResume_Name];
1324 ptr = reader_get_ptr(reader);
1325 }
1326 else
1327 {
1328 ptr = reader_get_ptr(reader);
1329 start = reader_get_cur(reader);
1330 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1331 }
1332
1333 while (is_namechar(*ptr))
1334 {
1335 reader_skipn(reader, 1);
1336 ptr = reader_get_ptr(reader);
1337 }
1338
1339 if (is_reader_pending(reader))
1340 {
1341 reader->resume[XmlReadResume_Name] = start;
1342 return E_PENDING;
1343 }
1344 else
1345 reader->resume[XmlReadResume_Name] = 0;
1346
1347 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1348 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1349
1350 return S_OK;
1351 }
1352
1353 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1354 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1355 {
1356 static const WCHAR xmlW[] = {'x','m','l'};
1357 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1358 strval name;
1359 WCHAR *ptr;
1360 HRESULT hr;
1361 UINT i;
1362
1363 hr = reader_parse_name(reader, &name);
1364 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1365
1366 /* now that we got name check for illegal content */
1367 if (strval_eq(reader, &name, &xmlval))
1368 return WC_E_LEADINGXML;
1369
1370 /* PITarget can't be a qualified name */
1371 ptr = reader_get_strptr(reader, &name);
1372 for (i = 0; i < name.len; i++)
1373 if (ptr[i] == ':')
1374 return i ? NC_E_NAMECOLON : WC_E_PI;
1375
1376 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1377 *target = name;
1378 return S_OK;
1379 }
1380
1381 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1382 static HRESULT reader_parse_pi(xmlreader *reader)
1383 {
1384 strval target;
1385 WCHAR *ptr;
1386 UINT start;
1387 HRESULT hr;
1388
1389 switch (reader->resumestate)
1390 {
1391 case XmlReadResumeState_Initial:
1392 /* skip '<?' */
1393 reader_skipn(reader, 2);
1394 reader_shrink(reader);
1395 reader->resumestate = XmlReadResumeState_PITarget;
1396 case XmlReadResumeState_PITarget:
1397 hr = reader_parse_pitarget(reader, &target);
1398 if (FAILED(hr)) return hr;
1399 reader_set_strvalue(reader, StringValue_LocalName, &target);
1400 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1401 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1402 reader->resumestate = XmlReadResumeState_PIBody;
1403 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1404 default:
1405 ;
1406 }
1407
1408 start = reader->resume[XmlReadResume_Body];
1409 ptr = reader_get_ptr(reader);
1410 while (*ptr)
1411 {
1412 if (ptr[0] == '?')
1413 {
1414 if (ptr[1] == '>')
1415 {
1416 UINT cur = reader_get_cur(reader);
1417 strval value;
1418
1419 /* strip all leading whitespace chars */
1420 while (start < cur)
1421 {
1422 ptr = reader_get_ptr2(reader, start);
1423 if (!is_wchar_space(*ptr)) break;
1424 start++;
1425 }
1426
1427 reader_init_strvalue(start, cur-start, &value);
1428
1429 /* skip '?>' */
1430 reader_skipn(reader, 2);
1431 TRACE("%s\n", debug_strval(reader, &value));
1432 reader->nodetype = XmlNodeType_ProcessingInstruction;
1433 reader->resumestate = XmlReadResumeState_Initial;
1434 reader->resume[XmlReadResume_Body] = 0;
1435 reader_set_strvalue(reader, StringValue_Value, &value);
1436 return S_OK;
1437 }
1438 }
1439
1440 reader_skipn(reader, 1);
1441 ptr = reader_get_ptr(reader);
1442 }
1443
1444 return S_OK;
1445 }
1446
1447 /* This one is used to parse significant whitespace nodes, like in Misc production */
1448 static HRESULT reader_parse_whitespace(xmlreader *reader)
1449 {
1450 switch (reader->resumestate)
1451 {
1452 case XmlReadResumeState_Initial:
1453 reader_shrink(reader);
1454 reader->resumestate = XmlReadResumeState_Whitespace;
1455 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1456 reader->nodetype = XmlNodeType_Whitespace;
1457 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1458 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1459 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1460 /* fallthrough */
1461 case XmlReadResumeState_Whitespace:
1462 {
1463 strval value;
1464 UINT start;
1465
1466 reader_skipspaces(reader);
1467 if (is_reader_pending(reader)) return S_OK;
1468
1469 start = reader->resume[XmlReadResume_Body];
1470 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1471 reader_set_strvalue(reader, StringValue_Value, &value);
1472 TRACE("%s\n", debug_strval(reader, &value));
1473 reader->resumestate = XmlReadResumeState_Initial;
1474 }
1475 default:
1476 ;
1477 }
1478
1479 return S_OK;
1480 }
1481
1482 /* [27] Misc ::= Comment | PI | S */
1483 static HRESULT reader_parse_misc(xmlreader *reader)
1484 {
1485 HRESULT hr = S_FALSE;
1486
1487 if (reader->resumestate != XmlReadResumeState_Initial)
1488 {
1489 hr = reader_more(reader);
1490 if (FAILED(hr)) return hr;
1491
1492 /* finish current node */
1493 switch (reader->resumestate)
1494 {
1495 case XmlReadResumeState_PITarget:
1496 case XmlReadResumeState_PIBody:
1497 return reader_parse_pi(reader);
1498 case XmlReadResumeState_Comment:
1499 return reader_parse_comment(reader);
1500 case XmlReadResumeState_Whitespace:
1501 return reader_parse_whitespace(reader);
1502 default:
1503 ERR("unknown resume state %d\n", reader->resumestate);
1504 }
1505 }
1506
1507 while (1)
1508 {
1509 const WCHAR *cur = reader_get_ptr(reader);
1510
1511 if (is_wchar_space(*cur))
1512 hr = reader_parse_whitespace(reader);
1513 else if (!reader_cmp(reader, commentW))
1514 hr = reader_parse_comment(reader);
1515 else if (!reader_cmp(reader, piW))
1516 hr = reader_parse_pi(reader);
1517 else
1518 break;
1519
1520 if (hr != S_FALSE) return hr;
1521 }
1522
1523 return hr;
1524 }
1525
1526 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1527 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1528 {
1529 WCHAR *cur = reader_get_ptr(reader), quote;
1530 UINT start;
1531
1532 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1533
1534 quote = *cur;
1535 reader_skipn(reader, 1);
1536
1537 cur = reader_get_ptr(reader);
1538 start = reader_get_cur(reader);
1539 while (is_char(*cur) && *cur != quote)
1540 {
1541 reader_skipn(reader, 1);
1542 cur = reader_get_ptr(reader);
1543 }
1544 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1545 if (*cur == quote) reader_skipn(reader, 1);
1546
1547 TRACE("%s\n", debug_strval(reader, literal));
1548 return S_OK;
1549 }
1550
1551 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1552 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1553 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1554 {
1555 WCHAR *cur = reader_get_ptr(reader), quote;
1556 UINT start;
1557
1558 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1559
1560 quote = *cur;
1561 reader_skipn(reader, 1);
1562
1563 start = reader_get_cur(reader);
1564 cur = reader_get_ptr(reader);
1565 while (is_pubchar(*cur) && *cur != quote)
1566 {
1567 reader_skipn(reader, 1);
1568 cur = reader_get_ptr(reader);
1569 }
1570
1571 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1572 TRACE("%s\n", debug_strval(reader, literal));
1573 return S_OK;
1574 }
1575
1576 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1577 static HRESULT reader_parse_externalid(xmlreader *reader)
1578 {
1579 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1580 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1581 strval name;
1582 HRESULT hr;
1583 int cnt;
1584
1585 if (reader_cmp(reader, systemW))
1586 {
1587 if (reader_cmp(reader, publicW))
1588 return S_FALSE;
1589 else
1590 {
1591 strval pub;
1592
1593 /* public id */
1594 reader_skipn(reader, 6);
1595 cnt = reader_skipspaces(reader);
1596 if (!cnt) return WC_E_WHITESPACE;
1597
1598 hr = reader_parse_pub_literal(reader, &pub);
1599 if (FAILED(hr)) return hr;
1600
1601 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1602 return reader_add_attr(reader, &name, &pub);
1603 }
1604 }
1605 else
1606 {
1607 strval sys;
1608
1609 /* system id */
1610 reader_skipn(reader, 6);
1611 cnt = reader_skipspaces(reader);
1612 if (!cnt) return WC_E_WHITESPACE;
1613
1614 hr = reader_parse_sys_literal(reader, &sys);
1615 if (FAILED(hr)) return hr;
1616
1617 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1618 return reader_add_attr(reader, &name, &sys);
1619 }
1620
1621 return hr;
1622 }
1623
1624 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1625 static HRESULT reader_parse_dtd(xmlreader *reader)
1626 {
1627 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1628 strval name;
1629 WCHAR *cur;
1630 HRESULT hr;
1631
1632 /* check if we have "<!DOCTYPE" */
1633 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1634 reader_shrink(reader);
1635
1636 /* DTD processing is not allowed by default */
1637 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1638
1639 reader_skipn(reader, 9);
1640 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1641
1642 /* name */
1643 hr = reader_parse_name(reader, &name);
1644 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1645
1646 reader_skipspaces(reader);
1647
1648 hr = reader_parse_externalid(reader);
1649 if (FAILED(hr)) return hr;
1650
1651 reader_skipspaces(reader);
1652
1653 cur = reader_get_ptr(reader);
1654 if (*cur != '>')
1655 {
1656 FIXME("internal subset parsing not implemented\n");
1657 return E_NOTIMPL;
1658 }
1659
1660 /* skip '>' */
1661 reader_skipn(reader, 1);
1662
1663 reader->nodetype = XmlNodeType_DocumentType;
1664 reader_set_strvalue(reader, StringValue_LocalName, &name);
1665 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1666
1667 return S_OK;
1668 }
1669
1670 /* [11 NS] LocalPart ::= NCName */
1671 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1672 {
1673 WCHAR *ptr;
1674 UINT start;
1675
1676 if (reader->resume[XmlReadResume_Local])
1677 {
1678 start = reader->resume[XmlReadResume_Local];
1679 ptr = reader_get_ptr(reader);
1680 }
1681 else
1682 {
1683 ptr = reader_get_ptr(reader);
1684 start = reader_get_cur(reader);
1685 }
1686
1687 while (is_ncnamechar(*ptr))
1688 {
1689 reader_skipn(reader, 1);
1690 ptr = reader_get_ptr(reader);
1691 }
1692
1693 if (is_reader_pending(reader))
1694 {
1695 reader->resume[XmlReadResume_Local] = start;
1696 return E_PENDING;
1697 }
1698 else
1699 reader->resume[XmlReadResume_Local] = 0;
1700
1701 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1702
1703 return S_OK;
1704 }
1705
1706 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1707 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1708 [9 NS] UnprefixedName ::= LocalPart
1709 [10 NS] Prefix ::= NCName */
1710 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1711 {
1712 WCHAR *ptr;
1713 UINT start;
1714 HRESULT hr;
1715
1716 if (reader->resume[XmlReadResume_Name])
1717 {
1718 start = reader->resume[XmlReadResume_Name];
1719 ptr = reader_get_ptr(reader);
1720 }
1721 else
1722 {
1723 ptr = reader_get_ptr(reader);
1724 start = reader_get_cur(reader);
1725 reader->resume[XmlReadResume_Name] = start;
1726 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1727 }
1728
1729 if (reader->resume[XmlReadResume_Local])
1730 {
1731 hr = reader_parse_local(reader, local);
1732 if (FAILED(hr)) return hr;
1733
1734 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1735 local->start - reader->resume[XmlReadResume_Name] - 1,
1736 prefix);
1737 }
1738 else
1739 {
1740 /* skip prefix part */
1741 while (is_ncnamechar(*ptr))
1742 {
1743 reader_skipn(reader, 1);
1744 ptr = reader_get_ptr(reader);
1745 }
1746
1747 if (is_reader_pending(reader)) return E_PENDING;
1748
1749 /* got a qualified name */
1750 if (*ptr == ':')
1751 {
1752 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1753
1754 /* skip ':' */
1755 reader_skipn(reader, 1);
1756 hr = reader_parse_local(reader, local);
1757 if (FAILED(hr)) return hr;
1758 }
1759 else
1760 {
1761 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1762 reader_init_strvalue(0, 0, prefix);
1763 }
1764 }
1765
1766 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1767
1768 if (prefix->len)
1769 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1770 else
1771 TRACE("ncname %s\n", debug_strval(reader, local));
1772
1773 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1774 /* count ':' too */
1775 (prefix->len ? prefix->len + 1 : 0) + local->len,
1776 qname);
1777
1778 reader->resume[XmlReadResume_Name] = 0;
1779 reader->resume[XmlReadResume_Local] = 0;
1780
1781 return S_OK;
1782 }
1783
1784 /* Applies normalization rules to a single char, used for attribute values.
1785
1786 Rules include 2 steps:
1787
1788 1) replacing \r\n with a single \n;
1789 2) replacing all whitespace chars with ' '.
1790
1791 */
1792 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1793 {
1794 encoded_buffer *buffer = &reader->input->buffer->utf16;
1795
1796 if (!is_wchar_space(*ptr)) return;
1797
1798 if (*ptr == '\r' && *(ptr+1) == '\n')
1799 {
1800 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1801 memmove(ptr+1, ptr+2, len);
1802 }
1803 *ptr = ' ';
1804 }
1805
1806 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1807 {
1808 static const WCHAR entltW[] = {'l','t'};
1809 static const WCHAR entgtW[] = {'g','t'};
1810 static const WCHAR entampW[] = {'a','m','p'};
1811 static const WCHAR entaposW[] = {'a','p','o','s'};
1812 static const WCHAR entquotW[] = {'q','u','o','t'};
1813 static const strval lt = { (WCHAR*)entltW, 2 };
1814 static const strval gt = { (WCHAR*)entgtW, 2 };
1815 static const strval amp = { (WCHAR*)entampW, 3 };
1816 static const strval apos = { (WCHAR*)entaposW, 4 };
1817 static const strval quot = { (WCHAR*)entquotW, 4 };
1818 WCHAR *str = reader_get_strptr(reader, name);
1819
1820 switch (*str)
1821 {
1822 case 'l':
1823 if (strval_eq(reader, name, &lt)) return '<';
1824 break;
1825 case 'g':
1826 if (strval_eq(reader, name, &gt)) return '>';
1827 break;
1828 case 'a':
1829 if (strval_eq(reader, name, &amp))
1830 return '&';
1831 else if (strval_eq(reader, name, &apos))
1832 return '\'';
1833 break;
1834 case 'q':
1835 if (strval_eq(reader, name, &quot)) return '\"';
1836 break;
1837 default:
1838 ;
1839 }
1840
1841 return 0;
1842 }
1843
1844 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1845 [67] Reference ::= EntityRef | CharRef
1846 [68] EntityRef ::= '&' Name ';' */
1847 static HRESULT reader_parse_reference(xmlreader *reader)
1848 {
1849 encoded_buffer *buffer = &reader->input->buffer->utf16;
1850 WCHAR *start = reader_get_ptr(reader), *ptr;
1851 UINT cur = reader_get_cur(reader);
1852 WCHAR ch = 0;
1853 int len;
1854
1855 /* skip '&' */
1856 reader_skipn(reader, 1);
1857 ptr = reader_get_ptr(reader);
1858
1859 if (*ptr == '#')
1860 {
1861 reader_skipn(reader, 1);
1862 ptr = reader_get_ptr(reader);
1863
1864 /* hex char or decimal */
1865 if (*ptr == 'x')
1866 {
1867 reader_skipn(reader, 1);
1868 ptr = reader_get_ptr(reader);
1869
1870 while (*ptr != ';')
1871 {
1872 if ((*ptr >= '0' && *ptr <= '9'))
1873 ch = ch*16 + *ptr - '0';
1874 else if ((*ptr >= 'a' && *ptr <= 'f'))
1875 ch = ch*16 + *ptr - 'a' + 10;
1876 else if ((*ptr >= 'A' && *ptr <= 'F'))
1877 ch = ch*16 + *ptr - 'A' + 10;
1878 else
1879 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1880 reader_skipn(reader, 1);
1881 ptr = reader_get_ptr(reader);
1882 }
1883 }
1884 else
1885 {
1886 while (*ptr != ';')
1887 {
1888 if ((*ptr >= '0' && *ptr <= '9'))
1889 {
1890 ch = ch*10 + *ptr - '0';
1891 reader_skipn(reader, 1);
1892 ptr = reader_get_ptr(reader);
1893 }
1894 else
1895 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1896 }
1897 }
1898
1899 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1900
1901 /* normalize */
1902 if (is_wchar_space(ch)) ch = ' ';
1903
1904 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1905 memmove(start+1, ptr+1, len);
1906 buffer->cur = cur + 1;
1907
1908 *start = ch;
1909 }
1910 else
1911 {
1912 strval name;
1913 HRESULT hr;
1914
1915 hr = reader_parse_name(reader, &name);
1916 if (FAILED(hr)) return hr;
1917
1918 ptr = reader_get_ptr(reader);
1919 if (*ptr != ';') return WC_E_SEMICOLON;
1920
1921 /* predefined entities resolve to a single character */
1922 ch = get_predefined_entity(reader, &name);
1923 if (ch)
1924 {
1925 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1926 memmove(start+1, ptr+1, len);
1927 buffer->cur = cur + 1;
1928
1929 *start = ch;
1930 }
1931 else
1932 {
1933 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1934 return WC_E_UNDECLAREDENTITY;
1935 }
1936
1937 }
1938
1939 return S_OK;
1940 }
1941
1942 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1943 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1944 {
1945 WCHAR *ptr, quote;
1946 UINT start;
1947
1948 ptr = reader_get_ptr(reader);
1949
1950 /* skip opening quote */
1951 quote = *ptr;
1952 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1953 reader_skipn(reader, 1);
1954
1955 ptr = reader_get_ptr(reader);
1956 start = reader_get_cur(reader);
1957 while (*ptr)
1958 {
1959 if (*ptr == '<') return WC_E_LESSTHAN;
1960
1961 if (*ptr == quote)
1962 {
1963 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
1964 /* skip closing quote */
1965 reader_skipn(reader, 1);
1966 return S_OK;
1967 }
1968
1969 if (*ptr == '&')
1970 {
1971 HRESULT hr = reader_parse_reference(reader);
1972 if (FAILED(hr)) return hr;
1973 }
1974 else
1975 {
1976 reader_normalize_space(reader, ptr);
1977 reader_skipn(reader, 1);
1978 }
1979 ptr = reader_get_ptr(reader);
1980 }
1981
1982 return WC_E_QUOTE;
1983 }
1984
1985 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1986 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1987 [3 NS] DefaultAttName ::= 'xmlns'
1988 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1989 static HRESULT reader_parse_attribute(xmlreader *reader)
1990 {
1991 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
1992 strval prefix, local, qname, xmlns, value;
1993 HRESULT hr;
1994
1995 hr = reader_parse_qname(reader, &prefix, &local, &qname);
1996 if (FAILED(hr)) return hr;
1997
1998 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
1999
2000 if (strval_eq(reader, &prefix, &xmlns))
2001 {
2002 FIXME("namespace definitions not supported\n");
2003 return E_NOTIMPL;
2004 }
2005
2006 if (strval_eq(reader, &qname, &xmlns))
2007 FIXME("default namespace definitions not supported\n");
2008
2009 hr = reader_parse_eq(reader);
2010 if (FAILED(hr)) return hr;
2011
2012 hr = reader_parse_attvalue(reader, &value);
2013 if (FAILED(hr)) return hr;
2014
2015 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2016 return reader_add_attr(reader, &local, &value);
2017 }
2018
2019 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2020 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2021 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2022 {
2023 HRESULT hr;
2024
2025 hr = reader_parse_qname(reader, prefix, local, qname);
2026 if (FAILED(hr)) return hr;
2027
2028 while (1)
2029 {
2030 static const WCHAR endW[] = {'/','>',0};
2031
2032 reader_skipspaces(reader);
2033
2034 /* empty element */
2035 if ((*empty = !reader_cmp(reader, endW)))
2036 {
2037 /* skip '/>' */
2038 reader_skipn(reader, 2);
2039 reader->empty_element = TRUE;
2040 return S_OK;
2041 }
2042
2043 /* got a start tag */
2044 if (!reader_cmp(reader, gtW))
2045 {
2046 /* skip '>' */
2047 reader_skipn(reader, 1);
2048 return reader_push_element(reader, qname, local);
2049 }
2050
2051 hr = reader_parse_attribute(reader);
2052 if (FAILED(hr)) return hr;
2053 }
2054
2055 return S_OK;
2056 }
2057
2058 /* [39] element ::= EmptyElemTag | STag content ETag */
2059 static HRESULT reader_parse_element(xmlreader *reader)
2060 {
2061 HRESULT hr;
2062
2063 switch (reader->resumestate)
2064 {
2065 case XmlReadResumeState_Initial:
2066 /* check if we are really on element */
2067 if (reader_cmp(reader, ltW)) return S_FALSE;
2068
2069 /* skip '<' */
2070 reader_skipn(reader, 1);
2071
2072 reader_shrink(reader);
2073 reader->resumestate = XmlReadResumeState_STag;
2074 case XmlReadResumeState_STag:
2075 {
2076 strval qname, prefix, local;
2077 int empty = 0;
2078
2079 /* this handles empty elements too */
2080 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2081 if (FAILED(hr)) return hr;
2082
2083 /* FIXME: need to check for defined namespace to reject invalid prefix,
2084 currently reject all prefixes */
2085 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2086
2087 /* if we got empty element and stack is empty go straight to Misc */
2088 if (empty && list_empty(&reader->elements))
2089 reader->instate = XmlReadInState_MiscEnd;
2090 else
2091 reader->instate = XmlReadInState_Content;
2092
2093 reader->nodetype = XmlNodeType_Element;
2094 reader->resumestate = XmlReadResumeState_Initial;
2095 reader_set_strvalue(reader, StringValue_LocalName, &local);
2096 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2097 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2098 break;
2099 }
2100 default:
2101 hr = E_FAIL;
2102 }
2103
2104 return hr;
2105 }
2106
2107 /* [13 NS] ETag ::= '</' QName S? '>' */
2108 static HRESULT reader_parse_endtag(xmlreader *reader)
2109 {
2110 strval prefix, local, qname;
2111 struct element *elem;
2112 HRESULT hr;
2113
2114 /* skip '</' */
2115 reader_skipn(reader, 2);
2116
2117 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2118 if (FAILED(hr)) return hr;
2119
2120 reader_skipspaces(reader);
2121
2122 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2123
2124 /* skip '>' */
2125 reader_skipn(reader, 1);
2126
2127 /* Element stack should never be empty at this point, cause we shouldn't get to
2128 content parsing if it's empty. */
2129 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2130 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2131
2132 reader_pop_element(reader);
2133
2134 /* It was a root element, the rest is expected as Misc */
2135 if (list_empty(&reader->elements))
2136 reader->instate = XmlReadInState_MiscEnd;
2137
2138 reader->nodetype = XmlNodeType_EndElement;
2139 reader_set_strvalue(reader, StringValue_LocalName, &local);
2140 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2141
2142 return S_OK;
2143 }
2144
2145 /* [18] CDSect ::= CDStart CData CDEnd
2146 [19] CDStart ::= '<![CDATA['
2147 [20] CData ::= (Char* - (Char* ']]>' Char*))
2148 [21] CDEnd ::= ']]>' */
2149 static HRESULT reader_parse_cdata(xmlreader *reader)
2150 {
2151 WCHAR *ptr;
2152 UINT start;
2153
2154 if (reader->resumestate == XmlReadResumeState_CDATA)
2155 {
2156 start = reader->resume[XmlReadResume_Body];
2157 ptr = reader_get_ptr(reader);
2158 }
2159 else
2160 {
2161 /* skip markup '<![CDATA[' */
2162 reader_skipn(reader, 9);
2163 reader_shrink(reader);
2164 ptr = reader_get_ptr(reader);
2165 start = reader_get_cur(reader);
2166 reader->nodetype = XmlNodeType_CDATA;
2167 reader->resume[XmlReadResume_Body] = start;
2168 reader->resumestate = XmlReadResumeState_CDATA;
2169 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2170 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2171 reader_set_strvalue(reader, StringValue_Value, NULL);
2172 }
2173
2174 while (*ptr)
2175 {
2176 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2177 {
2178 strval value;
2179
2180 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2181
2182 /* skip ']]>' */
2183 reader_skipn(reader, 3);
2184 TRACE("%s\n", debug_strval(reader, &value));
2185
2186 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2187 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2188 reader_set_strvalue(reader, StringValue_Value, &value);
2189 reader->resume[XmlReadResume_Body] = 0;
2190 reader->resumestate = XmlReadResumeState_Initial;
2191 return S_OK;
2192 }
2193 else
2194 {
2195 /* Value normalization is not fully implemented, rules are:
2196
2197 - single '\r' -> '\n';
2198 - sequence '\r\n' -> '\n', in this case value length changes;
2199 */
2200 if (*ptr == '\r') *ptr = '\n';
2201 reader_skipn(reader, 1);
2202 ptr++;
2203 }
2204 }
2205
2206 return S_OK;
2207 }
2208
2209 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2210 static HRESULT reader_parse_chardata(xmlreader *reader)
2211 {
2212 WCHAR *ptr;
2213 UINT start;
2214
2215 if (reader->resumestate == XmlReadResumeState_CharData)
2216 {
2217 start = reader->resume[XmlReadResume_Body];
2218 ptr = reader_get_ptr(reader);
2219 }
2220 else
2221 {
2222 reader_shrink(reader);
2223 ptr = reader_get_ptr(reader);
2224 start = reader_get_cur(reader);
2225 /* There's no text */
2226 if (!*ptr || *ptr == '<') return S_OK;
2227 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2228 reader->resume[XmlReadResume_Body] = start;
2229 reader->resumestate = XmlReadResumeState_CharData;
2230 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2231 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2232 reader_set_strvalue(reader, StringValue_Value, NULL);
2233 }
2234
2235 while (*ptr)
2236 {
2237 /* CDATA closing sequence ']]>' is not allowed */
2238 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2239 return WC_E_CDSECTEND;
2240
2241 /* Found next markup part */
2242 if (ptr[0] == '<')
2243 {
2244 strval value;
2245
2246 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2247 reader_set_strvalue(reader, StringValue_Value, &value);
2248 reader->resume[XmlReadResume_Body] = 0;
2249 reader->resumestate = XmlReadResumeState_Initial;
2250 return S_OK;
2251 }
2252
2253 reader_skipn(reader, 1);
2254
2255 /* this covers a case when text has leading whitespace chars */
2256 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2257 ptr++;
2258 }
2259
2260 return S_OK;
2261 }
2262
2263 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2264 static HRESULT reader_parse_content(xmlreader *reader)
2265 {
2266 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2267 static const WCHAR etagW[] = {'<','/',0};
2268 static const WCHAR ampW[] = {'&',0};
2269
2270 if (reader->resumestate != XmlReadResumeState_Initial)
2271 {
2272 switch (reader->resumestate)
2273 {
2274 case XmlReadResumeState_CDATA:
2275 return reader_parse_cdata(reader);
2276 case XmlReadResumeState_Comment:
2277 return reader_parse_comment(reader);
2278 case XmlReadResumeState_PIBody:
2279 case XmlReadResumeState_PITarget:
2280 return reader_parse_pi(reader);
2281 case XmlReadResumeState_CharData:
2282 return reader_parse_chardata(reader);
2283 default:
2284 ERR("unknown resume state %d\n", reader->resumestate);
2285 }
2286 }
2287
2288 reader_shrink(reader);
2289
2290 /* handle end tag here, it indicates end of content as well */
2291 if (!reader_cmp(reader, etagW))
2292 return reader_parse_endtag(reader);
2293
2294 if (!reader_cmp(reader, commentW))
2295 return reader_parse_comment(reader);
2296
2297 if (!reader_cmp(reader, piW))
2298 return reader_parse_pi(reader);
2299
2300 if (!reader_cmp(reader, cdstartW))
2301 return reader_parse_cdata(reader);
2302
2303 if (!reader_cmp(reader, ampW))
2304 return reader_parse_reference(reader);
2305
2306 if (!reader_cmp(reader, ltW))
2307 return reader_parse_element(reader);
2308
2309 /* what's left must be CharData */
2310 return reader_parse_chardata(reader);
2311 }
2312
2313 static HRESULT reader_parse_nextnode(xmlreader *reader)
2314 {
2315 HRESULT hr;
2316
2317 if (!is_reader_pending(reader))
2318 reader_clear_attrs(reader);
2319
2320 while (1)
2321 {
2322 switch (reader->instate)
2323 {
2324 /* if it's a first call for a new input we need to detect stream encoding */
2325 case XmlReadInState_Initial:
2326 {
2327 xml_encoding enc;
2328
2329 hr = readerinput_growraw(reader->input);
2330 if (FAILED(hr)) return hr;
2331
2332 /* try to detect encoding by BOM or data and set input code page */
2333 hr = readerinput_detectencoding(reader->input, &enc);
2334 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2335 if (FAILED(hr)) return hr;
2336
2337 /* always switch first time cause we have to put something in */
2338 readerinput_switchencoding(reader->input, enc);
2339
2340 /* parse xml declaration */
2341 hr = reader_parse_xmldecl(reader);
2342 if (FAILED(hr)) return hr;
2343
2344 readerinput_shrinkraw(reader->input, -1);
2345 reader->instate = XmlReadInState_Misc_DTD;
2346 if (hr == S_OK) return hr;
2347 }
2348 break;
2349 case XmlReadInState_Misc_DTD:
2350 hr = reader_parse_misc(reader);
2351 if (FAILED(hr)) return hr;
2352
2353 if (hr == S_FALSE)
2354 reader->instate = XmlReadInState_DTD;
2355 else
2356 return hr;
2357 break;
2358 case XmlReadInState_DTD:
2359 hr = reader_parse_dtd(reader);
2360 if (FAILED(hr)) return hr;
2361
2362 if (hr == S_OK)
2363 {
2364 reader->instate = XmlReadInState_DTD_Misc;
2365 return hr;
2366 }
2367 else
2368 reader->instate = XmlReadInState_Element;
2369 break;
2370 case XmlReadInState_DTD_Misc:
2371 hr = reader_parse_misc(reader);
2372 if (FAILED(hr)) return hr;
2373
2374 if (hr == S_FALSE)
2375 reader->instate = XmlReadInState_Element;
2376 else
2377 return hr;
2378 break;
2379 case XmlReadInState_Element:
2380 return reader_parse_element(reader);
2381 case XmlReadInState_Content:
2382 return reader_parse_content(reader);
2383 case XmlReadInState_MiscEnd:
2384 hr = reader_parse_misc(reader);
2385 if (FAILED(hr)) return hr;
2386
2387 if (hr == S_FALSE)
2388 reader->instate = XmlReadInState_Eof;
2389 return hr;
2390 case XmlReadInState_Eof:
2391 return S_FALSE;
2392 default:
2393 FIXME("internal state %d not handled\n", reader->instate);
2394 return E_NOTIMPL;
2395 }
2396 }
2397
2398 return E_NOTIMPL;
2399 }
2400
2401 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2402 {
2403 xmlreader *This = impl_from_IXmlReader(iface);
2404
2405 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2406
2407 if (IsEqualGUID(riid, &IID_IUnknown) ||
2408 IsEqualGUID(riid, &IID_IXmlReader))
2409 {
2410 *ppvObject = iface;
2411 }
2412 else
2413 {
2414 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2415 *ppvObject = NULL;
2416 return E_NOINTERFACE;
2417 }
2418
2419 IXmlReader_AddRef(iface);
2420
2421 return S_OK;
2422 }
2423
2424 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2425 {
2426 xmlreader *This = impl_from_IXmlReader(iface);
2427 ULONG ref = InterlockedIncrement(&This->ref);
2428 TRACE("(%p)->(%d)\n", This, ref);
2429 return ref;
2430 }
2431
2432 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2433 {
2434 xmlreader *This = impl_from_IXmlReader(iface);
2435 LONG ref = InterlockedDecrement(&This->ref);
2436
2437 TRACE("(%p)->(%d)\n", This, ref);
2438
2439 if (ref == 0)
2440 {
2441 IMalloc *imalloc = This->imalloc;
2442 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2443 reader_clear_attrs(This);
2444 reader_clear_elements(This);
2445 reader_free_strvalues(This);
2446 reader_free(This, This);
2447 if (imalloc) IMalloc_Release(imalloc);
2448 }
2449
2450 return ref;
2451 }
2452
2453 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2454 {
2455 xmlreader *This = impl_from_IXmlReader(iface);
2456 IXmlReaderInput *readerinput;
2457 HRESULT hr;
2458
2459 TRACE("(%p)->(%p)\n", This, input);
2460
2461 if (This->input)
2462 {
2463 readerinput_release_stream(This->input);
2464 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2465 This->input = NULL;
2466 }
2467
2468 This->line = This->pos = 0;
2469 reader_clear_elements(This);
2470 This->depth = 0;
2471 This->resumestate = XmlReadResumeState_Initial;
2472 memset(This->resume, 0, sizeof(This->resume));
2473
2474 /* just reset current input */
2475 if (!input)
2476 {
2477 This->state = XmlReadState_Initial;
2478 return S_OK;
2479 }
2480
2481 /* now try IXmlReaderInput, ISequentialStream, IStream */
2482 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2483 if (hr == S_OK)
2484 {
2485 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2486 This->input = impl_from_IXmlReaderInput(readerinput);
2487 else
2488 {
2489 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2490 readerinput, readerinput->lpVtbl);
2491 IUnknown_Release(readerinput);
2492 return E_FAIL;
2493
2494 }
2495 }
2496
2497 if (hr != S_OK || !readerinput)
2498 {
2499 /* create IXmlReaderInput basing on supplied interface */
2500 hr = CreateXmlReaderInputWithEncodingName(input,
2501 This->imalloc, NULL, FALSE, NULL, &readerinput);
2502 if (hr != S_OK) return hr;
2503 This->input = impl_from_IXmlReaderInput(readerinput);
2504 }
2505
2506 /* set stream for supplied IXmlReaderInput */
2507 hr = readerinput_query_for_stream(This->input);
2508 if (hr == S_OK)
2509 {
2510 This->state = XmlReadState_Initial;
2511 This->instate = XmlReadInState_Initial;
2512 }
2513
2514 return hr;
2515 }
2516
2517 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2518 {
2519 xmlreader *This = impl_from_IXmlReader(iface);
2520
2521 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2522
2523 if (!value) return E_INVALIDARG;
2524
2525 switch (property)
2526 {
2527 case XmlReaderProperty_DtdProcessing:
2528 *value = This->dtdmode;
2529 break;
2530 case XmlReaderProperty_ReadState:
2531 *value = This->state;
2532 break;
2533 default:
2534 FIXME("Unimplemented property (%u)\n", property);
2535 return E_NOTIMPL;
2536 }
2537
2538 return S_OK;
2539 }
2540
2541 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2542 {
2543 xmlreader *This = impl_from_IXmlReader(iface);
2544
2545 TRACE("(%p)->(%s %lu)\n", This, debugstr_reader_prop(property), value);
2546
2547 switch (property)
2548 {
2549 case XmlReaderProperty_DtdProcessing:
2550 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2551 This->dtdmode = value;
2552 break;
2553 default:
2554 FIXME("Unimplemented property (%u)\n", property);
2555 return E_NOTIMPL;
2556 }
2557
2558 return S_OK;
2559 }
2560
2561 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2562 {
2563 xmlreader *This = impl_from_IXmlReader(iface);
2564 XmlNodeType oldtype = This->nodetype;
2565 HRESULT hr;
2566
2567 TRACE("(%p)->(%p)\n", This, nodetype);
2568
2569 if (This->state == XmlReadState_Closed) return S_FALSE;
2570
2571 hr = reader_parse_nextnode(This);
2572 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2573 This->state = XmlReadState_Interactive;
2574 if (hr == S_OK)
2575 {
2576 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2577 *nodetype = This->nodetype;
2578 }
2579
2580 return hr;
2581 }
2582
2583 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2584 {
2585 xmlreader *This = impl_from_IXmlReader(iface);
2586 TRACE("(%p)->(%p)\n", This, node_type);
2587
2588 *node_type = reader_get_nodetype(This);
2589 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2590 }
2591
2592 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2593 {
2594 xmlreader *This = impl_from_IXmlReader(iface);
2595
2596 TRACE("(%p)\n", This);
2597
2598 if (!This->attr_count) return S_FALSE;
2599 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2600 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2601 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2602
2603 return S_OK;
2604 }
2605
2606 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2607 {
2608 xmlreader *This = impl_from_IXmlReader(iface);
2609 const struct list *next;
2610
2611 TRACE("(%p)\n", This);
2612
2613 if (!This->attr_count) return S_FALSE;
2614
2615 if (!This->attr)
2616 return IXmlReader_MoveToFirstAttribute(iface);
2617
2618 next = list_next(&This->attrs, &This->attr->entry);
2619 if (next)
2620 {
2621 This->attr = LIST_ENTRY(next, struct attribute, entry);
2622 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2623 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2624 }
2625
2626 return next ? S_OK : S_FALSE;
2627 }
2628
2629 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2630 LPCWSTR local_name,
2631 LPCWSTR namespaceUri)
2632 {
2633 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2634 return E_NOTIMPL;
2635 }
2636
2637 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2638 {
2639 xmlreader *This = impl_from_IXmlReader(iface);
2640 struct element *elem;
2641
2642 TRACE("(%p)\n", This);
2643
2644 if (!This->attr_count) return S_FALSE;
2645 This->attr = NULL;
2646
2647 /* FIXME: support other node types with 'attributes' like DTD */
2648 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2649 if (elem)
2650 {
2651 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2652 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2653 }
2654
2655 return S_OK;
2656 }
2657
2658 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2659 {
2660 xmlreader *This = impl_from_IXmlReader(iface);
2661
2662 TRACE("(%p)->(%p %p)\n", This, name, len);
2663 *name = This->strvalues[StringValue_QualifiedName].str;
2664 *len = This->strvalues[StringValue_QualifiedName].len;
2665 return S_OK;
2666 }
2667
2668 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2669 LPCWSTR *namespaceUri,
2670 UINT *namespaceUri_length)
2671 {
2672 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2673 return E_NOTIMPL;
2674 }
2675
2676 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2677 {
2678 xmlreader *This = impl_from_IXmlReader(iface);
2679
2680 TRACE("(%p)->(%p %p)\n", This, name, len);
2681 *name = This->strvalues[StringValue_LocalName].str;
2682 if (len) *len = This->strvalues[StringValue_LocalName].len;
2683 return S_OK;
2684 }
2685
2686 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2687 {
2688 xmlreader *This = impl_from_IXmlReader(iface);
2689
2690 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2691 *prefix = This->strvalues[StringValue_Prefix].str;
2692 if (len) *len = This->strvalues[StringValue_Prefix].len;
2693 return S_OK;
2694 }
2695
2696 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2697 {
2698 xmlreader *reader = impl_from_IXmlReader(iface);
2699 strval *val = &reader->strvalues[StringValue_Value];
2700
2701 TRACE("(%p)->(%p %p)\n", reader, value, len);
2702
2703 *value = NULL;
2704
2705 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2706 {
2707 XmlNodeType type;
2708 HRESULT hr;
2709
2710 hr = IXmlReader_Read(iface, &type);
2711 if (FAILED(hr)) return hr;
2712
2713 /* return if still pending, partially read values are not reported */
2714 if (is_reader_pending(reader)) return E_PENDING;
2715 }
2716
2717 if (!val->str)
2718 {
2719 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2720 if (!ptr) return E_OUTOFMEMORY;
2721 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2722 ptr[val->len] = 0;
2723 val->str = ptr;
2724 }
2725
2726 *value = val->str;
2727 if (len) *len = val->len;
2728 return S_OK;
2729 }
2730
2731 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2732 {
2733 xmlreader *reader = impl_from_IXmlReader(iface);
2734 strval *val = &reader->strvalues[StringValue_Value];
2735 UINT len;
2736
2737 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2738
2739 /* Value is already allocated, chunked reads are not possible. */
2740 if (val->str) return S_FALSE;
2741
2742 if (val->len)
2743 {
2744 len = min(chunk_size, val->len);
2745 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2746 val->start += len;
2747 val->len -= len;
2748 if (read) *read = len;
2749 }
2750
2751 return S_OK;
2752 }
2753
2754 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2755 LPCWSTR *baseUri,
2756 UINT *baseUri_length)
2757 {
2758 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2759 return E_NOTIMPL;
2760 }
2761
2762 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2763 {
2764 FIXME("(%p): stub\n", iface);
2765 return FALSE;
2766 }
2767
2768 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2769 {
2770 xmlreader *This = impl_from_IXmlReader(iface);
2771 TRACE("(%p)\n", This);
2772 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2773 when current node is start tag of an element */
2774 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2775 }
2776
2777 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2778 {
2779 xmlreader *This = impl_from_IXmlReader(iface);
2780
2781 TRACE("(%p %p)\n", This, lineNumber);
2782
2783 if (!lineNumber) return E_INVALIDARG;
2784
2785 *lineNumber = This->line;
2786
2787 return S_OK;
2788 }
2789
2790 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2791 {
2792 xmlreader *This = impl_from_IXmlReader(iface);
2793
2794 TRACE("(%p %p)\n", This, linePosition);
2795
2796 if (!linePosition) return E_INVALIDARG;
2797
2798 *linePosition = This->pos;
2799
2800 return S_OK;
2801 }
2802
2803 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2804 {
2805 xmlreader *This = impl_from_IXmlReader(iface);
2806
2807 TRACE("(%p)->(%p)\n", This, count);
2808
2809 if (!count) return E_INVALIDARG;
2810
2811 *count = This->attr_count;
2812 return S_OK;
2813 }
2814
2815 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2816 {
2817 xmlreader *This = impl_from_IXmlReader(iface);
2818 TRACE("(%p)->(%p)\n", This, depth);
2819 *depth = This->depth;
2820 return S_OK;
2821 }
2822
2823 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2824 {
2825 FIXME("(%p): stub\n", iface);
2826 return E_NOTIMPL;
2827 }
2828
2829 static const struct IXmlReaderVtbl xmlreader_vtbl =
2830 {
2831 xmlreader_QueryInterface,
2832 xmlreader_AddRef,
2833 xmlreader_Release,
2834 xmlreader_SetInput,
2835 xmlreader_GetProperty,
2836 xmlreader_SetProperty,
2837 xmlreader_Read,
2838 xmlreader_GetNodeType,
2839 xmlreader_MoveToFirstAttribute,
2840 xmlreader_MoveToNextAttribute,
2841 xmlreader_MoveToAttributeByName,
2842 xmlreader_MoveToElement,
2843 xmlreader_GetQualifiedName,
2844 xmlreader_GetNamespaceUri,
2845 xmlreader_GetLocalName,
2846 xmlreader_GetPrefix,
2847 xmlreader_GetValue,
2848 xmlreader_ReadValueChunk,
2849 xmlreader_GetBaseUri,
2850 xmlreader_IsDefault,
2851 xmlreader_IsEmptyElement,
2852 xmlreader_GetLineNumber,
2853 xmlreader_GetLinePosition,
2854 xmlreader_GetAttributeCount,
2855 xmlreader_GetDepth,
2856 xmlreader_IsEOF
2857 };
2858
2859 /** IXmlReaderInput **/
2860 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2861 {
2862 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2863
2864 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2865
2866 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2867 IsEqualGUID(riid, &IID_IUnknown))
2868 {
2869 *ppvObject = iface;
2870 }
2871 else
2872 {
2873 WARN("interface %s not implemented\n", debugstr_guid(riid));
2874 *ppvObject = NULL;
2875 return E_NOINTERFACE;
2876 }
2877
2878 IUnknown_AddRef(iface);
2879
2880 return S_OK;
2881 }
2882
2883 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2884 {
2885 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2886 ULONG ref = InterlockedIncrement(&This->ref);
2887 TRACE("(%p)->(%d)\n", This, ref);
2888 return ref;
2889 }
2890
2891 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2892 {
2893 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2894 LONG ref = InterlockedDecrement(&This->ref);
2895
2896 TRACE("(%p)->(%d)\n", This, ref);
2897
2898 if (ref == 0)
2899 {
2900 IMalloc *imalloc = This->imalloc;
2901 if (This->input) IUnknown_Release(This->input);
2902 if (This->stream) ISequentialStream_Release(This->stream);
2903 if (This->buffer) free_input_buffer(This->buffer);
2904 readerinput_free(This, This->baseuri);
2905 readerinput_free(This, This);
2906 if (imalloc) IMalloc_Release(imalloc);
2907 }
2908
2909 return ref;
2910 }
2911
2912 static const struct IUnknownVtbl xmlreaderinputvtbl =
2913 {
2914 xmlreaderinput_QueryInterface,
2915 xmlreaderinput_AddRef,
2916 xmlreaderinput_Release
2917 };
2918
2919 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2920 {
2921 xmlreader *reader;
2922 int i;
2923
2924 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2925
2926 if (!IsEqualGUID(riid, &IID_IXmlReader))
2927 {
2928 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2929 return E_FAIL;
2930 }
2931
2932 if (imalloc)
2933 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2934 else
2935 reader = heap_alloc(sizeof(*reader));
2936 if(!reader) return E_OUTOFMEMORY;
2937
2938 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2939 reader->ref = 1;
2940 reader->input = NULL;
2941 reader->state = XmlReadState_Closed;
2942 reader->instate = XmlReadInState_Initial;
2943 reader->resumestate = XmlReadResumeState_Initial;
2944 reader->dtdmode = DtdProcessing_Prohibit;
2945 reader->line = reader->pos = 0;
2946 reader->imalloc = imalloc;
2947 if (imalloc) IMalloc_AddRef(imalloc);
2948 reader->nodetype = XmlNodeType_None;
2949 list_init(&reader->attrs);
2950 reader->attr_count = 0;
2951 reader->attr = NULL;
2952 list_init(&reader->elements);
2953 reader->depth = 0;
2954 reader->max_depth = 256;
2955 reader->empty_element = FALSE;
2956 memset(reader->resume, 0, sizeof(reader->resume));
2957
2958 for (i = 0; i < StringValue_Last; i++)
2959 reader->strvalues[i] = strval_empty;
2960
2961 *obj = &reader->IXmlReader_iface;
2962
2963 TRACE("returning iface %p\n", *obj);
2964
2965 return S_OK;
2966 }
2967
2968 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2969 IMalloc *imalloc,
2970 LPCWSTR encoding,
2971 BOOL hint,
2972 LPCWSTR base_uri,
2973 IXmlReaderInput **ppInput)
2974 {
2975 xmlreaderinput *readerinput;
2976 HRESULT hr;
2977
2978 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
2979 hint, wine_dbgstr_w(base_uri), ppInput);
2980
2981 if (!stream || !ppInput) return E_INVALIDARG;
2982
2983 if (imalloc)
2984 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
2985 else
2986 readerinput = heap_alloc(sizeof(*readerinput));
2987 if(!readerinput) return E_OUTOFMEMORY;
2988
2989 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
2990 readerinput->ref = 1;
2991 readerinput->imalloc = imalloc;
2992 readerinput->stream = NULL;
2993 if (imalloc) IMalloc_AddRef(imalloc);
2994 readerinput->encoding = parse_encoding_name(encoding, -1);
2995 readerinput->hint = hint;
2996 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
2997 readerinput->pending = 0;
2998
2999 hr = alloc_input_buffer(readerinput);
3000 if (hr != S_OK)
3001 {
3002 readerinput_free(readerinput, readerinput->baseuri);
3003 readerinput_free(readerinput, readerinput);
3004 if (imalloc) IMalloc_Release(imalloc);
3005 return hr;
3006 }
3007 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3008
3009 *ppInput = &readerinput->IXmlReaderInput_iface;
3010
3011 TRACE("returning iface %p\n", *ppInput);
3012
3013 return S_OK;
3014 }