Sync with trunk revision 63128.
[reactos.git] / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "xmllite_private.h"
22
23 #include <stdio.h>
24
25 #include <wine/list.h>
26 #include <wine/unicode.h>
27
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
30
31 typedef enum
32 {
33 XmlReadInState_Initial,
34 XmlReadInState_XmlDecl,
35 XmlReadInState_Misc_DTD,
36 XmlReadInState_DTD,
37 XmlReadInState_DTD_Misc,
38 XmlReadInState_Element,
39 XmlReadInState_Content,
40 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
41 XmlReadInState_Eof
42 } XmlReaderInternalState;
43
44 /* This state denotes where parsing was interrupted by input problem.
45 Reader resumes parsing using this information. */
46 typedef enum
47 {
48 XmlReadResumeState_Initial,
49 XmlReadResumeState_PITarget,
50 XmlReadResumeState_PIBody,
51 XmlReadResumeState_CDATA,
52 XmlReadResumeState_Comment,
53 XmlReadResumeState_STag,
54 XmlReadResumeState_CharData,
55 XmlReadResumeState_Whitespace
56 } XmlReaderResumeState;
57
58 /* saved pointer index to resume from particular input position */
59 typedef enum
60 {
61 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
62 XmlReadResume_Local, /* local for QName */
63 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
64 XmlReadResume_Last
65 } XmlReaderResume;
66
67 typedef enum
68 {
69 StringValue_LocalName,
70 StringValue_Prefix,
71 StringValue_QualifiedName,
72 StringValue_Value,
73 StringValue_Last
74 } XmlReaderStringValue;
75
76 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
77 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
78
79 static const WCHAR dblquoteW[] = {'\"',0};
80 static const WCHAR quoteW[] = {'\'',0};
81 static const WCHAR ltW[] = {'<',0};
82 static const WCHAR gtW[] = {'>',0};
83 static const WCHAR commentW[] = {'<','!','-','-',0};
84 static const WCHAR piW[] = {'<','?',0};
85
86 static const char *debugstr_nodetype(XmlNodeType nodetype)
87 {
88 static const char * const type_names[] =
89 {
90 "None",
91 "Element",
92 "Attribute",
93 "Text",
94 "CDATA",
95 "",
96 "",
97 "ProcessingInstruction",
98 "Comment",
99 "",
100 "DocumentType",
101 "",
102 "",
103 "Whitespace",
104 "",
105 "EndElement",
106 "",
107 "XmlDeclaration"
108 };
109
110 if (nodetype > _XmlNodeType_Last)
111 return wine_dbg_sprintf("unknown type=%d", nodetype);
112
113 return type_names[nodetype];
114 }
115
116 static const char *debugstr_prop(XmlReaderProperty prop)
117 {
118 static const char * const prop_names[] =
119 {
120 "MultiLanguage",
121 "ConformanceLevel",
122 "RandomAccess",
123 "XmlResolver",
124 "DtdProcessing",
125 "ReadState",
126 "MaxElementDepth",
127 "MaxEntityExpansion"
128 };
129
130 if (prop > _XmlReaderProperty_Last)
131 return wine_dbg_sprintf("unknown property=%d", prop);
132
133 return prop_names[prop];
134 }
135
136 struct xml_encoding_data
137 {
138 const WCHAR *name;
139 xml_encoding enc;
140 UINT cp;
141 };
142
143 static const struct xml_encoding_data xml_encoding_map[] = {
144 { utf16W, XmlEncoding_UTF16, ~0 },
145 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
146 };
147
148 typedef struct
149 {
150 char *data;
151 UINT cur;
152 unsigned int allocated;
153 unsigned int written;
154 } encoded_buffer;
155
156 typedef struct input_buffer input_buffer;
157
158 typedef struct
159 {
160 IXmlReaderInput IXmlReaderInput_iface;
161 LONG ref;
162 /* reference passed on IXmlReaderInput creation, is kept when input is created */
163 IUnknown *input;
164 IMalloc *imalloc;
165 xml_encoding encoding;
166 BOOL hint;
167 WCHAR *baseuri;
168 /* stream reference set after SetInput() call from reader,
169 stored as sequential stream, cause currently
170 optimizations possible with IStream aren't implemented */
171 ISequentialStream *stream;
172 input_buffer *buffer;
173 unsigned int pending : 1;
174 } xmlreaderinput;
175
176 static const struct IUnknownVtbl xmlreaderinputvtbl;
177
178 /* Structure to hold parsed string of specific length.
179
180 Reader stores node value as 'start' pointer, on request
181 a null-terminated version of it is allocated.
182
183 To init a strval variable use reader_init_strval(),
184 to set strval as a reader value use reader_set_strval().
185 */
186 typedef struct
187 {
188 WCHAR *str; /* allocated null-terminated string */
189 UINT len; /* length in WCHARs, altered after ReadValueChunk */
190 UINT start; /* input position where value starts */
191 } strval;
192
193 static WCHAR emptyW[] = {0};
194 static const strval strval_empty = { emptyW };
195
196 struct attribute
197 {
198 struct list entry;
199 strval localname;
200 strval value;
201 };
202
203 struct element
204 {
205 struct list entry;
206 strval qname;
207 strval localname;
208 };
209
210 typedef struct
211 {
212 IXmlReader IXmlReader_iface;
213 LONG ref;
214 xmlreaderinput *input;
215 IMalloc *imalloc;
216 XmlReadState state;
217 XmlReaderInternalState instate;
218 XmlReaderResumeState resumestate;
219 XmlNodeType nodetype;
220 DtdProcessing dtdmode;
221 UINT line, pos; /* reader position in XML stream */
222 struct list attrs; /* attributes list for current node */
223 struct attribute *attr; /* current attribute */
224 UINT attr_count;
225 struct list elements;
226 strval strvalues[StringValue_Last];
227 UINT depth;
228 UINT max_depth;
229 BOOL empty_element;
230 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
231 } xmlreader;
232
233 struct input_buffer
234 {
235 encoded_buffer utf16;
236 encoded_buffer encoded;
237 UINT code_page;
238 xmlreaderinput *input;
239 };
240
241 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
242 {
243 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
244 }
245
246 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
247 {
248 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
249 }
250
251 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
252 {
253 if (imalloc)
254 return IMalloc_Realloc(imalloc, mem, len);
255 else
256 return heap_realloc(mem, len);
257 }
258
259 /* reader memory allocation functions */
260 static inline void *reader_alloc(xmlreader *reader, size_t len)
261 {
262 return m_alloc(reader->imalloc, len);
263 }
264
265 static inline void reader_free(xmlreader *reader, void *mem)
266 {
267 m_free(reader->imalloc, mem);
268 }
269
270 /* Just return pointer from offset, no attempt to read more. */
271 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
272 {
273 encoded_buffer *buffer = &reader->input->buffer->utf16;
274 return (WCHAR*)buffer->data + offset;
275 }
276
277 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
278 {
279 return v->str ? v->str : reader_get_ptr2(reader, v->start);
280 }
281
282 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
283 {
284 *dest = *src;
285
286 if (src->str != strval_empty.str)
287 {
288 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
289 if (!dest->str) return E_OUTOFMEMORY;
290 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
291 dest->str[dest->len] = 0;
292 dest->start = 0;
293 }
294
295 return S_OK;
296 }
297
298 /* reader input memory allocation functions */
299 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
300 {
301 return m_alloc(input->imalloc, len);
302 }
303
304 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
305 {
306 return m_realloc(input->imalloc, mem, len);
307 }
308
309 static inline void readerinput_free(xmlreaderinput *input, void *mem)
310 {
311 m_free(input->imalloc, mem);
312 }
313
314 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
315 {
316 LPWSTR ret = NULL;
317
318 if(str) {
319 DWORD size;
320
321 size = (strlenW(str)+1)*sizeof(WCHAR);
322 ret = readerinput_alloc(input, size);
323 if (ret) memcpy(ret, str, size);
324 }
325
326 return ret;
327 }
328
329 static void reader_clear_attrs(xmlreader *reader)
330 {
331 struct attribute *attr, *attr2;
332 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
333 {
334 reader_free(reader, attr);
335 }
336 list_init(&reader->attrs);
337 reader->attr_count = 0;
338 reader->attr = NULL;
339 }
340
341 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
342 while we are on a node with attributes */
343 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
344 {
345 struct attribute *attr;
346
347 attr = reader_alloc(reader, sizeof(*attr));
348 if (!attr) return E_OUTOFMEMORY;
349
350 attr->localname = *localname;
351 attr->value = *value;
352 list_add_tail(&reader->attrs, &attr->entry);
353 reader->attr_count++;
354
355 return S_OK;
356 }
357
358 /* This one frees stored string value if needed */
359 static void reader_free_strvalued(xmlreader *reader, strval *v)
360 {
361 if (v->str != strval_empty.str)
362 {
363 reader_free(reader, v->str);
364 *v = strval_empty;
365 }
366 }
367
368 /* returns length in WCHARs from 'start' to current buffer offset */
369 static inline UINT reader_get_len(const xmlreader *reader, UINT start)
370 {
371 return reader->input->buffer->utf16.cur - start;
372 }
373
374 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
375 {
376 v->start = start;
377 v->len = len;
378 v->str = NULL;
379 }
380
381 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
382 {
383 return debugstr_wn(reader_get_strptr(reader, v), v->len);
384 }
385
386 /* used to initialize from constant string */
387 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
388 {
389 v->start = 0;
390 v->len = len;
391 v->str = str;
392 }
393
394 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
395 {
396 reader_free_strvalued(reader, &reader->strvalues[type]);
397 }
398
399 static void reader_free_strvalues(xmlreader *reader)
400 {
401 int type;
402 for (type = 0; type < StringValue_Last; type++)
403 reader_free_strvalue(reader, type);
404 }
405
406 /* This helper should only be used to test if strings are the same,
407 it doesn't try to sort. */
408 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
409 {
410 if (str1->len != str2->len) return 0;
411 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
412 }
413
414 static void reader_clear_elements(xmlreader *reader)
415 {
416 struct element *elem, *elem2;
417 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
418 {
419 reader_free_strvalued(reader, &elem->qname);
420 reader_free(reader, elem);
421 }
422 list_init(&reader->elements);
423 reader->empty_element = FALSE;
424 }
425
426 static HRESULT reader_inc_depth(xmlreader *reader)
427 {
428 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
429 return S_OK;
430 }
431
432 static void reader_dec_depth(xmlreader *reader)
433 {
434 if (reader->depth > 1) reader->depth--;
435 }
436
437 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
438 {
439 struct element *elem;
440 HRESULT hr;
441
442 elem = reader_alloc(reader, sizeof(*elem));
443 if (!elem) return E_OUTOFMEMORY;
444
445 hr = reader_strvaldup(reader, qname, &elem->qname);
446 if (FAILED(hr)) {
447 reader_free(reader, elem);
448 return hr;
449 }
450
451 hr = reader_strvaldup(reader, localname, &elem->localname);
452 if (FAILED(hr))
453 {
454 reader_free_strvalued(reader, &elem->qname);
455 reader_free(reader, elem);
456 return hr;
457 }
458
459 if (!list_empty(&reader->elements))
460 {
461 hr = reader_inc_depth(reader);
462 if (FAILED(hr)) {
463 reader_free(reader, elem);
464 return hr;
465 }
466 }
467
468 list_add_head(&reader->elements, &elem->entry);
469 reader->empty_element = FALSE;
470 return hr;
471 }
472
473 static void reader_pop_element(xmlreader *reader)
474 {
475 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
476
477 if (elem)
478 {
479 list_remove(&elem->entry);
480 reader_free_strvalued(reader, &elem->qname);
481 reader_free_strvalued(reader, &elem->localname);
482 reader_free(reader, elem);
483 reader_dec_depth(reader);
484 }
485 }
486
487 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
488 means node value is to be determined. */
489 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
490 {
491 strval *v = &reader->strvalues[type];
492
493 reader_free_strvalue(reader, type);
494 if (!value)
495 {
496 v->str = NULL;
497 v->start = 0;
498 v->len = 0;
499 return;
500 }
501
502 if (value->str == strval_empty.str)
503 *v = *value;
504 else
505 {
506 if (type == StringValue_Value)
507 {
508 /* defer allocation for value string */
509 v->str = NULL;
510 v->start = value->start;
511 v->len = value->len;
512 }
513 else
514 {
515 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
516 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
517 v->str[value->len] = 0;
518 v->len = value->len;
519 }
520 }
521 }
522
523 static inline int is_reader_pending(xmlreader *reader)
524 {
525 return reader->input->pending;
526 }
527
528 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
529 {
530 const int initial_len = 0x2000;
531 buffer->data = readerinput_alloc(input, initial_len);
532 if (!buffer->data) return E_OUTOFMEMORY;
533
534 memset(buffer->data, 0, 4);
535 buffer->cur = 0;
536 buffer->allocated = initial_len;
537 buffer->written = 0;
538
539 return S_OK;
540 }
541
542 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
543 {
544 readerinput_free(input, buffer->data);
545 }
546
547 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
548 {
549 if (encoding == XmlEncoding_Unknown)
550 {
551 FIXME("unsupported encoding %d\n", encoding);
552 return E_NOTIMPL;
553 }
554
555 *cp = xml_encoding_map[encoding].cp;
556
557 return S_OK;
558 }
559
560 xml_encoding parse_encoding_name(const WCHAR *name, int len)
561 {
562 int min, max, n, c;
563
564 if (!name) return XmlEncoding_Unknown;
565
566 min = 0;
567 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
568
569 while (min <= max)
570 {
571 n = (min+max)/2;
572
573 if (len != -1)
574 c = strncmpiW(xml_encoding_map[n].name, name, len);
575 else
576 c = strcmpiW(xml_encoding_map[n].name, name);
577 if (!c)
578 return xml_encoding_map[n].enc;
579
580 if (c > 0)
581 max = n-1;
582 else
583 min = n+1;
584 }
585
586 return XmlEncoding_Unknown;
587 }
588
589 static HRESULT alloc_input_buffer(xmlreaderinput *input)
590 {
591 input_buffer *buffer;
592 HRESULT hr;
593
594 input->buffer = NULL;
595
596 buffer = readerinput_alloc(input, sizeof(*buffer));
597 if (!buffer) return E_OUTOFMEMORY;
598
599 buffer->input = input;
600 buffer->code_page = ~0; /* code page is unknown at this point */
601 hr = init_encoded_buffer(input, &buffer->utf16);
602 if (hr != S_OK) {
603 readerinput_free(input, buffer);
604 return hr;
605 }
606
607 hr = init_encoded_buffer(input, &buffer->encoded);
608 if (hr != S_OK) {
609 free_encoded_buffer(input, &buffer->utf16);
610 readerinput_free(input, buffer);
611 return hr;
612 }
613
614 input->buffer = buffer;
615 return S_OK;
616 }
617
618 static void free_input_buffer(input_buffer *buffer)
619 {
620 free_encoded_buffer(buffer->input, &buffer->encoded);
621 free_encoded_buffer(buffer->input, &buffer->utf16);
622 readerinput_free(buffer->input, buffer);
623 }
624
625 static void readerinput_release_stream(xmlreaderinput *readerinput)
626 {
627 if (readerinput->stream) {
628 ISequentialStream_Release(readerinput->stream);
629 readerinput->stream = NULL;
630 }
631 }
632
633 /* Queries already stored interface for IStream/ISequentialStream.
634 Interface supplied on creation will be overwritten */
635 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
636 {
637 HRESULT hr;
638
639 readerinput_release_stream(readerinput);
640 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
641 if (hr != S_OK)
642 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
643
644 return hr;
645 }
646
647 /* reads a chunk to raw buffer */
648 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
649 {
650 encoded_buffer *buffer = &readerinput->buffer->encoded;
651 /* to make sure aligned length won't exceed allocated length */
652 ULONG len = buffer->allocated - buffer->written - 4;
653 ULONG read;
654 HRESULT hr;
655
656 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
657 variable width encodings like UTF-8 */
658 len = (len + 3) & ~3;
659 /* try to use allocated space or grow */
660 if (buffer->allocated - buffer->written < len)
661 {
662 buffer->allocated *= 2;
663 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
664 len = buffer->allocated - buffer->written;
665 }
666
667 read = 0;
668 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
669 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
670 readerinput->pending = hr == E_PENDING;
671 if (FAILED(hr)) return hr;
672 buffer->written += read;
673
674 return hr;
675 }
676
677 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
678 static void readerinput_grow(xmlreaderinput *readerinput, int length)
679 {
680 encoded_buffer *buffer = &readerinput->buffer->utf16;
681
682 length *= sizeof(WCHAR);
683 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
684 if (buffer->allocated < buffer->written + length + 4)
685 {
686 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
687 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
688 buffer->allocated = grown_size;
689 }
690 }
691
692 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
693 {
694 static const char startA[] = {'<','?'};
695 static const char commentA[] = {'<','!'};
696 encoded_buffer *buffer = &readerinput->buffer->encoded;
697 unsigned char *ptr = (unsigned char*)buffer->data;
698
699 return !memcmp(buffer->data, startA, sizeof(startA)) ||
700 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
701 /* test start byte */
702 (ptr[0] == '<' &&
703 (
704 (ptr[1] && (ptr[1] <= 0x7f)) ||
705 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
706 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
707 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
708 );
709 }
710
711 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
712 {
713 encoded_buffer *buffer = &readerinput->buffer->encoded;
714 static const WCHAR startW[] = {'<','?'};
715 static const WCHAR commentW[] = {'<','!'};
716 static const char utf8bom[] = {0xef,0xbb,0xbf};
717 static const char utf16lebom[] = {0xff,0xfe};
718
719 *enc = XmlEncoding_Unknown;
720
721 if (buffer->written <= 3)
722 {
723 HRESULT hr = readerinput_growraw(readerinput);
724 if (FAILED(hr)) return hr;
725 if (buffer->written <= 3) return MX_E_INPUTEND;
726 }
727
728 /* try start symbols if we have enough data to do that, input buffer should contain
729 first chunk already */
730 if (readerinput_is_utf8(readerinput))
731 *enc = XmlEncoding_UTF8;
732 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
733 !memcmp(buffer->data, commentW, sizeof(commentW)))
734 *enc = XmlEncoding_UTF16;
735 /* try with BOM now */
736 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
737 {
738 buffer->cur += sizeof(utf8bom);
739 *enc = XmlEncoding_UTF8;
740 }
741 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
742 {
743 buffer->cur += sizeof(utf16lebom);
744 *enc = XmlEncoding_UTF16;
745 }
746
747 return S_OK;
748 }
749
750 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
751 {
752 encoded_buffer *buffer = &readerinput->buffer->encoded;
753 int len = buffer->written;
754
755 /* complete single byte char */
756 if (!(buffer->data[len-1] & 0x80)) return len;
757
758 /* find start byte of multibyte char */
759 while (--len && !(buffer->data[len] & 0xc0))
760 ;
761
762 return len;
763 }
764
765 /* Returns byte length of complete char sequence for buffer code page,
766 it's relative to current buffer position which is currently used for BOM handling
767 only. */
768 static int readerinput_get_convlen(xmlreaderinput *readerinput)
769 {
770 encoded_buffer *buffer = &readerinput->buffer->encoded;
771 int len;
772
773 if (readerinput->buffer->code_page == CP_UTF8)
774 len = readerinput_get_utf8_convlen(readerinput);
775 else
776 len = buffer->written;
777
778 TRACE("%d\n", len - buffer->cur);
779 return len - buffer->cur;
780 }
781
782 /* It's possible that raw buffer has some leftovers from last conversion - some char
783 sequence that doesn't represent a full code point. Length argument should be calculated with
784 readerinput_get_convlen(), if it's -1 it will be calculated here. */
785 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
786 {
787 encoded_buffer *buffer = &readerinput->buffer->encoded;
788
789 if (len == -1)
790 len = readerinput_get_convlen(readerinput);
791
792 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
793 /* everything below cur is lost too */
794 buffer->written -= len + buffer->cur;
795 /* after this point we don't need cur offset really,
796 it's used only to mark where actual data begins when first chunk is read */
797 buffer->cur = 0;
798 }
799
800 /* note that raw buffer content is kept */
801 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
802 {
803 encoded_buffer *src = &readerinput->buffer->encoded;
804 encoded_buffer *dest = &readerinput->buffer->utf16;
805 int len, dest_len;
806 HRESULT hr;
807 WCHAR *ptr;
808 UINT cp;
809
810 hr = get_code_page(enc, &cp);
811 if (FAILED(hr)) return;
812
813 readerinput->buffer->code_page = cp;
814 len = readerinput_get_convlen(readerinput);
815
816 TRACE("switching to cp %d\n", cp);
817
818 /* just copy in this case */
819 if (enc == XmlEncoding_UTF16)
820 {
821 readerinput_grow(readerinput, len);
822 memcpy(dest->data, src->data + src->cur, len);
823 dest->written += len*sizeof(WCHAR);
824 return;
825 }
826
827 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
828 readerinput_grow(readerinput, dest_len);
829 ptr = (WCHAR*)dest->data;
830 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
831 ptr[dest_len] = 0;
832 dest->written += dest_len*sizeof(WCHAR);
833 }
834
835 /* shrinks parsed data a buffer begins with */
836 static void reader_shrink(xmlreader *reader)
837 {
838 encoded_buffer *buffer = &reader->input->buffer->utf16;
839
840 /* avoid to move too often using threshold shrink length */
841 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
842 {
843 buffer->written -= buffer->cur*sizeof(WCHAR);
844 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
845 buffer->cur = 0;
846 *(WCHAR*)&buffer->data[buffer->written] = 0;
847 }
848 }
849
850 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
851 It won't attempt to shrink but will grow destination buffer if needed */
852 static HRESULT reader_more(xmlreader *reader)
853 {
854 xmlreaderinput *readerinput = reader->input;
855 encoded_buffer *src = &readerinput->buffer->encoded;
856 encoded_buffer *dest = &readerinput->buffer->utf16;
857 UINT cp = readerinput->buffer->code_page;
858 int len, dest_len;
859 HRESULT hr;
860 WCHAR *ptr;
861
862 /* get some raw data from stream first */
863 hr = readerinput_growraw(readerinput);
864 len = readerinput_get_convlen(readerinput);
865
866 /* just copy for UTF-16 case */
867 if (cp == ~0)
868 {
869 readerinput_grow(readerinput, len);
870 memcpy(dest->data + dest->written, src->data + src->cur, len);
871 dest->written += len*sizeof(WCHAR);
872 return hr;
873 }
874
875 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
876 readerinput_grow(readerinput, dest_len);
877 ptr = (WCHAR*)(dest->data + dest->written);
878 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
879 ptr[dest_len] = 0;
880 dest->written += dest_len*sizeof(WCHAR);
881 /* get rid of processed data */
882 readerinput_shrinkraw(readerinput, len);
883
884 return hr;
885 }
886
887 static inline UINT reader_get_cur(xmlreader *reader)
888 {
889 return reader->input->buffer->utf16.cur;
890 }
891
892 static inline WCHAR *reader_get_ptr(xmlreader *reader)
893 {
894 encoded_buffer *buffer = &reader->input->buffer->utf16;
895 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
896 if (!*ptr) reader_more(reader);
897 return (WCHAR*)buffer->data + buffer->cur;
898 }
899
900 static int reader_cmp(xmlreader *reader, const WCHAR *str)
901 {
902 const WCHAR *ptr = reader_get_ptr(reader);
903 return strncmpW(str, ptr, strlenW(str));
904 }
905
906 /* moves cursor n WCHARs forward */
907 static void reader_skipn(xmlreader *reader, int n)
908 {
909 encoded_buffer *buffer = &reader->input->buffer->utf16;
910 const WCHAR *ptr = reader_get_ptr(reader);
911
912 while (*ptr++ && n--)
913 {
914 buffer->cur++;
915 reader->pos++;
916 }
917 }
918
919 static inline BOOL is_wchar_space(WCHAR ch)
920 {
921 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
922 }
923
924 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
925 static int reader_skipspaces(xmlreader *reader)
926 {
927 encoded_buffer *buffer = &reader->input->buffer->utf16;
928 const WCHAR *ptr = reader_get_ptr(reader);
929 UINT start = reader_get_cur(reader);
930
931 while (is_wchar_space(*ptr))
932 {
933 if (*ptr == '\r')
934 reader->pos = 0;
935 else if (*ptr == '\n')
936 {
937 reader->line++;
938 reader->pos = 0;
939 }
940 else
941 reader->pos++;
942
943 buffer->cur++;
944 ptr = reader_get_ptr(reader);
945 }
946
947 return reader_get_cur(reader) - start;
948 }
949
950 /* [26] VersionNum ::= '1.' [0-9]+ */
951 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
952 {
953 static const WCHAR onedotW[] = {'1','.',0};
954 WCHAR *ptr, *ptr2;
955 UINT start;
956
957 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
958
959 start = reader_get_cur(reader);
960 /* skip "1." */
961 reader_skipn(reader, 2);
962
963 ptr2 = ptr = reader_get_ptr(reader);
964 while (*ptr >= '0' && *ptr <= '9')
965 {
966 reader_skipn(reader, 1);
967 ptr = reader_get_ptr(reader);
968 }
969
970 if (ptr2 == ptr) return WC_E_DIGIT;
971 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
972 TRACE("version=%s\n", debug_strval(reader, val));
973 return S_OK;
974 }
975
976 /* [25] Eq ::= S? '=' S? */
977 static HRESULT reader_parse_eq(xmlreader *reader)
978 {
979 static const WCHAR eqW[] = {'=',0};
980 reader_skipspaces(reader);
981 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
982 /* skip '=' */
983 reader_skipn(reader, 1);
984 reader_skipspaces(reader);
985 return S_OK;
986 }
987
988 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
989 static HRESULT reader_parse_versioninfo(xmlreader *reader)
990 {
991 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
992 strval val, name;
993 HRESULT hr;
994
995 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
996
997 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
998 reader_init_strvalue(reader_get_cur(reader), 7, &name);
999 /* skip 'version' */
1000 reader_skipn(reader, 7);
1001
1002 hr = reader_parse_eq(reader);
1003 if (FAILED(hr)) return hr;
1004
1005 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1006 return WC_E_QUOTE;
1007 /* skip "'"|'"' */
1008 reader_skipn(reader, 1);
1009
1010 hr = reader_parse_versionnum(reader, &val);
1011 if (FAILED(hr)) return hr;
1012
1013 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1014 return WC_E_QUOTE;
1015
1016 /* skip "'"|'"' */
1017 reader_skipn(reader, 1);
1018
1019 return reader_add_attr(reader, &name, &val);
1020 }
1021
1022 /* ([A-Za-z0-9._] | '-') */
1023 static inline BOOL is_wchar_encname(WCHAR ch)
1024 {
1025 return ((ch >= 'A' && ch <= 'Z') ||
1026 (ch >= 'a' && ch <= 'z') ||
1027 (ch >= '0' && ch <= '9') ||
1028 (ch == '.') || (ch == '_') ||
1029 (ch == '-'));
1030 }
1031
1032 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1033 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1034 {
1035 WCHAR *start = reader_get_ptr(reader), *ptr;
1036 xml_encoding enc;
1037 int len;
1038
1039 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1040 return WC_E_ENCNAME;
1041
1042 val->start = reader_get_cur(reader);
1043
1044 ptr = start;
1045 while (is_wchar_encname(*++ptr))
1046 ;
1047
1048 len = ptr - start;
1049 enc = parse_encoding_name(start, len);
1050 TRACE("encoding name %s\n", debugstr_wn(start, len));
1051 val->str = start;
1052 val->len = len;
1053
1054 if (enc == XmlEncoding_Unknown)
1055 return WC_E_ENCNAME;
1056
1057 /* skip encoding name */
1058 reader_skipn(reader, len);
1059 return S_OK;
1060 }
1061
1062 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1063 static HRESULT reader_parse_encdecl(xmlreader *reader)
1064 {
1065 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1066 strval name, val;
1067 HRESULT hr;
1068
1069 if (!reader_skipspaces(reader)) return S_FALSE;
1070
1071 if (reader_cmp(reader, encodingW)) return S_FALSE;
1072 name.str = reader_get_ptr(reader);
1073 name.start = reader_get_cur(reader);
1074 name.len = 8;
1075 /* skip 'encoding' */
1076 reader_skipn(reader, 8);
1077
1078 hr = reader_parse_eq(reader);
1079 if (FAILED(hr)) return hr;
1080
1081 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1082 return WC_E_QUOTE;
1083 /* skip "'"|'"' */
1084 reader_skipn(reader, 1);
1085
1086 hr = reader_parse_encname(reader, &val);
1087 if (FAILED(hr)) return hr;
1088
1089 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1090 return WC_E_QUOTE;
1091
1092 /* skip "'"|'"' */
1093 reader_skipn(reader, 1);
1094
1095 return reader_add_attr(reader, &name, &val);
1096 }
1097
1098 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1099 static HRESULT reader_parse_sddecl(xmlreader *reader)
1100 {
1101 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1102 static const WCHAR yesW[] = {'y','e','s',0};
1103 static const WCHAR noW[] = {'n','o',0};
1104 strval name, val;
1105 UINT start;
1106 HRESULT hr;
1107
1108 if (!reader_skipspaces(reader)) return S_FALSE;
1109
1110 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1111 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1112 /* skip 'standalone' */
1113 reader_skipn(reader, 10);
1114
1115 hr = reader_parse_eq(reader);
1116 if (FAILED(hr)) return hr;
1117
1118 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1119 return WC_E_QUOTE;
1120 /* skip "'"|'"' */
1121 reader_skipn(reader, 1);
1122
1123 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1124 return WC_E_XMLDECL;
1125
1126 start = reader_get_cur(reader);
1127 /* skip 'yes'|'no' */
1128 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1129 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1130 TRACE("standalone=%s\n", debug_strval(reader, &val));
1131
1132 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1133 return WC_E_QUOTE;
1134 /* skip "'"|'"' */
1135 reader_skipn(reader, 1);
1136
1137 return reader_add_attr(reader, &name, &val);
1138 }
1139
1140 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1141 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1142 {
1143 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1144 static const WCHAR declcloseW[] = {'?','>',0};
1145 HRESULT hr;
1146
1147 /* check if we have "<?xml " */
1148 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1149
1150 reader_skipn(reader, 5);
1151 hr = reader_parse_versioninfo(reader);
1152 if (FAILED(hr))
1153 return hr;
1154
1155 hr = reader_parse_encdecl(reader);
1156 if (FAILED(hr))
1157 return hr;
1158
1159 hr = reader_parse_sddecl(reader);
1160 if (FAILED(hr))
1161 return hr;
1162
1163 reader_skipspaces(reader);
1164 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1165 reader_skipn(reader, 2);
1166
1167 reader_inc_depth(reader);
1168 reader->nodetype = XmlNodeType_XmlDeclaration;
1169 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1170 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1171 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1172
1173 return S_OK;
1174 }
1175
1176 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1177 static HRESULT reader_parse_comment(xmlreader *reader)
1178 {
1179 WCHAR *ptr;
1180 UINT start;
1181
1182 if (reader->resumestate == XmlReadResumeState_Comment)
1183 {
1184 start = reader->resume[XmlReadResume_Body];
1185 ptr = reader_get_ptr(reader);
1186 }
1187 else
1188 {
1189 /* skip '<!--' */
1190 reader_skipn(reader, 4);
1191 reader_shrink(reader);
1192 ptr = reader_get_ptr(reader);
1193 start = reader_get_cur(reader);
1194 reader->nodetype = XmlNodeType_Comment;
1195 reader->resume[XmlReadResume_Body] = start;
1196 reader->resumestate = XmlReadResumeState_Comment;
1197 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1198 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1199 reader_set_strvalue(reader, StringValue_Value, NULL);
1200 }
1201
1202 /* will exit when there's no more data, it won't attempt to
1203 read more from stream */
1204 while (*ptr)
1205 {
1206 if (ptr[0] == '-')
1207 {
1208 if (ptr[1] == '-')
1209 {
1210 if (ptr[2] == '>')
1211 {
1212 strval value;
1213
1214 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1215 TRACE("%s\n", debug_strval(reader, &value));
1216
1217 /* skip rest of markup '->' */
1218 reader_skipn(reader, 3);
1219
1220 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1221 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1222 reader_set_strvalue(reader, StringValue_Value, &value);
1223 reader->resume[XmlReadResume_Body] = 0;
1224 reader->resumestate = XmlReadResumeState_Initial;
1225 return S_OK;
1226 }
1227 else
1228 return WC_E_COMMENT;
1229 }
1230 }
1231
1232 reader_skipn(reader, 1);
1233 ptr++;
1234 }
1235
1236 return S_OK;
1237 }
1238
1239 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1240 static inline BOOL is_char(WCHAR ch)
1241 {
1242 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1243 (ch >= 0x20 && ch <= 0xd7ff) ||
1244 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1245 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1246 (ch >= 0xe000 && ch <= 0xfffd);
1247 }
1248
1249 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1250 static inline BOOL is_pubchar(WCHAR ch)
1251 {
1252 return (ch == ' ') ||
1253 (ch >= 'a' && ch <= 'z') ||
1254 (ch >= 'A' && ch <= 'Z') ||
1255 (ch >= '0' && ch <= '9') ||
1256 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1257 (ch == '=') || (ch == '?') ||
1258 (ch == '@') || (ch == '!') ||
1259 (ch >= '#' && ch <= '%') || /* #$% */
1260 (ch == '_') || (ch == '\r') || (ch == '\n');
1261 }
1262
1263 static inline BOOL is_namestartchar(WCHAR ch)
1264 {
1265 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1266 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1267 (ch >= 0xc0 && ch <= 0xd6) ||
1268 (ch >= 0xd8 && ch <= 0xf6) ||
1269 (ch >= 0xf8 && ch <= 0x2ff) ||
1270 (ch >= 0x370 && ch <= 0x37d) ||
1271 (ch >= 0x37f && ch <= 0x1fff) ||
1272 (ch >= 0x200c && ch <= 0x200d) ||
1273 (ch >= 0x2070 && ch <= 0x218f) ||
1274 (ch >= 0x2c00 && ch <= 0x2fef) ||
1275 (ch >= 0x3001 && ch <= 0xd7ff) ||
1276 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1277 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1278 (ch >= 0xf900 && ch <= 0xfdcf) ||
1279 (ch >= 0xfdf0 && ch <= 0xfffd);
1280 }
1281
1282 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1283 static inline BOOL is_ncnamechar(WCHAR ch)
1284 {
1285 return (ch >= 'A' && ch <= 'Z') ||
1286 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1287 (ch == '-') || (ch == '.') ||
1288 (ch >= '0' && ch <= '9') ||
1289 (ch == 0xb7) ||
1290 (ch >= 0xc0 && ch <= 0xd6) ||
1291 (ch >= 0xd8 && ch <= 0xf6) ||
1292 (ch >= 0xf8 && ch <= 0x2ff) ||
1293 (ch >= 0x300 && ch <= 0x36f) ||
1294 (ch >= 0x370 && ch <= 0x37d) ||
1295 (ch >= 0x37f && ch <= 0x1fff) ||
1296 (ch >= 0x200c && ch <= 0x200d) ||
1297 (ch >= 0x203f && ch <= 0x2040) ||
1298 (ch >= 0x2070 && ch <= 0x218f) ||
1299 (ch >= 0x2c00 && ch <= 0x2fef) ||
1300 (ch >= 0x3001 && ch <= 0xd7ff) ||
1301 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1302 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1303 (ch >= 0xf900 && ch <= 0xfdcf) ||
1304 (ch >= 0xfdf0 && ch <= 0xfffd);
1305 }
1306
1307 static inline BOOL is_namechar(WCHAR ch)
1308 {
1309 return (ch == ':') || is_ncnamechar(ch);
1310 }
1311
1312 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1313 {
1314 /* When we're on attribute always return attribute type, container node type is kept.
1315 Note that container is not necessarily an element, and attribute doesn't mean it's
1316 an attribute in XML spec terms. */
1317 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1318 }
1319
1320 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1321 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1322 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1323 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1324 [5] Name ::= NameStartChar (NameChar)* */
1325 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1326 {
1327 WCHAR *ptr;
1328 UINT start;
1329
1330 if (reader->resume[XmlReadResume_Name])
1331 {
1332 start = reader->resume[XmlReadResume_Name];
1333 ptr = reader_get_ptr(reader);
1334 }
1335 else
1336 {
1337 ptr = reader_get_ptr(reader);
1338 start = reader_get_cur(reader);
1339 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1340 }
1341
1342 while (is_namechar(*ptr))
1343 {
1344 reader_skipn(reader, 1);
1345 ptr = reader_get_ptr(reader);
1346 }
1347
1348 if (is_reader_pending(reader))
1349 {
1350 reader->resume[XmlReadResume_Name] = start;
1351 return E_PENDING;
1352 }
1353 else
1354 reader->resume[XmlReadResume_Name] = 0;
1355
1356 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1357 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1358
1359 return S_OK;
1360 }
1361
1362 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1363 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1364 {
1365 static const WCHAR xmlW[] = {'x','m','l'};
1366 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1367 strval name;
1368 WCHAR *ptr;
1369 HRESULT hr;
1370 UINT i;
1371
1372 hr = reader_parse_name(reader, &name);
1373 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1374
1375 /* now that we got name check for illegal content */
1376 if (strval_eq(reader, &name, &xmlval))
1377 return WC_E_LEADINGXML;
1378
1379 /* PITarget can't be a qualified name */
1380 ptr = reader_get_strptr(reader, &name);
1381 for (i = 0; i < name.len; i++)
1382 if (ptr[i] == ':')
1383 return i ? NC_E_NAMECOLON : WC_E_PI;
1384
1385 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1386 *target = name;
1387 return S_OK;
1388 }
1389
1390 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1391 static HRESULT reader_parse_pi(xmlreader *reader)
1392 {
1393 strval target;
1394 WCHAR *ptr;
1395 UINT start;
1396 HRESULT hr;
1397
1398 switch (reader->resumestate)
1399 {
1400 case XmlReadResumeState_Initial:
1401 /* skip '<?' */
1402 reader_skipn(reader, 2);
1403 reader_shrink(reader);
1404 reader->resumestate = XmlReadResumeState_PITarget;
1405 case XmlReadResumeState_PITarget:
1406 hr = reader_parse_pitarget(reader, &target);
1407 if (FAILED(hr)) return hr;
1408 reader_set_strvalue(reader, StringValue_LocalName, &target);
1409 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1410 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1411 reader->resumestate = XmlReadResumeState_PIBody;
1412 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1413 default:
1414 ;
1415 }
1416
1417 start = reader->resume[XmlReadResume_Body];
1418 ptr = reader_get_ptr(reader);
1419 while (*ptr)
1420 {
1421 if (ptr[0] == '?')
1422 {
1423 if (ptr[1] == '>')
1424 {
1425 UINT cur = reader_get_cur(reader);
1426 strval value;
1427
1428 /* strip all leading whitespace chars */
1429 while (start < cur)
1430 {
1431 ptr = reader_get_ptr2(reader, start);
1432 if (!is_wchar_space(*ptr)) break;
1433 start++;
1434 }
1435
1436 reader_init_strvalue(start, cur-start, &value);
1437
1438 /* skip '?>' */
1439 reader_skipn(reader, 2);
1440 TRACE("%s\n", debug_strval(reader, &value));
1441 reader->nodetype = XmlNodeType_ProcessingInstruction;
1442 reader->resumestate = XmlReadResumeState_Initial;
1443 reader->resume[XmlReadResume_Body] = 0;
1444 reader_set_strvalue(reader, StringValue_Value, &value);
1445 return S_OK;
1446 }
1447 }
1448
1449 reader_skipn(reader, 1);
1450 ptr = reader_get_ptr(reader);
1451 }
1452
1453 return S_OK;
1454 }
1455
1456 /* This one is used to parse significant whitespace nodes, like in Misc production */
1457 static HRESULT reader_parse_whitespace(xmlreader *reader)
1458 {
1459 switch (reader->resumestate)
1460 {
1461 case XmlReadResumeState_Initial:
1462 reader_shrink(reader);
1463 reader->resumestate = XmlReadResumeState_Whitespace;
1464 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1465 reader->nodetype = XmlNodeType_Whitespace;
1466 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1467 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1468 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1469 /* fallthrough */
1470 case XmlReadResumeState_Whitespace:
1471 {
1472 strval value;
1473 UINT start;
1474
1475 reader_skipspaces(reader);
1476 if (is_reader_pending(reader)) return S_OK;
1477
1478 start = reader->resume[XmlReadResume_Body];
1479 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1480 reader_set_strvalue(reader, StringValue_Value, &value);
1481 TRACE("%s\n", debug_strval(reader, &value));
1482 reader->resumestate = XmlReadResumeState_Initial;
1483 }
1484 default:
1485 ;
1486 }
1487
1488 return S_OK;
1489 }
1490
1491 /* [27] Misc ::= Comment | PI | S */
1492 static HRESULT reader_parse_misc(xmlreader *reader)
1493 {
1494 HRESULT hr = S_FALSE;
1495
1496 if (reader->resumestate != XmlReadResumeState_Initial)
1497 {
1498 hr = reader_more(reader);
1499 if (FAILED(hr)) return hr;
1500
1501 /* finish current node */
1502 switch (reader->resumestate)
1503 {
1504 case XmlReadResumeState_PITarget:
1505 case XmlReadResumeState_PIBody:
1506 return reader_parse_pi(reader);
1507 case XmlReadResumeState_Comment:
1508 return reader_parse_comment(reader);
1509 case XmlReadResumeState_Whitespace:
1510 return reader_parse_whitespace(reader);
1511 default:
1512 ERR("unknown resume state %d\n", reader->resumestate);
1513 }
1514 }
1515
1516 while (1)
1517 {
1518 const WCHAR *cur = reader_get_ptr(reader);
1519
1520 if (is_wchar_space(*cur))
1521 hr = reader_parse_whitespace(reader);
1522 else if (!reader_cmp(reader, commentW))
1523 hr = reader_parse_comment(reader);
1524 else if (!reader_cmp(reader, piW))
1525 hr = reader_parse_pi(reader);
1526 else
1527 break;
1528
1529 if (hr != S_FALSE) return hr;
1530 }
1531
1532 return hr;
1533 }
1534
1535 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1536 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1537 {
1538 WCHAR *cur = reader_get_ptr(reader), quote;
1539 UINT start;
1540
1541 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1542
1543 quote = *cur;
1544 reader_skipn(reader, 1);
1545
1546 cur = reader_get_ptr(reader);
1547 start = reader_get_cur(reader);
1548 while (is_char(*cur) && *cur != quote)
1549 {
1550 reader_skipn(reader, 1);
1551 cur = reader_get_ptr(reader);
1552 }
1553 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1554 if (*cur == quote) reader_skipn(reader, 1);
1555
1556 TRACE("%s\n", debug_strval(reader, literal));
1557 return S_OK;
1558 }
1559
1560 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1561 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1562 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1563 {
1564 WCHAR *cur = reader_get_ptr(reader), quote;
1565 UINT start;
1566
1567 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1568
1569 quote = *cur;
1570 reader_skipn(reader, 1);
1571
1572 start = reader_get_cur(reader);
1573 cur = reader_get_ptr(reader);
1574 while (is_pubchar(*cur) && *cur != quote)
1575 {
1576 reader_skipn(reader, 1);
1577 cur = reader_get_ptr(reader);
1578 }
1579
1580 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1581 TRACE("%s\n", debug_strval(reader, literal));
1582 return S_OK;
1583 }
1584
1585 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1586 static HRESULT reader_parse_externalid(xmlreader *reader)
1587 {
1588 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1589 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1590 strval name;
1591 HRESULT hr;
1592 int cnt;
1593
1594 if (reader_cmp(reader, systemW))
1595 {
1596 if (reader_cmp(reader, publicW))
1597 return S_FALSE;
1598 else
1599 {
1600 strval pub;
1601
1602 /* public id */
1603 reader_skipn(reader, 6);
1604 cnt = reader_skipspaces(reader);
1605 if (!cnt) return WC_E_WHITESPACE;
1606
1607 hr = reader_parse_pub_literal(reader, &pub);
1608 if (FAILED(hr)) return hr;
1609
1610 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1611 return reader_add_attr(reader, &name, &pub);
1612 }
1613 }
1614 else
1615 {
1616 strval sys;
1617
1618 /* system id */
1619 reader_skipn(reader, 6);
1620 cnt = reader_skipspaces(reader);
1621 if (!cnt) return WC_E_WHITESPACE;
1622
1623 hr = reader_parse_sys_literal(reader, &sys);
1624 if (FAILED(hr)) return hr;
1625
1626 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1627 return reader_add_attr(reader, &name, &sys);
1628 }
1629
1630 return hr;
1631 }
1632
1633 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1634 static HRESULT reader_parse_dtd(xmlreader *reader)
1635 {
1636 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1637 strval name;
1638 WCHAR *cur;
1639 HRESULT hr;
1640
1641 /* check if we have "<!DOCTYPE" */
1642 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1643 reader_shrink(reader);
1644
1645 /* DTD processing is not allowed by default */
1646 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1647
1648 reader_skipn(reader, 9);
1649 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1650
1651 /* name */
1652 hr = reader_parse_name(reader, &name);
1653 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1654
1655 reader_skipspaces(reader);
1656
1657 hr = reader_parse_externalid(reader);
1658 if (FAILED(hr)) return hr;
1659
1660 reader_skipspaces(reader);
1661
1662 cur = reader_get_ptr(reader);
1663 if (*cur != '>')
1664 {
1665 FIXME("internal subset parsing not implemented\n");
1666 return E_NOTIMPL;
1667 }
1668
1669 /* skip '>' */
1670 reader_skipn(reader, 1);
1671
1672 reader->nodetype = XmlNodeType_DocumentType;
1673 reader_set_strvalue(reader, StringValue_LocalName, &name);
1674 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1675
1676 return S_OK;
1677 }
1678
1679 /* [11 NS] LocalPart ::= NCName */
1680 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1681 {
1682 WCHAR *ptr;
1683 UINT start;
1684
1685 if (reader->resume[XmlReadResume_Local])
1686 {
1687 start = reader->resume[XmlReadResume_Local];
1688 ptr = reader_get_ptr(reader);
1689 }
1690 else
1691 {
1692 ptr = reader_get_ptr(reader);
1693 start = reader_get_cur(reader);
1694 }
1695
1696 while (is_ncnamechar(*ptr))
1697 {
1698 reader_skipn(reader, 1);
1699 ptr = reader_get_ptr(reader);
1700 }
1701
1702 if (is_reader_pending(reader))
1703 {
1704 reader->resume[XmlReadResume_Local] = start;
1705 return E_PENDING;
1706 }
1707 else
1708 reader->resume[XmlReadResume_Local] = 0;
1709
1710 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1711
1712 return S_OK;
1713 }
1714
1715 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1716 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1717 [9 NS] UnprefixedName ::= LocalPart
1718 [10 NS] Prefix ::= NCName */
1719 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1720 {
1721 WCHAR *ptr;
1722 UINT start;
1723 HRESULT hr;
1724
1725 if (reader->resume[XmlReadResume_Name])
1726 {
1727 start = reader->resume[XmlReadResume_Name];
1728 ptr = reader_get_ptr(reader);
1729 }
1730 else
1731 {
1732 ptr = reader_get_ptr(reader);
1733 start = reader_get_cur(reader);
1734 reader->resume[XmlReadResume_Name] = start;
1735 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1736 }
1737
1738 if (reader->resume[XmlReadResume_Local])
1739 {
1740 hr = reader_parse_local(reader, local);
1741 if (FAILED(hr)) return hr;
1742
1743 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1744 local->start - reader->resume[XmlReadResume_Name] - 1,
1745 prefix);
1746 }
1747 else
1748 {
1749 /* skip prefix part */
1750 while (is_ncnamechar(*ptr))
1751 {
1752 reader_skipn(reader, 1);
1753 ptr = reader_get_ptr(reader);
1754 }
1755
1756 if (is_reader_pending(reader)) return E_PENDING;
1757
1758 /* got a qualified name */
1759 if (*ptr == ':')
1760 {
1761 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1762
1763 /* skip ':' */
1764 reader_skipn(reader, 1);
1765 hr = reader_parse_local(reader, local);
1766 if (FAILED(hr)) return hr;
1767 }
1768 else
1769 {
1770 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1771 reader_init_strvalue(0, 0, prefix);
1772 }
1773 }
1774
1775 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1776
1777 if (prefix->len)
1778 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1779 else
1780 TRACE("ncname %s\n", debug_strval(reader, local));
1781
1782 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1783 /* count ':' too */
1784 (prefix->len ? prefix->len + 1 : 0) + local->len,
1785 qname);
1786
1787 reader->resume[XmlReadResume_Name] = 0;
1788 reader->resume[XmlReadResume_Local] = 0;
1789
1790 return S_OK;
1791 }
1792
1793 /* Applies normalization rules to a single char, used for attribute values.
1794
1795 Rules include 2 steps:
1796
1797 1) replacing \r\n with a single \n;
1798 2) replacing all whitespace chars with ' '.
1799
1800 */
1801 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1802 {
1803 encoded_buffer *buffer = &reader->input->buffer->utf16;
1804
1805 if (!is_wchar_space(*ptr)) return;
1806
1807 if (*ptr == '\r' && *(ptr+1) == '\n')
1808 {
1809 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1810 memmove(ptr+1, ptr+2, len);
1811 }
1812 *ptr = ' ';
1813 }
1814
1815 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1816 {
1817 static const WCHAR entltW[] = {'l','t'};
1818 static const WCHAR entgtW[] = {'g','t'};
1819 static const WCHAR entampW[] = {'a','m','p'};
1820 static const WCHAR entaposW[] = {'a','p','o','s'};
1821 static const WCHAR entquotW[] = {'q','u','o','t'};
1822 static const strval lt = { (WCHAR*)entltW, 2 };
1823 static const strval gt = { (WCHAR*)entgtW, 2 };
1824 static const strval amp = { (WCHAR*)entampW, 3 };
1825 static const strval apos = { (WCHAR*)entaposW, 4 };
1826 static const strval quot = { (WCHAR*)entquotW, 4 };
1827 WCHAR *str = reader_get_strptr(reader, name);
1828
1829 switch (*str)
1830 {
1831 case 'l':
1832 if (strval_eq(reader, name, &lt)) return '<';
1833 break;
1834 case 'g':
1835 if (strval_eq(reader, name, &gt)) return '>';
1836 break;
1837 case 'a':
1838 if (strval_eq(reader, name, &amp))
1839 return '&';
1840 else if (strval_eq(reader, name, &apos))
1841 return '\'';
1842 break;
1843 case 'q':
1844 if (strval_eq(reader, name, &quot)) return '\"';
1845 break;
1846 default:
1847 ;
1848 }
1849
1850 return 0;
1851 }
1852
1853 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1854 [67] Reference ::= EntityRef | CharRef
1855 [68] EntityRef ::= '&' Name ';' */
1856 static HRESULT reader_parse_reference(xmlreader *reader)
1857 {
1858 encoded_buffer *buffer = &reader->input->buffer->utf16;
1859 WCHAR *start = reader_get_ptr(reader), *ptr;
1860 UINT cur = reader_get_cur(reader);
1861 WCHAR ch = 0;
1862 int len;
1863
1864 /* skip '&' */
1865 reader_skipn(reader, 1);
1866 ptr = reader_get_ptr(reader);
1867
1868 if (*ptr == '#')
1869 {
1870 reader_skipn(reader, 1);
1871 ptr = reader_get_ptr(reader);
1872
1873 /* hex char or decimal */
1874 if (*ptr == 'x')
1875 {
1876 reader_skipn(reader, 1);
1877 ptr = reader_get_ptr(reader);
1878
1879 while (*ptr != ';')
1880 {
1881 if ((*ptr >= '0' && *ptr <= '9'))
1882 ch = ch*16 + *ptr - '0';
1883 else if ((*ptr >= 'a' && *ptr <= 'f'))
1884 ch = ch*16 + *ptr - 'a' + 10;
1885 else if ((*ptr >= 'A' && *ptr <= 'F'))
1886 ch = ch*16 + *ptr - 'A' + 10;
1887 else
1888 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1889 reader_skipn(reader, 1);
1890 ptr = reader_get_ptr(reader);
1891 }
1892 }
1893 else
1894 {
1895 while (*ptr != ';')
1896 {
1897 if ((*ptr >= '0' && *ptr <= '9'))
1898 {
1899 ch = ch*10 + *ptr - '0';
1900 reader_skipn(reader, 1);
1901 ptr = reader_get_ptr(reader);
1902 }
1903 else
1904 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1905 }
1906 }
1907
1908 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1909
1910 /* normalize */
1911 if (is_wchar_space(ch)) ch = ' ';
1912
1913 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1914 memmove(start+1, ptr+1, len);
1915 buffer->cur = cur + 1;
1916
1917 *start = ch;
1918 }
1919 else
1920 {
1921 strval name;
1922 HRESULT hr;
1923
1924 hr = reader_parse_name(reader, &name);
1925 if (FAILED(hr)) return hr;
1926
1927 ptr = reader_get_ptr(reader);
1928 if (*ptr != ';') return WC_E_SEMICOLON;
1929
1930 /* predefined entities resolve to a single character */
1931 ch = get_predefined_entity(reader, &name);
1932 if (ch)
1933 {
1934 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1935 memmove(start+1, ptr+1, len);
1936 buffer->cur = cur + 1;
1937
1938 *start = ch;
1939 }
1940 else
1941 {
1942 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1943 return WC_E_UNDECLAREDENTITY;
1944 }
1945
1946 }
1947
1948 return S_OK;
1949 }
1950
1951 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1952 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1953 {
1954 WCHAR *ptr, quote;
1955 UINT start;
1956
1957 ptr = reader_get_ptr(reader);
1958
1959 /* skip opening quote */
1960 quote = *ptr;
1961 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1962 reader_skipn(reader, 1);
1963
1964 ptr = reader_get_ptr(reader);
1965 start = reader_get_cur(reader);
1966 while (*ptr)
1967 {
1968 if (*ptr == '<') return WC_E_LESSTHAN;
1969
1970 if (*ptr == quote)
1971 {
1972 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
1973 /* skip closing quote */
1974 reader_skipn(reader, 1);
1975 return S_OK;
1976 }
1977
1978 if (*ptr == '&')
1979 {
1980 HRESULT hr = reader_parse_reference(reader);
1981 if (FAILED(hr)) return hr;
1982 }
1983 else
1984 {
1985 reader_normalize_space(reader, ptr);
1986 reader_skipn(reader, 1);
1987 }
1988 ptr = reader_get_ptr(reader);
1989 }
1990
1991 return WC_E_QUOTE;
1992 }
1993
1994 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1995 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1996 [3 NS] DefaultAttName ::= 'xmlns'
1997 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1998 static HRESULT reader_parse_attribute(xmlreader *reader)
1999 {
2000 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
2001 strval prefix, local, qname, xmlns, value;
2002 HRESULT hr;
2003
2004 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2005 if (FAILED(hr)) return hr;
2006
2007 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
2008
2009 if (strval_eq(reader, &prefix, &xmlns))
2010 {
2011 FIXME("namespace definitions not supported\n");
2012 return E_NOTIMPL;
2013 }
2014
2015 if (strval_eq(reader, &qname, &xmlns))
2016 FIXME("default namespace definitions not supported\n");
2017
2018 hr = reader_parse_eq(reader);
2019 if (FAILED(hr)) return hr;
2020
2021 hr = reader_parse_attvalue(reader, &value);
2022 if (FAILED(hr)) return hr;
2023
2024 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2025 return reader_add_attr(reader, &local, &value);
2026 }
2027
2028 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2029 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2030 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2031 {
2032 HRESULT hr;
2033
2034 hr = reader_parse_qname(reader, prefix, local, qname);
2035 if (FAILED(hr)) return hr;
2036
2037 while (1)
2038 {
2039 static const WCHAR endW[] = {'/','>',0};
2040
2041 reader_skipspaces(reader);
2042
2043 /* empty element */
2044 if ((*empty = !reader_cmp(reader, endW)))
2045 {
2046 /* skip '/>' */
2047 reader_skipn(reader, 2);
2048 reader->empty_element = TRUE;
2049 return S_OK;
2050 }
2051
2052 /* got a start tag */
2053 if (!reader_cmp(reader, gtW))
2054 {
2055 /* skip '>' */
2056 reader_skipn(reader, 1);
2057 return reader_push_element(reader, qname, local);
2058 }
2059
2060 hr = reader_parse_attribute(reader);
2061 if (FAILED(hr)) return hr;
2062 }
2063
2064 return S_OK;
2065 }
2066
2067 /* [39] element ::= EmptyElemTag | STag content ETag */
2068 static HRESULT reader_parse_element(xmlreader *reader)
2069 {
2070 HRESULT hr;
2071
2072 switch (reader->resumestate)
2073 {
2074 case XmlReadResumeState_Initial:
2075 /* check if we are really on element */
2076 if (reader_cmp(reader, ltW)) return S_FALSE;
2077
2078 /* skip '<' */
2079 reader_skipn(reader, 1);
2080
2081 reader_shrink(reader);
2082 reader->resumestate = XmlReadResumeState_STag;
2083 case XmlReadResumeState_STag:
2084 {
2085 strval qname, prefix, local;
2086 int empty = 0;
2087
2088 /* this handles empty elements too */
2089 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2090 if (FAILED(hr)) return hr;
2091
2092 /* FIXME: need to check for defined namespace to reject invalid prefix,
2093 currently reject all prefixes */
2094 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2095
2096 /* if we got empty element and stack is empty go straight to Misc */
2097 if (empty && list_empty(&reader->elements))
2098 reader->instate = XmlReadInState_MiscEnd;
2099 else
2100 reader->instate = XmlReadInState_Content;
2101
2102 reader->nodetype = XmlNodeType_Element;
2103 reader->resumestate = XmlReadResumeState_Initial;
2104 reader_set_strvalue(reader, StringValue_LocalName, &local);
2105 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2106 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2107 break;
2108 }
2109 default:
2110 hr = E_FAIL;
2111 }
2112
2113 return hr;
2114 }
2115
2116 /* [13 NS] ETag ::= '</' QName S? '>' */
2117 static HRESULT reader_parse_endtag(xmlreader *reader)
2118 {
2119 strval prefix, local, qname;
2120 struct element *elem;
2121 HRESULT hr;
2122
2123 /* skip '</' */
2124 reader_skipn(reader, 2);
2125
2126 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2127 if (FAILED(hr)) return hr;
2128
2129 reader_skipspaces(reader);
2130
2131 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2132
2133 /* skip '>' */
2134 reader_skipn(reader, 1);
2135
2136 /* Element stack should never be empty at this point, cause we shouldn't get to
2137 content parsing if it's empty. */
2138 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2139 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2140
2141 reader_pop_element(reader);
2142
2143 /* It was a root element, the rest is expected as Misc */
2144 if (list_empty(&reader->elements))
2145 reader->instate = XmlReadInState_MiscEnd;
2146
2147 reader->nodetype = XmlNodeType_EndElement;
2148 reader_set_strvalue(reader, StringValue_LocalName, &local);
2149 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2150
2151 return S_OK;
2152 }
2153
2154 /* [18] CDSect ::= CDStart CData CDEnd
2155 [19] CDStart ::= '<![CDATA['
2156 [20] CData ::= (Char* - (Char* ']]>' Char*))
2157 [21] CDEnd ::= ']]>' */
2158 static HRESULT reader_parse_cdata(xmlreader *reader)
2159 {
2160 WCHAR *ptr;
2161 UINT start;
2162
2163 if (reader->resumestate == XmlReadResumeState_CDATA)
2164 {
2165 start = reader->resume[XmlReadResume_Body];
2166 ptr = reader_get_ptr(reader);
2167 }
2168 else
2169 {
2170 /* skip markup '<![CDATA[' */
2171 reader_skipn(reader, 9);
2172 reader_shrink(reader);
2173 ptr = reader_get_ptr(reader);
2174 start = reader_get_cur(reader);
2175 reader->nodetype = XmlNodeType_CDATA;
2176 reader->resume[XmlReadResume_Body] = start;
2177 reader->resumestate = XmlReadResumeState_CDATA;
2178 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2179 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2180 reader_set_strvalue(reader, StringValue_Value, NULL);
2181 }
2182
2183 while (*ptr)
2184 {
2185 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2186 {
2187 strval value;
2188
2189 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2190
2191 /* skip ']]>' */
2192 reader_skipn(reader, 3);
2193 TRACE("%s\n", debug_strval(reader, &value));
2194
2195 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2196 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2197 reader_set_strvalue(reader, StringValue_Value, &value);
2198 reader->resume[XmlReadResume_Body] = 0;
2199 reader->resumestate = XmlReadResumeState_Initial;
2200 return S_OK;
2201 }
2202 else
2203 {
2204 /* Value normalization is not fully implemented, rules are:
2205
2206 - single '\r' -> '\n';
2207 - sequence '\r\n' -> '\n', in this case value length changes;
2208 */
2209 if (*ptr == '\r') *ptr = '\n';
2210 reader_skipn(reader, 1);
2211 ptr++;
2212 }
2213 }
2214
2215 return S_OK;
2216 }
2217
2218 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2219 static HRESULT reader_parse_chardata(xmlreader *reader)
2220 {
2221 WCHAR *ptr;
2222 UINT start;
2223
2224 if (reader->resumestate == XmlReadResumeState_CharData)
2225 {
2226 start = reader->resume[XmlReadResume_Body];
2227 ptr = reader_get_ptr(reader);
2228 }
2229 else
2230 {
2231 reader_shrink(reader);
2232 ptr = reader_get_ptr(reader);
2233 start = reader_get_cur(reader);
2234 /* There's no text */
2235 if (!*ptr || *ptr == '<') return S_OK;
2236 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2237 reader->resume[XmlReadResume_Body] = start;
2238 reader->resumestate = XmlReadResumeState_CharData;
2239 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2240 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2241 reader_set_strvalue(reader, StringValue_Value, NULL);
2242 }
2243
2244 while (*ptr)
2245 {
2246 /* CDATA closing sequence ']]>' is not allowed */
2247 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2248 return WC_E_CDSECTEND;
2249
2250 /* Found next markup part */
2251 if (ptr[0] == '<')
2252 {
2253 strval value;
2254
2255 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2256 reader_set_strvalue(reader, StringValue_Value, &value);
2257 reader->resume[XmlReadResume_Body] = 0;
2258 reader->resumestate = XmlReadResumeState_Initial;
2259 return S_OK;
2260 }
2261
2262 reader_skipn(reader, 1);
2263
2264 /* this covers a case when text has leading whitespace chars */
2265 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2266 ptr++;
2267 }
2268
2269 return S_OK;
2270 }
2271
2272 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2273 static HRESULT reader_parse_content(xmlreader *reader)
2274 {
2275 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2276 static const WCHAR etagW[] = {'<','/',0};
2277 static const WCHAR ampW[] = {'&',0};
2278
2279 if (reader->resumestate != XmlReadResumeState_Initial)
2280 {
2281 switch (reader->resumestate)
2282 {
2283 case XmlReadResumeState_CDATA:
2284 return reader_parse_cdata(reader);
2285 case XmlReadResumeState_Comment:
2286 return reader_parse_comment(reader);
2287 case XmlReadResumeState_PIBody:
2288 case XmlReadResumeState_PITarget:
2289 return reader_parse_pi(reader);
2290 case XmlReadResumeState_CharData:
2291 return reader_parse_chardata(reader);
2292 default:
2293 ERR("unknown resume state %d\n", reader->resumestate);
2294 }
2295 }
2296
2297 reader_shrink(reader);
2298
2299 /* handle end tag here, it indicates end of content as well */
2300 if (!reader_cmp(reader, etagW))
2301 return reader_parse_endtag(reader);
2302
2303 if (!reader_cmp(reader, commentW))
2304 return reader_parse_comment(reader);
2305
2306 if (!reader_cmp(reader, piW))
2307 return reader_parse_pi(reader);
2308
2309 if (!reader_cmp(reader, cdstartW))
2310 return reader_parse_cdata(reader);
2311
2312 if (!reader_cmp(reader, ampW))
2313 return reader_parse_reference(reader);
2314
2315 if (!reader_cmp(reader, ltW))
2316 return reader_parse_element(reader);
2317
2318 /* what's left must be CharData */
2319 return reader_parse_chardata(reader);
2320 }
2321
2322 static HRESULT reader_parse_nextnode(xmlreader *reader)
2323 {
2324 HRESULT hr;
2325
2326 if (!is_reader_pending(reader))
2327 reader_clear_attrs(reader);
2328
2329 while (1)
2330 {
2331 switch (reader->instate)
2332 {
2333 /* if it's a first call for a new input we need to detect stream encoding */
2334 case XmlReadInState_Initial:
2335 {
2336 xml_encoding enc;
2337
2338 hr = readerinput_growraw(reader->input);
2339 if (FAILED(hr)) return hr;
2340
2341 /* try to detect encoding by BOM or data and set input code page */
2342 hr = readerinput_detectencoding(reader->input, &enc);
2343 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2344 if (FAILED(hr)) return hr;
2345
2346 /* always switch first time cause we have to put something in */
2347 readerinput_switchencoding(reader->input, enc);
2348
2349 /* parse xml declaration */
2350 hr = reader_parse_xmldecl(reader);
2351 if (FAILED(hr)) return hr;
2352
2353 readerinput_shrinkraw(reader->input, -1);
2354 reader->instate = XmlReadInState_Misc_DTD;
2355 if (hr == S_OK) return hr;
2356 }
2357 break;
2358 case XmlReadInState_Misc_DTD:
2359 hr = reader_parse_misc(reader);
2360 if (FAILED(hr)) return hr;
2361
2362 if (hr == S_FALSE)
2363 reader->instate = XmlReadInState_DTD;
2364 else
2365 return hr;
2366 break;
2367 case XmlReadInState_DTD:
2368 hr = reader_parse_dtd(reader);
2369 if (FAILED(hr)) return hr;
2370
2371 if (hr == S_OK)
2372 {
2373 reader->instate = XmlReadInState_DTD_Misc;
2374 return hr;
2375 }
2376 else
2377 reader->instate = XmlReadInState_Element;
2378 break;
2379 case XmlReadInState_DTD_Misc:
2380 hr = reader_parse_misc(reader);
2381 if (FAILED(hr)) return hr;
2382
2383 if (hr == S_FALSE)
2384 reader->instate = XmlReadInState_Element;
2385 else
2386 return hr;
2387 break;
2388 case XmlReadInState_Element:
2389 return reader_parse_element(reader);
2390 case XmlReadInState_Content:
2391 return reader_parse_content(reader);
2392 case XmlReadInState_MiscEnd:
2393 hr = reader_parse_misc(reader);
2394 if (FAILED(hr)) return hr;
2395
2396 if (hr == S_FALSE)
2397 reader->instate = XmlReadInState_Eof;
2398 return hr;
2399 case XmlReadInState_Eof:
2400 return S_FALSE;
2401 default:
2402 FIXME("internal state %d not handled\n", reader->instate);
2403 return E_NOTIMPL;
2404 }
2405 }
2406
2407 return E_NOTIMPL;
2408 }
2409
2410 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2411 {
2412 xmlreader *This = impl_from_IXmlReader(iface);
2413
2414 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2415
2416 if (IsEqualGUID(riid, &IID_IUnknown) ||
2417 IsEqualGUID(riid, &IID_IXmlReader))
2418 {
2419 *ppvObject = iface;
2420 }
2421 else
2422 {
2423 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2424 *ppvObject = NULL;
2425 return E_NOINTERFACE;
2426 }
2427
2428 IXmlReader_AddRef(iface);
2429
2430 return S_OK;
2431 }
2432
2433 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2434 {
2435 xmlreader *This = impl_from_IXmlReader(iface);
2436 ULONG ref = InterlockedIncrement(&This->ref);
2437 TRACE("(%p)->(%d)\n", This, ref);
2438 return ref;
2439 }
2440
2441 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2442 {
2443 xmlreader *This = impl_from_IXmlReader(iface);
2444 LONG ref = InterlockedDecrement(&This->ref);
2445
2446 TRACE("(%p)->(%d)\n", This, ref);
2447
2448 if (ref == 0)
2449 {
2450 IMalloc *imalloc = This->imalloc;
2451 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2452 reader_clear_attrs(This);
2453 reader_clear_elements(This);
2454 reader_free_strvalues(This);
2455 reader_free(This, This);
2456 if (imalloc) IMalloc_Release(imalloc);
2457 }
2458
2459 return ref;
2460 }
2461
2462 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2463 {
2464 xmlreader *This = impl_from_IXmlReader(iface);
2465 IXmlReaderInput *readerinput;
2466 HRESULT hr;
2467
2468 TRACE("(%p)->(%p)\n", This, input);
2469
2470 if (This->input)
2471 {
2472 readerinput_release_stream(This->input);
2473 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2474 This->input = NULL;
2475 }
2476
2477 This->line = This->pos = 0;
2478 reader_clear_elements(This);
2479 This->depth = 0;
2480 This->resumestate = XmlReadResumeState_Initial;
2481 memset(This->resume, 0, sizeof(This->resume));
2482
2483 /* just reset current input */
2484 if (!input)
2485 {
2486 This->state = XmlReadState_Initial;
2487 return S_OK;
2488 }
2489
2490 /* now try IXmlReaderInput, ISequentialStream, IStream */
2491 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2492 if (hr == S_OK)
2493 {
2494 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2495 This->input = impl_from_IXmlReaderInput(readerinput);
2496 else
2497 {
2498 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2499 readerinput, readerinput->lpVtbl);
2500 IUnknown_Release(readerinput);
2501 return E_FAIL;
2502
2503 }
2504 }
2505
2506 if (hr != S_OK || !readerinput)
2507 {
2508 /* create IXmlReaderInput basing on supplied interface */
2509 hr = CreateXmlReaderInputWithEncodingName(input,
2510 NULL, NULL, FALSE, NULL, &readerinput);
2511 if (hr != S_OK) return hr;
2512 This->input = impl_from_IXmlReaderInput(readerinput);
2513 }
2514
2515 /* set stream for supplied IXmlReaderInput */
2516 hr = readerinput_query_for_stream(This->input);
2517 if (hr == S_OK)
2518 {
2519 This->state = XmlReadState_Initial;
2520 This->instate = XmlReadInState_Initial;
2521 }
2522
2523 return hr;
2524 }
2525
2526 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2527 {
2528 xmlreader *This = impl_from_IXmlReader(iface);
2529
2530 TRACE("(%p)->(%s %p)\n", This, debugstr_prop(property), value);
2531
2532 if (!value) return E_INVALIDARG;
2533
2534 switch (property)
2535 {
2536 case XmlReaderProperty_DtdProcessing:
2537 *value = This->dtdmode;
2538 break;
2539 case XmlReaderProperty_ReadState:
2540 *value = This->state;
2541 break;
2542 default:
2543 FIXME("Unimplemented property (%u)\n", property);
2544 return E_NOTIMPL;
2545 }
2546
2547 return S_OK;
2548 }
2549
2550 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2551 {
2552 xmlreader *This = impl_from_IXmlReader(iface);
2553
2554 TRACE("(%p)->(%s %lu)\n", This, debugstr_prop(property), value);
2555
2556 switch (property)
2557 {
2558 case XmlReaderProperty_DtdProcessing:
2559 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2560 This->dtdmode = value;
2561 break;
2562 default:
2563 FIXME("Unimplemented property (%u)\n", property);
2564 return E_NOTIMPL;
2565 }
2566
2567 return S_OK;
2568 }
2569
2570 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2571 {
2572 xmlreader *This = impl_from_IXmlReader(iface);
2573 XmlNodeType oldtype = This->nodetype;
2574 HRESULT hr;
2575
2576 TRACE("(%p)->(%p)\n", This, nodetype);
2577
2578 if (This->state == XmlReadState_Closed) return S_FALSE;
2579
2580 hr = reader_parse_nextnode(This);
2581 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2582 This->state = XmlReadState_Interactive;
2583 if (hr == S_OK)
2584 {
2585 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2586 *nodetype = This->nodetype;
2587 }
2588
2589 return hr;
2590 }
2591
2592 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2593 {
2594 xmlreader *This = impl_from_IXmlReader(iface);
2595 TRACE("(%p)->(%p)\n", This, node_type);
2596
2597 *node_type = reader_get_nodetype(This);
2598 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2599 }
2600
2601 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2602 {
2603 xmlreader *This = impl_from_IXmlReader(iface);
2604
2605 TRACE("(%p)\n", This);
2606
2607 if (!This->attr_count) return S_FALSE;
2608 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2609 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2610 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2611
2612 return S_OK;
2613 }
2614
2615 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2616 {
2617 xmlreader *This = impl_from_IXmlReader(iface);
2618 const struct list *next;
2619
2620 TRACE("(%p)\n", This);
2621
2622 if (!This->attr_count) return S_FALSE;
2623
2624 if (!This->attr)
2625 return IXmlReader_MoveToFirstAttribute(iface);
2626
2627 next = list_next(&This->attrs, &This->attr->entry);
2628 if (next)
2629 {
2630 This->attr = LIST_ENTRY(next, struct attribute, entry);
2631 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2632 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2633 }
2634
2635 return next ? S_OK : S_FALSE;
2636 }
2637
2638 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2639 LPCWSTR local_name,
2640 LPCWSTR namespaceUri)
2641 {
2642 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2643 return E_NOTIMPL;
2644 }
2645
2646 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2647 {
2648 xmlreader *This = impl_from_IXmlReader(iface);
2649 struct element *elem;
2650
2651 TRACE("(%p)\n", This);
2652
2653 if (!This->attr_count) return S_FALSE;
2654 This->attr = NULL;
2655
2656 /* FIXME: support other node types with 'attributes' like DTD */
2657 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2658 if (elem)
2659 {
2660 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2661 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2662 }
2663
2664 return S_OK;
2665 }
2666
2667 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2668 {
2669 xmlreader *This = impl_from_IXmlReader(iface);
2670
2671 TRACE("(%p)->(%p %p)\n", This, name, len);
2672 *name = This->strvalues[StringValue_QualifiedName].str;
2673 *len = This->strvalues[StringValue_QualifiedName].len;
2674 return S_OK;
2675 }
2676
2677 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2678 LPCWSTR *namespaceUri,
2679 UINT *namespaceUri_length)
2680 {
2681 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2682 return E_NOTIMPL;
2683 }
2684
2685 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2686 {
2687 xmlreader *This = impl_from_IXmlReader(iface);
2688
2689 TRACE("(%p)->(%p %p)\n", This, name, len);
2690 *name = This->strvalues[StringValue_LocalName].str;
2691 if (len) *len = This->strvalues[StringValue_LocalName].len;
2692 return S_OK;
2693 }
2694
2695 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2696 {
2697 xmlreader *This = impl_from_IXmlReader(iface);
2698
2699 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2700 *prefix = This->strvalues[StringValue_Prefix].str;
2701 if (len) *len = This->strvalues[StringValue_Prefix].len;
2702 return S_OK;
2703 }
2704
2705 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2706 {
2707 xmlreader *reader = impl_from_IXmlReader(iface);
2708 strval *val = &reader->strvalues[StringValue_Value];
2709
2710 TRACE("(%p)->(%p %p)\n", reader, value, len);
2711
2712 *value = NULL;
2713
2714 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2715 {
2716 XmlNodeType type;
2717 HRESULT hr;
2718
2719 hr = IXmlReader_Read(iface, &type);
2720 if (FAILED(hr)) return hr;
2721
2722 /* return if still pending, partially read values are not reported */
2723 if (is_reader_pending(reader)) return E_PENDING;
2724 }
2725
2726 if (!val->str)
2727 {
2728 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2729 if (!ptr) return E_OUTOFMEMORY;
2730 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2731 ptr[val->len] = 0;
2732 val->str = ptr;
2733 }
2734
2735 *value = val->str;
2736 if (len) *len = val->len;
2737 return S_OK;
2738 }
2739
2740 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2741 {
2742 xmlreader *reader = impl_from_IXmlReader(iface);
2743 strval *val = &reader->strvalues[StringValue_Value];
2744 UINT len;
2745
2746 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2747
2748 /* Value is already allocated, chunked reads are not possible. */
2749 if (val->str) return S_FALSE;
2750
2751 if (val->len)
2752 {
2753 len = min(chunk_size, val->len);
2754 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2755 val->start += len;
2756 val->len -= len;
2757 if (read) *read = len;
2758 }
2759
2760 return S_OK;
2761 }
2762
2763 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2764 LPCWSTR *baseUri,
2765 UINT *baseUri_length)
2766 {
2767 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2768 return E_NOTIMPL;
2769 }
2770
2771 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2772 {
2773 FIXME("(%p): stub\n", iface);
2774 return FALSE;
2775 }
2776
2777 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2778 {
2779 xmlreader *This = impl_from_IXmlReader(iface);
2780 TRACE("(%p)\n", This);
2781 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2782 when current node is start tag of an element */
2783 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2784 }
2785
2786 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2787 {
2788 xmlreader *This = impl_from_IXmlReader(iface);
2789
2790 TRACE("(%p %p)\n", This, lineNumber);
2791
2792 if (!lineNumber) return E_INVALIDARG;
2793
2794 *lineNumber = This->line;
2795
2796 return S_OK;
2797 }
2798
2799 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2800 {
2801 xmlreader *This = impl_from_IXmlReader(iface);
2802
2803 TRACE("(%p %p)\n", This, linePosition);
2804
2805 if (!linePosition) return E_INVALIDARG;
2806
2807 *linePosition = This->pos;
2808
2809 return S_OK;
2810 }
2811
2812 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2813 {
2814 xmlreader *This = impl_from_IXmlReader(iface);
2815
2816 TRACE("(%p)->(%p)\n", This, count);
2817
2818 if (!count) return E_INVALIDARG;
2819
2820 *count = This->attr_count;
2821 return S_OK;
2822 }
2823
2824 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2825 {
2826 xmlreader *This = impl_from_IXmlReader(iface);
2827 TRACE("(%p)->(%p)\n", This, depth);
2828 *depth = This->depth;
2829 return S_OK;
2830 }
2831
2832 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2833 {
2834 FIXME("(%p): stub\n", iface);
2835 return E_NOTIMPL;
2836 }
2837
2838 static const struct IXmlReaderVtbl xmlreader_vtbl =
2839 {
2840 xmlreader_QueryInterface,
2841 xmlreader_AddRef,
2842 xmlreader_Release,
2843 xmlreader_SetInput,
2844 xmlreader_GetProperty,
2845 xmlreader_SetProperty,
2846 xmlreader_Read,
2847 xmlreader_GetNodeType,
2848 xmlreader_MoveToFirstAttribute,
2849 xmlreader_MoveToNextAttribute,
2850 xmlreader_MoveToAttributeByName,
2851 xmlreader_MoveToElement,
2852 xmlreader_GetQualifiedName,
2853 xmlreader_GetNamespaceUri,
2854 xmlreader_GetLocalName,
2855 xmlreader_GetPrefix,
2856 xmlreader_GetValue,
2857 xmlreader_ReadValueChunk,
2858 xmlreader_GetBaseUri,
2859 xmlreader_IsDefault,
2860 xmlreader_IsEmptyElement,
2861 xmlreader_GetLineNumber,
2862 xmlreader_GetLinePosition,
2863 xmlreader_GetAttributeCount,
2864 xmlreader_GetDepth,
2865 xmlreader_IsEOF
2866 };
2867
2868 /** IXmlReaderInput **/
2869 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2870 {
2871 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2872
2873 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2874
2875 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2876 IsEqualGUID(riid, &IID_IUnknown))
2877 {
2878 *ppvObject = iface;
2879 }
2880 else
2881 {
2882 WARN("interface %s not implemented\n", debugstr_guid(riid));
2883 *ppvObject = NULL;
2884 return E_NOINTERFACE;
2885 }
2886
2887 IUnknown_AddRef(iface);
2888
2889 return S_OK;
2890 }
2891
2892 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2893 {
2894 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2895 ULONG ref = InterlockedIncrement(&This->ref);
2896 TRACE("(%p)->(%d)\n", This, ref);
2897 return ref;
2898 }
2899
2900 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2901 {
2902 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2903 LONG ref = InterlockedDecrement(&This->ref);
2904
2905 TRACE("(%p)->(%d)\n", This, ref);
2906
2907 if (ref == 0)
2908 {
2909 IMalloc *imalloc = This->imalloc;
2910 if (This->input) IUnknown_Release(This->input);
2911 if (This->stream) ISequentialStream_Release(This->stream);
2912 if (This->buffer) free_input_buffer(This->buffer);
2913 readerinput_free(This, This->baseuri);
2914 readerinput_free(This, This);
2915 if (imalloc) IMalloc_Release(imalloc);
2916 }
2917
2918 return ref;
2919 }
2920
2921 static const struct IUnknownVtbl xmlreaderinputvtbl =
2922 {
2923 xmlreaderinput_QueryInterface,
2924 xmlreaderinput_AddRef,
2925 xmlreaderinput_Release
2926 };
2927
2928 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2929 {
2930 xmlreader *reader;
2931 int i;
2932
2933 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2934
2935 if (!IsEqualGUID(riid, &IID_IXmlReader))
2936 {
2937 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2938 return E_FAIL;
2939 }
2940
2941 if (imalloc)
2942 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2943 else
2944 reader = heap_alloc(sizeof(*reader));
2945 if(!reader) return E_OUTOFMEMORY;
2946
2947 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2948 reader->ref = 1;
2949 reader->input = NULL;
2950 reader->state = XmlReadState_Closed;
2951 reader->instate = XmlReadInState_Initial;
2952 reader->resumestate = XmlReadResumeState_Initial;
2953 reader->dtdmode = DtdProcessing_Prohibit;
2954 reader->line = reader->pos = 0;
2955 reader->imalloc = imalloc;
2956 if (imalloc) IMalloc_AddRef(imalloc);
2957 reader->nodetype = XmlNodeType_None;
2958 list_init(&reader->attrs);
2959 reader->attr_count = 0;
2960 reader->attr = NULL;
2961 list_init(&reader->elements);
2962 reader->depth = 0;
2963 reader->max_depth = 256;
2964 reader->empty_element = FALSE;
2965 memset(reader->resume, 0, sizeof(reader->resume));
2966
2967 for (i = 0; i < StringValue_Last; i++)
2968 reader->strvalues[i] = strval_empty;
2969
2970 *obj = &reader->IXmlReader_iface;
2971
2972 TRACE("returning iface %p\n", *obj);
2973
2974 return S_OK;
2975 }
2976
2977 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2978 IMalloc *imalloc,
2979 LPCWSTR encoding,
2980 BOOL hint,
2981 LPCWSTR base_uri,
2982 IXmlReaderInput **ppInput)
2983 {
2984 xmlreaderinput *readerinput;
2985 HRESULT hr;
2986
2987 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
2988 hint, wine_dbgstr_w(base_uri), ppInput);
2989
2990 if (!stream || !ppInput) return E_INVALIDARG;
2991
2992 if (imalloc)
2993 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
2994 else
2995 readerinput = heap_alloc(sizeof(*readerinput));
2996 if(!readerinput) return E_OUTOFMEMORY;
2997
2998 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
2999 readerinput->ref = 1;
3000 readerinput->imalloc = imalloc;
3001 readerinput->stream = NULL;
3002 if (imalloc) IMalloc_AddRef(imalloc);
3003 readerinput->encoding = parse_encoding_name(encoding, -1);
3004 readerinput->hint = hint;
3005 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3006 readerinput->pending = 0;
3007
3008 hr = alloc_input_buffer(readerinput);
3009 if (hr != S_OK)
3010 {
3011 readerinput_free(readerinput, readerinput->baseuri);
3012 readerinput_free(readerinput, readerinput);
3013 if (imalloc) IMalloc_Release(imalloc);
3014 return hr;
3015 }
3016 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3017
3018 *ppInput = &readerinput->IXmlReaderInput_iface;
3019
3020 TRACE("returning iface %p\n", *ppInput);
3021
3022 return S_OK;
3023 }