[XMLLITE] Sync with Wine Staging 1.7.55. CORE-10536
[reactos.git] / reactos / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "xmllite_private.h"
22
23 #include <stdio.h>
24
25 #include <wine/list.h>
26 #include <wine/unicode.h>
27
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
30
31 typedef enum
32 {
33 XmlReadInState_Initial,
34 XmlReadInState_XmlDecl,
35 XmlReadInState_Misc_DTD,
36 XmlReadInState_DTD,
37 XmlReadInState_DTD_Misc,
38 XmlReadInState_Element,
39 XmlReadInState_Content,
40 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
41 XmlReadInState_Eof
42 } XmlReaderInternalState;
43
44 /* This state denotes where parsing was interrupted by input problem.
45 Reader resumes parsing using this information. */
46 typedef enum
47 {
48 XmlReadResumeState_Initial,
49 XmlReadResumeState_PITarget,
50 XmlReadResumeState_PIBody,
51 XmlReadResumeState_CDATA,
52 XmlReadResumeState_Comment,
53 XmlReadResumeState_STag,
54 XmlReadResumeState_CharData,
55 XmlReadResumeState_Whitespace
56 } XmlReaderResumeState;
57
58 /* saved pointer index to resume from particular input position */
59 typedef enum
60 {
61 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
62 XmlReadResume_Local, /* local for QName */
63 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
64 XmlReadResume_Last
65 } XmlReaderResume;
66
67 typedef enum
68 {
69 StringValue_LocalName,
70 StringValue_Prefix,
71 StringValue_QualifiedName,
72 StringValue_Value,
73 StringValue_Last
74 } XmlReaderStringValue;
75
76 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
77 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
78
79 static const WCHAR dblquoteW[] = {'\"',0};
80 static const WCHAR quoteW[] = {'\'',0};
81 static const WCHAR ltW[] = {'<',0};
82 static const WCHAR gtW[] = {'>',0};
83 static const WCHAR commentW[] = {'<','!','-','-',0};
84 static const WCHAR piW[] = {'<','?',0};
85
86 static const char *debugstr_nodetype(XmlNodeType nodetype)
87 {
88 static const char * const type_names[] =
89 {
90 "None",
91 "Element",
92 "Attribute",
93 "Text",
94 "CDATA",
95 "",
96 "",
97 "ProcessingInstruction",
98 "Comment",
99 "",
100 "DocumentType",
101 "",
102 "",
103 "Whitespace",
104 "",
105 "EndElement",
106 "",
107 "XmlDeclaration"
108 };
109
110 if (nodetype > _XmlNodeType_Last)
111 return wine_dbg_sprintf("unknown type=%d", nodetype);
112
113 return type_names[nodetype];
114 }
115
116 static const char *debugstr_reader_prop(XmlReaderProperty prop)
117 {
118 static const char * const prop_names[] =
119 {
120 "MultiLanguage",
121 "ConformanceLevel",
122 "RandomAccess",
123 "XmlResolver",
124 "DtdProcessing",
125 "ReadState",
126 "MaxElementDepth",
127 "MaxEntityExpansion"
128 };
129
130 if (prop > _XmlReaderProperty_Last)
131 return wine_dbg_sprintf("unknown property=%d", prop);
132
133 return prop_names[prop];
134 }
135
136 struct xml_encoding_data
137 {
138 const WCHAR *name;
139 xml_encoding enc;
140 UINT cp;
141 };
142
143 static const struct xml_encoding_data xml_encoding_map[] = {
144 { utf16W, XmlEncoding_UTF16, ~0 },
145 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
146 };
147
148 const WCHAR *get_encoding_name(xml_encoding encoding)
149 {
150 return xml_encoding_map[encoding].name;
151 }
152
153 xml_encoding get_encoding_from_codepage(UINT codepage)
154 {
155 int i;
156 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
157 {
158 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
159 }
160 return XmlEncoding_Unknown;
161 }
162
163 typedef struct
164 {
165 char *data;
166 UINT cur;
167 unsigned int allocated;
168 unsigned int written;
169 } encoded_buffer;
170
171 typedef struct input_buffer input_buffer;
172
173 typedef struct
174 {
175 IXmlReaderInput IXmlReaderInput_iface;
176 LONG ref;
177 /* reference passed on IXmlReaderInput creation, is kept when input is created */
178 IUnknown *input;
179 IMalloc *imalloc;
180 xml_encoding encoding;
181 BOOL hint;
182 WCHAR *baseuri;
183 /* stream reference set after SetInput() call from reader,
184 stored as sequential stream, cause currently
185 optimizations possible with IStream aren't implemented */
186 ISequentialStream *stream;
187 input_buffer *buffer;
188 unsigned int pending : 1;
189 } xmlreaderinput;
190
191 static const struct IUnknownVtbl xmlreaderinputvtbl;
192
193 /* Structure to hold parsed string of specific length.
194
195 Reader stores node value as 'start' pointer, on request
196 a null-terminated version of it is allocated.
197
198 To init a strval variable use reader_init_strval(),
199 to set strval as a reader value use reader_set_strval().
200 */
201 typedef struct
202 {
203 WCHAR *str; /* allocated null-terminated string */
204 UINT len; /* length in WCHARs, altered after ReadValueChunk */
205 UINT start; /* input position where value starts */
206 } strval;
207
208 static WCHAR emptyW[] = {0};
209 static const strval strval_empty = { emptyW };
210
211 struct attribute
212 {
213 struct list entry;
214 strval localname;
215 strval value;
216 };
217
218 struct element
219 {
220 struct list entry;
221 strval qname;
222 strval localname;
223 };
224
225 typedef struct
226 {
227 IXmlReader IXmlReader_iface;
228 LONG ref;
229 xmlreaderinput *input;
230 IMalloc *imalloc;
231 XmlReadState state;
232 XmlReaderInternalState instate;
233 XmlReaderResumeState resumestate;
234 XmlNodeType nodetype;
235 DtdProcessing dtdmode;
236 UINT line, pos; /* reader position in XML stream */
237 struct list attrs; /* attributes list for current node */
238 struct attribute *attr; /* current attribute */
239 UINT attr_count;
240 struct list elements;
241 strval strvalues[StringValue_Last];
242 UINT depth;
243 UINT max_depth;
244 BOOL empty_element;
245 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
246 } xmlreader;
247
248 struct input_buffer
249 {
250 encoded_buffer utf16;
251 encoded_buffer encoded;
252 UINT code_page;
253 xmlreaderinput *input;
254 };
255
256 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
257 {
258 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
259 }
260
261 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
262 {
263 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
264 }
265
266 /* reader memory allocation functions */
267 static inline void *reader_alloc(xmlreader *reader, size_t len)
268 {
269 return m_alloc(reader->imalloc, len);
270 }
271
272 static inline void reader_free(xmlreader *reader, void *mem)
273 {
274 m_free(reader->imalloc, mem);
275 }
276
277 /* Just return pointer from offset, no attempt to read more. */
278 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
279 {
280 encoded_buffer *buffer = &reader->input->buffer->utf16;
281 return (WCHAR*)buffer->data + offset;
282 }
283
284 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
285 {
286 return v->str ? v->str : reader_get_ptr2(reader, v->start);
287 }
288
289 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
290 {
291 *dest = *src;
292
293 if (src->str != strval_empty.str)
294 {
295 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
296 if (!dest->str) return E_OUTOFMEMORY;
297 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
298 dest->str[dest->len] = 0;
299 dest->start = 0;
300 }
301
302 return S_OK;
303 }
304
305 /* reader input memory allocation functions */
306 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
307 {
308 return m_alloc(input->imalloc, len);
309 }
310
311 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
312 {
313 return m_realloc(input->imalloc, mem, len);
314 }
315
316 static inline void readerinput_free(xmlreaderinput *input, void *mem)
317 {
318 m_free(input->imalloc, mem);
319 }
320
321 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
322 {
323 LPWSTR ret = NULL;
324
325 if(str) {
326 DWORD size;
327
328 size = (strlenW(str)+1)*sizeof(WCHAR);
329 ret = readerinput_alloc(input, size);
330 if (ret) memcpy(ret, str, size);
331 }
332
333 return ret;
334 }
335
336 static void reader_clear_attrs(xmlreader *reader)
337 {
338 struct attribute *attr, *attr2;
339 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
340 {
341 reader_free(reader, attr);
342 }
343 list_init(&reader->attrs);
344 reader->attr_count = 0;
345 reader->attr = NULL;
346 }
347
348 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
349 while we are on a node with attributes */
350 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
351 {
352 struct attribute *attr;
353
354 attr = reader_alloc(reader, sizeof(*attr));
355 if (!attr) return E_OUTOFMEMORY;
356
357 attr->localname = *localname;
358 attr->value = *value;
359 list_add_tail(&reader->attrs, &attr->entry);
360 reader->attr_count++;
361
362 return S_OK;
363 }
364
365 /* This one frees stored string value if needed */
366 static void reader_free_strvalued(xmlreader *reader, strval *v)
367 {
368 if (v->str != strval_empty.str)
369 {
370 reader_free(reader, v->str);
371 *v = strval_empty;
372 }
373 }
374
375 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
376 {
377 v->start = start;
378 v->len = len;
379 v->str = NULL;
380 }
381
382 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
383 {
384 return debugstr_wn(reader_get_strptr(reader, v), v->len);
385 }
386
387 /* used to initialize from constant string */
388 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
389 {
390 v->start = 0;
391 v->len = len;
392 v->str = str;
393 }
394
395 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
396 {
397 reader_free_strvalued(reader, &reader->strvalues[type]);
398 }
399
400 static void reader_free_strvalues(xmlreader *reader)
401 {
402 int type;
403 for (type = 0; type < StringValue_Last; type++)
404 reader_free_strvalue(reader, type);
405 }
406
407 /* This helper should only be used to test if strings are the same,
408 it doesn't try to sort. */
409 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
410 {
411 if (str1->len != str2->len) return 0;
412 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
413 }
414
415 static void reader_clear_elements(xmlreader *reader)
416 {
417 struct element *elem, *elem2;
418 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
419 {
420 reader_free_strvalued(reader, &elem->qname);
421 reader_free(reader, elem);
422 }
423 list_init(&reader->elements);
424 reader->empty_element = FALSE;
425 }
426
427 static HRESULT reader_inc_depth(xmlreader *reader)
428 {
429 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
430 return S_OK;
431 }
432
433 static void reader_dec_depth(xmlreader *reader)
434 {
435 if (reader->depth > 1) reader->depth--;
436 }
437
438 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
439 {
440 struct element *elem;
441 HRESULT hr;
442
443 elem = reader_alloc(reader, sizeof(*elem));
444 if (!elem) return E_OUTOFMEMORY;
445
446 hr = reader_strvaldup(reader, qname, &elem->qname);
447 if (FAILED(hr)) {
448 reader_free(reader, elem);
449 return hr;
450 }
451
452 hr = reader_strvaldup(reader, localname, &elem->localname);
453 if (FAILED(hr))
454 {
455 reader_free_strvalued(reader, &elem->qname);
456 reader_free(reader, elem);
457 return hr;
458 }
459
460 if (!list_empty(&reader->elements))
461 {
462 hr = reader_inc_depth(reader);
463 if (FAILED(hr)) {
464 reader_free(reader, elem);
465 return hr;
466 }
467 }
468
469 list_add_head(&reader->elements, &elem->entry);
470 reader->empty_element = FALSE;
471 return hr;
472 }
473
474 static void reader_pop_element(xmlreader *reader)
475 {
476 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
477
478 if (elem)
479 {
480 list_remove(&elem->entry);
481 reader_free_strvalued(reader, &elem->qname);
482 reader_free_strvalued(reader, &elem->localname);
483 reader_free(reader, elem);
484 reader_dec_depth(reader);
485 }
486 }
487
488 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
489 means node value is to be determined. */
490 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
491 {
492 strval *v = &reader->strvalues[type];
493
494 reader_free_strvalue(reader, type);
495 if (!value)
496 {
497 v->str = NULL;
498 v->start = 0;
499 v->len = 0;
500 return;
501 }
502
503 if (value->str == strval_empty.str)
504 *v = *value;
505 else
506 {
507 if (type == StringValue_Value)
508 {
509 /* defer allocation for value string */
510 v->str = NULL;
511 v->start = value->start;
512 v->len = value->len;
513 }
514 else
515 {
516 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
517 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
518 v->str[value->len] = 0;
519 v->len = value->len;
520 }
521 }
522 }
523
524 static inline int is_reader_pending(xmlreader *reader)
525 {
526 return reader->input->pending;
527 }
528
529 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
530 {
531 const int initial_len = 0x2000;
532 buffer->data = readerinput_alloc(input, initial_len);
533 if (!buffer->data) return E_OUTOFMEMORY;
534
535 memset(buffer->data, 0, 4);
536 buffer->cur = 0;
537 buffer->allocated = initial_len;
538 buffer->written = 0;
539
540 return S_OK;
541 }
542
543 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
544 {
545 readerinput_free(input, buffer->data);
546 }
547
548 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
549 {
550 if (encoding == XmlEncoding_Unknown)
551 {
552 FIXME("unsupported encoding %d\n", encoding);
553 return E_NOTIMPL;
554 }
555
556 *cp = xml_encoding_map[encoding].cp;
557
558 return S_OK;
559 }
560
561 xml_encoding parse_encoding_name(const WCHAR *name, int len)
562 {
563 int min, max, n, c;
564
565 if (!name) return XmlEncoding_Unknown;
566
567 min = 0;
568 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
569
570 while (min <= max)
571 {
572 n = (min+max)/2;
573
574 if (len != -1)
575 c = strncmpiW(xml_encoding_map[n].name, name, len);
576 else
577 c = strcmpiW(xml_encoding_map[n].name, name);
578 if (!c)
579 return xml_encoding_map[n].enc;
580
581 if (c > 0)
582 max = n-1;
583 else
584 min = n+1;
585 }
586
587 return XmlEncoding_Unknown;
588 }
589
590 static HRESULT alloc_input_buffer(xmlreaderinput *input)
591 {
592 input_buffer *buffer;
593 HRESULT hr;
594
595 input->buffer = NULL;
596
597 buffer = readerinput_alloc(input, sizeof(*buffer));
598 if (!buffer) return E_OUTOFMEMORY;
599
600 buffer->input = input;
601 buffer->code_page = ~0; /* code page is unknown at this point */
602 hr = init_encoded_buffer(input, &buffer->utf16);
603 if (hr != S_OK) {
604 readerinput_free(input, buffer);
605 return hr;
606 }
607
608 hr = init_encoded_buffer(input, &buffer->encoded);
609 if (hr != S_OK) {
610 free_encoded_buffer(input, &buffer->utf16);
611 readerinput_free(input, buffer);
612 return hr;
613 }
614
615 input->buffer = buffer;
616 return S_OK;
617 }
618
619 static void free_input_buffer(input_buffer *buffer)
620 {
621 free_encoded_buffer(buffer->input, &buffer->encoded);
622 free_encoded_buffer(buffer->input, &buffer->utf16);
623 readerinput_free(buffer->input, buffer);
624 }
625
626 static void readerinput_release_stream(xmlreaderinput *readerinput)
627 {
628 if (readerinput->stream) {
629 ISequentialStream_Release(readerinput->stream);
630 readerinput->stream = NULL;
631 }
632 }
633
634 /* Queries already stored interface for IStream/ISequentialStream.
635 Interface supplied on creation will be overwritten */
636 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
637 {
638 HRESULT hr;
639
640 readerinput_release_stream(readerinput);
641 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
642 if (hr != S_OK)
643 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
644
645 return hr;
646 }
647
648 /* reads a chunk to raw buffer */
649 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
650 {
651 encoded_buffer *buffer = &readerinput->buffer->encoded;
652 /* to make sure aligned length won't exceed allocated length */
653 ULONG len = buffer->allocated - buffer->written - 4;
654 ULONG read;
655 HRESULT hr;
656
657 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
658 variable width encodings like UTF-8 */
659 len = (len + 3) & ~3;
660 /* try to use allocated space or grow */
661 if (buffer->allocated - buffer->written < len)
662 {
663 buffer->allocated *= 2;
664 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
665 len = buffer->allocated - buffer->written;
666 }
667
668 read = 0;
669 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
670 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
671 readerinput->pending = hr == E_PENDING;
672 if (FAILED(hr)) return hr;
673 buffer->written += read;
674
675 return hr;
676 }
677
678 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
679 static void readerinput_grow(xmlreaderinput *readerinput, int length)
680 {
681 encoded_buffer *buffer = &readerinput->buffer->utf16;
682
683 length *= sizeof(WCHAR);
684 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
685 if (buffer->allocated < buffer->written + length + 4)
686 {
687 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
688 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
689 buffer->allocated = grown_size;
690 }
691 }
692
693 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
694 {
695 static const char startA[] = {'<','?'};
696 static const char commentA[] = {'<','!'};
697 encoded_buffer *buffer = &readerinput->buffer->encoded;
698 unsigned char *ptr = (unsigned char*)buffer->data;
699
700 return !memcmp(buffer->data, startA, sizeof(startA)) ||
701 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
702 /* test start byte */
703 (ptr[0] == '<' &&
704 (
705 (ptr[1] && (ptr[1] <= 0x7f)) ||
706 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
707 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
708 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
709 );
710 }
711
712 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
713 {
714 encoded_buffer *buffer = &readerinput->buffer->encoded;
715 static const WCHAR startW[] = {'<','?'};
716 static const WCHAR commentW[] = {'<','!'};
717 static const char utf8bom[] = {0xef,0xbb,0xbf};
718 static const char utf16lebom[] = {0xff,0xfe};
719
720 *enc = XmlEncoding_Unknown;
721
722 if (buffer->written <= 3)
723 {
724 HRESULT hr = readerinput_growraw(readerinput);
725 if (FAILED(hr)) return hr;
726 if (buffer->written <= 3) return MX_E_INPUTEND;
727 }
728
729 /* try start symbols if we have enough data to do that, input buffer should contain
730 first chunk already */
731 if (readerinput_is_utf8(readerinput))
732 *enc = XmlEncoding_UTF8;
733 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
734 !memcmp(buffer->data, commentW, sizeof(commentW)))
735 *enc = XmlEncoding_UTF16;
736 /* try with BOM now */
737 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
738 {
739 buffer->cur += sizeof(utf8bom);
740 *enc = XmlEncoding_UTF8;
741 }
742 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
743 {
744 buffer->cur += sizeof(utf16lebom);
745 *enc = XmlEncoding_UTF16;
746 }
747
748 return S_OK;
749 }
750
751 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
752 {
753 encoded_buffer *buffer = &readerinput->buffer->encoded;
754 int len = buffer->written;
755
756 /* complete single byte char */
757 if (!(buffer->data[len-1] & 0x80)) return len;
758
759 /* find start byte of multibyte char */
760 while (--len && !(buffer->data[len] & 0xc0))
761 ;
762
763 return len;
764 }
765
766 /* Returns byte length of complete char sequence for buffer code page,
767 it's relative to current buffer position which is currently used for BOM handling
768 only. */
769 static int readerinput_get_convlen(xmlreaderinput *readerinput)
770 {
771 encoded_buffer *buffer = &readerinput->buffer->encoded;
772 int len;
773
774 if (readerinput->buffer->code_page == CP_UTF8)
775 len = readerinput_get_utf8_convlen(readerinput);
776 else
777 len = buffer->written;
778
779 TRACE("%d\n", len - buffer->cur);
780 return len - buffer->cur;
781 }
782
783 /* It's possible that raw buffer has some leftovers from last conversion - some char
784 sequence that doesn't represent a full code point. Length argument should be calculated with
785 readerinput_get_convlen(), if it's -1 it will be calculated here. */
786 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
787 {
788 encoded_buffer *buffer = &readerinput->buffer->encoded;
789
790 if (len == -1)
791 len = readerinput_get_convlen(readerinput);
792
793 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
794 /* everything below cur is lost too */
795 buffer->written -= len + buffer->cur;
796 /* after this point we don't need cur offset really,
797 it's used only to mark where actual data begins when first chunk is read */
798 buffer->cur = 0;
799 }
800
801 /* note that raw buffer content is kept */
802 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
803 {
804 encoded_buffer *src = &readerinput->buffer->encoded;
805 encoded_buffer *dest = &readerinput->buffer->utf16;
806 int len, dest_len;
807 HRESULT hr;
808 WCHAR *ptr;
809 UINT cp;
810
811 hr = get_code_page(enc, &cp);
812 if (FAILED(hr)) return;
813
814 readerinput->buffer->code_page = cp;
815 len = readerinput_get_convlen(readerinput);
816
817 TRACE("switching to cp %d\n", cp);
818
819 /* just copy in this case */
820 if (enc == XmlEncoding_UTF16)
821 {
822 readerinput_grow(readerinput, len);
823 memcpy(dest->data, src->data + src->cur, len);
824 dest->written += len*sizeof(WCHAR);
825 return;
826 }
827
828 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
829 readerinput_grow(readerinput, dest_len);
830 ptr = (WCHAR*)dest->data;
831 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
832 ptr[dest_len] = 0;
833 dest->written += dest_len*sizeof(WCHAR);
834 }
835
836 /* shrinks parsed data a buffer begins with */
837 static void reader_shrink(xmlreader *reader)
838 {
839 encoded_buffer *buffer = &reader->input->buffer->utf16;
840
841 /* avoid to move too often using threshold shrink length */
842 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
843 {
844 buffer->written -= buffer->cur*sizeof(WCHAR);
845 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
846 buffer->cur = 0;
847 *(WCHAR*)&buffer->data[buffer->written] = 0;
848 }
849 }
850
851 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
852 It won't attempt to shrink but will grow destination buffer if needed */
853 static HRESULT reader_more(xmlreader *reader)
854 {
855 xmlreaderinput *readerinput = reader->input;
856 encoded_buffer *src = &readerinput->buffer->encoded;
857 encoded_buffer *dest = &readerinput->buffer->utf16;
858 UINT cp = readerinput->buffer->code_page;
859 int len, dest_len;
860 HRESULT hr;
861 WCHAR *ptr;
862
863 /* get some raw data from stream first */
864 hr = readerinput_growraw(readerinput);
865 len = readerinput_get_convlen(readerinput);
866
867 /* just copy for UTF-16 case */
868 if (cp == ~0)
869 {
870 readerinput_grow(readerinput, len);
871 memcpy(dest->data + dest->written, src->data + src->cur, len);
872 dest->written += len*sizeof(WCHAR);
873 return hr;
874 }
875
876 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
877 readerinput_grow(readerinput, dest_len);
878 ptr = (WCHAR*)(dest->data + dest->written);
879 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
880 ptr[dest_len] = 0;
881 dest->written += dest_len*sizeof(WCHAR);
882 /* get rid of processed data */
883 readerinput_shrinkraw(readerinput, len);
884
885 return hr;
886 }
887
888 static inline UINT reader_get_cur(xmlreader *reader)
889 {
890 return reader->input->buffer->utf16.cur;
891 }
892
893 static inline WCHAR *reader_get_ptr(xmlreader *reader)
894 {
895 encoded_buffer *buffer = &reader->input->buffer->utf16;
896 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
897 if (!*ptr) reader_more(reader);
898 return (WCHAR*)buffer->data + buffer->cur;
899 }
900
901 static int reader_cmp(xmlreader *reader, const WCHAR *str)
902 {
903 int i=0;
904 const WCHAR *ptr = reader_get_ptr(reader);
905 while (str[i])
906 {
907 if (!ptr[i])
908 {
909 reader_more(reader);
910 ptr = reader_get_ptr(reader);
911 }
912 if (str[i] != ptr[i])
913 return ptr[i] - str[i];
914 i++;
915 }
916 return 0;
917 }
918
919 /* moves cursor n WCHARs forward */
920 static void reader_skipn(xmlreader *reader, int n)
921 {
922 encoded_buffer *buffer = &reader->input->buffer->utf16;
923 const WCHAR *ptr = reader_get_ptr(reader);
924
925 while (*ptr++ && n--)
926 {
927 buffer->cur++;
928 reader->pos++;
929 }
930 }
931
932 static inline BOOL is_wchar_space(WCHAR ch)
933 {
934 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
935 }
936
937 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
938 static int reader_skipspaces(xmlreader *reader)
939 {
940 encoded_buffer *buffer = &reader->input->buffer->utf16;
941 const WCHAR *ptr = reader_get_ptr(reader);
942 UINT start = reader_get_cur(reader);
943
944 while (is_wchar_space(*ptr))
945 {
946 if (*ptr == '\r')
947 reader->pos = 0;
948 else if (*ptr == '\n')
949 {
950 reader->line++;
951 reader->pos = 0;
952 }
953 else
954 reader->pos++;
955
956 buffer->cur++;
957 ptr = reader_get_ptr(reader);
958 }
959
960 return reader_get_cur(reader) - start;
961 }
962
963 /* [26] VersionNum ::= '1.' [0-9]+ */
964 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
965 {
966 static const WCHAR onedotW[] = {'1','.',0};
967 WCHAR *ptr, *ptr2;
968 UINT start;
969
970 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
971
972 start = reader_get_cur(reader);
973 /* skip "1." */
974 reader_skipn(reader, 2);
975
976 ptr2 = ptr = reader_get_ptr(reader);
977 while (*ptr >= '0' && *ptr <= '9')
978 {
979 reader_skipn(reader, 1);
980 ptr = reader_get_ptr(reader);
981 }
982
983 if (ptr2 == ptr) return WC_E_DIGIT;
984 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
985 TRACE("version=%s\n", debug_strval(reader, val));
986 return S_OK;
987 }
988
989 /* [25] Eq ::= S? '=' S? */
990 static HRESULT reader_parse_eq(xmlreader *reader)
991 {
992 static const WCHAR eqW[] = {'=',0};
993 reader_skipspaces(reader);
994 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
995 /* skip '=' */
996 reader_skipn(reader, 1);
997 reader_skipspaces(reader);
998 return S_OK;
999 }
1000
1001 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1002 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1003 {
1004 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1005 strval val, name;
1006 HRESULT hr;
1007
1008 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1009
1010 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1011 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1012 /* skip 'version' */
1013 reader_skipn(reader, 7);
1014
1015 hr = reader_parse_eq(reader);
1016 if (FAILED(hr)) return hr;
1017
1018 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1019 return WC_E_QUOTE;
1020 /* skip "'"|'"' */
1021 reader_skipn(reader, 1);
1022
1023 hr = reader_parse_versionnum(reader, &val);
1024 if (FAILED(hr)) return hr;
1025
1026 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1027 return WC_E_QUOTE;
1028
1029 /* skip "'"|'"' */
1030 reader_skipn(reader, 1);
1031
1032 return reader_add_attr(reader, &name, &val);
1033 }
1034
1035 /* ([A-Za-z0-9._] | '-') */
1036 static inline BOOL is_wchar_encname(WCHAR ch)
1037 {
1038 return ((ch >= 'A' && ch <= 'Z') ||
1039 (ch >= 'a' && ch <= 'z') ||
1040 (ch >= '0' && ch <= '9') ||
1041 (ch == '.') || (ch == '_') ||
1042 (ch == '-'));
1043 }
1044
1045 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1046 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1047 {
1048 WCHAR *start = reader_get_ptr(reader), *ptr;
1049 xml_encoding enc;
1050 int len;
1051
1052 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1053 return WC_E_ENCNAME;
1054
1055 val->start = reader_get_cur(reader);
1056
1057 ptr = start;
1058 while (is_wchar_encname(*++ptr))
1059 ;
1060
1061 len = ptr - start;
1062 enc = parse_encoding_name(start, len);
1063 TRACE("encoding name %s\n", debugstr_wn(start, len));
1064 val->str = start;
1065 val->len = len;
1066
1067 if (enc == XmlEncoding_Unknown)
1068 return WC_E_ENCNAME;
1069
1070 /* skip encoding name */
1071 reader_skipn(reader, len);
1072 return S_OK;
1073 }
1074
1075 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1076 static HRESULT reader_parse_encdecl(xmlreader *reader)
1077 {
1078 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1079 strval name, val;
1080 HRESULT hr;
1081
1082 if (!reader_skipspaces(reader)) return S_FALSE;
1083
1084 if (reader_cmp(reader, encodingW)) return S_FALSE;
1085 name.str = reader_get_ptr(reader);
1086 name.start = reader_get_cur(reader);
1087 name.len = 8;
1088 /* skip 'encoding' */
1089 reader_skipn(reader, 8);
1090
1091 hr = reader_parse_eq(reader);
1092 if (FAILED(hr)) return hr;
1093
1094 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1095 return WC_E_QUOTE;
1096 /* skip "'"|'"' */
1097 reader_skipn(reader, 1);
1098
1099 hr = reader_parse_encname(reader, &val);
1100 if (FAILED(hr)) return hr;
1101
1102 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1103 return WC_E_QUOTE;
1104
1105 /* skip "'"|'"' */
1106 reader_skipn(reader, 1);
1107
1108 return reader_add_attr(reader, &name, &val);
1109 }
1110
1111 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1112 static HRESULT reader_parse_sddecl(xmlreader *reader)
1113 {
1114 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1115 static const WCHAR yesW[] = {'y','e','s',0};
1116 static const WCHAR noW[] = {'n','o',0};
1117 strval name, val;
1118 UINT start;
1119 HRESULT hr;
1120
1121 if (!reader_skipspaces(reader)) return S_FALSE;
1122
1123 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1124 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1125 /* skip 'standalone' */
1126 reader_skipn(reader, 10);
1127
1128 hr = reader_parse_eq(reader);
1129 if (FAILED(hr)) return hr;
1130
1131 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1132 return WC_E_QUOTE;
1133 /* skip "'"|'"' */
1134 reader_skipn(reader, 1);
1135
1136 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1137 return WC_E_XMLDECL;
1138
1139 start = reader_get_cur(reader);
1140 /* skip 'yes'|'no' */
1141 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1142 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1143 TRACE("standalone=%s\n", debug_strval(reader, &val));
1144
1145 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1146 return WC_E_QUOTE;
1147 /* skip "'"|'"' */
1148 reader_skipn(reader, 1);
1149
1150 return reader_add_attr(reader, &name, &val);
1151 }
1152
1153 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1154 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1155 {
1156 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1157 static const WCHAR declcloseW[] = {'?','>',0};
1158 HRESULT hr;
1159
1160 /* check if we have "<?xml " */
1161 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1162
1163 reader_skipn(reader, 5);
1164 hr = reader_parse_versioninfo(reader);
1165 if (FAILED(hr))
1166 return hr;
1167
1168 hr = reader_parse_encdecl(reader);
1169 if (FAILED(hr))
1170 return hr;
1171
1172 hr = reader_parse_sddecl(reader);
1173 if (FAILED(hr))
1174 return hr;
1175
1176 reader_skipspaces(reader);
1177 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1178 reader_skipn(reader, 2);
1179
1180 reader_inc_depth(reader);
1181 reader->nodetype = XmlNodeType_XmlDeclaration;
1182 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1183 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1184 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1185
1186 return S_OK;
1187 }
1188
1189 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1190 static HRESULT reader_parse_comment(xmlreader *reader)
1191 {
1192 WCHAR *ptr;
1193 UINT start;
1194
1195 if (reader->resumestate == XmlReadResumeState_Comment)
1196 {
1197 start = reader->resume[XmlReadResume_Body];
1198 ptr = reader_get_ptr(reader);
1199 }
1200 else
1201 {
1202 /* skip '<!--' */
1203 reader_skipn(reader, 4);
1204 reader_shrink(reader);
1205 ptr = reader_get_ptr(reader);
1206 start = reader_get_cur(reader);
1207 reader->nodetype = XmlNodeType_Comment;
1208 reader->resume[XmlReadResume_Body] = start;
1209 reader->resumestate = XmlReadResumeState_Comment;
1210 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1211 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1212 reader_set_strvalue(reader, StringValue_Value, NULL);
1213 }
1214
1215 /* will exit when there's no more data, it won't attempt to
1216 read more from stream */
1217 while (*ptr)
1218 {
1219 if (ptr[0] == '-')
1220 {
1221 if (ptr[1] == '-')
1222 {
1223 if (ptr[2] == '>')
1224 {
1225 strval value;
1226
1227 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1228 TRACE("%s\n", debug_strval(reader, &value));
1229
1230 /* skip rest of markup '->' */
1231 reader_skipn(reader, 3);
1232
1233 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1234 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1235 reader_set_strvalue(reader, StringValue_Value, &value);
1236 reader->resume[XmlReadResume_Body] = 0;
1237 reader->resumestate = XmlReadResumeState_Initial;
1238 return S_OK;
1239 }
1240 else
1241 return WC_E_COMMENT;
1242 }
1243 }
1244
1245 reader_skipn(reader, 1);
1246 ptr++;
1247 }
1248
1249 return S_OK;
1250 }
1251
1252 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1253 static inline BOOL is_char(WCHAR ch)
1254 {
1255 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1256 (ch >= 0x20 && ch <= 0xd7ff) ||
1257 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1258 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1259 (ch >= 0xe000 && ch <= 0xfffd);
1260 }
1261
1262 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1263 static inline BOOL is_pubchar(WCHAR ch)
1264 {
1265 return (ch == ' ') ||
1266 (ch >= 'a' && ch <= 'z') ||
1267 (ch >= 'A' && ch <= 'Z') ||
1268 (ch >= '0' && ch <= '9') ||
1269 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1270 (ch == '=') || (ch == '?') ||
1271 (ch == '@') || (ch == '!') ||
1272 (ch >= '#' && ch <= '%') || /* #$% */
1273 (ch == '_') || (ch == '\r') || (ch == '\n');
1274 }
1275
1276 static inline BOOL is_namestartchar(WCHAR ch)
1277 {
1278 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1279 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1280 (ch >= 0xc0 && ch <= 0xd6) ||
1281 (ch >= 0xd8 && ch <= 0xf6) ||
1282 (ch >= 0xf8 && ch <= 0x2ff) ||
1283 (ch >= 0x370 && ch <= 0x37d) ||
1284 (ch >= 0x37f && ch <= 0x1fff) ||
1285 (ch >= 0x200c && ch <= 0x200d) ||
1286 (ch >= 0x2070 && ch <= 0x218f) ||
1287 (ch >= 0x2c00 && ch <= 0x2fef) ||
1288 (ch >= 0x3001 && ch <= 0xd7ff) ||
1289 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1290 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1291 (ch >= 0xf900 && ch <= 0xfdcf) ||
1292 (ch >= 0xfdf0 && ch <= 0xfffd);
1293 }
1294
1295 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1296 static inline BOOL is_ncnamechar(WCHAR ch)
1297 {
1298 return (ch >= 'A' && ch <= 'Z') ||
1299 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1300 (ch == '-') || (ch == '.') ||
1301 (ch >= '0' && ch <= '9') ||
1302 (ch == 0xb7) ||
1303 (ch >= 0xc0 && ch <= 0xd6) ||
1304 (ch >= 0xd8 && ch <= 0xf6) ||
1305 (ch >= 0xf8 && ch <= 0x2ff) ||
1306 (ch >= 0x300 && ch <= 0x36f) ||
1307 (ch >= 0x370 && ch <= 0x37d) ||
1308 (ch >= 0x37f && ch <= 0x1fff) ||
1309 (ch >= 0x200c && ch <= 0x200d) ||
1310 (ch >= 0x203f && ch <= 0x2040) ||
1311 (ch >= 0x2070 && ch <= 0x218f) ||
1312 (ch >= 0x2c00 && ch <= 0x2fef) ||
1313 (ch >= 0x3001 && ch <= 0xd7ff) ||
1314 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1315 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1316 (ch >= 0xf900 && ch <= 0xfdcf) ||
1317 (ch >= 0xfdf0 && ch <= 0xfffd);
1318 }
1319
1320 static inline BOOL is_namechar(WCHAR ch)
1321 {
1322 return (ch == ':') || is_ncnamechar(ch);
1323 }
1324
1325 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1326 {
1327 /* When we're on attribute always return attribute type, container node type is kept.
1328 Note that container is not necessarily an element, and attribute doesn't mean it's
1329 an attribute in XML spec terms. */
1330 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1331 }
1332
1333 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1334 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1335 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1336 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1337 [5] Name ::= NameStartChar (NameChar)* */
1338 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1339 {
1340 WCHAR *ptr;
1341 UINT start;
1342
1343 if (reader->resume[XmlReadResume_Name])
1344 {
1345 start = reader->resume[XmlReadResume_Name];
1346 ptr = reader_get_ptr(reader);
1347 }
1348 else
1349 {
1350 ptr = reader_get_ptr(reader);
1351 start = reader_get_cur(reader);
1352 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1353 }
1354
1355 while (is_namechar(*ptr))
1356 {
1357 reader_skipn(reader, 1);
1358 ptr = reader_get_ptr(reader);
1359 }
1360
1361 if (is_reader_pending(reader))
1362 {
1363 reader->resume[XmlReadResume_Name] = start;
1364 return E_PENDING;
1365 }
1366 else
1367 reader->resume[XmlReadResume_Name] = 0;
1368
1369 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1370 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1371
1372 return S_OK;
1373 }
1374
1375 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1376 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1377 {
1378 static const WCHAR xmlW[] = {'x','m','l'};
1379 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1380 strval name;
1381 WCHAR *ptr;
1382 HRESULT hr;
1383 UINT i;
1384
1385 hr = reader_parse_name(reader, &name);
1386 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1387
1388 /* now that we got name check for illegal content */
1389 if (strval_eq(reader, &name, &xmlval))
1390 return WC_E_LEADINGXML;
1391
1392 /* PITarget can't be a qualified name */
1393 ptr = reader_get_strptr(reader, &name);
1394 for (i = 0; i < name.len; i++)
1395 if (ptr[i] == ':')
1396 return i ? NC_E_NAMECOLON : WC_E_PI;
1397
1398 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1399 *target = name;
1400 return S_OK;
1401 }
1402
1403 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1404 static HRESULT reader_parse_pi(xmlreader *reader)
1405 {
1406 strval target;
1407 WCHAR *ptr;
1408 UINT start;
1409 HRESULT hr;
1410
1411 switch (reader->resumestate)
1412 {
1413 case XmlReadResumeState_Initial:
1414 /* skip '<?' */
1415 reader_skipn(reader, 2);
1416 reader_shrink(reader);
1417 reader->resumestate = XmlReadResumeState_PITarget;
1418 case XmlReadResumeState_PITarget:
1419 hr = reader_parse_pitarget(reader, &target);
1420 if (FAILED(hr)) return hr;
1421 reader_set_strvalue(reader, StringValue_LocalName, &target);
1422 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1423 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1424 reader->resumestate = XmlReadResumeState_PIBody;
1425 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1426 default:
1427 ;
1428 }
1429
1430 start = reader->resume[XmlReadResume_Body];
1431 ptr = reader_get_ptr(reader);
1432 while (*ptr)
1433 {
1434 if (ptr[0] == '?')
1435 {
1436 if (ptr[1] == '>')
1437 {
1438 UINT cur = reader_get_cur(reader);
1439 strval value;
1440
1441 /* strip all leading whitespace chars */
1442 while (start < cur)
1443 {
1444 ptr = reader_get_ptr2(reader, start);
1445 if (!is_wchar_space(*ptr)) break;
1446 start++;
1447 }
1448
1449 reader_init_strvalue(start, cur-start, &value);
1450
1451 /* skip '?>' */
1452 reader_skipn(reader, 2);
1453 TRACE("%s\n", debug_strval(reader, &value));
1454 reader->nodetype = XmlNodeType_ProcessingInstruction;
1455 reader->resumestate = XmlReadResumeState_Initial;
1456 reader->resume[XmlReadResume_Body] = 0;
1457 reader_set_strvalue(reader, StringValue_Value, &value);
1458 return S_OK;
1459 }
1460 }
1461
1462 reader_skipn(reader, 1);
1463 ptr = reader_get_ptr(reader);
1464 }
1465
1466 return S_OK;
1467 }
1468
1469 /* This one is used to parse significant whitespace nodes, like in Misc production */
1470 static HRESULT reader_parse_whitespace(xmlreader *reader)
1471 {
1472 switch (reader->resumestate)
1473 {
1474 case XmlReadResumeState_Initial:
1475 reader_shrink(reader);
1476 reader->resumestate = XmlReadResumeState_Whitespace;
1477 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1478 reader->nodetype = XmlNodeType_Whitespace;
1479 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1480 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1481 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1482 /* fallthrough */
1483 case XmlReadResumeState_Whitespace:
1484 {
1485 strval value;
1486 UINT start;
1487
1488 reader_skipspaces(reader);
1489 if (is_reader_pending(reader)) return S_OK;
1490
1491 start = reader->resume[XmlReadResume_Body];
1492 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1493 reader_set_strvalue(reader, StringValue_Value, &value);
1494 TRACE("%s\n", debug_strval(reader, &value));
1495 reader->resumestate = XmlReadResumeState_Initial;
1496 }
1497 default:
1498 ;
1499 }
1500
1501 return S_OK;
1502 }
1503
1504 /* [27] Misc ::= Comment | PI | S */
1505 static HRESULT reader_parse_misc(xmlreader *reader)
1506 {
1507 HRESULT hr = S_FALSE;
1508
1509 if (reader->resumestate != XmlReadResumeState_Initial)
1510 {
1511 hr = reader_more(reader);
1512 if (FAILED(hr)) return hr;
1513
1514 /* finish current node */
1515 switch (reader->resumestate)
1516 {
1517 case XmlReadResumeState_PITarget:
1518 case XmlReadResumeState_PIBody:
1519 return reader_parse_pi(reader);
1520 case XmlReadResumeState_Comment:
1521 return reader_parse_comment(reader);
1522 case XmlReadResumeState_Whitespace:
1523 return reader_parse_whitespace(reader);
1524 default:
1525 ERR("unknown resume state %d\n", reader->resumestate);
1526 }
1527 }
1528
1529 while (1)
1530 {
1531 const WCHAR *cur = reader_get_ptr(reader);
1532
1533 if (is_wchar_space(*cur))
1534 hr = reader_parse_whitespace(reader);
1535 else if (!reader_cmp(reader, commentW))
1536 hr = reader_parse_comment(reader);
1537 else if (!reader_cmp(reader, piW))
1538 hr = reader_parse_pi(reader);
1539 else
1540 break;
1541
1542 if (hr != S_FALSE) return hr;
1543 }
1544
1545 return hr;
1546 }
1547
1548 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1549 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1550 {
1551 WCHAR *cur = reader_get_ptr(reader), quote;
1552 UINT start;
1553
1554 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1555
1556 quote = *cur;
1557 reader_skipn(reader, 1);
1558
1559 cur = reader_get_ptr(reader);
1560 start = reader_get_cur(reader);
1561 while (is_char(*cur) && *cur != quote)
1562 {
1563 reader_skipn(reader, 1);
1564 cur = reader_get_ptr(reader);
1565 }
1566 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1567 if (*cur == quote) reader_skipn(reader, 1);
1568
1569 TRACE("%s\n", debug_strval(reader, literal));
1570 return S_OK;
1571 }
1572
1573 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1574 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1575 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1576 {
1577 WCHAR *cur = reader_get_ptr(reader), quote;
1578 UINT start;
1579
1580 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1581
1582 quote = *cur;
1583 reader_skipn(reader, 1);
1584
1585 start = reader_get_cur(reader);
1586 cur = reader_get_ptr(reader);
1587 while (is_pubchar(*cur) && *cur != quote)
1588 {
1589 reader_skipn(reader, 1);
1590 cur = reader_get_ptr(reader);
1591 }
1592
1593 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1594 TRACE("%s\n", debug_strval(reader, literal));
1595 return S_OK;
1596 }
1597
1598 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1599 static HRESULT reader_parse_externalid(xmlreader *reader)
1600 {
1601 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1602 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1603 strval name;
1604 HRESULT hr;
1605 int cnt;
1606
1607 if (reader_cmp(reader, systemW))
1608 {
1609 if (reader_cmp(reader, publicW))
1610 return S_FALSE;
1611 else
1612 {
1613 strval pub;
1614
1615 /* public id */
1616 reader_skipn(reader, 6);
1617 cnt = reader_skipspaces(reader);
1618 if (!cnt) return WC_E_WHITESPACE;
1619
1620 hr = reader_parse_pub_literal(reader, &pub);
1621 if (FAILED(hr)) return hr;
1622
1623 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1624 return reader_add_attr(reader, &name, &pub);
1625 }
1626 }
1627 else
1628 {
1629 strval sys;
1630
1631 /* system id */
1632 reader_skipn(reader, 6);
1633 cnt = reader_skipspaces(reader);
1634 if (!cnt) return WC_E_WHITESPACE;
1635
1636 hr = reader_parse_sys_literal(reader, &sys);
1637 if (FAILED(hr)) return hr;
1638
1639 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1640 return reader_add_attr(reader, &name, &sys);
1641 }
1642
1643 return hr;
1644 }
1645
1646 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1647 static HRESULT reader_parse_dtd(xmlreader *reader)
1648 {
1649 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1650 strval name;
1651 WCHAR *cur;
1652 HRESULT hr;
1653
1654 /* check if we have "<!DOCTYPE" */
1655 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1656 reader_shrink(reader);
1657
1658 /* DTD processing is not allowed by default */
1659 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1660
1661 reader_skipn(reader, 9);
1662 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1663
1664 /* name */
1665 hr = reader_parse_name(reader, &name);
1666 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1667
1668 reader_skipspaces(reader);
1669
1670 hr = reader_parse_externalid(reader);
1671 if (FAILED(hr)) return hr;
1672
1673 reader_skipspaces(reader);
1674
1675 cur = reader_get_ptr(reader);
1676 if (*cur != '>')
1677 {
1678 FIXME("internal subset parsing not implemented\n");
1679 return E_NOTIMPL;
1680 }
1681
1682 /* skip '>' */
1683 reader_skipn(reader, 1);
1684
1685 reader->nodetype = XmlNodeType_DocumentType;
1686 reader_set_strvalue(reader, StringValue_LocalName, &name);
1687 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1688
1689 return S_OK;
1690 }
1691
1692 /* [11 NS] LocalPart ::= NCName */
1693 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1694 {
1695 WCHAR *ptr;
1696 UINT start;
1697
1698 if (reader->resume[XmlReadResume_Local])
1699 {
1700 start = reader->resume[XmlReadResume_Local];
1701 ptr = reader_get_ptr(reader);
1702 }
1703 else
1704 {
1705 ptr = reader_get_ptr(reader);
1706 start = reader_get_cur(reader);
1707 }
1708
1709 while (is_ncnamechar(*ptr))
1710 {
1711 reader_skipn(reader, 1);
1712 ptr = reader_get_ptr(reader);
1713 }
1714
1715 if (is_reader_pending(reader))
1716 {
1717 reader->resume[XmlReadResume_Local] = start;
1718 return E_PENDING;
1719 }
1720 else
1721 reader->resume[XmlReadResume_Local] = 0;
1722
1723 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1724
1725 return S_OK;
1726 }
1727
1728 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1729 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1730 [9 NS] UnprefixedName ::= LocalPart
1731 [10 NS] Prefix ::= NCName */
1732 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1733 {
1734 WCHAR *ptr;
1735 UINT start;
1736 HRESULT hr;
1737
1738 if (reader->resume[XmlReadResume_Name])
1739 {
1740 start = reader->resume[XmlReadResume_Name];
1741 ptr = reader_get_ptr(reader);
1742 }
1743 else
1744 {
1745 ptr = reader_get_ptr(reader);
1746 start = reader_get_cur(reader);
1747 reader->resume[XmlReadResume_Name] = start;
1748 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1749 }
1750
1751 if (reader->resume[XmlReadResume_Local])
1752 {
1753 hr = reader_parse_local(reader, local);
1754 if (FAILED(hr)) return hr;
1755
1756 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1757 local->start - reader->resume[XmlReadResume_Name] - 1,
1758 prefix);
1759 }
1760 else
1761 {
1762 /* skip prefix part */
1763 while (is_ncnamechar(*ptr))
1764 {
1765 reader_skipn(reader, 1);
1766 ptr = reader_get_ptr(reader);
1767 }
1768
1769 if (is_reader_pending(reader)) return E_PENDING;
1770
1771 /* got a qualified name */
1772 if (*ptr == ':')
1773 {
1774 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1775
1776 /* skip ':' */
1777 reader_skipn(reader, 1);
1778 hr = reader_parse_local(reader, local);
1779 if (FAILED(hr)) return hr;
1780 }
1781 else
1782 {
1783 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1784 reader_init_strvalue(0, 0, prefix);
1785 }
1786 }
1787
1788 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1789
1790 if (prefix->len)
1791 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1792 else
1793 TRACE("ncname %s\n", debug_strval(reader, local));
1794
1795 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1796 /* count ':' too */
1797 (prefix->len ? prefix->len + 1 : 0) + local->len,
1798 qname);
1799
1800 reader->resume[XmlReadResume_Name] = 0;
1801 reader->resume[XmlReadResume_Local] = 0;
1802
1803 return S_OK;
1804 }
1805
1806 /* Applies normalization rules to a single char, used for attribute values.
1807
1808 Rules include 2 steps:
1809
1810 1) replacing \r\n with a single \n;
1811 2) replacing all whitespace chars with ' '.
1812
1813 */
1814 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1815 {
1816 encoded_buffer *buffer = &reader->input->buffer->utf16;
1817
1818 if (!is_wchar_space(*ptr)) return;
1819
1820 if (*ptr == '\r' && *(ptr+1) == '\n')
1821 {
1822 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1823 memmove(ptr+1, ptr+2, len);
1824 }
1825 *ptr = ' ';
1826 }
1827
1828 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1829 {
1830 static const WCHAR entltW[] = {'l','t'};
1831 static const WCHAR entgtW[] = {'g','t'};
1832 static const WCHAR entampW[] = {'a','m','p'};
1833 static const WCHAR entaposW[] = {'a','p','o','s'};
1834 static const WCHAR entquotW[] = {'q','u','o','t'};
1835 static const strval lt = { (WCHAR*)entltW, 2 };
1836 static const strval gt = { (WCHAR*)entgtW, 2 };
1837 static const strval amp = { (WCHAR*)entampW, 3 };
1838 static const strval apos = { (WCHAR*)entaposW, 4 };
1839 static const strval quot = { (WCHAR*)entquotW, 4 };
1840 WCHAR *str = reader_get_strptr(reader, name);
1841
1842 switch (*str)
1843 {
1844 case 'l':
1845 if (strval_eq(reader, name, &lt)) return '<';
1846 break;
1847 case 'g':
1848 if (strval_eq(reader, name, &gt)) return '>';
1849 break;
1850 case 'a':
1851 if (strval_eq(reader, name, &amp))
1852 return '&';
1853 else if (strval_eq(reader, name, &apos))
1854 return '\'';
1855 break;
1856 case 'q':
1857 if (strval_eq(reader, name, &quot)) return '\"';
1858 break;
1859 default:
1860 ;
1861 }
1862
1863 return 0;
1864 }
1865
1866 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1867 [67] Reference ::= EntityRef | CharRef
1868 [68] EntityRef ::= '&' Name ';' */
1869 static HRESULT reader_parse_reference(xmlreader *reader)
1870 {
1871 encoded_buffer *buffer = &reader->input->buffer->utf16;
1872 WCHAR *start = reader_get_ptr(reader), *ptr;
1873 UINT cur = reader_get_cur(reader);
1874 WCHAR ch = 0;
1875 int len;
1876
1877 /* skip '&' */
1878 reader_skipn(reader, 1);
1879 ptr = reader_get_ptr(reader);
1880
1881 if (*ptr == '#')
1882 {
1883 reader_skipn(reader, 1);
1884 ptr = reader_get_ptr(reader);
1885
1886 /* hex char or decimal */
1887 if (*ptr == 'x')
1888 {
1889 reader_skipn(reader, 1);
1890 ptr = reader_get_ptr(reader);
1891
1892 while (*ptr != ';')
1893 {
1894 if ((*ptr >= '0' && *ptr <= '9'))
1895 ch = ch*16 + *ptr - '0';
1896 else if ((*ptr >= 'a' && *ptr <= 'f'))
1897 ch = ch*16 + *ptr - 'a' + 10;
1898 else if ((*ptr >= 'A' && *ptr <= 'F'))
1899 ch = ch*16 + *ptr - 'A' + 10;
1900 else
1901 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1902 reader_skipn(reader, 1);
1903 ptr = reader_get_ptr(reader);
1904 }
1905 }
1906 else
1907 {
1908 while (*ptr != ';')
1909 {
1910 if ((*ptr >= '0' && *ptr <= '9'))
1911 {
1912 ch = ch*10 + *ptr - '0';
1913 reader_skipn(reader, 1);
1914 ptr = reader_get_ptr(reader);
1915 }
1916 else
1917 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1918 }
1919 }
1920
1921 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1922
1923 /* normalize */
1924 if (is_wchar_space(ch)) ch = ' ';
1925
1926 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1927 memmove(start+1, ptr+1, len);
1928 buffer->cur = cur + 1;
1929
1930 *start = ch;
1931 }
1932 else
1933 {
1934 strval name;
1935 HRESULT hr;
1936
1937 hr = reader_parse_name(reader, &name);
1938 if (FAILED(hr)) return hr;
1939
1940 ptr = reader_get_ptr(reader);
1941 if (*ptr != ';') return WC_E_SEMICOLON;
1942
1943 /* predefined entities resolve to a single character */
1944 ch = get_predefined_entity(reader, &name);
1945 if (ch)
1946 {
1947 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1948 memmove(start+1, ptr+1, len);
1949 buffer->cur = cur + 1;
1950
1951 *start = ch;
1952 }
1953 else
1954 {
1955 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1956 return WC_E_UNDECLAREDENTITY;
1957 }
1958
1959 }
1960
1961 return S_OK;
1962 }
1963
1964 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1965 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1966 {
1967 WCHAR *ptr, quote;
1968 UINT start;
1969
1970 ptr = reader_get_ptr(reader);
1971
1972 /* skip opening quote */
1973 quote = *ptr;
1974 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1975 reader_skipn(reader, 1);
1976
1977 ptr = reader_get_ptr(reader);
1978 start = reader_get_cur(reader);
1979 while (*ptr)
1980 {
1981 if (*ptr == '<') return WC_E_LESSTHAN;
1982
1983 if (*ptr == quote)
1984 {
1985 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
1986 /* skip closing quote */
1987 reader_skipn(reader, 1);
1988 return S_OK;
1989 }
1990
1991 if (*ptr == '&')
1992 {
1993 HRESULT hr = reader_parse_reference(reader);
1994 if (FAILED(hr)) return hr;
1995 }
1996 else
1997 {
1998 reader_normalize_space(reader, ptr);
1999 reader_skipn(reader, 1);
2000 }
2001 ptr = reader_get_ptr(reader);
2002 }
2003
2004 return WC_E_QUOTE;
2005 }
2006
2007 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2008 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2009 [3 NS] DefaultAttName ::= 'xmlns'
2010 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2011 static HRESULT reader_parse_attribute(xmlreader *reader)
2012 {
2013 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
2014 strval prefix, local, qname, xmlns, value;
2015 HRESULT hr;
2016
2017 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2018 if (FAILED(hr)) return hr;
2019
2020 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
2021
2022 if (strval_eq(reader, &prefix, &xmlns))
2023 {
2024 FIXME("namespace definitions not supported\n");
2025 return E_NOTIMPL;
2026 }
2027
2028 if (strval_eq(reader, &qname, &xmlns))
2029 FIXME("default namespace definitions not supported\n");
2030
2031 hr = reader_parse_eq(reader);
2032 if (FAILED(hr)) return hr;
2033
2034 hr = reader_parse_attvalue(reader, &value);
2035 if (FAILED(hr)) return hr;
2036
2037 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2038 return reader_add_attr(reader, &local, &value);
2039 }
2040
2041 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2042 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2043 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2044 {
2045 HRESULT hr;
2046
2047 hr = reader_parse_qname(reader, prefix, local, qname);
2048 if (FAILED(hr)) return hr;
2049
2050 while (1)
2051 {
2052 static const WCHAR endW[] = {'/','>',0};
2053
2054 reader_skipspaces(reader);
2055
2056 /* empty element */
2057 if ((*empty = !reader_cmp(reader, endW)))
2058 {
2059 /* skip '/>' */
2060 reader_skipn(reader, 2);
2061 reader->empty_element = TRUE;
2062 return S_OK;
2063 }
2064
2065 /* got a start tag */
2066 if (!reader_cmp(reader, gtW))
2067 {
2068 /* skip '>' */
2069 reader_skipn(reader, 1);
2070 return reader_push_element(reader, qname, local);
2071 }
2072
2073 hr = reader_parse_attribute(reader);
2074 if (FAILED(hr)) return hr;
2075 }
2076
2077 return S_OK;
2078 }
2079
2080 /* [39] element ::= EmptyElemTag | STag content ETag */
2081 static HRESULT reader_parse_element(xmlreader *reader)
2082 {
2083 HRESULT hr;
2084
2085 switch (reader->resumestate)
2086 {
2087 case XmlReadResumeState_Initial:
2088 /* check if we are really on element */
2089 if (reader_cmp(reader, ltW)) return S_FALSE;
2090
2091 /* skip '<' */
2092 reader_skipn(reader, 1);
2093
2094 reader_shrink(reader);
2095 reader->resumestate = XmlReadResumeState_STag;
2096 case XmlReadResumeState_STag:
2097 {
2098 strval qname, prefix, local;
2099 int empty = 0;
2100
2101 /* this handles empty elements too */
2102 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2103 if (FAILED(hr)) return hr;
2104
2105 /* FIXME: need to check for defined namespace to reject invalid prefix,
2106 currently reject all prefixes */
2107 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2108
2109 /* if we got empty element and stack is empty go straight to Misc */
2110 if (empty && list_empty(&reader->elements))
2111 reader->instate = XmlReadInState_MiscEnd;
2112 else
2113 reader->instate = XmlReadInState_Content;
2114
2115 reader->nodetype = XmlNodeType_Element;
2116 reader->resumestate = XmlReadResumeState_Initial;
2117 reader_set_strvalue(reader, StringValue_LocalName, &local);
2118 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2119 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2120 break;
2121 }
2122 default:
2123 hr = E_FAIL;
2124 }
2125
2126 return hr;
2127 }
2128
2129 /* [13 NS] ETag ::= '</' QName S? '>' */
2130 static HRESULT reader_parse_endtag(xmlreader *reader)
2131 {
2132 strval prefix, local, qname;
2133 struct element *elem;
2134 HRESULT hr;
2135
2136 /* skip '</' */
2137 reader_skipn(reader, 2);
2138
2139 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2140 if (FAILED(hr)) return hr;
2141
2142 reader_skipspaces(reader);
2143
2144 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2145
2146 /* skip '>' */
2147 reader_skipn(reader, 1);
2148
2149 /* Element stack should never be empty at this point, cause we shouldn't get to
2150 content parsing if it's empty. */
2151 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2152 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2153
2154 reader_pop_element(reader);
2155
2156 /* It was a root element, the rest is expected as Misc */
2157 if (list_empty(&reader->elements))
2158 reader->instate = XmlReadInState_MiscEnd;
2159
2160 reader->nodetype = XmlNodeType_EndElement;
2161 reader_set_strvalue(reader, StringValue_LocalName, &local);
2162 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2163
2164 return S_OK;
2165 }
2166
2167 /* [18] CDSect ::= CDStart CData CDEnd
2168 [19] CDStart ::= '<![CDATA['
2169 [20] CData ::= (Char* - (Char* ']]>' Char*))
2170 [21] CDEnd ::= ']]>' */
2171 static HRESULT reader_parse_cdata(xmlreader *reader)
2172 {
2173 WCHAR *ptr;
2174 UINT start;
2175
2176 if (reader->resumestate == XmlReadResumeState_CDATA)
2177 {
2178 start = reader->resume[XmlReadResume_Body];
2179 ptr = reader_get_ptr(reader);
2180 }
2181 else
2182 {
2183 /* skip markup '<![CDATA[' */
2184 reader_skipn(reader, 9);
2185 reader_shrink(reader);
2186 ptr = reader_get_ptr(reader);
2187 start = reader_get_cur(reader);
2188 reader->nodetype = XmlNodeType_CDATA;
2189 reader->resume[XmlReadResume_Body] = start;
2190 reader->resumestate = XmlReadResumeState_CDATA;
2191 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2192 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2193 reader_set_strvalue(reader, StringValue_Value, NULL);
2194 }
2195
2196 while (*ptr)
2197 {
2198 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2199 {
2200 strval value;
2201
2202 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2203
2204 /* skip ']]>' */
2205 reader_skipn(reader, 3);
2206 TRACE("%s\n", debug_strval(reader, &value));
2207
2208 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2209 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2210 reader_set_strvalue(reader, StringValue_Value, &value);
2211 reader->resume[XmlReadResume_Body] = 0;
2212 reader->resumestate = XmlReadResumeState_Initial;
2213 return S_OK;
2214 }
2215 else
2216 {
2217 /* Value normalization is not fully implemented, rules are:
2218
2219 - single '\r' -> '\n';
2220 - sequence '\r\n' -> '\n', in this case value length changes;
2221 */
2222 if (*ptr == '\r') *ptr = '\n';
2223 reader_skipn(reader, 1);
2224 ptr++;
2225 }
2226 }
2227
2228 return S_OK;
2229 }
2230
2231 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2232 static HRESULT reader_parse_chardata(xmlreader *reader)
2233 {
2234 WCHAR *ptr;
2235 UINT start;
2236
2237 if (reader->resumestate == XmlReadResumeState_CharData)
2238 {
2239 start = reader->resume[XmlReadResume_Body];
2240 ptr = reader_get_ptr(reader);
2241 }
2242 else
2243 {
2244 reader_shrink(reader);
2245 ptr = reader_get_ptr(reader);
2246 start = reader_get_cur(reader);
2247 /* There's no text */
2248 if (!*ptr || *ptr == '<') return S_OK;
2249 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2250 reader->resume[XmlReadResume_Body] = start;
2251 reader->resumestate = XmlReadResumeState_CharData;
2252 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2253 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2254 reader_set_strvalue(reader, StringValue_Value, NULL);
2255 }
2256
2257 while (*ptr)
2258 {
2259 /* CDATA closing sequence ']]>' is not allowed */
2260 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2261 return WC_E_CDSECTEND;
2262
2263 /* Found next markup part */
2264 if (ptr[0] == '<')
2265 {
2266 strval value;
2267
2268 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2269 reader_set_strvalue(reader, StringValue_Value, &value);
2270 reader->resume[XmlReadResume_Body] = 0;
2271 reader->resumestate = XmlReadResumeState_Initial;
2272 return S_OK;
2273 }
2274
2275 reader_skipn(reader, 1);
2276
2277 /* this covers a case when text has leading whitespace chars */
2278 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2279 ptr++;
2280 }
2281
2282 return S_OK;
2283 }
2284
2285 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2286 static HRESULT reader_parse_content(xmlreader *reader)
2287 {
2288 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2289 static const WCHAR etagW[] = {'<','/',0};
2290 static const WCHAR ampW[] = {'&',0};
2291
2292 if (reader->resumestate != XmlReadResumeState_Initial)
2293 {
2294 switch (reader->resumestate)
2295 {
2296 case XmlReadResumeState_CDATA:
2297 return reader_parse_cdata(reader);
2298 case XmlReadResumeState_Comment:
2299 return reader_parse_comment(reader);
2300 case XmlReadResumeState_PIBody:
2301 case XmlReadResumeState_PITarget:
2302 return reader_parse_pi(reader);
2303 case XmlReadResumeState_CharData:
2304 return reader_parse_chardata(reader);
2305 default:
2306 ERR("unknown resume state %d\n", reader->resumestate);
2307 }
2308 }
2309
2310 reader_shrink(reader);
2311
2312 /* handle end tag here, it indicates end of content as well */
2313 if (!reader_cmp(reader, etagW))
2314 return reader_parse_endtag(reader);
2315
2316 if (!reader_cmp(reader, commentW))
2317 return reader_parse_comment(reader);
2318
2319 if (!reader_cmp(reader, piW))
2320 return reader_parse_pi(reader);
2321
2322 if (!reader_cmp(reader, cdstartW))
2323 return reader_parse_cdata(reader);
2324
2325 if (!reader_cmp(reader, ampW))
2326 return reader_parse_reference(reader);
2327
2328 if (!reader_cmp(reader, ltW))
2329 return reader_parse_element(reader);
2330
2331 /* what's left must be CharData */
2332 return reader_parse_chardata(reader);
2333 }
2334
2335 static HRESULT reader_parse_nextnode(xmlreader *reader)
2336 {
2337 HRESULT hr;
2338
2339 if (!is_reader_pending(reader))
2340 reader_clear_attrs(reader);
2341
2342 while (1)
2343 {
2344 switch (reader->instate)
2345 {
2346 /* if it's a first call for a new input we need to detect stream encoding */
2347 case XmlReadInState_Initial:
2348 {
2349 xml_encoding enc;
2350
2351 hr = readerinput_growraw(reader->input);
2352 if (FAILED(hr)) return hr;
2353
2354 /* try to detect encoding by BOM or data and set input code page */
2355 hr = readerinput_detectencoding(reader->input, &enc);
2356 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2357 if (FAILED(hr)) return hr;
2358
2359 /* always switch first time cause we have to put something in */
2360 readerinput_switchencoding(reader->input, enc);
2361
2362 /* parse xml declaration */
2363 hr = reader_parse_xmldecl(reader);
2364 if (FAILED(hr)) return hr;
2365
2366 readerinput_shrinkraw(reader->input, -1);
2367 reader->instate = XmlReadInState_Misc_DTD;
2368 if (hr == S_OK) return hr;
2369 }
2370 break;
2371 case XmlReadInState_Misc_DTD:
2372 hr = reader_parse_misc(reader);
2373 if (FAILED(hr)) return hr;
2374
2375 if (hr == S_FALSE)
2376 reader->instate = XmlReadInState_DTD;
2377 else
2378 return hr;
2379 break;
2380 case XmlReadInState_DTD:
2381 hr = reader_parse_dtd(reader);
2382 if (FAILED(hr)) return hr;
2383
2384 if (hr == S_OK)
2385 {
2386 reader->instate = XmlReadInState_DTD_Misc;
2387 return hr;
2388 }
2389 else
2390 reader->instate = XmlReadInState_Element;
2391 break;
2392 case XmlReadInState_DTD_Misc:
2393 hr = reader_parse_misc(reader);
2394 if (FAILED(hr)) return hr;
2395
2396 if (hr == S_FALSE)
2397 reader->instate = XmlReadInState_Element;
2398 else
2399 return hr;
2400 break;
2401 case XmlReadInState_Element:
2402 return reader_parse_element(reader);
2403 case XmlReadInState_Content:
2404 return reader_parse_content(reader);
2405 case XmlReadInState_MiscEnd:
2406 hr = reader_parse_misc(reader);
2407 if (FAILED(hr)) return hr;
2408
2409 if (hr == S_FALSE)
2410 reader->instate = XmlReadInState_Eof;
2411 return hr;
2412 case XmlReadInState_Eof:
2413 return S_FALSE;
2414 default:
2415 FIXME("internal state %d not handled\n", reader->instate);
2416 return E_NOTIMPL;
2417 }
2418 }
2419
2420 return E_NOTIMPL;
2421 }
2422
2423 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2424 {
2425 xmlreader *This = impl_from_IXmlReader(iface);
2426
2427 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2428
2429 if (IsEqualGUID(riid, &IID_IUnknown) ||
2430 IsEqualGUID(riid, &IID_IXmlReader))
2431 {
2432 *ppvObject = iface;
2433 }
2434 else
2435 {
2436 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2437 *ppvObject = NULL;
2438 return E_NOINTERFACE;
2439 }
2440
2441 IXmlReader_AddRef(iface);
2442
2443 return S_OK;
2444 }
2445
2446 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2447 {
2448 xmlreader *This = impl_from_IXmlReader(iface);
2449 ULONG ref = InterlockedIncrement(&This->ref);
2450 TRACE("(%p)->(%d)\n", This, ref);
2451 return ref;
2452 }
2453
2454 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2455 {
2456 xmlreader *This = impl_from_IXmlReader(iface);
2457 LONG ref = InterlockedDecrement(&This->ref);
2458
2459 TRACE("(%p)->(%d)\n", This, ref);
2460
2461 if (ref == 0)
2462 {
2463 IMalloc *imalloc = This->imalloc;
2464 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2465 reader_clear_attrs(This);
2466 reader_clear_elements(This);
2467 reader_free_strvalues(This);
2468 reader_free(This, This);
2469 if (imalloc) IMalloc_Release(imalloc);
2470 }
2471
2472 return ref;
2473 }
2474
2475 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2476 {
2477 xmlreader *This = impl_from_IXmlReader(iface);
2478 IXmlReaderInput *readerinput;
2479 HRESULT hr;
2480
2481 TRACE("(%p)->(%p)\n", This, input);
2482
2483 if (This->input)
2484 {
2485 readerinput_release_stream(This->input);
2486 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2487 This->input = NULL;
2488 }
2489
2490 This->line = This->pos = 0;
2491 reader_clear_elements(This);
2492 This->depth = 0;
2493 This->resumestate = XmlReadResumeState_Initial;
2494 memset(This->resume, 0, sizeof(This->resume));
2495
2496 /* just reset current input */
2497 if (!input)
2498 {
2499 This->state = XmlReadState_Initial;
2500 return S_OK;
2501 }
2502
2503 /* now try IXmlReaderInput, ISequentialStream, IStream */
2504 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2505 if (hr == S_OK)
2506 {
2507 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2508 This->input = impl_from_IXmlReaderInput(readerinput);
2509 else
2510 {
2511 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2512 readerinput, readerinput->lpVtbl);
2513 IUnknown_Release(readerinput);
2514 return E_FAIL;
2515
2516 }
2517 }
2518
2519 if (hr != S_OK || !readerinput)
2520 {
2521 /* create IXmlReaderInput basing on supplied interface */
2522 hr = CreateXmlReaderInputWithEncodingName(input,
2523 This->imalloc, NULL, FALSE, NULL, &readerinput);
2524 if (hr != S_OK) return hr;
2525 This->input = impl_from_IXmlReaderInput(readerinput);
2526 }
2527
2528 /* set stream for supplied IXmlReaderInput */
2529 hr = readerinput_query_for_stream(This->input);
2530 if (hr == S_OK)
2531 {
2532 This->state = XmlReadState_Initial;
2533 This->instate = XmlReadInState_Initial;
2534 }
2535
2536 return hr;
2537 }
2538
2539 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2540 {
2541 xmlreader *This = impl_from_IXmlReader(iface);
2542
2543 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2544
2545 if (!value) return E_INVALIDARG;
2546
2547 switch (property)
2548 {
2549 case XmlReaderProperty_DtdProcessing:
2550 *value = This->dtdmode;
2551 break;
2552 case XmlReaderProperty_ReadState:
2553 *value = This->state;
2554 break;
2555 default:
2556 FIXME("Unimplemented property (%u)\n", property);
2557 return E_NOTIMPL;
2558 }
2559
2560 return S_OK;
2561 }
2562
2563 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2564 {
2565 xmlreader *This = impl_from_IXmlReader(iface);
2566
2567 TRACE("(%p)->(%s %lu)\n", This, debugstr_reader_prop(property), value);
2568
2569 switch (property)
2570 {
2571 case XmlReaderProperty_DtdProcessing:
2572 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2573 This->dtdmode = value;
2574 break;
2575 default:
2576 FIXME("Unimplemented property (%u)\n", property);
2577 return E_NOTIMPL;
2578 }
2579
2580 return S_OK;
2581 }
2582
2583 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2584 {
2585 xmlreader *This = impl_from_IXmlReader(iface);
2586 XmlNodeType oldtype = This->nodetype;
2587 HRESULT hr;
2588
2589 TRACE("(%p)->(%p)\n", This, nodetype);
2590
2591 if (This->state == XmlReadState_Closed) return S_FALSE;
2592
2593 hr = reader_parse_nextnode(This);
2594 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2595 This->state = XmlReadState_Interactive;
2596 if (hr == S_OK)
2597 {
2598 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2599 *nodetype = This->nodetype;
2600 }
2601
2602 return hr;
2603 }
2604
2605 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2606 {
2607 xmlreader *This = impl_from_IXmlReader(iface);
2608 TRACE("(%p)->(%p)\n", This, node_type);
2609
2610 *node_type = reader_get_nodetype(This);
2611 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2612 }
2613
2614 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2615 {
2616 xmlreader *This = impl_from_IXmlReader(iface);
2617
2618 TRACE("(%p)\n", This);
2619
2620 if (!This->attr_count) return S_FALSE;
2621 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2622 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2623 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2624
2625 return S_OK;
2626 }
2627
2628 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2629 {
2630 xmlreader *This = impl_from_IXmlReader(iface);
2631 const struct list *next;
2632
2633 TRACE("(%p)\n", This);
2634
2635 if (!This->attr_count) return S_FALSE;
2636
2637 if (!This->attr)
2638 return IXmlReader_MoveToFirstAttribute(iface);
2639
2640 next = list_next(&This->attrs, &This->attr->entry);
2641 if (next)
2642 {
2643 This->attr = LIST_ENTRY(next, struct attribute, entry);
2644 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2645 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2646 }
2647
2648 return next ? S_OK : S_FALSE;
2649 }
2650
2651 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2652 LPCWSTR local_name,
2653 LPCWSTR namespaceUri)
2654 {
2655 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2656 return E_NOTIMPL;
2657 }
2658
2659 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2660 {
2661 xmlreader *This = impl_from_IXmlReader(iface);
2662 struct element *elem;
2663
2664 TRACE("(%p)\n", This);
2665
2666 if (!This->attr_count) return S_FALSE;
2667 This->attr = NULL;
2668
2669 /* FIXME: support other node types with 'attributes' like DTD */
2670 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2671 if (elem)
2672 {
2673 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2674 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2675 }
2676
2677 return S_OK;
2678 }
2679
2680 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2681 {
2682 xmlreader *This = impl_from_IXmlReader(iface);
2683
2684 TRACE("(%p)->(%p %p)\n", This, name, len);
2685 *name = This->strvalues[StringValue_QualifiedName].str;
2686 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2687 return S_OK;
2688 }
2689
2690 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2691 LPCWSTR *namespaceUri,
2692 UINT *namespaceUri_length)
2693 {
2694 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2695 return E_NOTIMPL;
2696 }
2697
2698 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2699 {
2700 xmlreader *This = impl_from_IXmlReader(iface);
2701
2702 TRACE("(%p)->(%p %p)\n", This, name, len);
2703 *name = This->strvalues[StringValue_LocalName].str;
2704 if (len) *len = This->strvalues[StringValue_LocalName].len;
2705 return S_OK;
2706 }
2707
2708 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2709 {
2710 xmlreader *This = impl_from_IXmlReader(iface);
2711
2712 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2713 *prefix = This->strvalues[StringValue_Prefix].str;
2714 if (len) *len = This->strvalues[StringValue_Prefix].len;
2715 return S_OK;
2716 }
2717
2718 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2719 {
2720 xmlreader *reader = impl_from_IXmlReader(iface);
2721 strval *val = &reader->strvalues[StringValue_Value];
2722
2723 TRACE("(%p)->(%p %p)\n", reader, value, len);
2724
2725 *value = NULL;
2726
2727 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2728 {
2729 XmlNodeType type;
2730 HRESULT hr;
2731
2732 hr = IXmlReader_Read(iface, &type);
2733 if (FAILED(hr)) return hr;
2734
2735 /* return if still pending, partially read values are not reported */
2736 if (is_reader_pending(reader)) return E_PENDING;
2737 }
2738
2739 if (!val->str)
2740 {
2741 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2742 if (!ptr) return E_OUTOFMEMORY;
2743 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2744 ptr[val->len] = 0;
2745 val->str = ptr;
2746 }
2747
2748 *value = val->str;
2749 if (len) *len = val->len;
2750 return S_OK;
2751 }
2752
2753 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2754 {
2755 xmlreader *reader = impl_from_IXmlReader(iface);
2756 strval *val = &reader->strvalues[StringValue_Value];
2757 UINT len;
2758
2759 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2760
2761 /* Value is already allocated, chunked reads are not possible. */
2762 if (val->str) return S_FALSE;
2763
2764 if (val->len)
2765 {
2766 len = min(chunk_size, val->len);
2767 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2768 val->start += len;
2769 val->len -= len;
2770 if (read) *read = len;
2771 }
2772
2773 return S_OK;
2774 }
2775
2776 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2777 LPCWSTR *baseUri,
2778 UINT *baseUri_length)
2779 {
2780 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2781 return E_NOTIMPL;
2782 }
2783
2784 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2785 {
2786 FIXME("(%p): stub\n", iface);
2787 return FALSE;
2788 }
2789
2790 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2791 {
2792 xmlreader *This = impl_from_IXmlReader(iface);
2793 TRACE("(%p)\n", This);
2794 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2795 when current node is start tag of an element */
2796 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2797 }
2798
2799 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2800 {
2801 xmlreader *This = impl_from_IXmlReader(iface);
2802
2803 TRACE("(%p %p)\n", This, lineNumber);
2804
2805 if (!lineNumber) return E_INVALIDARG;
2806
2807 *lineNumber = This->line;
2808
2809 return S_OK;
2810 }
2811
2812 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2813 {
2814 xmlreader *This = impl_from_IXmlReader(iface);
2815
2816 TRACE("(%p %p)\n", This, linePosition);
2817
2818 if (!linePosition) return E_INVALIDARG;
2819
2820 *linePosition = This->pos;
2821
2822 return S_OK;
2823 }
2824
2825 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2826 {
2827 xmlreader *This = impl_from_IXmlReader(iface);
2828
2829 TRACE("(%p)->(%p)\n", This, count);
2830
2831 if (!count) return E_INVALIDARG;
2832
2833 *count = This->attr_count;
2834 return S_OK;
2835 }
2836
2837 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2838 {
2839 xmlreader *This = impl_from_IXmlReader(iface);
2840 TRACE("(%p)->(%p)\n", This, depth);
2841 *depth = This->depth;
2842 return S_OK;
2843 }
2844
2845 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2846 {
2847 FIXME("(%p): stub\n", iface);
2848 return FALSE;
2849 }
2850
2851 static const struct IXmlReaderVtbl xmlreader_vtbl =
2852 {
2853 xmlreader_QueryInterface,
2854 xmlreader_AddRef,
2855 xmlreader_Release,
2856 xmlreader_SetInput,
2857 xmlreader_GetProperty,
2858 xmlreader_SetProperty,
2859 xmlreader_Read,
2860 xmlreader_GetNodeType,
2861 xmlreader_MoveToFirstAttribute,
2862 xmlreader_MoveToNextAttribute,
2863 xmlreader_MoveToAttributeByName,
2864 xmlreader_MoveToElement,
2865 xmlreader_GetQualifiedName,
2866 xmlreader_GetNamespaceUri,
2867 xmlreader_GetLocalName,
2868 xmlreader_GetPrefix,
2869 xmlreader_GetValue,
2870 xmlreader_ReadValueChunk,
2871 xmlreader_GetBaseUri,
2872 xmlreader_IsDefault,
2873 xmlreader_IsEmptyElement,
2874 xmlreader_GetLineNumber,
2875 xmlreader_GetLinePosition,
2876 xmlreader_GetAttributeCount,
2877 xmlreader_GetDepth,
2878 xmlreader_IsEOF
2879 };
2880
2881 /** IXmlReaderInput **/
2882 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2883 {
2884 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2885
2886 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2887
2888 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2889 IsEqualGUID(riid, &IID_IUnknown))
2890 {
2891 *ppvObject = iface;
2892 }
2893 else
2894 {
2895 WARN("interface %s not implemented\n", debugstr_guid(riid));
2896 *ppvObject = NULL;
2897 return E_NOINTERFACE;
2898 }
2899
2900 IUnknown_AddRef(iface);
2901
2902 return S_OK;
2903 }
2904
2905 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2906 {
2907 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2908 ULONG ref = InterlockedIncrement(&This->ref);
2909 TRACE("(%p)->(%d)\n", This, ref);
2910 return ref;
2911 }
2912
2913 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2914 {
2915 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2916 LONG ref = InterlockedDecrement(&This->ref);
2917
2918 TRACE("(%p)->(%d)\n", This, ref);
2919
2920 if (ref == 0)
2921 {
2922 IMalloc *imalloc = This->imalloc;
2923 if (This->input) IUnknown_Release(This->input);
2924 if (This->stream) ISequentialStream_Release(This->stream);
2925 if (This->buffer) free_input_buffer(This->buffer);
2926 readerinput_free(This, This->baseuri);
2927 readerinput_free(This, This);
2928 if (imalloc) IMalloc_Release(imalloc);
2929 }
2930
2931 return ref;
2932 }
2933
2934 static const struct IUnknownVtbl xmlreaderinputvtbl =
2935 {
2936 xmlreaderinput_QueryInterface,
2937 xmlreaderinput_AddRef,
2938 xmlreaderinput_Release
2939 };
2940
2941 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2942 {
2943 xmlreader *reader;
2944 int i;
2945
2946 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2947
2948 if (!IsEqualGUID(riid, &IID_IXmlReader))
2949 {
2950 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2951 return E_FAIL;
2952 }
2953
2954 if (imalloc)
2955 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2956 else
2957 reader = heap_alloc(sizeof(*reader));
2958 if(!reader) return E_OUTOFMEMORY;
2959
2960 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2961 reader->ref = 1;
2962 reader->input = NULL;
2963 reader->state = XmlReadState_Closed;
2964 reader->instate = XmlReadInState_Initial;
2965 reader->resumestate = XmlReadResumeState_Initial;
2966 reader->dtdmode = DtdProcessing_Prohibit;
2967 reader->line = reader->pos = 0;
2968 reader->imalloc = imalloc;
2969 if (imalloc) IMalloc_AddRef(imalloc);
2970 reader->nodetype = XmlNodeType_None;
2971 list_init(&reader->attrs);
2972 reader->attr_count = 0;
2973 reader->attr = NULL;
2974 list_init(&reader->elements);
2975 reader->depth = 0;
2976 reader->max_depth = 256;
2977 reader->empty_element = FALSE;
2978 memset(reader->resume, 0, sizeof(reader->resume));
2979
2980 for (i = 0; i < StringValue_Last; i++)
2981 reader->strvalues[i] = strval_empty;
2982
2983 *obj = &reader->IXmlReader_iface;
2984
2985 TRACE("returning iface %p\n", *obj);
2986
2987 return S_OK;
2988 }
2989
2990 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2991 IMalloc *imalloc,
2992 LPCWSTR encoding,
2993 BOOL hint,
2994 LPCWSTR base_uri,
2995 IXmlReaderInput **ppInput)
2996 {
2997 xmlreaderinput *readerinput;
2998 HRESULT hr;
2999
3000 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3001 hint, wine_dbgstr_w(base_uri), ppInput);
3002
3003 if (!stream || !ppInput) return E_INVALIDARG;
3004
3005 if (imalloc)
3006 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3007 else
3008 readerinput = heap_alloc(sizeof(*readerinput));
3009 if(!readerinput) return E_OUTOFMEMORY;
3010
3011 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3012 readerinput->ref = 1;
3013 readerinput->imalloc = imalloc;
3014 readerinput->stream = NULL;
3015 if (imalloc) IMalloc_AddRef(imalloc);
3016 readerinput->encoding = parse_encoding_name(encoding, -1);
3017 readerinput->hint = hint;
3018 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3019 readerinput->pending = 0;
3020
3021 hr = alloc_input_buffer(readerinput);
3022 if (hr != S_OK)
3023 {
3024 readerinput_free(readerinput, readerinput->baseuri);
3025 readerinput_free(readerinput, readerinput);
3026 if (imalloc) IMalloc_Release(imalloc);
3027 return hr;
3028 }
3029 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3030
3031 *ppInput = &readerinput->IXmlReaderInput_iface;
3032
3033 TRACE("returning iface %p\n", *ppInput);
3034
3035 return S_OK;
3036 }