Sync with trunk rev.61910 to get latest improvements and bugfixes.
[reactos.git] / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "xmllite_private.h"
22
23 #include <stdio.h>
24
25 #include <wine/list.h>
26 #include <wine/unicode.h>
27
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
30
31 typedef enum
32 {
33 XmlEncoding_UTF16,
34 XmlEncoding_UTF8,
35 XmlEncoding_Unknown
36 } xml_encoding;
37
38 typedef enum
39 {
40 XmlReadInState_Initial,
41 XmlReadInState_XmlDecl,
42 XmlReadInState_Misc_DTD,
43 XmlReadInState_DTD,
44 XmlReadInState_DTD_Misc,
45 XmlReadInState_Element,
46 XmlReadInState_Content,
47 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
48 XmlReadInState_Eof
49 } XmlReaderInternalState;
50
51 /* This state denotes where parsing was interrupted by input problem.
52 Reader resumes parsing using this information. */
53 typedef enum
54 {
55 XmlReadResumeState_Initial,
56 XmlReadResumeState_PITarget,
57 XmlReadResumeState_PIBody,
58 XmlReadResumeState_CDATA,
59 XmlReadResumeState_Comment,
60 XmlReadResumeState_STag,
61 XmlReadResumeState_CharData
62 } XmlReaderResumeState;
63
64 /* saved pointer index to resume from particular input position */
65 typedef enum
66 {
67 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
68 XmlReadResume_Local, /* local for QName */
69 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
70 XmlReadResume_Last
71 } XmlReaderResume;
72
73 typedef enum
74 {
75 StringValue_LocalName,
76 StringValue_Prefix,
77 StringValue_QualifiedName,
78 StringValue_Value,
79 StringValue_Last
80 } XmlReaderStringValue;
81
82 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
83 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
84
85 static const WCHAR dblquoteW[] = {'\"',0};
86 static const WCHAR quoteW[] = {'\'',0};
87 static const WCHAR ltW[] = {'<',0};
88 static const WCHAR gtW[] = {'>',0};
89 static const WCHAR commentW[] = {'<','!','-','-',0};
90 static const WCHAR piW[] = {'<','?',0};
91
92 static const char *debugstr_nodetype(XmlNodeType nodetype)
93 {
94 static const char* type_names[] =
95 {
96 "None",
97 "Element",
98 "Attribute",
99 "Text",
100 "CDATA",
101 "",
102 "",
103 "ProcessingInstruction",
104 "Comment",
105 "",
106 "DocumentType",
107 "",
108 "",
109 "Whitespace",
110 "",
111 "EndElement",
112 "",
113 "XmlDeclaration"
114 };
115
116 if (nodetype > _XmlNodeType_Last)
117 {
118 static char buf[25];
119 sprintf(buf, "unknown type=%d", nodetype);
120 return buf;
121 }
122 return type_names[nodetype];
123 }
124
125 static const char *debugstr_prop(XmlReaderProperty prop)
126 {
127 static const char* prop_names[] =
128 {
129 "MultiLanguage",
130 "ConformanceLevel",
131 "RandomAccess",
132 "XmlResolver",
133 "DtdProcessing",
134 "ReadState",
135 "MaxElementDepth",
136 "MaxEntityExpansion"
137 };
138
139 if (prop > _XmlReaderProperty_Last)
140 {
141 static char buf[25];
142 sprintf(buf, "unknown property=%d", prop);
143 return buf;
144 }
145 return prop_names[prop];
146 }
147
148 struct xml_encoding_data
149 {
150 const WCHAR *name;
151 xml_encoding enc;
152 UINT cp;
153 };
154
155 static const struct xml_encoding_data xml_encoding_map[] = {
156 { utf16W, XmlEncoding_UTF16, ~0 },
157 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 };
159
160 typedef struct
161 {
162 char *data;
163 char *cur;
164 unsigned int allocated;
165 unsigned int written;
166 } encoded_buffer;
167
168 typedef struct input_buffer input_buffer;
169
170 typedef struct
171 {
172 IXmlReaderInput IXmlReaderInput_iface;
173 LONG ref;
174 /* reference passed on IXmlReaderInput creation, is kept when input is created */
175 IUnknown *input;
176 IMalloc *imalloc;
177 xml_encoding encoding;
178 BOOL hint;
179 WCHAR *baseuri;
180 /* stream reference set after SetInput() call from reader,
181 stored as sequential stream, cause currently
182 optimizations possible with IStream aren't implemented */
183 ISequentialStream *stream;
184 input_buffer *buffer;
185 unsigned int pending : 1;
186 } xmlreaderinput;
187
188 static const struct IUnknownVtbl xmlreaderinputvtbl;
189
190 /* Structure to hold parsed string of specific length.
191
192 Reader stores node value as 'start' pointer, on request
193 a null-terminated version of it is allocated.
194
195 To init a strval variable use reader_init_strval(),
196 to set strval as a reader value use reader_set_strval().
197 */
198 typedef struct
199 {
200 WCHAR *str; /* allocated null-terminated string */
201 UINT len; /* length in WCHARs, altered after ReadValueChunk */
202 WCHAR *start; /* input position where value starts */
203 } strval;
204
205 static WCHAR emptyW[] = {0};
206 static const strval strval_empty = {emptyW, 0, emptyW};
207
208 struct attribute
209 {
210 struct list entry;
211 strval localname;
212 strval value;
213 };
214
215 struct element
216 {
217 struct list entry;
218 strval qname;
219 strval localname;
220 };
221
222 typedef struct
223 {
224 IXmlReader IXmlReader_iface;
225 LONG ref;
226 xmlreaderinput *input;
227 IMalloc *imalloc;
228 XmlReadState state;
229 XmlReaderInternalState instate;
230 XmlReaderResumeState resumestate;
231 XmlNodeType nodetype;
232 DtdProcessing dtdmode;
233 UINT line, pos; /* reader position in XML stream */
234 struct list attrs; /* attributes list for current node */
235 struct attribute *attr; /* current attribute */
236 UINT attr_count;
237 struct list elements;
238 strval strvalues[StringValue_Last];
239 UINT depth;
240 UINT max_depth;
241 BOOL empty_element;
242 WCHAR *resume[XmlReadResume_Last]; /* pointers used to resume reader */
243 } xmlreader;
244
245 struct input_buffer
246 {
247 encoded_buffer utf16;
248 encoded_buffer encoded;
249 UINT code_page;
250 xmlreaderinput *input;
251 };
252
253 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
254 {
255 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
256 }
257
258 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
259 {
260 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
261 }
262
263 static inline void *m_alloc(IMalloc *imalloc, size_t len)
264 {
265 if (imalloc)
266 return IMalloc_Alloc(imalloc, len);
267 else
268 return heap_alloc(len);
269 }
270
271 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
272 {
273 if (imalloc)
274 return IMalloc_Realloc(imalloc, mem, len);
275 else
276 return heap_realloc(mem, len);
277 }
278
279 static inline void m_free(IMalloc *imalloc, void *mem)
280 {
281 if (imalloc)
282 IMalloc_Free(imalloc, mem);
283 else
284 heap_free(mem);
285 }
286
287 /* reader memory allocation functions */
288 static inline void *reader_alloc(xmlreader *reader, size_t len)
289 {
290 return m_alloc(reader->imalloc, len);
291 }
292
293 static inline void reader_free(xmlreader *reader, void *mem)
294 {
295 m_free(reader->imalloc, mem);
296 }
297
298 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
299 {
300 *dest = *src;
301
302 if (src->str != strval_empty.str)
303 {
304 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
305 if (!dest->str) return E_OUTOFMEMORY;
306 memcpy(dest->str, src->str, dest->len*sizeof(WCHAR));
307 dest->str[dest->len] = 0;
308 }
309
310 return S_OK;
311 }
312
313 /* reader input memory allocation functions */
314 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
315 {
316 return m_alloc(input->imalloc, len);
317 }
318
319 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
320 {
321 return m_realloc(input->imalloc, mem, len);
322 }
323
324 static inline void readerinput_free(xmlreaderinput *input, void *mem)
325 {
326 m_free(input->imalloc, mem);
327 }
328
329 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
330 {
331 LPWSTR ret = NULL;
332
333 if(str) {
334 DWORD size;
335
336 size = (strlenW(str)+1)*sizeof(WCHAR);
337 ret = readerinput_alloc(input, size);
338 if (ret) memcpy(ret, str, size);
339 }
340
341 return ret;
342 }
343
344 static void reader_clear_attrs(xmlreader *reader)
345 {
346 struct attribute *attr, *attr2;
347 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
348 {
349 reader_free(reader, attr);
350 }
351 list_init(&reader->attrs);
352 reader->attr_count = 0;
353 }
354
355 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
356 while we are on a node with attributes */
357 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
358 {
359 struct attribute *attr;
360
361 attr = reader_alloc(reader, sizeof(*attr));
362 if (!attr) return E_OUTOFMEMORY;
363
364 attr->localname = *localname;
365 attr->value = *value;
366 list_add_tail(&reader->attrs, &attr->entry);
367 reader->attr_count++;
368
369 return S_OK;
370 }
371
372 /* This one frees stored string value if needed */
373 static void reader_free_strvalued(xmlreader *reader, strval *v)
374 {
375 if (v->str != strval_empty.str)
376 {
377 reader_free(reader, v->str);
378 *v = strval_empty;
379 }
380 }
381
382 static inline void reader_init_strvalue(WCHAR *str, UINT len, strval *v)
383 {
384 v->start = v->str = str;
385 v->len = len;
386 }
387
388 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
389 {
390 reader_free_strvalued(reader, &reader->strvalues[type]);
391 }
392
393 static void reader_free_strvalues(xmlreader *reader)
394 {
395 int type;
396 for (type = 0; type < StringValue_Last; type++)
397 reader_free_strvalue(reader, type);
398 }
399
400 /* This helper should only be used to test if strings are the same,
401 it doesn't try to sort. */
402 static inline int strval_eq(const strval *str1, const strval *str2)
403 {
404 if (str1->len != str2->len) return 0;
405 return !memcmp(str1->str, str2->str, str1->len*sizeof(WCHAR));
406 }
407
408 static void reader_clear_elements(xmlreader *reader)
409 {
410 struct element *elem, *elem2;
411 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
412 {
413 reader_free_strvalued(reader, &elem->qname);
414 reader_free(reader, elem);
415 }
416 list_init(&reader->elements);
417 reader->empty_element = FALSE;
418 }
419
420 static HRESULT reader_inc_depth(xmlreader *reader)
421 {
422 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
423 return S_OK;
424 }
425
426 static void reader_dec_depth(xmlreader *reader)
427 {
428 if (reader->depth > 1) reader->depth--;
429 }
430
431 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
432 {
433 struct element *elem;
434 HRESULT hr;
435
436 elem = reader_alloc(reader, sizeof(*elem));
437 if (!elem) return E_OUTOFMEMORY;
438
439 hr = reader_strvaldup(reader, qname, &elem->qname);
440 if (FAILED(hr)) {
441 reader_free(reader, elem);
442 return hr;
443 }
444
445 hr = reader_strvaldup(reader, localname, &elem->localname);
446 if (FAILED(hr))
447 {
448 reader_free_strvalued(reader, &elem->qname);
449 reader_free(reader, elem);
450 return hr;
451 }
452
453 if (!list_empty(&reader->elements))
454 {
455 hr = reader_inc_depth(reader);
456 if (FAILED(hr)) {
457 reader_free(reader, elem);
458 return hr;
459 }
460 }
461
462 list_add_head(&reader->elements, &elem->entry);
463 reader->empty_element = FALSE;
464 return hr;
465 }
466
467 static void reader_pop_element(xmlreader *reader)
468 {
469 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
470
471 if (elem)
472 {
473 list_remove(&elem->entry);
474 reader_free_strvalued(reader, &elem->qname);
475 reader_free_strvalued(reader, &elem->localname);
476 reader_free(reader, elem);
477 reader_dec_depth(reader);
478 }
479 }
480
481 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
482 means node value is to be determined. */
483 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
484 {
485 strval *v = &reader->strvalues[type];
486
487 reader_free_strvalue(reader, type);
488 if (!value)
489 {
490 v->str = NULL;
491 v->start = NULL;
492 v->len = 0;
493 return;
494 }
495
496 if (value->str == strval_empty.str)
497 *v = *value;
498 else
499 {
500 if (type == StringValue_Value)
501 {
502 /* defer allocation for value string */
503 v->str = NULL;
504 v->start = value->start;
505 v->len = value->len;
506 }
507 else
508 {
509 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
510 memcpy(v->str, value->start, value->len*sizeof(WCHAR));
511 v->str[value->len] = 0;
512 v->len = value->len;
513 }
514 }
515 }
516
517 static inline int is_reader_pending(xmlreader *reader)
518 {
519 return reader->input->pending;
520 }
521
522 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
523 {
524 const int initial_len = 0x2000;
525 buffer->data = readerinput_alloc(input, initial_len);
526 if (!buffer->data) return E_OUTOFMEMORY;
527
528 memset(buffer->data, 0, 4);
529 buffer->cur = buffer->data;
530 buffer->allocated = initial_len;
531 buffer->written = 0;
532
533 return S_OK;
534 }
535
536 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
537 {
538 readerinput_free(input, buffer->data);
539 }
540
541 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
542 {
543 if (encoding == XmlEncoding_Unknown)
544 {
545 FIXME("unsupported encoding %d\n", encoding);
546 return E_NOTIMPL;
547 }
548
549 *cp = xml_encoding_map[encoding].cp;
550
551 return S_OK;
552 }
553
554 static xml_encoding parse_encoding_name(const WCHAR *name, int len)
555 {
556 int min, max, n, c;
557
558 if (!name) return XmlEncoding_Unknown;
559
560 min = 0;
561 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
562
563 while (min <= max)
564 {
565 n = (min+max)/2;
566
567 if (len != -1)
568 c = strncmpiW(xml_encoding_map[n].name, name, len);
569 else
570 c = strcmpiW(xml_encoding_map[n].name, name);
571 if (!c)
572 return xml_encoding_map[n].enc;
573
574 if (c > 0)
575 max = n-1;
576 else
577 min = n+1;
578 }
579
580 return XmlEncoding_Unknown;
581 }
582
583 static HRESULT alloc_input_buffer(xmlreaderinput *input)
584 {
585 input_buffer *buffer;
586 HRESULT hr;
587
588 input->buffer = NULL;
589
590 buffer = readerinput_alloc(input, sizeof(*buffer));
591 if (!buffer) return E_OUTOFMEMORY;
592
593 buffer->input = input;
594 buffer->code_page = ~0; /* code page is unknown at this point */
595 hr = init_encoded_buffer(input, &buffer->utf16);
596 if (hr != S_OK) {
597 readerinput_free(input, buffer);
598 return hr;
599 }
600
601 hr = init_encoded_buffer(input, &buffer->encoded);
602 if (hr != S_OK) {
603 free_encoded_buffer(input, &buffer->utf16);
604 readerinput_free(input, buffer);
605 return hr;
606 }
607
608 input->buffer = buffer;
609 return S_OK;
610 }
611
612 static void free_input_buffer(input_buffer *buffer)
613 {
614 free_encoded_buffer(buffer->input, &buffer->encoded);
615 free_encoded_buffer(buffer->input, &buffer->utf16);
616 readerinput_free(buffer->input, buffer);
617 }
618
619 static void readerinput_release_stream(xmlreaderinput *readerinput)
620 {
621 if (readerinput->stream) {
622 ISequentialStream_Release(readerinput->stream);
623 readerinput->stream = NULL;
624 }
625 }
626
627 /* Queries already stored interface for IStream/ISequentialStream.
628 Interface supplied on creation will be overwritten */
629 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
630 {
631 HRESULT hr;
632
633 readerinput_release_stream(readerinput);
634 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
635 if (hr != S_OK)
636 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
637
638 return hr;
639 }
640
641 /* reads a chunk to raw buffer */
642 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
643 {
644 encoded_buffer *buffer = &readerinput->buffer->encoded;
645 /* to make sure aligned length won't exceed allocated length */
646 ULONG len = buffer->allocated - buffer->written - 4;
647 ULONG read;
648 HRESULT hr;
649
650 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
651 variable width encodings like UTF-8 */
652 len = (len + 3) & ~3;
653 /* try to use allocated space or grow */
654 if (buffer->allocated - buffer->written < len)
655 {
656 buffer->allocated *= 2;
657 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
658 len = buffer->allocated - buffer->written;
659 }
660
661 read = 0;
662 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
663 TRACE("requested %d, read %d, ret 0x%08x\n", len, read, hr);
664 readerinput->pending = hr == E_PENDING;
665 if (FAILED(hr)) return hr;
666 buffer->written += read;
667
668 return hr;
669 }
670
671 /* grows UTF-16 buffer so it has at least 'length' bytes free on return */
672 static void readerinput_grow(xmlreaderinput *readerinput, int length)
673 {
674 encoded_buffer *buffer = &readerinput->buffer->utf16;
675
676 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
677 if (buffer->allocated < buffer->written + length + 4)
678 {
679 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
680 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
681 buffer->allocated = grown_size;
682 }
683 }
684
685 static inline int readerinput_is_utf8(xmlreaderinput *readerinput)
686 {
687 static char startA[] = {'<','?'};
688 static char commentA[] = {'<','!'};
689 encoded_buffer *buffer = &readerinput->buffer->encoded;
690 unsigned char *ptr = (unsigned char*)buffer->data;
691
692 return !memcmp(buffer->data, startA, sizeof(startA)) ||
693 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
694 /* test start byte */
695 (ptr[0] == '<' &&
696 (
697 (ptr[1] && (ptr[1] <= 0x7f)) ||
698 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
699 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
700 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
701 );
702 }
703
704 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
705 {
706 encoded_buffer *buffer = &readerinput->buffer->encoded;
707 static WCHAR startW[] = {'<','?'};
708 static WCHAR commentW[] = {'<','!'};
709 static char utf8bom[] = {0xef,0xbb,0xbf};
710 static char utf16lebom[] = {0xff,0xfe};
711
712 *enc = XmlEncoding_Unknown;
713
714 if (buffer->written <= 3)
715 {
716 HRESULT hr = readerinput_growraw(readerinput);
717 if (FAILED(hr)) return hr;
718 if (buffer->written <= 3) return MX_E_INPUTEND;
719 }
720
721 /* try start symbols if we have enough data to do that, input buffer should contain
722 first chunk already */
723 if (readerinput_is_utf8(readerinput))
724 *enc = XmlEncoding_UTF8;
725 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
726 !memcmp(buffer->data, commentW, sizeof(commentW)))
727 *enc = XmlEncoding_UTF16;
728 /* try with BOM now */
729 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
730 {
731 buffer->cur += sizeof(utf8bom);
732 *enc = XmlEncoding_UTF8;
733 }
734 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
735 {
736 buffer->cur += sizeof(utf16lebom);
737 *enc = XmlEncoding_UTF16;
738 }
739
740 return S_OK;
741 }
742
743 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
744 {
745 encoded_buffer *buffer = &readerinput->buffer->encoded;
746 int len = buffer->written;
747
748 /* complete single byte char */
749 if (!(buffer->data[len-1] & 0x80)) return len;
750
751 /* find start byte of multibyte char */
752 while (--len && !(buffer->data[len] & 0xc0))
753 ;
754
755 return len;
756 }
757
758 /* Returns byte length of complete char sequence for buffer code page,
759 it's relative to current buffer position which is currently used for BOM handling
760 only. */
761 static int readerinput_get_convlen(xmlreaderinput *readerinput)
762 {
763 encoded_buffer *buffer = &readerinput->buffer->encoded;
764 int len;
765
766 if (readerinput->buffer->code_page == CP_UTF8)
767 len = readerinput_get_utf8_convlen(readerinput);
768 else
769 len = buffer->written;
770
771 TRACE("%d\n", len - (int)(buffer->cur - buffer->data));
772 return len - (buffer->cur - buffer->data);
773 }
774
775 /* It's possible that raw buffer has some leftovers from last conversion - some char
776 sequence that doesn't represent a full code point. Length argument should be calculated with
777 readerinput_get_convlen(), if it's -1 it will be calculated here. */
778 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
779 {
780 encoded_buffer *buffer = &readerinput->buffer->encoded;
781
782 if (len == -1)
783 len = readerinput_get_convlen(readerinput);
784
785 memmove(buffer->data, buffer->cur + (buffer->written - len), len);
786 /* everything below cur is lost too */
787 buffer->written -= len + (buffer->cur - buffer->data);
788 /* after this point we don't need cur pointer really,
789 it's used only to mark where actual data begins when first chunk is read */
790 buffer->cur = buffer->data;
791 }
792
793 /* note that raw buffer content is kept */
794 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
795 {
796 encoded_buffer *src = &readerinput->buffer->encoded;
797 encoded_buffer *dest = &readerinput->buffer->utf16;
798 int len, dest_len;
799 HRESULT hr;
800 WCHAR *ptr;
801 UINT cp;
802
803 hr = get_code_page(enc, &cp);
804 if (FAILED(hr)) return;
805
806 readerinput->buffer->code_page = cp;
807 len = readerinput_get_convlen(readerinput);
808
809 TRACE("switching to cp %d\n", cp);
810
811 /* just copy in this case */
812 if (enc == XmlEncoding_UTF16)
813 {
814 readerinput_grow(readerinput, len);
815 memcpy(dest->data, src->cur, len);
816 dest->written += len*sizeof(WCHAR);
817 return;
818 }
819
820 dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
821 readerinput_grow(readerinput, dest_len);
822 ptr = (WCHAR*)dest->data;
823 MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
824 ptr[dest_len] = 0;
825 dest->written += dest_len*sizeof(WCHAR);
826 }
827
828 /* shrinks parsed data a buffer begins with */
829 static void reader_shrink(xmlreader *reader)
830 {
831 encoded_buffer *buffer = &reader->input->buffer->utf16;
832
833 /* avoid to move too often using threshold shrink length */
834 if (buffer->cur - buffer->data > buffer->written / 2)
835 {
836 buffer->written -= buffer->cur - buffer->data;
837 memmove(buffer->data, buffer->cur, buffer->written);
838 buffer->cur = buffer->data;
839 *(WCHAR*)&buffer->cur[buffer->written] = 0;
840 }
841 }
842
843 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
844 It won't attempt to shrink but will grow destination buffer if needed */
845 static HRESULT reader_more(xmlreader *reader)
846 {
847 xmlreaderinput *readerinput = reader->input;
848 encoded_buffer *src = &readerinput->buffer->encoded;
849 encoded_buffer *dest = &readerinput->buffer->utf16;
850 UINT cp = readerinput->buffer->code_page;
851 int len, dest_len;
852 HRESULT hr;
853 WCHAR *ptr;
854
855 /* get some raw data from stream first */
856 hr = readerinput_growraw(readerinput);
857 len = readerinput_get_convlen(readerinput);
858
859 /* just copy for UTF-16 case */
860 if (cp == ~0)
861 {
862 readerinput_grow(readerinput, len);
863 memcpy(dest->data, src->cur, len);
864 dest->written += len*sizeof(WCHAR);
865 return hr;
866 }
867
868 dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
869 readerinput_grow(readerinput, dest_len);
870 ptr = (WCHAR*)dest->data;
871 MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
872 ptr[dest_len] = 0;
873 dest->written += dest_len*sizeof(WCHAR);
874 /* get rid of processed data */
875 readerinput_shrinkraw(readerinput, len);
876
877 return hr;
878 }
879
880 static inline WCHAR *reader_get_cur(xmlreader *reader)
881 {
882 WCHAR *ptr = (WCHAR*)reader->input->buffer->utf16.cur;
883 if (!*ptr) reader_more(reader);
884 return ptr;
885 }
886
887 static int reader_cmp(xmlreader *reader, const WCHAR *str)
888 {
889 const WCHAR *ptr = reader_get_cur(reader);
890 return strncmpW(str, ptr, strlenW(str));
891 }
892
893 /* moves cursor n WCHARs forward */
894 static void reader_skipn(xmlreader *reader, int n)
895 {
896 encoded_buffer *buffer = &reader->input->buffer->utf16;
897 const WCHAR *ptr = reader_get_cur(reader);
898
899 while (*ptr++ && n--)
900 {
901 buffer->cur += sizeof(WCHAR);
902 reader->pos++;
903 }
904 }
905
906 static inline int is_wchar_space(WCHAR ch)
907 {
908 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
909 }
910
911 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
912 static int reader_skipspaces(xmlreader *reader)
913 {
914 encoded_buffer *buffer = &reader->input->buffer->utf16;
915 const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
916
917 while (is_wchar_space(*ptr))
918 {
919 buffer->cur += sizeof(WCHAR);
920 if (*ptr == '\r')
921 reader->pos = 0;
922 else if (*ptr == '\n')
923 {
924 reader->line++;
925 reader->pos = 0;
926 }
927 else
928 reader->pos++;
929 ptr++;
930 }
931
932 return ptr - start;
933 }
934
935 /* [26] VersionNum ::= '1.' [0-9]+ */
936 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
937 {
938 WCHAR *ptr, *ptr2, *start = reader_get_cur(reader);
939 static const WCHAR onedotW[] = {'1','.',0};
940
941 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
942 /* skip "1." */
943 reader_skipn(reader, 2);
944
945 ptr2 = ptr = reader_get_cur(reader);
946 while (*ptr >= '0' && *ptr <= '9')
947 ptr++;
948
949 if (ptr2 == ptr) return WC_E_DIGIT;
950 TRACE("version=%s\n", debugstr_wn(start, ptr-start));
951 reader_init_strvalue(start, ptr-start, val);
952 reader_skipn(reader, ptr-ptr2);
953 return S_OK;
954 }
955
956 /* [25] Eq ::= S? '=' S? */
957 static HRESULT reader_parse_eq(xmlreader *reader)
958 {
959 static const WCHAR eqW[] = {'=',0};
960 reader_skipspaces(reader);
961 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
962 /* skip '=' */
963 reader_skipn(reader, 1);
964 reader_skipspaces(reader);
965 return S_OK;
966 }
967
968 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
969 static HRESULT reader_parse_versioninfo(xmlreader *reader)
970 {
971 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
972 strval val, name;
973 HRESULT hr;
974
975 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
976
977 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
978 reader_init_strvalue(reader_get_cur(reader), 7, &name);
979 /* skip 'version' */
980 reader_skipn(reader, 7);
981
982 hr = reader_parse_eq(reader);
983 if (FAILED(hr)) return hr;
984
985 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
986 return WC_E_QUOTE;
987 /* skip "'"|'"' */
988 reader_skipn(reader, 1);
989
990 hr = reader_parse_versionnum(reader, &val);
991 if (FAILED(hr)) return hr;
992
993 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
994 return WC_E_QUOTE;
995
996 /* skip "'"|'"' */
997 reader_skipn(reader, 1);
998
999 return reader_add_attr(reader, &name, &val);
1000 }
1001
1002 /* ([A-Za-z0-9._] | '-') */
1003 static inline int is_wchar_encname(WCHAR ch)
1004 {
1005 return ((ch >= 'A' && ch <= 'Z') ||
1006 (ch >= 'a' && ch <= 'z') ||
1007 (ch >= '0' && ch <= '9') ||
1008 (ch == '.') || (ch == '_') ||
1009 (ch == '-'));
1010 }
1011
1012 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1013 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1014 {
1015 WCHAR *start = reader_get_cur(reader), *ptr;
1016 xml_encoding enc;
1017 int len;
1018
1019 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1020 return WC_E_ENCNAME;
1021
1022 ptr = start;
1023 while (is_wchar_encname(*++ptr))
1024 ;
1025
1026 len = ptr - start;
1027 enc = parse_encoding_name(start, len);
1028 TRACE("encoding name %s\n", debugstr_wn(start, len));
1029 val->str = start;
1030 val->len = len;
1031
1032 if (enc == XmlEncoding_Unknown)
1033 return WC_E_ENCNAME;
1034
1035 /* skip encoding name */
1036 reader_skipn(reader, len);
1037 return S_OK;
1038 }
1039
1040 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1041 static HRESULT reader_parse_encdecl(xmlreader *reader)
1042 {
1043 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1044 strval name, val;
1045 HRESULT hr;
1046
1047 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1048
1049 if (reader_cmp(reader, encodingW)) return S_FALSE;
1050 name.str = reader_get_cur(reader);
1051 name.len = 8;
1052 /* skip 'encoding' */
1053 reader_skipn(reader, 8);
1054
1055 hr = reader_parse_eq(reader);
1056 if (FAILED(hr)) return hr;
1057
1058 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1059 return WC_E_QUOTE;
1060 /* skip "'"|'"' */
1061 reader_skipn(reader, 1);
1062
1063 hr = reader_parse_encname(reader, &val);
1064 if (FAILED(hr)) return hr;
1065
1066 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1067 return WC_E_QUOTE;
1068
1069 /* skip "'"|'"' */
1070 reader_skipn(reader, 1);
1071
1072 return reader_add_attr(reader, &name, &val);
1073 }
1074
1075 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1076 static HRESULT reader_parse_sddecl(xmlreader *reader)
1077 {
1078 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1079 static const WCHAR yesW[] = {'y','e','s',0};
1080 static const WCHAR noW[] = {'n','o',0};
1081 WCHAR *start, *ptr;
1082 strval name, val;
1083 HRESULT hr;
1084
1085 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1086
1087 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1088 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1089 /* skip 'standalone' */
1090 reader_skipn(reader, 10);
1091
1092 hr = reader_parse_eq(reader);
1093 if (FAILED(hr)) return hr;
1094
1095 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1096 return WC_E_QUOTE;
1097 /* skip "'"|'"' */
1098 reader_skipn(reader, 1);
1099
1100 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1101 return WC_E_XMLDECL;
1102
1103 start = reader_get_cur(reader);
1104 /* skip 'yes'|'no' */
1105 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1106 ptr = reader_get_cur(reader);
1107 TRACE("standalone=%s\n", debugstr_wn(start, ptr-start));
1108 val.str = val.start = start;
1109 val.len = ptr-start;
1110
1111 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1112 return WC_E_QUOTE;
1113 /* skip "'"|'"' */
1114 reader_skipn(reader, 1);
1115
1116 return reader_add_attr(reader, &name, &val);
1117 }
1118
1119 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1120 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1121 {
1122 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1123 static const WCHAR declcloseW[] = {'?','>',0};
1124 HRESULT hr;
1125
1126 /* check if we have "<?xml " */
1127 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1128
1129 reader_skipn(reader, 5);
1130 hr = reader_parse_versioninfo(reader);
1131 if (FAILED(hr))
1132 return hr;
1133
1134 hr = reader_parse_encdecl(reader);
1135 if (FAILED(hr))
1136 return hr;
1137
1138 hr = reader_parse_sddecl(reader);
1139 if (FAILED(hr))
1140 return hr;
1141
1142 reader_skipspaces(reader);
1143 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1144 reader_skipn(reader, 2);
1145
1146 reader_inc_depth(reader);
1147 reader->nodetype = XmlNodeType_XmlDeclaration;
1148 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1149 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1150 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1151
1152 return S_OK;
1153 }
1154
1155 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1156 static HRESULT reader_parse_comment(xmlreader *reader)
1157 {
1158 WCHAR *start, *ptr;
1159
1160 if (reader->resume[XmlReadResume_Body])
1161 {
1162 start = reader->resume[XmlReadResume_Body];
1163 ptr = reader_get_cur(reader);
1164 }
1165 else
1166 {
1167 /* skip '<!--' */
1168 reader_skipn(reader, 4);
1169 reader_shrink(reader);
1170 ptr = start = reader_get_cur(reader);
1171 reader->nodetype = XmlNodeType_Comment;
1172 reader->resume[XmlReadResume_Body] = start;
1173 reader->resumestate = XmlReadResumeState_Comment;
1174 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1175 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1176 reader_set_strvalue(reader, StringValue_Value, NULL);
1177 }
1178
1179 /* will exit when there's no more data, it won't attempt to
1180 read more from stream */
1181 while (*ptr)
1182 {
1183 if (ptr[0] == '-')
1184 {
1185 if (ptr[1] == '-')
1186 {
1187 if (ptr[2] == '>')
1188 {
1189 strval value;
1190
1191 TRACE("%s\n", debugstr_wn(start, ptr-start));
1192 /* skip '-->' */
1193 reader_skipn(reader, 3);
1194 reader_init_strvalue(start, ptr-start, &value);
1195 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1196 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1197 reader_set_strvalue(reader, StringValue_Value, &value);
1198 reader->resume[XmlReadResume_Body] = NULL;
1199 reader->resumestate = XmlReadResumeState_Initial;
1200 return S_OK;
1201 }
1202 else
1203 return WC_E_COMMENT;
1204 }
1205 else
1206 ptr++;
1207 }
1208 else
1209 {
1210 reader_skipn(reader, 1);
1211 ptr++;
1212 }
1213 }
1214
1215 return S_OK;
1216 }
1217
1218 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1219 static inline int is_char(WCHAR ch)
1220 {
1221 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1222 (ch >= 0x20 && ch <= 0xd7ff) ||
1223 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1224 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1225 (ch >= 0xe000 && ch <= 0xfffd);
1226 }
1227
1228 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1229 static inline int is_pubchar(WCHAR ch)
1230 {
1231 return (ch == ' ') ||
1232 (ch >= 'a' && ch <= 'z') ||
1233 (ch >= 'A' && ch <= 'Z') ||
1234 (ch >= '0' && ch <= '9') ||
1235 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1236 (ch == '=') || (ch == '?') ||
1237 (ch == '@') || (ch == '!') ||
1238 (ch >= '#' && ch <= '%') || /* #$% */
1239 (ch == '_') || (ch == '\r') || (ch == '\n');
1240 }
1241
1242 static inline int is_namestartchar(WCHAR ch)
1243 {
1244 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1245 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1246 (ch >= 0xc0 && ch <= 0xd6) ||
1247 (ch >= 0xd8 && ch <= 0xf6) ||
1248 (ch >= 0xf8 && ch <= 0x2ff) ||
1249 (ch >= 0x370 && ch <= 0x37d) ||
1250 (ch >= 0x37f && ch <= 0x1fff) ||
1251 (ch >= 0x200c && ch <= 0x200d) ||
1252 (ch >= 0x2070 && ch <= 0x218f) ||
1253 (ch >= 0x2c00 && ch <= 0x2fef) ||
1254 (ch >= 0x3001 && ch <= 0xd7ff) ||
1255 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1256 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1257 (ch >= 0xf900 && ch <= 0xfdcf) ||
1258 (ch >= 0xfdf0 && ch <= 0xfffd);
1259 }
1260
1261 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1262 static inline int is_ncnamechar(WCHAR ch)
1263 {
1264 return (ch >= 'A' && ch <= 'Z') ||
1265 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1266 (ch == '-') || (ch == '.') ||
1267 (ch >= '0' && ch <= '9') ||
1268 (ch == 0xb7) ||
1269 (ch >= 0xc0 && ch <= 0xd6) ||
1270 (ch >= 0xd8 && ch <= 0xf6) ||
1271 (ch >= 0xf8 && ch <= 0x2ff) ||
1272 (ch >= 0x300 && ch <= 0x36f) ||
1273 (ch >= 0x370 && ch <= 0x37d) ||
1274 (ch >= 0x37f && ch <= 0x1fff) ||
1275 (ch >= 0x200c && ch <= 0x200d) ||
1276 (ch >= 0x203f && ch <= 0x2040) ||
1277 (ch >= 0x2070 && ch <= 0x218f) ||
1278 (ch >= 0x2c00 && ch <= 0x2fef) ||
1279 (ch >= 0x3001 && ch <= 0xd7ff) ||
1280 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1281 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1282 (ch >= 0xf900 && ch <= 0xfdcf) ||
1283 (ch >= 0xfdf0 && ch <= 0xfffd);
1284 }
1285
1286 static inline int is_namechar(WCHAR ch)
1287 {
1288 return (ch == ':') || is_ncnamechar(ch);
1289 }
1290
1291 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1292 {
1293 /* When we're on attribute always return attribute type, container node type is kept.
1294 Note that container is not necessarily an element, and attribute doesn't mean it's
1295 an attribute in XML spec terms. */
1296 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1297 }
1298
1299 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1300 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1301 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1302 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1303 [5] Name ::= NameStartChar (NameChar)* */
1304 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1305 {
1306 WCHAR *ptr, *start;
1307
1308 if (reader->resume[XmlReadResume_Name])
1309 {
1310 start = reader->resume[XmlReadResume_Name];
1311 ptr = reader_get_cur(reader);
1312 }
1313 else
1314 {
1315 ptr = start = reader_get_cur(reader);
1316 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1317 }
1318
1319 while (is_namechar(*ptr))
1320 {
1321 reader_skipn(reader, 1);
1322 ptr = reader_get_cur(reader);
1323 }
1324
1325 if (is_reader_pending(reader))
1326 {
1327 reader->resume[XmlReadResume_Name] = start;
1328 return E_PENDING;
1329 }
1330 else
1331 reader->resume[XmlReadResume_Name] = NULL;
1332
1333 TRACE("name %s:%d\n", debugstr_wn(start, ptr-start), (int)(ptr-start));
1334 reader_init_strvalue(start, ptr-start, name);
1335
1336 return S_OK;
1337 }
1338
1339 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1340 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1341 {
1342 static const WCHAR xmlW[] = {'x','m','l'};
1343 strval name;
1344 HRESULT hr;
1345 UINT i;
1346
1347 hr = reader_parse_name(reader, &name);
1348 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1349
1350 /* now that we got name check for illegal content */
1351 if (name.len == 3 && !strncmpiW(name.str, xmlW, 3))
1352 return WC_E_LEADINGXML;
1353
1354 /* PITarget can't be a qualified name */
1355 for (i = 0; i < name.len; i++)
1356 if (name.str[i] == ':')
1357 return i ? NC_E_NAMECOLON : WC_E_PI;
1358
1359 TRACE("pitarget %s:%d\n", debugstr_wn(name.str, name.len), name.len);
1360 *target = name;
1361 return S_OK;
1362 }
1363
1364 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1365 static HRESULT reader_parse_pi(xmlreader *reader)
1366 {
1367 WCHAR *ptr, *start;
1368 strval target;
1369 HRESULT hr;
1370
1371 switch (reader->resumestate)
1372 {
1373 case XmlReadResumeState_Initial:
1374 /* skip '<?' */
1375 reader_skipn(reader, 2);
1376 reader_shrink(reader);
1377 reader->resumestate = XmlReadResumeState_PITarget;
1378 case XmlReadResumeState_PITarget:
1379 hr = reader_parse_pitarget(reader, &target);
1380 if (FAILED(hr)) return hr;
1381 reader_set_strvalue(reader, StringValue_LocalName, &target);
1382 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1383 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1384 reader->resumestate = XmlReadResumeState_PIBody;
1385 default:
1386 ;
1387 }
1388
1389 ptr = reader_get_cur(reader);
1390 /* exit earlier if there's no content */
1391 if (ptr[0] == '?' && ptr[1] == '>')
1392 {
1393 /* skip '?>' */
1394 reader_skipn(reader, 2);
1395 reader->nodetype = XmlNodeType_ProcessingInstruction;
1396 reader->resumestate = XmlReadResumeState_Initial;
1397 return S_OK;
1398 }
1399
1400 if (!reader->resume[XmlReadResume_Body])
1401 {
1402 /* now at least a single space char should be there */
1403 if (!is_wchar_space(*ptr)) return WC_E_WHITESPACE;
1404 reader_skipspaces(reader);
1405 ptr = start = reader_get_cur(reader);
1406 reader->resume[XmlReadResume_Body] = start;
1407 }
1408 else
1409 {
1410 start = reader->resume[XmlReadResume_Body];
1411 ptr = reader_get_cur(reader);
1412 }
1413
1414 while (*ptr)
1415 {
1416 if (ptr[0] == '?')
1417 {
1418 if (ptr[1] == '>')
1419 {
1420 strval value;
1421
1422 TRACE("%s\n", debugstr_wn(start, ptr-start));
1423 /* skip '?>' */
1424 reader_skipn(reader, 2);
1425 reader->nodetype = XmlNodeType_ProcessingInstruction;
1426 reader->resumestate = XmlReadResumeState_Initial;
1427 reader->resume[XmlReadResume_Body] = NULL;
1428 reader_init_strvalue(start, ptr-start, &value);
1429 reader_set_strvalue(reader, StringValue_Value, &value);
1430 return S_OK;
1431 }
1432 else
1433 {
1434 ptr++;
1435 reader_more(reader);
1436 }
1437 }
1438 else
1439 {
1440 reader_skipn(reader, 1);
1441 ptr = reader_get_cur(reader);
1442 }
1443 }
1444
1445 return S_OK;
1446 }
1447
1448 /* This one is used to parse significant whitespace nodes, like in Misc production */
1449 static HRESULT reader_parse_whitespace(xmlreader *reader)
1450 {
1451 WCHAR *start, *ptr;
1452
1453 reader_shrink(reader);
1454 start = reader_get_cur(reader);
1455
1456 reader_skipspaces(reader);
1457 ptr = reader_get_cur(reader);
1458 TRACE("%s\n", debugstr_wn(start, ptr-start));
1459
1460 reader->nodetype = XmlNodeType_Whitespace;
1461 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1462 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1463 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1464 return S_OK;
1465 }
1466
1467 /* [27] Misc ::= Comment | PI | S */
1468 static HRESULT reader_parse_misc(xmlreader *reader)
1469 {
1470 HRESULT hr = S_FALSE;
1471
1472 if (reader->resumestate != XmlReadResumeState_Initial)
1473 {
1474 hr = reader_more(reader);
1475 if (FAILED(hr)) return hr;
1476
1477 /* finish current node */
1478 switch (reader->resumestate)
1479 {
1480 case XmlReadResumeState_PITarget:
1481 case XmlReadResumeState_PIBody:
1482 return reader_parse_pi(reader);
1483 case XmlReadResumeState_Comment:
1484 return reader_parse_comment(reader);
1485 default:
1486 ERR("unknown resume state %d\n", reader->resumestate);
1487 }
1488 }
1489
1490 while (1)
1491 {
1492 const WCHAR *cur = reader_get_cur(reader);
1493
1494 if (is_wchar_space(*cur))
1495 hr = reader_parse_whitespace(reader);
1496 else if (!reader_cmp(reader, commentW))
1497 hr = reader_parse_comment(reader);
1498 else if (!reader_cmp(reader, piW))
1499 hr = reader_parse_pi(reader);
1500 else
1501 break;
1502
1503 if (hr != S_FALSE) return hr;
1504 }
1505
1506 return hr;
1507 }
1508
1509 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1510 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1511 {
1512 WCHAR *start = reader_get_cur(reader), *cur, quote;
1513
1514 if (*start != '"' && *start != '\'') return WC_E_QUOTE;
1515
1516 quote = *start;
1517 reader_skipn(reader, 1);
1518
1519 cur = start = reader_get_cur(reader);
1520 while (is_char(*cur) && *cur != quote)
1521 {
1522 reader_skipn(reader, 1);
1523 cur = reader_get_cur(reader);
1524 }
1525 if (*cur == quote) reader_skipn(reader, 1);
1526
1527 reader_init_strvalue(start, cur-start, literal);
1528 TRACE("%s\n", debugstr_wn(start, cur-start));
1529 return S_OK;
1530 }
1531
1532 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1533 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1534 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1535 {
1536 WCHAR *start = reader_get_cur(reader), *cur, quote;
1537
1538 if (*start != '"' && *start != '\'') return WC_E_QUOTE;
1539
1540 quote = *start;
1541 reader_skipn(reader, 1);
1542
1543 cur = start;
1544 while (is_pubchar(*cur) && *cur != quote)
1545 {
1546 reader_skipn(reader, 1);
1547 cur = reader_get_cur(reader);
1548 }
1549
1550 reader_init_strvalue(start, cur-start, literal);
1551 TRACE("%s\n", debugstr_wn(start, cur-start));
1552 return S_OK;
1553 }
1554
1555 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1556 static HRESULT reader_parse_externalid(xmlreader *reader)
1557 {
1558 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1559 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1560 strval name;
1561 HRESULT hr;
1562 int cnt;
1563
1564 if (reader_cmp(reader, systemW))
1565 {
1566 if (reader_cmp(reader, publicW))
1567 return S_FALSE;
1568 else
1569 {
1570 strval pub;
1571
1572 /* public id */
1573 reader_skipn(reader, 6);
1574 cnt = reader_skipspaces(reader);
1575 if (!cnt) return WC_E_WHITESPACE;
1576
1577 hr = reader_parse_pub_literal(reader, &pub);
1578 if (FAILED(hr)) return hr;
1579
1580 reader_init_strvalue(publicW, strlenW(publicW), &name);
1581 return reader_add_attr(reader, &name, &pub);
1582 }
1583 }
1584 else
1585 {
1586 strval sys;
1587
1588 /* system id */
1589 reader_skipn(reader, 6);
1590 cnt = reader_skipspaces(reader);
1591 if (!cnt) return WC_E_WHITESPACE;
1592
1593 hr = reader_parse_sys_literal(reader, &sys);
1594 if (FAILED(hr)) return hr;
1595
1596 reader_init_strvalue(systemW, strlenW(systemW), &name);
1597 return reader_add_attr(reader, &name, &sys);
1598 }
1599
1600 return hr;
1601 }
1602
1603 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1604 static HRESULT reader_parse_dtd(xmlreader *reader)
1605 {
1606 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1607 strval name;
1608 WCHAR *cur;
1609 HRESULT hr;
1610
1611 /* check if we have "<!DOCTYPE" */
1612 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1613 reader_shrink(reader);
1614
1615 /* DTD processing is not allowed by default */
1616 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1617
1618 reader_skipn(reader, 9);
1619 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1620
1621 /* name */
1622 hr = reader_parse_name(reader, &name);
1623 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1624
1625 reader_skipspaces(reader);
1626
1627 hr = reader_parse_externalid(reader);
1628 if (FAILED(hr)) return hr;
1629
1630 reader_skipspaces(reader);
1631
1632 cur = reader_get_cur(reader);
1633 if (*cur != '>')
1634 {
1635 FIXME("internal subset parsing not implemented\n");
1636 return E_NOTIMPL;
1637 }
1638
1639 /* skip '>' */
1640 reader_skipn(reader, 1);
1641
1642 reader->nodetype = XmlNodeType_DocumentType;
1643 reader_set_strvalue(reader, StringValue_LocalName, &name);
1644 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1645
1646 return S_OK;
1647 }
1648
1649 /* [11 NS] LocalPart ::= NCName */
1650 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1651 {
1652 WCHAR *ptr, *start;
1653
1654 if (reader->resume[XmlReadResume_Local])
1655 {
1656 start = reader->resume[XmlReadResume_Local];
1657 ptr = reader_get_cur(reader);
1658 }
1659 else
1660 {
1661 ptr = start = reader_get_cur(reader);
1662 }
1663
1664 while (is_ncnamechar(*ptr))
1665 {
1666 reader_skipn(reader, 1);
1667 ptr = reader_get_cur(reader);
1668 }
1669
1670 if (is_reader_pending(reader))
1671 {
1672 reader->resume[XmlReadResume_Local] = start;
1673 return E_PENDING;
1674 }
1675 else
1676 reader->resume[XmlReadResume_Local] = NULL;
1677
1678 reader_init_strvalue(start, ptr-start, local);
1679
1680 return S_OK;
1681 }
1682
1683 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1684 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1685 [9 NS] UnprefixedName ::= LocalPart
1686 [10 NS] Prefix ::= NCName */
1687 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1688 {
1689 WCHAR *ptr, *start;
1690 HRESULT hr;
1691
1692 if (reader->resume[XmlReadResume_Name])
1693 {
1694 start = reader->resume[XmlReadResume_Name];
1695 ptr = reader_get_cur(reader);
1696 }
1697 else
1698 {
1699 ptr = start = reader_get_cur(reader);
1700 reader->resume[XmlReadResume_Name] = start;
1701 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1702 }
1703
1704 if (reader->resume[XmlReadResume_Local])
1705 {
1706 hr = reader_parse_local(reader, local);
1707 if (FAILED(hr)) return hr;
1708
1709 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1710 local->start - reader->resume[XmlReadResume_Name] - 1,
1711 prefix);
1712 }
1713 else
1714 {
1715 /* skip prefix part */
1716 while (is_ncnamechar(*ptr))
1717 {
1718 reader_skipn(reader, 1);
1719 ptr = reader_get_cur(reader);
1720 }
1721
1722 if (is_reader_pending(reader)) return E_PENDING;
1723
1724 /* got a qualified name */
1725 if (*ptr == ':')
1726 {
1727 reader_init_strvalue(start, ptr-start, prefix);
1728
1729 /* skip ':' */
1730 reader_skipn(reader, 1);
1731 hr = reader_parse_local(reader, local);
1732 if (FAILED(hr)) return hr;
1733 }
1734 else
1735 {
1736 reader_init_strvalue(reader->resume[XmlReadResume_Name], ptr-reader->resume[XmlReadResume_Name], local);
1737 reader_init_strvalue(NULL, 0, prefix);
1738 }
1739 }
1740
1741 reader_init_strvalue(start, ptr-start, local);
1742
1743 if (prefix->len)
1744 TRACE("qname %s:%s\n", debugstr_wn(prefix->start, prefix->len), debugstr_wn(local->start, local->len));
1745 else
1746 TRACE("ncname %s\n", debugstr_wn(local->start, local->len));
1747
1748 reader_init_strvalue(prefix->start ? prefix->start : local->start,
1749 /* count ':' too */
1750 (prefix->len ? prefix->len + 1 : 0) + local->len,
1751 qname);
1752
1753 reader->resume[XmlReadResume_Name] = NULL;
1754 reader->resume[XmlReadResume_Local] = NULL;
1755
1756 return S_OK;
1757 }
1758
1759 /* Applies normalization rules to a single char, used for attribute values.
1760
1761 Rules include 2 steps:
1762
1763 1) replacing \r\n with a single \n;
1764 2) replacing all whitespace chars with ' '.
1765
1766 */
1767 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1768 {
1769 encoded_buffer *buffer = &reader->input->buffer->utf16;
1770
1771 if (!is_wchar_space(*ptr)) return;
1772
1773 if (*ptr == '\r' && *(ptr+1) == '\n')
1774 {
1775 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1776 memmove(ptr+1, ptr+2, len);
1777 }
1778 *ptr = ' ';
1779 }
1780
1781 static WCHAR get_predefined_entity(const strval *name)
1782 {
1783 static const WCHAR entltW[] = {'l','t'};
1784 static const WCHAR entgtW[] = {'g','t'};
1785 static const WCHAR entampW[] = {'a','m','p'};
1786 static const WCHAR entaposW[] = {'a','p','o','s'};
1787 static const WCHAR entquotW[] = {'q','u','o','t'};
1788
1789 static const strval lt = { (WCHAR*)entltW, 2 };
1790 static const strval gt = { (WCHAR*)entgtW, 2 };
1791 static const strval amp = { (WCHAR*)entampW, 3 };
1792 static const strval apos = { (WCHAR*)entaposW, 4 };
1793 static const strval quot = { (WCHAR*)entquotW, 4 };
1794
1795 switch (name->str[0])
1796 {
1797 case 'l':
1798 if (strval_eq(name, &lt)) return '<';
1799 break;
1800 case 'g':
1801 if (strval_eq(name, &gt)) return '>';
1802 break;
1803 case 'a':
1804 if (strval_eq(name, &amp))
1805 return '&';
1806 else if (strval_eq(name, &apos))
1807 return '\'';
1808 break;
1809 case 'q':
1810 if (strval_eq(name, &quot)) return '\"';
1811 break;
1812 default:
1813 ;
1814 }
1815
1816 return 0;
1817 }
1818
1819 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1820 [67] Reference ::= EntityRef | CharRef
1821 [68] EntityRef ::= '&' Name ';' */
1822 static HRESULT reader_parse_reference(xmlreader *reader)
1823 {
1824 encoded_buffer *buffer = &reader->input->buffer->utf16;
1825 WCHAR *start = reader_get_cur(reader), *ptr;
1826 WCHAR ch = 0;
1827 int len;
1828
1829 /* skip '&' */
1830 reader_skipn(reader, 1);
1831 ptr = reader_get_cur(reader);
1832
1833 if (*ptr == '#')
1834 {
1835 reader_skipn(reader, 1);
1836 ptr = reader_get_cur(reader);
1837
1838 /* hex char or decimal */
1839 if (*ptr == 'x')
1840 {
1841 reader_skipn(reader, 1);
1842 ptr = reader_get_cur(reader);
1843
1844 while (*ptr != ';')
1845 {
1846 if ((*ptr >= '0' && *ptr <= '9'))
1847 ch = ch*16 + *ptr - '0';
1848 else if ((*ptr >= 'a' && *ptr <= 'f'))
1849 ch = ch*16 + *ptr - 'a' + 10;
1850 else if ((*ptr >= 'A' && *ptr <= 'F'))
1851 ch = ch*16 + *ptr - 'A' + 10;
1852 else
1853 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1854 reader_skipn(reader, 1);
1855 ptr = reader_get_cur(reader);
1856 }
1857 }
1858 else
1859 {
1860 while (*ptr != ';')
1861 {
1862 if ((*ptr >= '0' && *ptr <= '9'))
1863 {
1864 ch = ch*10 + *ptr - '0';
1865 reader_skipn(reader, 1);
1866 ptr = reader_get_cur(reader);
1867 }
1868 else
1869 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1870 }
1871 }
1872
1873 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1874
1875 /* normalize */
1876 if (is_wchar_space(ch)) ch = ' ';
1877
1878 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1879 memmove(start+1, ptr+1, len);
1880 buffer->cur = (char*)(start+1);
1881
1882 *start = ch;
1883 }
1884 else
1885 {
1886 strval name;
1887 HRESULT hr;
1888
1889 hr = reader_parse_name(reader, &name);
1890 if (FAILED(hr)) return hr;
1891
1892 ptr = reader_get_cur(reader);
1893 if (*ptr != ';') return WC_E_SEMICOLON;
1894
1895 /* predefined entities resolve to a single character */
1896 ch = get_predefined_entity(&name);
1897 if (ch)
1898 {
1899 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1900 memmove(start+1, ptr+1, len);
1901 buffer->cur = (char*)(start+1);
1902
1903 *start = ch;
1904 }
1905 else
1906 {
1907 FIXME("undeclared entity %s\n", debugstr_wn(name.str, name.len));
1908 return WC_E_UNDECLAREDENTITY;
1909 }
1910
1911 }
1912
1913 return S_OK;
1914 }
1915
1916 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1917 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1918 {
1919 WCHAR *ptr, *start;
1920 WCHAR quote;
1921
1922 ptr = reader_get_cur(reader);
1923
1924 /* skip opening quote */
1925 quote = *ptr;
1926 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1927 reader_skipn(reader, 1);
1928
1929 start = ptr = reader_get_cur(reader);
1930 while (*ptr)
1931 {
1932 if (*ptr == '<') return WC_E_LESSTHAN;
1933
1934 if (*ptr == quote)
1935 {
1936 /* skip closing quote */
1937 reader_skipn(reader, 1);
1938 break;
1939 }
1940
1941 if (*ptr == '&')
1942 {
1943 HRESULT hr = reader_parse_reference(reader);
1944 if (FAILED(hr)) return hr;
1945 }
1946 else
1947 {
1948 reader_normalize_space(reader, ptr);
1949 reader_skipn(reader, 1);
1950 }
1951 ptr = reader_get_cur(reader);
1952 }
1953
1954 reader_init_strvalue(start, ptr-start, value);
1955
1956 return S_OK;
1957 }
1958
1959 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1960 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1961 [3 NS] DefaultAttName ::= 'xmlns'
1962 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1963 static HRESULT reader_parse_attribute(xmlreader *reader)
1964 {
1965 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
1966 strval prefix, local, qname, xmlns, value;
1967 HRESULT hr;
1968
1969 hr = reader_parse_qname(reader, &prefix, &local, &qname);
1970 if (FAILED(hr)) return hr;
1971
1972 reader_init_strvalue((WCHAR*)xmlnsW, 5, &xmlns);
1973
1974 if (strval_eq(&prefix, &xmlns))
1975 {
1976 FIXME("namespace definitions not supported\n");
1977 return E_NOTIMPL;
1978 }
1979
1980 if (strval_eq(&qname, &xmlns))
1981 {
1982 FIXME("default namespace definitions not supported\n");
1983 return E_NOTIMPL;
1984 }
1985
1986 hr = reader_parse_eq(reader);
1987 if (FAILED(hr)) return hr;
1988
1989 hr = reader_parse_attvalue(reader, &value);
1990 if (FAILED(hr)) return hr;
1991
1992 TRACE("%s=%s\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len));
1993 return reader_add_attr(reader, &local, &value);
1994 }
1995
1996 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
1997 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
1998 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
1999 {
2000 HRESULT hr;
2001
2002 hr = reader_parse_qname(reader, prefix, local, qname);
2003 if (FAILED(hr)) return hr;
2004
2005 while (1)
2006 {
2007 static const WCHAR endW[] = {'/','>',0};
2008
2009 reader_skipspaces(reader);
2010
2011 /* empty element */
2012 if ((*empty = !reader_cmp(reader, endW)))
2013 {
2014 /* skip '/>' */
2015 reader_skipn(reader, 2);
2016 reader->empty_element = TRUE;
2017 return S_OK;
2018 }
2019
2020 /* got a start tag */
2021 if (!reader_cmp(reader, gtW))
2022 {
2023 /* skip '>' */
2024 reader_skipn(reader, 1);
2025 return reader_push_element(reader, qname, local);
2026 }
2027
2028 hr = reader_parse_attribute(reader);
2029 if (FAILED(hr)) return hr;
2030 }
2031
2032 return S_OK;
2033 }
2034
2035 /* [39] element ::= EmptyElemTag | STag content ETag */
2036 static HRESULT reader_parse_element(xmlreader *reader)
2037 {
2038 HRESULT hr;
2039
2040 switch (reader->resumestate)
2041 {
2042 case XmlReadResumeState_Initial:
2043 /* check if we are really on element */
2044 if (reader_cmp(reader, ltW)) return S_FALSE;
2045
2046 /* skip '<' */
2047 reader_skipn(reader, 1);
2048
2049 reader_shrink(reader);
2050 reader->resumestate = XmlReadResumeState_STag;
2051 case XmlReadResumeState_STag:
2052 {
2053 strval qname, prefix, local;
2054 int empty = 0;
2055
2056 /* this handles empty elements too */
2057 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2058 if (FAILED(hr)) return hr;
2059
2060 /* FIXME: need to check for defined namespace to reject invalid prefix,
2061 currently reject all prefixes */
2062 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2063
2064 /* if we got empty element and stack is empty go straight to Misc */
2065 if (empty && list_empty(&reader->elements))
2066 reader->instate = XmlReadInState_MiscEnd;
2067 else
2068 reader->instate = XmlReadInState_Content;
2069
2070 reader->nodetype = XmlNodeType_Element;
2071 reader->resumestate = XmlReadResumeState_Initial;
2072 reader_set_strvalue(reader, StringValue_LocalName, &local);
2073 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2074 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2075 break;
2076 }
2077 default:
2078 hr = E_FAIL;
2079 }
2080
2081 return hr;
2082 }
2083
2084 /* [13 NS] ETag ::= '</' QName S? '>' */
2085 static HRESULT reader_parse_endtag(xmlreader *reader)
2086 {
2087 strval prefix, local, qname;
2088 struct element *elem;
2089 HRESULT hr;
2090
2091 /* skip '</' */
2092 reader_skipn(reader, 2);
2093
2094 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2095 if (FAILED(hr)) return hr;
2096
2097 reader_skipspaces(reader);
2098
2099 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2100
2101 /* skip '>' */
2102 reader_skipn(reader, 1);
2103
2104 /* Element stack should never be empty at this point, cause we shouldn't get to
2105 content parsing if it's empty. */
2106 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2107 if (!strval_eq(&elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2108
2109 reader_pop_element(reader);
2110
2111 /* It was a root element, the rest is expected as Misc */
2112 if (list_empty(&reader->elements))
2113 reader->instate = XmlReadInState_MiscEnd;
2114
2115 reader->nodetype = XmlNodeType_EndElement;
2116 reader_set_strvalue(reader, StringValue_LocalName, &local);
2117 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2118
2119 return S_OK;
2120 }
2121
2122 /* [18] CDSect ::= CDStart CData CDEnd
2123 [19] CDStart ::= '<![CDATA['
2124 [20] CData ::= (Char* - (Char* ']]>' Char*))
2125 [21] CDEnd ::= ']]>' */
2126 static HRESULT reader_parse_cdata(xmlreader *reader)
2127 {
2128 WCHAR *start, *ptr;
2129
2130 if (reader->resume[XmlReadResume_Body])
2131 {
2132 start = reader->resume[XmlReadResume_Body];
2133 ptr = reader_get_cur(reader);
2134 }
2135 else
2136 {
2137 /* skip markup '<![CDATA[' */
2138 reader_skipn(reader, 9);
2139 reader_shrink(reader);
2140 ptr = start = reader_get_cur(reader);
2141 reader->nodetype = XmlNodeType_CDATA;
2142 reader->resume[XmlReadResume_Body] = start;
2143 reader->resumestate = XmlReadResumeState_CDATA;
2144 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2145 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2146 reader_set_strvalue(reader, StringValue_Value, NULL);
2147 }
2148
2149 while (*ptr)
2150 {
2151 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2152 {
2153 strval value;
2154
2155 TRACE("%s\n", debugstr_wn(start, ptr-start));
2156 /* skip ']]>' */
2157 reader_skipn(reader, 3);
2158 reader_init_strvalue(start, ptr-start, &value);
2159 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2160 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2161 reader_set_strvalue(reader, StringValue_Value, &value);
2162 reader->resume[XmlReadResume_Body] = NULL;
2163 reader->resumestate = XmlReadResumeState_Initial;
2164 return S_OK;
2165 }
2166 else
2167 {
2168 /* Value normalization is not fully implemented, rules are:
2169
2170 - single '\r' -> '\n';
2171 - sequence '\r\n' -> '\n', in this case value length changes;
2172 */
2173 if (*ptr == '\r') *ptr = '\n';
2174 reader_skipn(reader, 1);
2175 ptr++;
2176 }
2177 }
2178
2179 return S_OK;
2180 }
2181
2182 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2183 static HRESULT reader_parse_chardata(xmlreader *reader)
2184 {
2185 WCHAR *start, *ptr;
2186
2187 if (reader->resume[XmlReadResume_Body])
2188 {
2189 start = reader->resume[XmlReadResume_Body];
2190 ptr = reader_get_cur(reader);
2191 }
2192 else
2193 {
2194 reader_shrink(reader);
2195 ptr = start = reader_get_cur(reader);
2196 /* There's no text */
2197 if (!*ptr || *ptr == '<') return S_OK;
2198 reader->nodetype = XmlNodeType_Text;
2199 reader->resume[XmlReadResume_Body] = start;
2200 reader->resumestate = XmlReadResumeState_CharData;
2201 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2202 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2203 reader_set_strvalue(reader, StringValue_Value, NULL);
2204 }
2205
2206 while (*ptr)
2207 {
2208 /* CDATA closing sequence ']]>' is not allowed */
2209 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2210 return WC_E_CDSECTEND;
2211
2212 /* Found next markup part */
2213 if (ptr[0] == '<')
2214 {
2215 strval value;
2216
2217 reader_init_strvalue(start, ptr-start, &value);
2218 reader_set_strvalue(reader, StringValue_Value, &value);
2219 return S_OK;
2220 }
2221
2222 reader_skipn(reader, 1);
2223 ptr++;
2224 }
2225
2226 return S_OK;
2227 }
2228
2229 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2230 static HRESULT reader_parse_content(xmlreader *reader)
2231 {
2232 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2233 static const WCHAR etagW[] = {'<','/',0};
2234 static const WCHAR ampW[] = {'&',0};
2235
2236 if (reader->resumestate != XmlReadResumeState_Initial)
2237 {
2238 switch (reader->resumestate)
2239 {
2240 case XmlReadResumeState_CDATA:
2241 return reader_parse_cdata(reader);
2242 case XmlReadResumeState_Comment:
2243 return reader_parse_comment(reader);
2244 case XmlReadResumeState_PIBody:
2245 case XmlReadResumeState_PITarget:
2246 return reader_parse_pi(reader);
2247 case XmlReadResumeState_CharData:
2248 return reader_parse_chardata(reader);
2249 default:
2250 ERR("unknown resume state %d\n", reader->resumestate);
2251 }
2252 }
2253
2254 reader_shrink(reader);
2255
2256 /* handle end tag here, it indicates end of content as well */
2257 if (!reader_cmp(reader, etagW))
2258 return reader_parse_endtag(reader);
2259
2260 if (!reader_cmp(reader, commentW))
2261 return reader_parse_comment(reader);
2262
2263 if (!reader_cmp(reader, piW))
2264 return reader_parse_pi(reader);
2265
2266 if (!reader_cmp(reader, cdstartW))
2267 return reader_parse_cdata(reader);
2268
2269 if (!reader_cmp(reader, ampW))
2270 return reader_parse_reference(reader);
2271
2272 if (!reader_cmp(reader, ltW))
2273 return reader_parse_element(reader);
2274
2275 /* what's left must be CharData */
2276 return reader_parse_chardata(reader);
2277 }
2278
2279 static HRESULT reader_parse_nextnode(xmlreader *reader)
2280 {
2281 HRESULT hr;
2282
2283 if (!is_reader_pending(reader))
2284 reader_clear_attrs(reader);
2285
2286 while (1)
2287 {
2288 switch (reader->instate)
2289 {
2290 /* if it's a first call for a new input we need to detect stream encoding */
2291 case XmlReadInState_Initial:
2292 {
2293 xml_encoding enc;
2294
2295 hr = readerinput_growraw(reader->input);
2296 if (FAILED(hr)) return hr;
2297
2298 /* try to detect encoding by BOM or data and set input code page */
2299 hr = readerinput_detectencoding(reader->input, &enc);
2300 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2301 if (FAILED(hr)) return hr;
2302
2303 /* always switch first time cause we have to put something in */
2304 readerinput_switchencoding(reader->input, enc);
2305
2306 /* parse xml declaration */
2307 hr = reader_parse_xmldecl(reader);
2308 if (FAILED(hr)) return hr;
2309
2310 readerinput_shrinkraw(reader->input, -1);
2311 reader->instate = XmlReadInState_Misc_DTD;
2312 if (hr == S_OK) return hr;
2313 }
2314 break;
2315 case XmlReadInState_Misc_DTD:
2316 hr = reader_parse_misc(reader);
2317 if (FAILED(hr)) return hr;
2318
2319 if (hr == S_FALSE)
2320 reader->instate = XmlReadInState_DTD;
2321 else
2322 return hr;
2323 break;
2324 case XmlReadInState_DTD:
2325 hr = reader_parse_dtd(reader);
2326 if (FAILED(hr)) return hr;
2327
2328 if (hr == S_OK)
2329 {
2330 reader->instate = XmlReadInState_DTD_Misc;
2331 return hr;
2332 }
2333 else
2334 reader->instate = XmlReadInState_Element;
2335 break;
2336 case XmlReadInState_DTD_Misc:
2337 hr = reader_parse_misc(reader);
2338 if (FAILED(hr)) return hr;
2339
2340 if (hr == S_FALSE)
2341 reader->instate = XmlReadInState_Element;
2342 else
2343 return hr;
2344 break;
2345 case XmlReadInState_Element:
2346 return reader_parse_element(reader);
2347 case XmlReadInState_Content:
2348 return reader_parse_content(reader);
2349 case XmlReadInState_MiscEnd:
2350 hr = reader_parse_misc(reader);
2351 if (FAILED(hr)) return hr;
2352
2353 if (hr == S_FALSE)
2354 reader->instate = XmlReadInState_Eof;
2355 return hr;
2356 case XmlReadInState_Eof:
2357 return S_FALSE;
2358 default:
2359 FIXME("internal state %d not handled\n", reader->instate);
2360 return E_NOTIMPL;
2361 }
2362 }
2363
2364 return E_NOTIMPL;
2365 }
2366
2367 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2368 {
2369 xmlreader *This = impl_from_IXmlReader(iface);
2370
2371 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2372
2373 if (IsEqualGUID(riid, &IID_IUnknown) ||
2374 IsEqualGUID(riid, &IID_IXmlReader))
2375 {
2376 *ppvObject = iface;
2377 }
2378 else
2379 {
2380 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2381 *ppvObject = NULL;
2382 return E_NOINTERFACE;
2383 }
2384
2385 IXmlReader_AddRef(iface);
2386
2387 return S_OK;
2388 }
2389
2390 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2391 {
2392 xmlreader *This = impl_from_IXmlReader(iface);
2393 ULONG ref = InterlockedIncrement(&This->ref);
2394 TRACE("(%p)->(%d)\n", This, ref);
2395 return ref;
2396 }
2397
2398 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2399 {
2400 xmlreader *This = impl_from_IXmlReader(iface);
2401 LONG ref = InterlockedDecrement(&This->ref);
2402
2403 TRACE("(%p)->(%d)\n", This, ref);
2404
2405 if (ref == 0)
2406 {
2407 IMalloc *imalloc = This->imalloc;
2408 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2409 reader_clear_attrs(This);
2410 reader_clear_elements(This);
2411 reader_free_strvalues(This);
2412 reader_free(This, This);
2413 if (imalloc) IMalloc_Release(imalloc);
2414 }
2415
2416 return ref;
2417 }
2418
2419 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2420 {
2421 xmlreader *This = impl_from_IXmlReader(iface);
2422 IXmlReaderInput *readerinput;
2423 HRESULT hr;
2424
2425 TRACE("(%p)->(%p)\n", This, input);
2426
2427 if (This->input)
2428 {
2429 readerinput_release_stream(This->input);
2430 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2431 This->input = NULL;
2432 }
2433
2434 This->line = This->pos = 0;
2435 reader_clear_elements(This);
2436 This->depth = 0;
2437 This->resumestate = XmlReadResumeState_Initial;
2438 memset(This->resume, 0, sizeof(This->resume));
2439
2440 /* just reset current input */
2441 if (!input)
2442 {
2443 This->state = XmlReadState_Initial;
2444 return S_OK;
2445 }
2446
2447 /* now try IXmlReaderInput, ISequentialStream, IStream */
2448 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2449 if (hr == S_OK)
2450 {
2451 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2452 This->input = impl_from_IXmlReaderInput(readerinput);
2453 else
2454 {
2455 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2456 readerinput, readerinput->lpVtbl);
2457 IUnknown_Release(readerinput);
2458 return E_FAIL;
2459
2460 }
2461 }
2462
2463 if (hr != S_OK || !readerinput)
2464 {
2465 /* create IXmlReaderInput basing on supplied interface */
2466 hr = CreateXmlReaderInputWithEncodingName(input,
2467 NULL, NULL, FALSE, NULL, &readerinput);
2468 if (hr != S_OK) return hr;
2469 This->input = impl_from_IXmlReaderInput(readerinput);
2470 }
2471
2472 /* set stream for supplied IXmlReaderInput */
2473 hr = readerinput_query_for_stream(This->input);
2474 if (hr == S_OK)
2475 {
2476 This->state = XmlReadState_Initial;
2477 This->instate = XmlReadInState_Initial;
2478 }
2479
2480 return hr;
2481 }
2482
2483 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2484 {
2485 xmlreader *This = impl_from_IXmlReader(iface);
2486
2487 TRACE("(%p)->(%s %p)\n", This, debugstr_prop(property), value);
2488
2489 if (!value) return E_INVALIDARG;
2490
2491 switch (property)
2492 {
2493 case XmlReaderProperty_DtdProcessing:
2494 *value = This->dtdmode;
2495 break;
2496 case XmlReaderProperty_ReadState:
2497 *value = This->state;
2498 break;
2499 default:
2500 FIXME("Unimplemented property (%u)\n", property);
2501 return E_NOTIMPL;
2502 }
2503
2504 return S_OK;
2505 }
2506
2507 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2508 {
2509 xmlreader *This = impl_from_IXmlReader(iface);
2510
2511 TRACE("(%p)->(%s %lu)\n", This, debugstr_prop(property), value);
2512
2513 switch (property)
2514 {
2515 case XmlReaderProperty_DtdProcessing:
2516 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2517 This->dtdmode = value;
2518 break;
2519 default:
2520 FIXME("Unimplemented property (%u)\n", property);
2521 return E_NOTIMPL;
2522 }
2523
2524 return S_OK;
2525 }
2526
2527 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2528 {
2529 xmlreader *This = impl_from_IXmlReader(iface);
2530 XmlNodeType oldtype = This->nodetype;
2531 HRESULT hr;
2532
2533 TRACE("(%p)->(%p)\n", This, nodetype);
2534
2535 if (This->state == XmlReadState_Closed) return S_FALSE;
2536
2537 hr = reader_parse_nextnode(This);
2538 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2539 This->state = XmlReadState_Interactive;
2540 if (hr == S_OK)
2541 {
2542 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2543 *nodetype = This->nodetype;
2544 }
2545
2546 return hr;
2547 }
2548
2549 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2550 {
2551 xmlreader *This = impl_from_IXmlReader(iface);
2552 TRACE("(%p)->(%p)\n", This, node_type);
2553
2554 *node_type = reader_get_nodetype(This);
2555 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2556 }
2557
2558 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2559 {
2560 xmlreader *This = impl_from_IXmlReader(iface);
2561
2562 TRACE("(%p)\n", This);
2563
2564 if (!This->attr_count) return S_FALSE;
2565 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2566 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2567 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2568
2569 return S_OK;
2570 }
2571
2572 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2573 {
2574 xmlreader *This = impl_from_IXmlReader(iface);
2575 const struct list *next;
2576
2577 TRACE("(%p)\n", This);
2578
2579 if (!This->attr_count) return S_FALSE;
2580
2581 if (!This->attr)
2582 return IXmlReader_MoveToFirstAttribute(iface);
2583
2584 next = list_next(&This->attrs, &This->attr->entry);
2585 if (next)
2586 {
2587 This->attr = LIST_ENTRY(next, struct attribute, entry);
2588 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2589 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2590 }
2591
2592 return next ? S_OK : S_FALSE;
2593 }
2594
2595 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2596 LPCWSTR local_name,
2597 LPCWSTR namespaceUri)
2598 {
2599 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2600 return E_NOTIMPL;
2601 }
2602
2603 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2604 {
2605 xmlreader *This = impl_from_IXmlReader(iface);
2606 struct element *elem;
2607
2608 TRACE("(%p)\n", This);
2609
2610 if (!This->attr_count) return S_FALSE;
2611 This->attr = NULL;
2612
2613 /* FIXME: support other node types with 'attributes' like DTD */
2614 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2615 if (elem)
2616 {
2617 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2618 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2619 }
2620
2621 return S_OK;
2622 }
2623
2624 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2625 {
2626 xmlreader *This = impl_from_IXmlReader(iface);
2627
2628 TRACE("(%p)->(%p %p)\n", This, name, len);
2629 *name = This->strvalues[StringValue_QualifiedName].str;
2630 *len = This->strvalues[StringValue_QualifiedName].len;
2631 return S_OK;
2632 }
2633
2634 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2635 LPCWSTR *namespaceUri,
2636 UINT *namespaceUri_length)
2637 {
2638 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2639 return E_NOTIMPL;
2640 }
2641
2642 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2643 {
2644 xmlreader *This = impl_from_IXmlReader(iface);
2645
2646 TRACE("(%p)->(%p %p)\n", This, name, len);
2647 *name = This->strvalues[StringValue_LocalName].str;
2648 if (len) *len = This->strvalues[StringValue_LocalName].len;
2649 return S_OK;
2650 }
2651
2652 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2653 {
2654 xmlreader *This = impl_from_IXmlReader(iface);
2655
2656 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2657 *prefix = This->strvalues[StringValue_Prefix].str;
2658 if (len) *len = This->strvalues[StringValue_Prefix].len;
2659 return S_OK;
2660 }
2661
2662 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2663 {
2664 xmlreader *reader = impl_from_IXmlReader(iface);
2665 strval *val = &reader->strvalues[StringValue_Value];
2666
2667 TRACE("(%p)->(%p %p)\n", reader, value, len);
2668
2669 *value = NULL;
2670
2671 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2672 {
2673 XmlNodeType type;
2674 HRESULT hr;
2675
2676 hr = IXmlReader_Read(iface, &type);
2677 if (FAILED(hr)) return hr;
2678
2679 /* return if still pending, partially read values are not reported */
2680 if (is_reader_pending(reader)) return E_PENDING;
2681 }
2682
2683 if (!val->str)
2684 {
2685 val->str = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2686 if (!val->str) return E_OUTOFMEMORY;
2687 memcpy(val->str, val->start, val->len*sizeof(WCHAR));
2688 val->str[val->len] = 0;
2689 }
2690
2691 *value = val->str;
2692 if (len) *len = val->len;
2693 return S_OK;
2694 }
2695
2696 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2697 {
2698 xmlreader *reader = impl_from_IXmlReader(iface);
2699 strval *val = &reader->strvalues[StringValue_Value];
2700 UINT len;
2701
2702 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2703
2704 /* Value is already allocated, chunked reads are not possible. */
2705 if (val->str) return S_FALSE;
2706
2707 if (val->len)
2708 {
2709 len = min(chunk_size, val->len);
2710 memcpy(buffer, val->start, len);
2711 val->start += len;
2712 val->len -= len;
2713 if (read) *read = len;
2714 }
2715
2716 return S_OK;
2717 }
2718
2719 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2720 LPCWSTR *baseUri,
2721 UINT *baseUri_length)
2722 {
2723 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2724 return E_NOTIMPL;
2725 }
2726
2727 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2728 {
2729 FIXME("(%p): stub\n", iface);
2730 return FALSE;
2731 }
2732
2733 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2734 {
2735 xmlreader *This = impl_from_IXmlReader(iface);
2736 TRACE("(%p)\n", This);
2737 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2738 when current node is start tag of an element */
2739 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2740 }
2741
2742 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2743 {
2744 xmlreader *This = impl_from_IXmlReader(iface);
2745
2746 TRACE("(%p %p)\n", This, lineNumber);
2747
2748 if (!lineNumber) return E_INVALIDARG;
2749
2750 *lineNumber = This->line;
2751
2752 return S_OK;
2753 }
2754
2755 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2756 {
2757 xmlreader *This = impl_from_IXmlReader(iface);
2758
2759 TRACE("(%p %p)\n", This, linePosition);
2760
2761 if (!linePosition) return E_INVALIDARG;
2762
2763 *linePosition = This->pos;
2764
2765 return S_OK;
2766 }
2767
2768 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2769 {
2770 xmlreader *This = impl_from_IXmlReader(iface);
2771
2772 TRACE("(%p)->(%p)\n", This, count);
2773
2774 if (!count) return E_INVALIDARG;
2775
2776 *count = This->attr_count;
2777 return S_OK;
2778 }
2779
2780 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2781 {
2782 xmlreader *This = impl_from_IXmlReader(iface);
2783 TRACE("(%p)->(%p)\n", This, depth);
2784 *depth = This->depth;
2785 return S_OK;
2786 }
2787
2788 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2789 {
2790 FIXME("(%p): stub\n", iface);
2791 return E_NOTIMPL;
2792 }
2793
2794 static const struct IXmlReaderVtbl xmlreader_vtbl =
2795 {
2796 xmlreader_QueryInterface,
2797 xmlreader_AddRef,
2798 xmlreader_Release,
2799 xmlreader_SetInput,
2800 xmlreader_GetProperty,
2801 xmlreader_SetProperty,
2802 xmlreader_Read,
2803 xmlreader_GetNodeType,
2804 xmlreader_MoveToFirstAttribute,
2805 xmlreader_MoveToNextAttribute,
2806 xmlreader_MoveToAttributeByName,
2807 xmlreader_MoveToElement,
2808 xmlreader_GetQualifiedName,
2809 xmlreader_GetNamespaceUri,
2810 xmlreader_GetLocalName,
2811 xmlreader_GetPrefix,
2812 xmlreader_GetValue,
2813 xmlreader_ReadValueChunk,
2814 xmlreader_GetBaseUri,
2815 xmlreader_IsDefault,
2816 xmlreader_IsEmptyElement,
2817 xmlreader_GetLineNumber,
2818 xmlreader_GetLinePosition,
2819 xmlreader_GetAttributeCount,
2820 xmlreader_GetDepth,
2821 xmlreader_IsEOF
2822 };
2823
2824 /** IXmlReaderInput **/
2825 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2826 {
2827 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2828
2829 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2830
2831 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2832 IsEqualGUID(riid, &IID_IUnknown))
2833 {
2834 *ppvObject = iface;
2835 }
2836 else
2837 {
2838 WARN("interface %s not implemented\n", debugstr_guid(riid));
2839 *ppvObject = NULL;
2840 return E_NOINTERFACE;
2841 }
2842
2843 IUnknown_AddRef(iface);
2844
2845 return S_OK;
2846 }
2847
2848 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2849 {
2850 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2851 ULONG ref = InterlockedIncrement(&This->ref);
2852 TRACE("(%p)->(%d)\n", This, ref);
2853 return ref;
2854 }
2855
2856 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2857 {
2858 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2859 LONG ref = InterlockedDecrement(&This->ref);
2860
2861 TRACE("(%p)->(%d)\n", This, ref);
2862
2863 if (ref == 0)
2864 {
2865 IMalloc *imalloc = This->imalloc;
2866 if (This->input) IUnknown_Release(This->input);
2867 if (This->stream) ISequentialStream_Release(This->stream);
2868 if (This->buffer) free_input_buffer(This->buffer);
2869 readerinput_free(This, This->baseuri);
2870 readerinput_free(This, This);
2871 if (imalloc) IMalloc_Release(imalloc);
2872 }
2873
2874 return ref;
2875 }
2876
2877 static const struct IUnknownVtbl xmlreaderinputvtbl =
2878 {
2879 xmlreaderinput_QueryInterface,
2880 xmlreaderinput_AddRef,
2881 xmlreaderinput_Release
2882 };
2883
2884 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2885 {
2886 xmlreader *reader;
2887 int i;
2888
2889 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2890
2891 if (!IsEqualGUID(riid, &IID_IXmlReader))
2892 {
2893 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2894 return E_FAIL;
2895 }
2896
2897 if (imalloc)
2898 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2899 else
2900 reader = heap_alloc(sizeof(*reader));
2901 if(!reader) return E_OUTOFMEMORY;
2902
2903 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2904 reader->ref = 1;
2905 reader->input = NULL;
2906 reader->state = XmlReadState_Closed;
2907 reader->instate = XmlReadInState_Initial;
2908 reader->resumestate = XmlReadResumeState_Initial;
2909 reader->dtdmode = DtdProcessing_Prohibit;
2910 reader->line = reader->pos = 0;
2911 reader->imalloc = imalloc;
2912 if (imalloc) IMalloc_AddRef(imalloc);
2913 reader->nodetype = XmlNodeType_None;
2914 list_init(&reader->attrs);
2915 reader->attr_count = 0;
2916 reader->attr = NULL;
2917 list_init(&reader->elements);
2918 reader->depth = 0;
2919 reader->max_depth = 256;
2920 reader->empty_element = FALSE;
2921 memset(reader->resume, 0, sizeof(reader->resume));
2922
2923 for (i = 0; i < StringValue_Last; i++)
2924 reader->strvalues[i] = strval_empty;
2925
2926 *obj = &reader->IXmlReader_iface;
2927
2928 TRACE("returning iface %p\n", *obj);
2929
2930 return S_OK;
2931 }
2932
2933 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2934 IMalloc *imalloc,
2935 LPCWSTR encoding,
2936 BOOL hint,
2937 LPCWSTR base_uri,
2938 IXmlReaderInput **ppInput)
2939 {
2940 xmlreaderinput *readerinput;
2941 HRESULT hr;
2942
2943 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
2944 hint, wine_dbgstr_w(base_uri), ppInput);
2945
2946 if (!stream || !ppInput) return E_INVALIDARG;
2947
2948 if (imalloc)
2949 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
2950 else
2951 readerinput = heap_alloc(sizeof(*readerinput));
2952 if(!readerinput) return E_OUTOFMEMORY;
2953
2954 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
2955 readerinput->ref = 1;
2956 readerinput->imalloc = imalloc;
2957 readerinput->stream = NULL;
2958 if (imalloc) IMalloc_AddRef(imalloc);
2959 readerinput->encoding = parse_encoding_name(encoding, -1);
2960 readerinput->hint = hint;
2961 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
2962 readerinput->pending = 0;
2963
2964 hr = alloc_input_buffer(readerinput);
2965 if (hr != S_OK)
2966 {
2967 readerinput_free(readerinput, readerinput->baseuri);
2968 readerinput_free(readerinput, readerinput);
2969 if (imalloc) IMalloc_Release(imalloc);
2970 return hr;
2971 }
2972 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
2973
2974 *ppInput = &readerinput->IXmlReaderInput_iface;
2975
2976 TRACE("returning iface %p\n", *ppInput);
2977
2978 return S_OK;
2979 }