2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include "xmllite_private.h"
25 #include <wine/list.h>
26 #include <wine/unicode.h>
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
40 XmlReadInState_Initial
,
41 XmlReadInState_XmlDecl
,
42 XmlReadInState_Misc_DTD
,
44 XmlReadInState_DTD_Misc
,
45 XmlReadInState_Element
,
46 XmlReadInState_Content
,
47 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
49 } XmlReaderInternalState
;
51 /* This state denotes where parsing was interrupted by input problem.
52 Reader resumes parsing using this information. */
55 XmlReadResumeState_Initial
,
56 XmlReadResumeState_PITarget
,
57 XmlReadResumeState_PIBody
,
58 XmlReadResumeState_CDATA
,
59 XmlReadResumeState_Comment
,
60 XmlReadResumeState_STag
,
61 XmlReadResumeState_CharData
62 } XmlReaderResumeState
;
64 /* saved pointer index to resume from particular input position */
67 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
68 XmlReadResume_Local
, /* local for QName */
69 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
75 StringValue_LocalName
,
77 StringValue_QualifiedName
,
80 } XmlReaderStringValue
;
82 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
83 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
85 static const WCHAR dblquoteW
[] = {'\"',0};
86 static const WCHAR quoteW
[] = {'\'',0};
87 static const WCHAR ltW
[] = {'<',0};
88 static const WCHAR gtW
[] = {'>',0};
89 static const WCHAR commentW
[] = {'<','!','-','-',0};
90 static const WCHAR piW
[] = {'<','?',0};
92 static const char *debugstr_nodetype(XmlNodeType nodetype
)
94 static const char* type_names
[] =
103 "ProcessingInstruction",
116 if (nodetype
> _XmlNodeType_Last
)
119 sprintf(buf
, "unknown type=%d", nodetype
);
122 return type_names
[nodetype
];
125 static const char *debugstr_prop(XmlReaderProperty prop
)
127 static const char* prop_names
[] =
139 if (prop
> _XmlReaderProperty_Last
)
142 sprintf(buf
, "unknown property=%d", prop
);
145 return prop_names
[prop
];
148 struct xml_encoding_data
155 static const struct xml_encoding_data xml_encoding_map
[] = {
156 { utf16W
, XmlEncoding_UTF16
, ~0 },
157 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
164 unsigned int allocated
;
165 unsigned int written
;
168 typedef struct input_buffer input_buffer
;
172 IXmlReaderInput IXmlReaderInput_iface
;
174 /* reference passed on IXmlReaderInput creation, is kept when input is created */
177 xml_encoding encoding
;
180 /* stream reference set after SetInput() call from reader,
181 stored as sequential stream, cause currently
182 optimizations possible with IStream aren't implemented */
183 ISequentialStream
*stream
;
184 input_buffer
*buffer
;
185 unsigned int pending
: 1;
188 static const struct IUnknownVtbl xmlreaderinputvtbl
;
190 /* Structure to hold parsed string of specific length.
192 Reader stores node value as 'start' pointer, on request
193 a null-terminated version of it is allocated.
195 To init a strval variable use reader_init_strval(),
196 to set strval as a reader value use reader_set_strval().
200 WCHAR
*str
; /* allocated null-terminated string */
201 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
202 WCHAR
*start
; /* input position where value starts */
205 static WCHAR emptyW
[] = {0};
206 static const strval strval_empty
= {emptyW
, 0, emptyW
};
224 IXmlReader IXmlReader_iface
;
226 xmlreaderinput
*input
;
229 XmlReaderInternalState instate
;
230 XmlReaderResumeState resumestate
;
231 XmlNodeType nodetype
;
232 DtdProcessing dtdmode
;
233 UINT line
, pos
; /* reader position in XML stream */
234 struct list attrs
; /* attributes list for current node */
235 struct attribute
*attr
; /* current attribute */
237 struct list elements
;
238 strval strvalues
[StringValue_Last
];
242 WCHAR
*resume
[XmlReadResume_Last
]; /* pointers used to resume reader */
247 encoded_buffer utf16
;
248 encoded_buffer encoded
;
250 xmlreaderinput
*input
;
253 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
255 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
258 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
260 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
263 static inline void *m_alloc(IMalloc
*imalloc
, size_t len
)
266 return IMalloc_Alloc(imalloc
, len
);
268 return heap_alloc(len
);
271 static inline void *m_realloc(IMalloc
*imalloc
, void *mem
, size_t len
)
274 return IMalloc_Realloc(imalloc
, mem
, len
);
276 return heap_realloc(mem
, len
);
279 static inline void m_free(IMalloc
*imalloc
, void *mem
)
282 IMalloc_Free(imalloc
, mem
);
287 /* reader memory allocation functions */
288 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
290 return m_alloc(reader
->imalloc
, len
);
293 static inline void reader_free(xmlreader
*reader
, void *mem
)
295 m_free(reader
->imalloc
, mem
);
298 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
302 if (src
->str
!= strval_empty
.str
)
304 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
305 if (!dest
->str
) return E_OUTOFMEMORY
;
306 memcpy(dest
->str
, src
->str
, dest
->len
*sizeof(WCHAR
));
307 dest
->str
[dest
->len
] = 0;
313 /* reader input memory allocation functions */
314 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
316 return m_alloc(input
->imalloc
, len
);
319 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
321 return m_realloc(input
->imalloc
, mem
, len
);
324 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
326 m_free(input
->imalloc
, mem
);
329 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
336 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
337 ret
= readerinput_alloc(input
, size
);
338 if (ret
) memcpy(ret
, str
, size
);
344 static void reader_clear_attrs(xmlreader
*reader
)
346 struct attribute
*attr
, *attr2
;
347 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
349 reader_free(reader
, attr
);
351 list_init(&reader
->attrs
);
352 reader
->attr_count
= 0;
355 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
356 while we are on a node with attributes */
357 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
359 struct attribute
*attr
;
361 attr
= reader_alloc(reader
, sizeof(*attr
));
362 if (!attr
) return E_OUTOFMEMORY
;
364 attr
->localname
= *localname
;
365 attr
->value
= *value
;
366 list_add_tail(&reader
->attrs
, &attr
->entry
);
367 reader
->attr_count
++;
372 /* This one frees stored string value if needed */
373 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
375 if (v
->str
!= strval_empty
.str
)
377 reader_free(reader
, v
->str
);
382 static inline void reader_init_strvalue(WCHAR
*str
, UINT len
, strval
*v
)
384 v
->start
= v
->str
= str
;
388 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
390 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
393 static void reader_free_strvalues(xmlreader
*reader
)
396 for (type
= 0; type
< StringValue_Last
; type
++)
397 reader_free_strvalue(reader
, type
);
400 /* This helper should only be used to test if strings are the same,
401 it doesn't try to sort. */
402 static inline int strval_eq(const strval
*str1
, const strval
*str2
)
404 if (str1
->len
!= str2
->len
) return 0;
405 return !memcmp(str1
->str
, str2
->str
, str1
->len
*sizeof(WCHAR
));
408 static void reader_clear_elements(xmlreader
*reader
)
410 struct element
*elem
, *elem2
;
411 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
413 reader_free_strvalued(reader
, &elem
->qname
);
414 reader_free(reader
, elem
);
416 list_init(&reader
->elements
);
417 reader
->empty_element
= FALSE
;
420 static HRESULT
reader_inc_depth(xmlreader
*reader
)
422 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
426 static void reader_dec_depth(xmlreader
*reader
)
428 if (reader
->depth
> 1) reader
->depth
--;
431 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
433 struct element
*elem
;
436 elem
= reader_alloc(reader
, sizeof(*elem
));
437 if (!elem
) return E_OUTOFMEMORY
;
439 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
441 reader_free(reader
, elem
);
445 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
448 reader_free_strvalued(reader
, &elem
->qname
);
449 reader_free(reader
, elem
);
453 if (!list_empty(&reader
->elements
))
455 hr
= reader_inc_depth(reader
);
457 reader_free(reader
, elem
);
462 list_add_head(&reader
->elements
, &elem
->entry
);
463 reader
->empty_element
= FALSE
;
467 static void reader_pop_element(xmlreader
*reader
)
469 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
473 list_remove(&elem
->entry
);
474 reader_free_strvalued(reader
, &elem
->qname
);
475 reader_free_strvalued(reader
, &elem
->localname
);
476 reader_free(reader
, elem
);
477 reader_dec_depth(reader
);
481 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
482 means node value is to be determined. */
483 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
485 strval
*v
= &reader
->strvalues
[type
];
487 reader_free_strvalue(reader
, type
);
496 if (value
->str
== strval_empty
.str
)
500 if (type
== StringValue_Value
)
502 /* defer allocation for value string */
504 v
->start
= value
->start
;
509 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
510 memcpy(v
->str
, value
->start
, value
->len
*sizeof(WCHAR
));
511 v
->str
[value
->len
] = 0;
517 static inline int is_reader_pending(xmlreader
*reader
)
519 return reader
->input
->pending
;
522 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
524 const int initial_len
= 0x2000;
525 buffer
->data
= readerinput_alloc(input
, initial_len
);
526 if (!buffer
->data
) return E_OUTOFMEMORY
;
528 memset(buffer
->data
, 0, 4);
529 buffer
->cur
= buffer
->data
;
530 buffer
->allocated
= initial_len
;
536 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
538 readerinput_free(input
, buffer
->data
);
541 static HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
543 if (encoding
== XmlEncoding_Unknown
)
545 FIXME("unsupported encoding %d\n", encoding
);
549 *cp
= xml_encoding_map
[encoding
].cp
;
554 static xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
558 if (!name
) return XmlEncoding_Unknown
;
561 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
568 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
570 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
572 return xml_encoding_map
[n
].enc
;
580 return XmlEncoding_Unknown
;
583 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
585 input_buffer
*buffer
;
588 input
->buffer
= NULL
;
590 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
591 if (!buffer
) return E_OUTOFMEMORY
;
593 buffer
->input
= input
;
594 buffer
->code_page
= ~0; /* code page is unknown at this point */
595 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
597 readerinput_free(input
, buffer
);
601 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
603 free_encoded_buffer(input
, &buffer
->utf16
);
604 readerinput_free(input
, buffer
);
608 input
->buffer
= buffer
;
612 static void free_input_buffer(input_buffer
*buffer
)
614 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
615 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
616 readerinput_free(buffer
->input
, buffer
);
619 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
621 if (readerinput
->stream
) {
622 ISequentialStream_Release(readerinput
->stream
);
623 readerinput
->stream
= NULL
;
627 /* Queries already stored interface for IStream/ISequentialStream.
628 Interface supplied on creation will be overwritten */
629 static HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
633 readerinput_release_stream(readerinput
);
634 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
636 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
641 /* reads a chunk to raw buffer */
642 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
644 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
645 /* to make sure aligned length won't exceed allocated length */
646 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
650 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
651 variable width encodings like UTF-8 */
652 len
= (len
+ 3) & ~3;
653 /* try to use allocated space or grow */
654 if (buffer
->allocated
- buffer
->written
< len
)
656 buffer
->allocated
*= 2;
657 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
658 len
= buffer
->allocated
- buffer
->written
;
662 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
663 TRACE("requested %d, read %d, ret 0x%08x\n", len
, read
, hr
);
664 readerinput
->pending
= hr
== E_PENDING
;
665 if (FAILED(hr
)) return hr
;
666 buffer
->written
+= read
;
671 /* grows UTF-16 buffer so it has at least 'length' bytes free on return */
672 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
674 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
676 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
677 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
679 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
680 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
681 buffer
->allocated
= grown_size
;
685 static inline int readerinput_is_utf8(xmlreaderinput
*readerinput
)
687 static char startA
[] = {'<','?'};
688 static char commentA
[] = {'<','!'};
689 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
690 unsigned char *ptr
= (unsigned char*)buffer
->data
;
692 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
693 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
694 /* test start byte */
697 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
698 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
699 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
700 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
704 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
706 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
707 static WCHAR startW
[] = {'<','?'};
708 static WCHAR commentW
[] = {'<','!'};
709 static char utf8bom
[] = {0xef,0xbb,0xbf};
710 static char utf16lebom
[] = {0xff,0xfe};
712 *enc
= XmlEncoding_Unknown
;
714 if (buffer
->written
<= 3)
716 HRESULT hr
= readerinput_growraw(readerinput
);
717 if (FAILED(hr
)) return hr
;
718 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
721 /* try start symbols if we have enough data to do that, input buffer should contain
722 first chunk already */
723 if (readerinput_is_utf8(readerinput
))
724 *enc
= XmlEncoding_UTF8
;
725 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
726 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
727 *enc
= XmlEncoding_UTF16
;
728 /* try with BOM now */
729 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
731 buffer
->cur
+= sizeof(utf8bom
);
732 *enc
= XmlEncoding_UTF8
;
734 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
736 buffer
->cur
+= sizeof(utf16lebom
);
737 *enc
= XmlEncoding_UTF16
;
743 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
745 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
746 int len
= buffer
->written
;
748 /* complete single byte char */
749 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
751 /* find start byte of multibyte char */
752 while (--len
&& !(buffer
->data
[len
] & 0xc0))
758 /* Returns byte length of complete char sequence for buffer code page,
759 it's relative to current buffer position which is currently used for BOM handling
761 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
763 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
766 if (readerinput
->buffer
->code_page
== CP_UTF8
)
767 len
= readerinput_get_utf8_convlen(readerinput
);
769 len
= buffer
->written
;
771 TRACE("%d\n", len
- (int)(buffer
->cur
- buffer
->data
));
772 return len
- (buffer
->cur
- buffer
->data
);
775 /* It's possible that raw buffer has some leftovers from last conversion - some char
776 sequence that doesn't represent a full code point. Length argument should be calculated with
777 readerinput_get_convlen(), if it's -1 it will be calculated here. */
778 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
780 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
783 len
= readerinput_get_convlen(readerinput
);
785 memmove(buffer
->data
, buffer
->cur
+ (buffer
->written
- len
), len
);
786 /* everything below cur is lost too */
787 buffer
->written
-= len
+ (buffer
->cur
- buffer
->data
);
788 /* after this point we don't need cur pointer really,
789 it's used only to mark where actual data begins when first chunk is read */
790 buffer
->cur
= buffer
->data
;
793 /* note that raw buffer content is kept */
794 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
796 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
797 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
803 hr
= get_code_page(enc
, &cp
);
804 if (FAILED(hr
)) return;
806 readerinput
->buffer
->code_page
= cp
;
807 len
= readerinput_get_convlen(readerinput
);
809 TRACE("switching to cp %d\n", cp
);
811 /* just copy in this case */
812 if (enc
== XmlEncoding_UTF16
)
814 readerinput_grow(readerinput
, len
);
815 memcpy(dest
->data
, src
->cur
, len
);
816 dest
->written
+= len
*sizeof(WCHAR
);
820 dest_len
= MultiByteToWideChar(cp
, 0, src
->cur
, len
, NULL
, 0);
821 readerinput_grow(readerinput
, dest_len
);
822 ptr
= (WCHAR
*)dest
->data
;
823 MultiByteToWideChar(cp
, 0, src
->cur
, len
, ptr
, dest_len
);
825 dest
->written
+= dest_len
*sizeof(WCHAR
);
828 /* shrinks parsed data a buffer begins with */
829 static void reader_shrink(xmlreader
*reader
)
831 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
833 /* avoid to move too often using threshold shrink length */
834 if (buffer
->cur
- buffer
->data
> buffer
->written
/ 2)
836 buffer
->written
-= buffer
->cur
- buffer
->data
;
837 memmove(buffer
->data
, buffer
->cur
, buffer
->written
);
838 buffer
->cur
= buffer
->data
;
839 *(WCHAR
*)&buffer
->cur
[buffer
->written
] = 0;
843 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
844 It won't attempt to shrink but will grow destination buffer if needed */
845 static HRESULT
reader_more(xmlreader
*reader
)
847 xmlreaderinput
*readerinput
= reader
->input
;
848 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
849 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
850 UINT cp
= readerinput
->buffer
->code_page
;
855 /* get some raw data from stream first */
856 hr
= readerinput_growraw(readerinput
);
857 len
= readerinput_get_convlen(readerinput
);
859 /* just copy for UTF-16 case */
862 readerinput_grow(readerinput
, len
);
863 memcpy(dest
->data
, src
->cur
, len
);
864 dest
->written
+= len
*sizeof(WCHAR
);
868 dest_len
= MultiByteToWideChar(cp
, 0, src
->cur
, len
, NULL
, 0);
869 readerinput_grow(readerinput
, dest_len
);
870 ptr
= (WCHAR
*)dest
->data
;
871 MultiByteToWideChar(cp
, 0, src
->cur
, len
, ptr
, dest_len
);
873 dest
->written
+= dest_len
*sizeof(WCHAR
);
874 /* get rid of processed data */
875 readerinput_shrinkraw(readerinput
, len
);
880 static inline WCHAR
*reader_get_cur(xmlreader
*reader
)
882 WCHAR
*ptr
= (WCHAR
*)reader
->input
->buffer
->utf16
.cur
;
883 if (!*ptr
) reader_more(reader
);
887 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
889 const WCHAR
*ptr
= reader_get_cur(reader
);
890 return strncmpW(str
, ptr
, strlenW(str
));
893 /* moves cursor n WCHARs forward */
894 static void reader_skipn(xmlreader
*reader
, int n
)
896 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
897 const WCHAR
*ptr
= reader_get_cur(reader
);
899 while (*ptr
++ && n
--)
901 buffer
->cur
+= sizeof(WCHAR
);
906 static inline int is_wchar_space(WCHAR ch
)
908 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
911 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
912 static int reader_skipspaces(xmlreader
*reader
)
914 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
915 const WCHAR
*ptr
= reader_get_cur(reader
), *start
= ptr
;
917 while (is_wchar_space(*ptr
))
919 buffer
->cur
+= sizeof(WCHAR
);
922 else if (*ptr
== '\n')
935 /* [26] VersionNum ::= '1.' [0-9]+ */
936 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
938 WCHAR
*ptr
, *ptr2
, *start
= reader_get_cur(reader
);
939 static const WCHAR onedotW
[] = {'1','.',0};
941 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
943 reader_skipn(reader
, 2);
945 ptr2
= ptr
= reader_get_cur(reader
);
946 while (*ptr
>= '0' && *ptr
<= '9')
949 if (ptr2
== ptr
) return WC_E_DIGIT
;
950 TRACE("version=%s\n", debugstr_wn(start
, ptr
-start
));
951 reader_init_strvalue(start
, ptr
-start
, val
);
952 reader_skipn(reader
, ptr
-ptr2
);
956 /* [25] Eq ::= S? '=' S? */
957 static HRESULT
reader_parse_eq(xmlreader
*reader
)
959 static const WCHAR eqW
[] = {'=',0};
960 reader_skipspaces(reader
);
961 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
963 reader_skipn(reader
, 1);
964 reader_skipspaces(reader
);
968 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
969 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
971 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
975 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
977 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
978 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
980 reader_skipn(reader
, 7);
982 hr
= reader_parse_eq(reader
);
983 if (FAILED(hr
)) return hr
;
985 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
988 reader_skipn(reader
, 1);
990 hr
= reader_parse_versionnum(reader
, &val
);
991 if (FAILED(hr
)) return hr
;
993 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
997 reader_skipn(reader
, 1);
999 return reader_add_attr(reader
, &name
, &val
);
1002 /* ([A-Za-z0-9._] | '-') */
1003 static inline int is_wchar_encname(WCHAR ch
)
1005 return ((ch
>= 'A' && ch
<= 'Z') ||
1006 (ch
>= 'a' && ch
<= 'z') ||
1007 (ch
>= '0' && ch
<= '9') ||
1008 (ch
== '.') || (ch
== '_') ||
1012 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1013 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1015 WCHAR
*start
= reader_get_cur(reader
), *ptr
;
1019 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1020 return WC_E_ENCNAME
;
1023 while (is_wchar_encname(*++ptr
))
1027 enc
= parse_encoding_name(start
, len
);
1028 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1032 if (enc
== XmlEncoding_Unknown
)
1033 return WC_E_ENCNAME
;
1035 /* skip encoding name */
1036 reader_skipn(reader
, len
);
1040 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1041 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1043 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1047 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1049 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1050 name
.str
= reader_get_cur(reader
);
1052 /* skip 'encoding' */
1053 reader_skipn(reader
, 8);
1055 hr
= reader_parse_eq(reader
);
1056 if (FAILED(hr
)) return hr
;
1058 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1061 reader_skipn(reader
, 1);
1063 hr
= reader_parse_encname(reader
, &val
);
1064 if (FAILED(hr
)) return hr
;
1066 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1070 reader_skipn(reader
, 1);
1072 return reader_add_attr(reader
, &name
, &val
);
1075 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1076 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1078 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1079 static const WCHAR yesW
[] = {'y','e','s',0};
1080 static const WCHAR noW
[] = {'n','o',0};
1085 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1087 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1088 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1089 /* skip 'standalone' */
1090 reader_skipn(reader
, 10);
1092 hr
= reader_parse_eq(reader
);
1093 if (FAILED(hr
)) return hr
;
1095 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1098 reader_skipn(reader
, 1);
1100 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1101 return WC_E_XMLDECL
;
1103 start
= reader_get_cur(reader
);
1104 /* skip 'yes'|'no' */
1105 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1106 ptr
= reader_get_cur(reader
);
1107 TRACE("standalone=%s\n", debugstr_wn(start
, ptr
-start
));
1108 val
.str
= val
.start
= start
;
1109 val
.len
= ptr
-start
;
1111 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1114 reader_skipn(reader
, 1);
1116 return reader_add_attr(reader
, &name
, &val
);
1119 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1120 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1122 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1123 static const WCHAR declcloseW
[] = {'?','>',0};
1126 /* check if we have "<?xml " */
1127 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1129 reader_skipn(reader
, 5);
1130 hr
= reader_parse_versioninfo(reader
);
1134 hr
= reader_parse_encdecl(reader
);
1138 hr
= reader_parse_sddecl(reader
);
1142 reader_skipspaces(reader
);
1143 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1144 reader_skipn(reader
, 2);
1146 reader_inc_depth(reader
);
1147 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1148 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1149 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1150 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1155 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1156 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1160 if (reader
->resume
[XmlReadResume_Body
])
1162 start
= reader
->resume
[XmlReadResume_Body
];
1163 ptr
= reader_get_cur(reader
);
1168 reader_skipn(reader
, 4);
1169 reader_shrink(reader
);
1170 ptr
= start
= reader_get_cur(reader
);
1171 reader
->nodetype
= XmlNodeType_Comment
;
1172 reader
->resume
[XmlReadResume_Body
] = start
;
1173 reader
->resumestate
= XmlReadResumeState_Comment
;
1174 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1175 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1176 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1179 /* will exit when there's no more data, it won't attempt to
1180 read more from stream */
1191 TRACE("%s\n", debugstr_wn(start
, ptr
-start
));
1193 reader_skipn(reader
, 3);
1194 reader_init_strvalue(start
, ptr
-start
, &value
);
1195 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1196 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1197 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1198 reader
->resume
[XmlReadResume_Body
] = NULL
;
1199 reader
->resumestate
= XmlReadResumeState_Initial
;
1203 return WC_E_COMMENT
;
1210 reader_skipn(reader
, 1);
1218 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1219 static inline int is_char(WCHAR ch
)
1221 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1222 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1223 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1224 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1225 (ch
>= 0xe000 && ch
<= 0xfffd);
1228 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1229 static inline int is_pubchar(WCHAR ch
)
1231 return (ch
== ' ') ||
1232 (ch
>= 'a' && ch
<= 'z') ||
1233 (ch
>= 'A' && ch
<= 'Z') ||
1234 (ch
>= '0' && ch
<= '9') ||
1235 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1236 (ch
== '=') || (ch
== '?') ||
1237 (ch
== '@') || (ch
== '!') ||
1238 (ch
>= '#' && ch
<= '%') || /* #$% */
1239 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1242 static inline int is_namestartchar(WCHAR ch
)
1244 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1245 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1246 (ch
>= 0xc0 && ch
<= 0xd6) ||
1247 (ch
>= 0xd8 && ch
<= 0xf6) ||
1248 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1249 (ch
>= 0x370 && ch
<= 0x37d) ||
1250 (ch
>= 0x37f && ch
<= 0x1fff) ||
1251 (ch
>= 0x200c && ch
<= 0x200d) ||
1252 (ch
>= 0x2070 && ch
<= 0x218f) ||
1253 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1254 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1255 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1256 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1257 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1258 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1261 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1262 static inline int is_ncnamechar(WCHAR ch
)
1264 return (ch
>= 'A' && ch
<= 'Z') ||
1265 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1266 (ch
== '-') || (ch
== '.') ||
1267 (ch
>= '0' && ch
<= '9') ||
1269 (ch
>= 0xc0 && ch
<= 0xd6) ||
1270 (ch
>= 0xd8 && ch
<= 0xf6) ||
1271 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1272 (ch
>= 0x300 && ch
<= 0x36f) ||
1273 (ch
>= 0x370 && ch
<= 0x37d) ||
1274 (ch
>= 0x37f && ch
<= 0x1fff) ||
1275 (ch
>= 0x200c && ch
<= 0x200d) ||
1276 (ch
>= 0x203f && ch
<= 0x2040) ||
1277 (ch
>= 0x2070 && ch
<= 0x218f) ||
1278 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1279 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1280 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1281 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1282 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1283 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1286 static inline int is_namechar(WCHAR ch
)
1288 return (ch
== ':') || is_ncnamechar(ch
);
1291 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1293 /* When we're on attribute always return attribute type, container node type is kept.
1294 Note that container is not necessarily an element, and attribute doesn't mean it's
1295 an attribute in XML spec terms. */
1296 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1299 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1300 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1301 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1302 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1303 [5] Name ::= NameStartChar (NameChar)* */
1304 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1308 if (reader
->resume
[XmlReadResume_Name
])
1310 start
= reader
->resume
[XmlReadResume_Name
];
1311 ptr
= reader_get_cur(reader
);
1315 ptr
= start
= reader_get_cur(reader
);
1316 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1319 while (is_namechar(*ptr
))
1321 reader_skipn(reader
, 1);
1322 ptr
= reader_get_cur(reader
);
1325 if (is_reader_pending(reader
))
1327 reader
->resume
[XmlReadResume_Name
] = start
;
1331 reader
->resume
[XmlReadResume_Name
] = NULL
;
1333 TRACE("name %s:%d\n", debugstr_wn(start
, ptr
-start
), (int)(ptr
-start
));
1334 reader_init_strvalue(start
, ptr
-start
, name
);
1339 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1340 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1342 static const WCHAR xmlW
[] = {'x','m','l'};
1347 hr
= reader_parse_name(reader
, &name
);
1348 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1350 /* now that we got name check for illegal content */
1351 if (name
.len
== 3 && !strncmpiW(name
.str
, xmlW
, 3))
1352 return WC_E_LEADINGXML
;
1354 /* PITarget can't be a qualified name */
1355 for (i
= 0; i
< name
.len
; i
++)
1356 if (name
.str
[i
] == ':')
1357 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1359 TRACE("pitarget %s:%d\n", debugstr_wn(name
.str
, name
.len
), name
.len
);
1364 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1365 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1371 switch (reader
->resumestate
)
1373 case XmlReadResumeState_Initial
:
1375 reader_skipn(reader
, 2);
1376 reader_shrink(reader
);
1377 reader
->resumestate
= XmlReadResumeState_PITarget
;
1378 case XmlReadResumeState_PITarget
:
1379 hr
= reader_parse_pitarget(reader
, &target
);
1380 if (FAILED(hr
)) return hr
;
1381 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1382 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1383 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1384 reader
->resumestate
= XmlReadResumeState_PIBody
;
1389 ptr
= reader_get_cur(reader
);
1390 /* exit earlier if there's no content */
1391 if (ptr
[0] == '?' && ptr
[1] == '>')
1394 reader_skipn(reader
, 2);
1395 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1396 reader
->resumestate
= XmlReadResumeState_Initial
;
1400 if (!reader
->resume
[XmlReadResume_Body
])
1402 /* now at least a single space char should be there */
1403 if (!is_wchar_space(*ptr
)) return WC_E_WHITESPACE
;
1404 reader_skipspaces(reader
);
1405 ptr
= start
= reader_get_cur(reader
);
1406 reader
->resume
[XmlReadResume_Body
] = start
;
1410 start
= reader
->resume
[XmlReadResume_Body
];
1411 ptr
= reader_get_cur(reader
);
1422 TRACE("%s\n", debugstr_wn(start
, ptr
-start
));
1424 reader_skipn(reader
, 2);
1425 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1426 reader
->resumestate
= XmlReadResumeState_Initial
;
1427 reader
->resume
[XmlReadResume_Body
] = NULL
;
1428 reader_init_strvalue(start
, ptr
-start
, &value
);
1429 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1435 reader_more(reader
);
1440 reader_skipn(reader
, 1);
1441 ptr
= reader_get_cur(reader
);
1448 /* This one is used to parse significant whitespace nodes, like in Misc production */
1449 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1453 reader_shrink(reader
);
1454 start
= reader_get_cur(reader
);
1456 reader_skipspaces(reader
);
1457 ptr
= reader_get_cur(reader
);
1458 TRACE("%s\n", debugstr_wn(start
, ptr
-start
));
1460 reader
->nodetype
= XmlNodeType_Whitespace
;
1461 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1462 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1463 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1467 /* [27] Misc ::= Comment | PI | S */
1468 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1470 HRESULT hr
= S_FALSE
;
1472 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1474 hr
= reader_more(reader
);
1475 if (FAILED(hr
)) return hr
;
1477 /* finish current node */
1478 switch (reader
->resumestate
)
1480 case XmlReadResumeState_PITarget
:
1481 case XmlReadResumeState_PIBody
:
1482 return reader_parse_pi(reader
);
1483 case XmlReadResumeState_Comment
:
1484 return reader_parse_comment(reader
);
1486 ERR("unknown resume state %d\n", reader
->resumestate
);
1492 const WCHAR
*cur
= reader_get_cur(reader
);
1494 if (is_wchar_space(*cur
))
1495 hr
= reader_parse_whitespace(reader
);
1496 else if (!reader_cmp(reader
, commentW
))
1497 hr
= reader_parse_comment(reader
);
1498 else if (!reader_cmp(reader
, piW
))
1499 hr
= reader_parse_pi(reader
);
1503 if (hr
!= S_FALSE
) return hr
;
1509 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1510 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1512 WCHAR
*start
= reader_get_cur(reader
), *cur
, quote
;
1514 if (*start
!= '"' && *start
!= '\'') return WC_E_QUOTE
;
1517 reader_skipn(reader
, 1);
1519 cur
= start
= reader_get_cur(reader
);
1520 while (is_char(*cur
) && *cur
!= quote
)
1522 reader_skipn(reader
, 1);
1523 cur
= reader_get_cur(reader
);
1525 if (*cur
== quote
) reader_skipn(reader
, 1);
1527 reader_init_strvalue(start
, cur
-start
, literal
);
1528 TRACE("%s\n", debugstr_wn(start
, cur
-start
));
1532 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1533 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1534 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1536 WCHAR
*start
= reader_get_cur(reader
), *cur
, quote
;
1538 if (*start
!= '"' && *start
!= '\'') return WC_E_QUOTE
;
1541 reader_skipn(reader
, 1);
1544 while (is_pubchar(*cur
) && *cur
!= quote
)
1546 reader_skipn(reader
, 1);
1547 cur
= reader_get_cur(reader
);
1550 reader_init_strvalue(start
, cur
-start
, literal
);
1551 TRACE("%s\n", debugstr_wn(start
, cur
-start
));
1555 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1556 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1558 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1559 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1564 if (reader_cmp(reader
, systemW
))
1566 if (reader_cmp(reader
, publicW
))
1573 reader_skipn(reader
, 6);
1574 cnt
= reader_skipspaces(reader
);
1575 if (!cnt
) return WC_E_WHITESPACE
;
1577 hr
= reader_parse_pub_literal(reader
, &pub
);
1578 if (FAILED(hr
)) return hr
;
1580 reader_init_strvalue(publicW
, strlenW(publicW
), &name
);
1581 return reader_add_attr(reader
, &name
, &pub
);
1589 reader_skipn(reader
, 6);
1590 cnt
= reader_skipspaces(reader
);
1591 if (!cnt
) return WC_E_WHITESPACE
;
1593 hr
= reader_parse_sys_literal(reader
, &sys
);
1594 if (FAILED(hr
)) return hr
;
1596 reader_init_strvalue(systemW
, strlenW(systemW
), &name
);
1597 return reader_add_attr(reader
, &name
, &sys
);
1603 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1604 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1606 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1611 /* check if we have "<!DOCTYPE" */
1612 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1613 reader_shrink(reader
);
1615 /* DTD processing is not allowed by default */
1616 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1618 reader_skipn(reader
, 9);
1619 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1622 hr
= reader_parse_name(reader
, &name
);
1623 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1625 reader_skipspaces(reader
);
1627 hr
= reader_parse_externalid(reader
);
1628 if (FAILED(hr
)) return hr
;
1630 reader_skipspaces(reader
);
1632 cur
= reader_get_cur(reader
);
1635 FIXME("internal subset parsing not implemented\n");
1640 reader_skipn(reader
, 1);
1642 reader
->nodetype
= XmlNodeType_DocumentType
;
1643 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1644 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1649 /* [11 NS] LocalPart ::= NCName */
1650 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1654 if (reader
->resume
[XmlReadResume_Local
])
1656 start
= reader
->resume
[XmlReadResume_Local
];
1657 ptr
= reader_get_cur(reader
);
1661 ptr
= start
= reader_get_cur(reader
);
1664 while (is_ncnamechar(*ptr
))
1666 reader_skipn(reader
, 1);
1667 ptr
= reader_get_cur(reader
);
1670 if (is_reader_pending(reader
))
1672 reader
->resume
[XmlReadResume_Local
] = start
;
1676 reader
->resume
[XmlReadResume_Local
] = NULL
;
1678 reader_init_strvalue(start
, ptr
-start
, local
);
1683 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1684 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1685 [9 NS] UnprefixedName ::= LocalPart
1686 [10 NS] Prefix ::= NCName */
1687 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1692 if (reader
->resume
[XmlReadResume_Name
])
1694 start
= reader
->resume
[XmlReadResume_Name
];
1695 ptr
= reader_get_cur(reader
);
1699 ptr
= start
= reader_get_cur(reader
);
1700 reader
->resume
[XmlReadResume_Name
] = start
;
1701 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1704 if (reader
->resume
[XmlReadResume_Local
])
1706 hr
= reader_parse_local(reader
, local
);
1707 if (FAILED(hr
)) return hr
;
1709 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1710 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1715 /* skip prefix part */
1716 while (is_ncnamechar(*ptr
))
1718 reader_skipn(reader
, 1);
1719 ptr
= reader_get_cur(reader
);
1722 if (is_reader_pending(reader
)) return E_PENDING
;
1724 /* got a qualified name */
1727 reader_init_strvalue(start
, ptr
-start
, prefix
);
1730 reader_skipn(reader
, 1);
1731 hr
= reader_parse_local(reader
, local
);
1732 if (FAILED(hr
)) return hr
;
1736 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], ptr
-reader
->resume
[XmlReadResume_Name
], local
);
1737 reader_init_strvalue(NULL
, 0, prefix
);
1741 reader_init_strvalue(start
, ptr
-start
, local
);
1744 TRACE("qname %s:%s\n", debugstr_wn(prefix
->start
, prefix
->len
), debugstr_wn(local
->start
, local
->len
));
1746 TRACE("ncname %s\n", debugstr_wn(local
->start
, local
->len
));
1748 reader_init_strvalue(prefix
->start
? prefix
->start
: local
->start
,
1750 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1753 reader
->resume
[XmlReadResume_Name
] = NULL
;
1754 reader
->resume
[XmlReadResume_Local
] = NULL
;
1759 /* Applies normalization rules to a single char, used for attribute values.
1761 Rules include 2 steps:
1763 1) replacing \r\n with a single \n;
1764 2) replacing all whitespace chars with ' '.
1767 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1769 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1771 if (!is_wchar_space(*ptr
)) return;
1773 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1775 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1776 memmove(ptr
+1, ptr
+2, len
);
1781 static WCHAR
get_predefined_entity(const strval
*name
)
1783 static const WCHAR entltW
[] = {'l','t'};
1784 static const WCHAR entgtW
[] = {'g','t'};
1785 static const WCHAR entampW
[] = {'a','m','p'};
1786 static const WCHAR entaposW
[] = {'a','p','o','s'};
1787 static const WCHAR entquotW
[] = {'q','u','o','t'};
1789 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1790 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1791 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1792 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1793 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1795 switch (name
->str
[0])
1798 if (strval_eq(name
, <
)) return '<';
1801 if (strval_eq(name
, >
)) return '>';
1804 if (strval_eq(name
, &
))
1806 else if (strval_eq(name
, &apos
))
1810 if (strval_eq(name
, "
)) return '\"';
1819 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1820 [67] Reference ::= EntityRef | CharRef
1821 [68] EntityRef ::= '&' Name ';' */
1822 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1824 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1825 WCHAR
*start
= reader_get_cur(reader
), *ptr
;
1830 reader_skipn(reader
, 1);
1831 ptr
= reader_get_cur(reader
);
1835 reader_skipn(reader
, 1);
1836 ptr
= reader_get_cur(reader
);
1838 /* hex char or decimal */
1841 reader_skipn(reader
, 1);
1842 ptr
= reader_get_cur(reader
);
1846 if ((*ptr
>= '0' && *ptr
<= '9'))
1847 ch
= ch
*16 + *ptr
- '0';
1848 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1849 ch
= ch
*16 + *ptr
- 'a' + 10;
1850 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1851 ch
= ch
*16 + *ptr
- 'A' + 10;
1853 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1854 reader_skipn(reader
, 1);
1855 ptr
= reader_get_cur(reader
);
1862 if ((*ptr
>= '0' && *ptr
<= '9'))
1864 ch
= ch
*10 + *ptr
- '0';
1865 reader_skipn(reader
, 1);
1866 ptr
= reader_get_cur(reader
);
1869 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1873 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1876 if (is_wchar_space(ch
)) ch
= ' ';
1878 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1879 memmove(start
+1, ptr
+1, len
);
1880 buffer
->cur
= (char*)(start
+1);
1889 hr
= reader_parse_name(reader
, &name
);
1890 if (FAILED(hr
)) return hr
;
1892 ptr
= reader_get_cur(reader
);
1893 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1895 /* predefined entities resolve to a single character */
1896 ch
= get_predefined_entity(&name
);
1899 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1900 memmove(start
+1, ptr
+1, len
);
1901 buffer
->cur
= (char*)(start
+1);
1907 FIXME("undeclared entity %s\n", debugstr_wn(name
.str
, name
.len
));
1908 return WC_E_UNDECLAREDENTITY
;
1916 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1917 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1922 ptr
= reader_get_cur(reader
);
1924 /* skip opening quote */
1926 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1927 reader_skipn(reader
, 1);
1929 start
= ptr
= reader_get_cur(reader
);
1932 if (*ptr
== '<') return WC_E_LESSTHAN
;
1936 /* skip closing quote */
1937 reader_skipn(reader
, 1);
1943 HRESULT hr
= reader_parse_reference(reader
);
1944 if (FAILED(hr
)) return hr
;
1948 reader_normalize_space(reader
, ptr
);
1949 reader_skipn(reader
, 1);
1951 ptr
= reader_get_cur(reader
);
1954 reader_init_strvalue(start
, ptr
-start
, value
);
1959 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1960 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1961 [3 NS] DefaultAttName ::= 'xmlns'
1962 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1963 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
1965 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
1966 strval prefix
, local
, qname
, xmlns
, value
;
1969 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
1970 if (FAILED(hr
)) return hr
;
1972 reader_init_strvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
1974 if (strval_eq(&prefix
, &xmlns
))
1976 FIXME("namespace definitions not supported\n");
1980 if (strval_eq(&qname
, &xmlns
))
1982 FIXME("default namespace definitions not supported\n");
1986 hr
= reader_parse_eq(reader
);
1987 if (FAILED(hr
)) return hr
;
1989 hr
= reader_parse_attvalue(reader
, &value
);
1990 if (FAILED(hr
)) return hr
;
1992 TRACE("%s=%s\n", debugstr_wn(local
.str
, local
.len
), debugstr_wn(value
.str
, value
.len
));
1993 return reader_add_attr(reader
, &local
, &value
);
1996 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
1997 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
1998 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2002 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2003 if (FAILED(hr
)) return hr
;
2007 static const WCHAR endW
[] = {'/','>',0};
2009 reader_skipspaces(reader
);
2012 if ((*empty
= !reader_cmp(reader
, endW
)))
2015 reader_skipn(reader
, 2);
2016 reader
->empty_element
= TRUE
;
2020 /* got a start tag */
2021 if (!reader_cmp(reader
, gtW
))
2024 reader_skipn(reader
, 1);
2025 return reader_push_element(reader
, qname
, local
);
2028 hr
= reader_parse_attribute(reader
);
2029 if (FAILED(hr
)) return hr
;
2035 /* [39] element ::= EmptyElemTag | STag content ETag */
2036 static HRESULT
reader_parse_element(xmlreader
*reader
)
2040 switch (reader
->resumestate
)
2042 case XmlReadResumeState_Initial
:
2043 /* check if we are really on element */
2044 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2047 reader_skipn(reader
, 1);
2049 reader_shrink(reader
);
2050 reader
->resumestate
= XmlReadResumeState_STag
;
2051 case XmlReadResumeState_STag
:
2053 strval qname
, prefix
, local
;
2056 /* this handles empty elements too */
2057 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2058 if (FAILED(hr
)) return hr
;
2060 /* FIXME: need to check for defined namespace to reject invalid prefix,
2061 currently reject all prefixes */
2062 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2064 /* if we got empty element and stack is empty go straight to Misc */
2065 if (empty
&& list_empty(&reader
->elements
))
2066 reader
->instate
= XmlReadInState_MiscEnd
;
2068 reader
->instate
= XmlReadInState_Content
;
2070 reader
->nodetype
= XmlNodeType_Element
;
2071 reader
->resumestate
= XmlReadResumeState_Initial
;
2072 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2073 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2074 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2084 /* [13 NS] ETag ::= '</' QName S? '>' */
2085 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2087 strval prefix
, local
, qname
;
2088 struct element
*elem
;
2092 reader_skipn(reader
, 2);
2094 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2095 if (FAILED(hr
)) return hr
;
2097 reader_skipspaces(reader
);
2099 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2102 reader_skipn(reader
, 1);
2104 /* Element stack should never be empty at this point, cause we shouldn't get to
2105 content parsing if it's empty. */
2106 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2107 if (!strval_eq(&elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2109 reader_pop_element(reader
);
2111 /* It was a root element, the rest is expected as Misc */
2112 if (list_empty(&reader
->elements
))
2113 reader
->instate
= XmlReadInState_MiscEnd
;
2115 reader
->nodetype
= XmlNodeType_EndElement
;
2116 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2117 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2122 /* [18] CDSect ::= CDStart CData CDEnd
2123 [19] CDStart ::= '<![CDATA['
2124 [20] CData ::= (Char* - (Char* ']]>' Char*))
2125 [21] CDEnd ::= ']]>' */
2126 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2130 if (reader
->resume
[XmlReadResume_Body
])
2132 start
= reader
->resume
[XmlReadResume_Body
];
2133 ptr
= reader_get_cur(reader
);
2137 /* skip markup '<![CDATA[' */
2138 reader_skipn(reader
, 9);
2139 reader_shrink(reader
);
2140 ptr
= start
= reader_get_cur(reader
);
2141 reader
->nodetype
= XmlNodeType_CDATA
;
2142 reader
->resume
[XmlReadResume_Body
] = start
;
2143 reader
->resumestate
= XmlReadResumeState_CDATA
;
2144 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2145 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2146 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2151 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2155 TRACE("%s\n", debugstr_wn(start
, ptr
-start
));
2157 reader_skipn(reader
, 3);
2158 reader_init_strvalue(start
, ptr
-start
, &value
);
2159 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2160 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2161 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2162 reader
->resume
[XmlReadResume_Body
] = NULL
;
2163 reader
->resumestate
= XmlReadResumeState_Initial
;
2168 /* Value normalization is not fully implemented, rules are:
2170 - single '\r' -> '\n';
2171 - sequence '\r\n' -> '\n', in this case value length changes;
2173 if (*ptr
== '\r') *ptr
= '\n';
2174 reader_skipn(reader
, 1);
2182 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2183 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2187 if (reader
->resume
[XmlReadResume_Body
])
2189 start
= reader
->resume
[XmlReadResume_Body
];
2190 ptr
= reader_get_cur(reader
);
2194 reader_shrink(reader
);
2195 ptr
= start
= reader_get_cur(reader
);
2196 /* There's no text */
2197 if (!*ptr
|| *ptr
== '<') return S_OK
;
2198 reader
->nodetype
= XmlNodeType_Text
;
2199 reader
->resume
[XmlReadResume_Body
] = start
;
2200 reader
->resumestate
= XmlReadResumeState_CharData
;
2201 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2202 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2203 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2208 /* CDATA closing sequence ']]>' is not allowed */
2209 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2210 return WC_E_CDSECTEND
;
2212 /* Found next markup part */
2217 reader_init_strvalue(start
, ptr
-start
, &value
);
2218 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2222 reader_skipn(reader
, 1);
2229 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2230 static HRESULT
reader_parse_content(xmlreader
*reader
)
2232 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2233 static const WCHAR etagW
[] = {'<','/',0};
2234 static const WCHAR ampW
[] = {'&',0};
2236 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2238 switch (reader
->resumestate
)
2240 case XmlReadResumeState_CDATA
:
2241 return reader_parse_cdata(reader
);
2242 case XmlReadResumeState_Comment
:
2243 return reader_parse_comment(reader
);
2244 case XmlReadResumeState_PIBody
:
2245 case XmlReadResumeState_PITarget
:
2246 return reader_parse_pi(reader
);
2247 case XmlReadResumeState_CharData
:
2248 return reader_parse_chardata(reader
);
2250 ERR("unknown resume state %d\n", reader
->resumestate
);
2254 reader_shrink(reader
);
2256 /* handle end tag here, it indicates end of content as well */
2257 if (!reader_cmp(reader
, etagW
))
2258 return reader_parse_endtag(reader
);
2260 if (!reader_cmp(reader
, commentW
))
2261 return reader_parse_comment(reader
);
2263 if (!reader_cmp(reader
, piW
))
2264 return reader_parse_pi(reader
);
2266 if (!reader_cmp(reader
, cdstartW
))
2267 return reader_parse_cdata(reader
);
2269 if (!reader_cmp(reader
, ampW
))
2270 return reader_parse_reference(reader
);
2272 if (!reader_cmp(reader
, ltW
))
2273 return reader_parse_element(reader
);
2275 /* what's left must be CharData */
2276 return reader_parse_chardata(reader
);
2279 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2283 if (!is_reader_pending(reader
))
2284 reader_clear_attrs(reader
);
2288 switch (reader
->instate
)
2290 /* if it's a first call for a new input we need to detect stream encoding */
2291 case XmlReadInState_Initial
:
2295 hr
= readerinput_growraw(reader
->input
);
2296 if (FAILED(hr
)) return hr
;
2298 /* try to detect encoding by BOM or data and set input code page */
2299 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2300 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2301 if (FAILED(hr
)) return hr
;
2303 /* always switch first time cause we have to put something in */
2304 readerinput_switchencoding(reader
->input
, enc
);
2306 /* parse xml declaration */
2307 hr
= reader_parse_xmldecl(reader
);
2308 if (FAILED(hr
)) return hr
;
2310 readerinput_shrinkraw(reader
->input
, -1);
2311 reader
->instate
= XmlReadInState_Misc_DTD
;
2312 if (hr
== S_OK
) return hr
;
2315 case XmlReadInState_Misc_DTD
:
2316 hr
= reader_parse_misc(reader
);
2317 if (FAILED(hr
)) return hr
;
2320 reader
->instate
= XmlReadInState_DTD
;
2324 case XmlReadInState_DTD
:
2325 hr
= reader_parse_dtd(reader
);
2326 if (FAILED(hr
)) return hr
;
2330 reader
->instate
= XmlReadInState_DTD_Misc
;
2334 reader
->instate
= XmlReadInState_Element
;
2336 case XmlReadInState_DTD_Misc
:
2337 hr
= reader_parse_misc(reader
);
2338 if (FAILED(hr
)) return hr
;
2341 reader
->instate
= XmlReadInState_Element
;
2345 case XmlReadInState_Element
:
2346 return reader_parse_element(reader
);
2347 case XmlReadInState_Content
:
2348 return reader_parse_content(reader
);
2349 case XmlReadInState_MiscEnd
:
2350 hr
= reader_parse_misc(reader
);
2351 if (FAILED(hr
)) return hr
;
2354 reader
->instate
= XmlReadInState_Eof
;
2356 case XmlReadInState_Eof
:
2359 FIXME("internal state %d not handled\n", reader
->instate
);
2367 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2369 xmlreader
*This
= impl_from_IXmlReader(iface
);
2371 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2373 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2374 IsEqualGUID(riid
, &IID_IXmlReader
))
2380 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2382 return E_NOINTERFACE
;
2385 IXmlReader_AddRef(iface
);
2390 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2392 xmlreader
*This
= impl_from_IXmlReader(iface
);
2393 ULONG ref
= InterlockedIncrement(&This
->ref
);
2394 TRACE("(%p)->(%d)\n", This
, ref
);
2398 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2400 xmlreader
*This
= impl_from_IXmlReader(iface
);
2401 LONG ref
= InterlockedDecrement(&This
->ref
);
2403 TRACE("(%p)->(%d)\n", This
, ref
);
2407 IMalloc
*imalloc
= This
->imalloc
;
2408 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2409 reader_clear_attrs(This
);
2410 reader_clear_elements(This
);
2411 reader_free_strvalues(This
);
2412 reader_free(This
, This
);
2413 if (imalloc
) IMalloc_Release(imalloc
);
2419 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2421 xmlreader
*This
= impl_from_IXmlReader(iface
);
2422 IXmlReaderInput
*readerinput
;
2425 TRACE("(%p)->(%p)\n", This
, input
);
2429 readerinput_release_stream(This
->input
);
2430 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2434 This
->line
= This
->pos
= 0;
2435 reader_clear_elements(This
);
2437 This
->resumestate
= XmlReadResumeState_Initial
;
2438 memset(This
->resume
, 0, sizeof(This
->resume
));
2440 /* just reset current input */
2443 This
->state
= XmlReadState_Initial
;
2447 /* now try IXmlReaderInput, ISequentialStream, IStream */
2448 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2451 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2452 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2455 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2456 readerinput
, readerinput
->lpVtbl
);
2457 IUnknown_Release(readerinput
);
2463 if (hr
!= S_OK
|| !readerinput
)
2465 /* create IXmlReaderInput basing on supplied interface */
2466 hr
= CreateXmlReaderInputWithEncodingName(input
,
2467 NULL
, NULL
, FALSE
, NULL
, &readerinput
);
2468 if (hr
!= S_OK
) return hr
;
2469 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2472 /* set stream for supplied IXmlReaderInput */
2473 hr
= readerinput_query_for_stream(This
->input
);
2476 This
->state
= XmlReadState_Initial
;
2477 This
->instate
= XmlReadInState_Initial
;
2483 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2485 xmlreader
*This
= impl_from_IXmlReader(iface
);
2487 TRACE("(%p)->(%s %p)\n", This
, debugstr_prop(property
), value
);
2489 if (!value
) return E_INVALIDARG
;
2493 case XmlReaderProperty_DtdProcessing
:
2494 *value
= This
->dtdmode
;
2496 case XmlReaderProperty_ReadState
:
2497 *value
= This
->state
;
2500 FIXME("Unimplemented property (%u)\n", property
);
2507 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2509 xmlreader
*This
= impl_from_IXmlReader(iface
);
2511 TRACE("(%p)->(%s %lu)\n", This
, debugstr_prop(property
), value
);
2515 case XmlReaderProperty_DtdProcessing
:
2516 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2517 This
->dtdmode
= value
;
2520 FIXME("Unimplemented property (%u)\n", property
);
2527 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2529 xmlreader
*This
= impl_from_IXmlReader(iface
);
2530 XmlNodeType oldtype
= This
->nodetype
;
2533 TRACE("(%p)->(%p)\n", This
, nodetype
);
2535 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2537 hr
= reader_parse_nextnode(This
);
2538 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2539 This
->state
= XmlReadState_Interactive
;
2542 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2543 *nodetype
= This
->nodetype
;
2549 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2551 xmlreader
*This
= impl_from_IXmlReader(iface
);
2552 TRACE("(%p)->(%p)\n", This
, node_type
);
2554 *node_type
= reader_get_nodetype(This
);
2555 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2558 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2560 xmlreader
*This
= impl_from_IXmlReader(iface
);
2562 TRACE("(%p)\n", This
);
2564 if (!This
->attr_count
) return S_FALSE
;
2565 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2566 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2567 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2572 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2574 xmlreader
*This
= impl_from_IXmlReader(iface
);
2575 const struct list
*next
;
2577 TRACE("(%p)\n", This
);
2579 if (!This
->attr_count
) return S_FALSE
;
2582 return IXmlReader_MoveToFirstAttribute(iface
);
2584 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2587 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2588 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2589 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2592 return next
? S_OK
: S_FALSE
;
2595 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2597 LPCWSTR namespaceUri
)
2599 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2603 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2605 xmlreader
*This
= impl_from_IXmlReader(iface
);
2606 struct element
*elem
;
2608 TRACE("(%p)\n", This
);
2610 if (!This
->attr_count
) return S_FALSE
;
2613 /* FIXME: support other node types with 'attributes' like DTD */
2614 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2617 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2618 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2624 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2626 xmlreader
*This
= impl_from_IXmlReader(iface
);
2628 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2629 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2630 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2634 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2635 LPCWSTR
*namespaceUri
,
2636 UINT
*namespaceUri_length
)
2638 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2642 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2644 xmlreader
*This
= impl_from_IXmlReader(iface
);
2646 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2647 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2648 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2652 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2654 xmlreader
*This
= impl_from_IXmlReader(iface
);
2656 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2657 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2658 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2662 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2664 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2665 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2667 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2671 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2676 hr
= IXmlReader_Read(iface
, &type
);
2677 if (FAILED(hr
)) return hr
;
2679 /* return if still pending, partially read values are not reported */
2680 if (is_reader_pending(reader
)) return E_PENDING
;
2685 val
->str
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2686 if (!val
->str
) return E_OUTOFMEMORY
;
2687 memcpy(val
->str
, val
->start
, val
->len
*sizeof(WCHAR
));
2688 val
->str
[val
->len
] = 0;
2692 if (len
) *len
= val
->len
;
2696 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2698 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2699 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2702 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2704 /* Value is already allocated, chunked reads are not possible. */
2705 if (val
->str
) return S_FALSE
;
2709 len
= min(chunk_size
, val
->len
);
2710 memcpy(buffer
, val
->start
, len
);
2713 if (read
) *read
= len
;
2719 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2721 UINT
*baseUri_length
)
2723 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2727 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2729 FIXME("(%p): stub\n", iface
);
2733 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2735 xmlreader
*This
= impl_from_IXmlReader(iface
);
2736 TRACE("(%p)\n", This
);
2737 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2738 when current node is start tag of an element */
2739 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2742 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2744 xmlreader
*This
= impl_from_IXmlReader(iface
);
2746 TRACE("(%p %p)\n", This
, lineNumber
);
2748 if (!lineNumber
) return E_INVALIDARG
;
2750 *lineNumber
= This
->line
;
2755 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2757 xmlreader
*This
= impl_from_IXmlReader(iface
);
2759 TRACE("(%p %p)\n", This
, linePosition
);
2761 if (!linePosition
) return E_INVALIDARG
;
2763 *linePosition
= This
->pos
;
2768 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2770 xmlreader
*This
= impl_from_IXmlReader(iface
);
2772 TRACE("(%p)->(%p)\n", This
, count
);
2774 if (!count
) return E_INVALIDARG
;
2776 *count
= This
->attr_count
;
2780 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2782 xmlreader
*This
= impl_from_IXmlReader(iface
);
2783 TRACE("(%p)->(%p)\n", This
, depth
);
2784 *depth
= This
->depth
;
2788 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2790 FIXME("(%p): stub\n", iface
);
2794 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2796 xmlreader_QueryInterface
,
2800 xmlreader_GetProperty
,
2801 xmlreader_SetProperty
,
2803 xmlreader_GetNodeType
,
2804 xmlreader_MoveToFirstAttribute
,
2805 xmlreader_MoveToNextAttribute
,
2806 xmlreader_MoveToAttributeByName
,
2807 xmlreader_MoveToElement
,
2808 xmlreader_GetQualifiedName
,
2809 xmlreader_GetNamespaceUri
,
2810 xmlreader_GetLocalName
,
2811 xmlreader_GetPrefix
,
2813 xmlreader_ReadValueChunk
,
2814 xmlreader_GetBaseUri
,
2815 xmlreader_IsDefault
,
2816 xmlreader_IsEmptyElement
,
2817 xmlreader_GetLineNumber
,
2818 xmlreader_GetLinePosition
,
2819 xmlreader_GetAttributeCount
,
2824 /** IXmlReaderInput **/
2825 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2827 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2829 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2831 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2832 IsEqualGUID(riid
, &IID_IUnknown
))
2838 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2840 return E_NOINTERFACE
;
2843 IUnknown_AddRef(iface
);
2848 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2850 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2851 ULONG ref
= InterlockedIncrement(&This
->ref
);
2852 TRACE("(%p)->(%d)\n", This
, ref
);
2856 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2858 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2859 LONG ref
= InterlockedDecrement(&This
->ref
);
2861 TRACE("(%p)->(%d)\n", This
, ref
);
2865 IMalloc
*imalloc
= This
->imalloc
;
2866 if (This
->input
) IUnknown_Release(This
->input
);
2867 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2868 if (This
->buffer
) free_input_buffer(This
->buffer
);
2869 readerinput_free(This
, This
->baseuri
);
2870 readerinput_free(This
, This
);
2871 if (imalloc
) IMalloc_Release(imalloc
);
2877 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2879 xmlreaderinput_QueryInterface
,
2880 xmlreaderinput_AddRef
,
2881 xmlreaderinput_Release
2884 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2889 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2891 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2893 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2898 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2900 reader
= heap_alloc(sizeof(*reader
));
2901 if(!reader
) return E_OUTOFMEMORY
;
2903 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2905 reader
->input
= NULL
;
2906 reader
->state
= XmlReadState_Closed
;
2907 reader
->instate
= XmlReadInState_Initial
;
2908 reader
->resumestate
= XmlReadResumeState_Initial
;
2909 reader
->dtdmode
= DtdProcessing_Prohibit
;
2910 reader
->line
= reader
->pos
= 0;
2911 reader
->imalloc
= imalloc
;
2912 if (imalloc
) IMalloc_AddRef(imalloc
);
2913 reader
->nodetype
= XmlNodeType_None
;
2914 list_init(&reader
->attrs
);
2915 reader
->attr_count
= 0;
2916 reader
->attr
= NULL
;
2917 list_init(&reader
->elements
);
2919 reader
->max_depth
= 256;
2920 reader
->empty_element
= FALSE
;
2921 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2923 for (i
= 0; i
< StringValue_Last
; i
++)
2924 reader
->strvalues
[i
] = strval_empty
;
2926 *obj
= &reader
->IXmlReader_iface
;
2928 TRACE("returning iface %p\n", *obj
);
2933 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
2938 IXmlReaderInput
**ppInput
)
2940 xmlreaderinput
*readerinput
;
2943 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
2944 hint
, wine_dbgstr_w(base_uri
), ppInput
);
2946 if (!stream
|| !ppInput
) return E_INVALIDARG
;
2949 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
2951 readerinput
= heap_alloc(sizeof(*readerinput
));
2952 if(!readerinput
) return E_OUTOFMEMORY
;
2954 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
2955 readerinput
->ref
= 1;
2956 readerinput
->imalloc
= imalloc
;
2957 readerinput
->stream
= NULL
;
2958 if (imalloc
) IMalloc_AddRef(imalloc
);
2959 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
2960 readerinput
->hint
= hint
;
2961 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
2962 readerinput
->pending
= 0;
2964 hr
= alloc_input_buffer(readerinput
);
2967 readerinput_free(readerinput
, readerinput
->baseuri
);
2968 readerinput_free(readerinput
, readerinput
);
2969 if (imalloc
) IMalloc_Release(imalloc
);
2972 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
2974 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
2976 TRACE("returning iface %p\n", *ppInput
);