2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
70 #ifdef HAVE_SYS_STAT_H
84 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
);
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
88 const xmlChar
*base
, xmlParserCtxtPtr pctx
);
90 /************************************************************************
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
94 ************************************************************************/
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
105 #define XML_PARSER_NON_LINEAR 10
108 * xmlParserEntityCheck
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt
, unsigned long size
,
120 unsigned long consumed
= 0;
122 if ((ctxt
== NULL
) || (ctxt
->options
& XML_PARSE_HUGE
))
124 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
128 * Do the check based on the replacement size of the entity
130 if (size
< XML_PARSER_BIG_ENTITY
)
134 * A limit on the amount of text data reasonably used
136 if (ctxt
->input
!= NULL
) {
137 consumed
= ctxt
->input
->consumed
+
138 (ctxt
->input
->cur
- ctxt
->input
->base
);
140 consumed
+= ctxt
->sizeentities
;
142 if ((size
< XML_PARSER_NON_LINEAR
* consumed
) &&
143 (ctxt
->nbentities
* 3 < XML_PARSER_NON_LINEAR
* consumed
))
145 } else if (ent
!= NULL
) {
147 * use the number of parsed entities in the replacement
152 * The amount of data parsed counting entities size only once
154 if (ctxt
->input
!= NULL
) {
155 consumed
= ctxt
->input
->consumed
+
156 (ctxt
->input
->cur
- ctxt
->input
->base
);
158 consumed
+= ctxt
->sizeentities
;
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
164 if (size
* 3 < consumed
* XML_PARSER_NON_LINEAR
)
168 * strange we got no data for checking just return
173 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
185 unsigned int xmlParserMaxDepth
= 256;
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
195 * List of XML prefixed PI allowed by W3C specs
198 static const char *xmlW3CPIs
[] = {
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt
,
206 const xmlChar
**str
);
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
210 xmlSAXHandlerPtr sax
,
211 void *user_data
, int depth
, const xmlChar
*URL
,
212 const xmlChar
*ID
, xmlNodePtr
*list
);
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
,
216 const char *encoding
);
217 #ifdef LIBXML_LEGACY_ENABLED
219 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
220 xmlNodePtr lastNode
);
221 #endif /* LIBXML_LEGACY_ENABLED */
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
225 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
);
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
);
230 /************************************************************************
232 * Some factorized error routines *
234 ************************************************************************/
237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
242 * Handle a redefinition of attribute error
245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt
, const xmlChar
* prefix
,
246 const xmlChar
* localname
)
248 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
249 (ctxt
->instate
== XML_PARSER_EOF
))
252 ctxt
->errNo
= XML_ERR_ATTRIBUTE_REDEFINED
;
255 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
256 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
257 (const char *) localname
, NULL
, NULL
, 0, 0,
258 "Attribute %s redefined\n", localname
);
260 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
,
261 XML_ERR_ATTRIBUTE_REDEFINED
, XML_ERR_FATAL
, NULL
, 0,
262 (const char *) prefix
, (const char *) localname
,
263 NULL
, 0, 0, "Attribute %s:%s redefined\n", prefix
,
266 ctxt
->wellFormed
= 0;
267 if (ctxt
->recovery
== 0)
268 ctxt
->disableSAX
= 1;
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
281 xmlFatalErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
, const char *info
)
285 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
286 (ctxt
->instate
== XML_PARSER_EOF
))
289 case XML_ERR_INVALID_HEX_CHARREF
:
290 errmsg
= "CharRef: invalid hexadecimal value\n";
292 case XML_ERR_INVALID_DEC_CHARREF
:
293 errmsg
= "CharRef: invalid decimal value\n";
295 case XML_ERR_INVALID_CHARREF
:
296 errmsg
= "CharRef: invalid value\n";
298 case XML_ERR_INTERNAL_ERROR
:
299 errmsg
= "internal error";
301 case XML_ERR_PEREF_AT_EOF
:
302 errmsg
= "PEReference at end of document\n";
304 case XML_ERR_PEREF_IN_PROLOG
:
305 errmsg
= "PEReference in prolog\n";
307 case XML_ERR_PEREF_IN_EPILOG
:
308 errmsg
= "PEReference in epilog\n";
310 case XML_ERR_PEREF_NO_NAME
:
311 errmsg
= "PEReference: no name\n";
313 case XML_ERR_PEREF_SEMICOL_MISSING
:
314 errmsg
= "PEReference: expecting ';'\n";
316 case XML_ERR_ENTITY_LOOP
:
317 errmsg
= "Detected an entity reference loop\n";
319 case XML_ERR_ENTITY_NOT_STARTED
:
320 errmsg
= "EntityValue: \" or ' expected\n";
322 case XML_ERR_ENTITY_PE_INTERNAL
:
323 errmsg
= "PEReferences forbidden in internal subset\n";
325 case XML_ERR_ENTITY_NOT_FINISHED
:
326 errmsg
= "EntityValue: \" or ' expected\n";
328 case XML_ERR_ATTRIBUTE_NOT_STARTED
:
329 errmsg
= "AttValue: \" or ' expected\n";
331 case XML_ERR_LT_IN_ATTRIBUTE
:
332 errmsg
= "Unescaped '<' not allowed in attributes values\n";
334 case XML_ERR_LITERAL_NOT_STARTED
:
335 errmsg
= "SystemLiteral \" or ' expected\n";
337 case XML_ERR_LITERAL_NOT_FINISHED
:
338 errmsg
= "Unfinished System or Public ID \" or ' expected\n";
340 case XML_ERR_MISPLACED_CDATA_END
:
341 errmsg
= "Sequence ']]>' not allowed in content\n";
343 case XML_ERR_URI_REQUIRED
:
344 errmsg
= "SYSTEM or PUBLIC, the URI is missing\n";
346 case XML_ERR_PUBID_REQUIRED
:
347 errmsg
= "PUBLIC, the Public Identifier is missing\n";
349 case XML_ERR_HYPHEN_IN_COMMENT
:
350 errmsg
= "Comment must not contain '--' (double-hyphen)\n";
352 case XML_ERR_PI_NOT_STARTED
:
353 errmsg
= "xmlParsePI : no target name\n";
355 case XML_ERR_RESERVED_XML_NAME
:
356 errmsg
= "Invalid PI name\n";
358 case XML_ERR_NOTATION_NOT_STARTED
:
359 errmsg
= "NOTATION: Name expected here\n";
361 case XML_ERR_NOTATION_NOT_FINISHED
:
362 errmsg
= "'>' required to close NOTATION declaration\n";
364 case XML_ERR_VALUE_REQUIRED
:
365 errmsg
= "Entity value required\n";
367 case XML_ERR_URI_FRAGMENT
:
368 errmsg
= "Fragment not allowed";
370 case XML_ERR_ATTLIST_NOT_STARTED
:
371 errmsg
= "'(' required to start ATTLIST enumeration\n";
373 case XML_ERR_NMTOKEN_REQUIRED
:
374 errmsg
= "NmToken expected in ATTLIST enumeration\n";
376 case XML_ERR_ATTLIST_NOT_FINISHED
:
377 errmsg
= "')' required to finish ATTLIST enumeration\n";
379 case XML_ERR_MIXED_NOT_STARTED
:
380 errmsg
= "MixedContentDecl : '|' or ')*' expected\n";
382 case XML_ERR_PCDATA_REQUIRED
:
383 errmsg
= "MixedContentDecl : '#PCDATA' expected\n";
385 case XML_ERR_ELEMCONTENT_NOT_STARTED
:
386 errmsg
= "ContentDecl : Name or '(' expected\n";
388 case XML_ERR_ELEMCONTENT_NOT_FINISHED
:
389 errmsg
= "ContentDecl : ',' '|' or ')' expected\n";
391 case XML_ERR_PEREF_IN_INT_SUBSET
:
393 "PEReference: forbidden within markup decl in internal subset\n";
395 case XML_ERR_GT_REQUIRED
:
396 errmsg
= "expected '>'\n";
398 case XML_ERR_CONDSEC_INVALID
:
399 errmsg
= "XML conditional section '[' expected\n";
401 case XML_ERR_EXT_SUBSET_NOT_FINISHED
:
402 errmsg
= "Content error in the external subset\n";
404 case XML_ERR_CONDSEC_INVALID_KEYWORD
:
406 "conditional section INCLUDE or IGNORE keyword expected\n";
408 case XML_ERR_CONDSEC_NOT_FINISHED
:
409 errmsg
= "XML conditional section not closed\n";
411 case XML_ERR_XMLDECL_NOT_STARTED
:
412 errmsg
= "Text declaration '<?xml' required\n";
414 case XML_ERR_XMLDECL_NOT_FINISHED
:
415 errmsg
= "parsing XML declaration: '?>' expected\n";
417 case XML_ERR_EXT_ENTITY_STANDALONE
:
418 errmsg
= "external parsed entities cannot be standalone\n";
420 case XML_ERR_ENTITYREF_SEMICOL_MISSING
:
421 errmsg
= "EntityRef: expecting ';'\n";
423 case XML_ERR_DOCTYPE_NOT_FINISHED
:
424 errmsg
= "DOCTYPE improperly terminated\n";
426 case XML_ERR_LTSLASH_REQUIRED
:
427 errmsg
= "EndTag: '</' not found\n";
429 case XML_ERR_EQUAL_REQUIRED
:
430 errmsg
= "expected '='\n";
432 case XML_ERR_STRING_NOT_CLOSED
:
433 errmsg
= "String not closed expecting \" or '\n";
435 case XML_ERR_STRING_NOT_STARTED
:
436 errmsg
= "String not started expecting ' or \"\n";
438 case XML_ERR_ENCODING_NAME
:
439 errmsg
= "Invalid XML encoding name\n";
441 case XML_ERR_STANDALONE_VALUE
:
442 errmsg
= "standalone accepts only 'yes' or 'no'\n";
444 case XML_ERR_DOCUMENT_EMPTY
:
445 errmsg
= "Document is empty\n";
447 case XML_ERR_DOCUMENT_END
:
448 errmsg
= "Extra content at the end of the document\n";
450 case XML_ERR_NOT_WELL_BALANCED
:
451 errmsg
= "chunk is not well balanced\n";
453 case XML_ERR_EXTRA_CONTENT
:
454 errmsg
= "extra content at the end of well balanced chunk\n";
456 case XML_ERR_VERSION_MISSING
:
457 errmsg
= "Malformed declaration expecting version\n";
465 errmsg
= "Unregistered error message\n";
469 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
470 XML_ERR_FATAL
, NULL
, 0, info
, NULL
, NULL
, 0, 0, errmsg
,
473 ctxt
->wellFormed
= 0;
474 if (ctxt
->recovery
== 0)
475 ctxt
->disableSAX
= 1;
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
491 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
492 (ctxt
->instate
== XML_PARSER_EOF
))
496 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_PARSER
, error
,
497 XML_ERR_FATAL
, NULL
, 0, NULL
, NULL
, NULL
, 0, 0, "%s", msg
);
499 ctxt
->wellFormed
= 0;
500 if (ctxt
->recovery
== 0)
501 ctxt
->disableSAX
= 1;
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
516 xmlWarningMsg(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
517 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
519 xmlStructuredErrorFunc schannel
= NULL
;
521 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
522 (ctxt
->instate
== XML_PARSER_EOF
))
524 if ((ctxt
!= NULL
) && (ctxt
->sax
!= NULL
) &&
525 (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
526 schannel
= ctxt
->sax
->serror
;
528 __xmlRaiseError(schannel
,
529 (ctxt
->sax
) ? ctxt
->sax
->warning
: NULL
,
531 ctxt
, NULL
, XML_FROM_PARSER
, error
,
532 XML_ERR_WARNING
, NULL
, 0,
533 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
534 msg
, (const char *) str1
, (const char *) str2
);
536 __xmlRaiseError(schannel
, NULL
, NULL
,
537 ctxt
, NULL
, XML_FROM_PARSER
, error
,
538 XML_ERR_WARNING
, NULL
, 0,
539 (const char *) str1
, (const char *) str2
, NULL
, 0, 0,
540 msg
, (const char *) str1
, (const char *) str2
);
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
551 * Handle a validity error.
554 xmlValidityError(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
555 const char *msg
, const xmlChar
*str1
, const xmlChar
*str2
)
557 xmlStructuredErrorFunc schannel
= NULL
;
559 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
560 (ctxt
->instate
== XML_PARSER_EOF
))
564 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
))
565 schannel
= ctxt
->sax
->serror
;
568 __xmlRaiseError(schannel
,
569 ctxt
->vctxt
.error
, ctxt
->vctxt
.userData
,
570 ctxt
, NULL
, XML_FROM_DTD
, error
,
571 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
572 (const char *) str2
, NULL
, 0, 0,
573 msg
, (const char *) str1
, (const char *) str2
);
576 __xmlRaiseError(schannel
, NULL
, NULL
,
577 ctxt
, NULL
, XML_FROM_DTD
, error
,
578 XML_ERR_ERROR
, NULL
, 0, (const char *) str1
,
579 (const char *) str2
, NULL
, 0, 0,
580 msg
, (const char *) str1
, (const char *) str2
);
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
595 const char *msg
, int val
)
597 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
598 (ctxt
->instate
== XML_PARSER_EOF
))
602 __xmlRaiseError(NULL
, NULL
, NULL
,
603 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
604 NULL
, 0, NULL
, NULL
, NULL
, val
, 0, msg
, val
);
606 ctxt
->wellFormed
= 0;
607 if (ctxt
->recovery
== 0)
608 ctxt
->disableSAX
= 1;
613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
625 const char *msg
, const xmlChar
*str1
, int val
,
628 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
629 (ctxt
->instate
== XML_PARSER_EOF
))
633 __xmlRaiseError(NULL
, NULL
, NULL
,
634 ctxt
, NULL
, XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
635 NULL
, 0, (const char *) str1
, (const char *) str2
,
636 NULL
, val
, 0, msg
, str1
, val
, str2
);
638 ctxt
->wellFormed
= 0;
639 if (ctxt
->recovery
== 0)
640 ctxt
->disableSAX
= 1;
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
655 const char *msg
, const xmlChar
* val
)
657 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
658 (ctxt
->instate
== XML_PARSER_EOF
))
662 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
663 XML_FROM_PARSER
, error
, XML_ERR_FATAL
,
664 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
667 ctxt
->wellFormed
= 0;
668 if (ctxt
->recovery
== 0)
669 ctxt
->disableSAX
= 1;
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
680 * Handle a non fatal parser error
683 xmlErrMsgStr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
684 const char *msg
, const xmlChar
* val
)
686 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
687 (ctxt
->instate
== XML_PARSER_EOF
))
691 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
,
692 XML_FROM_PARSER
, error
, XML_ERR_ERROR
,
693 NULL
, 0, (const char *) val
, NULL
, NULL
, 0, 0, msg
,
699 * @ctxt: an XML parser context
700 * @error: the error number
702 * @info1: extra information string
703 * @info2: extra information string
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708 xmlNsErr(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
710 const xmlChar
* info1
, const xmlChar
* info2
,
711 const xmlChar
* info3
)
713 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
714 (ctxt
->instate
== XML_PARSER_EOF
))
718 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
719 XML_ERR_ERROR
, NULL
, 0, (const char *) info1
,
720 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
721 info1
, info2
, info3
);
723 ctxt
->nsWellFormed
= 0;
728 * @ctxt: an XML parser context
729 * @error: the error number
731 * @info1: extra information string
732 * @info2: extra information string
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
737 xmlNsWarn(xmlParserCtxtPtr ctxt
, xmlParserErrors error
,
739 const xmlChar
* info1
, const xmlChar
* info2
,
740 const xmlChar
* info3
)
742 if ((ctxt
!= NULL
) && (ctxt
->disableSAX
!= 0) &&
743 (ctxt
->instate
== XML_PARSER_EOF
))
745 __xmlRaiseError(NULL
, NULL
, NULL
, ctxt
, NULL
, XML_FROM_NAMESPACE
, error
,
746 XML_ERR_WARNING
, NULL
, 0, (const char *) info1
,
747 (const char *) info2
, (const char *) info3
, 0, 0, msg
,
748 info1
, info2
, info3
);
751 /************************************************************************
753 * Library wide options *
755 ************************************************************************/
759 * @feature: the feature to be examined
761 * Examines if the library has been compiled with a given feature.
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
768 xmlHasFeature(xmlFeature feature
)
771 case XML_WITH_THREAD
:
772 #ifdef LIBXML_THREAD_ENABLED
778 #ifdef LIBXML_TREE_ENABLED
783 case XML_WITH_OUTPUT
:
784 #ifdef LIBXML_OUTPUT_ENABLED
790 #ifdef LIBXML_PUSH_ENABLED
795 case XML_WITH_READER
:
796 #ifdef LIBXML_READER_ENABLED
801 case XML_WITH_PATTERN
:
802 #ifdef LIBXML_PATTERN_ENABLED
807 case XML_WITH_WRITER
:
808 #ifdef LIBXML_WRITER_ENABLED
814 #ifdef LIBXML_SAX1_ENABLED
820 #ifdef LIBXML_FTP_ENABLED
826 #ifdef LIBXML_HTTP_ENABLED
832 #ifdef LIBXML_VALID_ENABLED
838 #ifdef LIBXML_HTML_ENABLED
843 case XML_WITH_LEGACY
:
844 #ifdef LIBXML_LEGACY_ENABLED
850 #ifdef LIBXML_C14N_ENABLED
855 case XML_WITH_CATALOG
:
856 #ifdef LIBXML_CATALOG_ENABLED
862 #ifdef LIBXML_XPATH_ENABLED
868 #ifdef LIBXML_XPTR_ENABLED
873 case XML_WITH_XINCLUDE
:
874 #ifdef LIBXML_XINCLUDE_ENABLED
880 #ifdef LIBXML_ICONV_ENABLED
885 case XML_WITH_ISO8859X
:
886 #ifdef LIBXML_ISO8859X_ENABLED
891 case XML_WITH_UNICODE
:
892 #ifdef LIBXML_UNICODE_ENABLED
897 case XML_WITH_REGEXP
:
898 #ifdef LIBXML_REGEXP_ENABLED
903 case XML_WITH_AUTOMATA
:
904 #ifdef LIBXML_AUTOMATA_ENABLED
910 #ifdef LIBXML_EXPR_ENABLED
915 case XML_WITH_SCHEMAS
:
916 #ifdef LIBXML_SCHEMAS_ENABLED
921 case XML_WITH_SCHEMATRON
:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
927 case XML_WITH_MODULES
:
928 #ifdef LIBXML_MODULES_ENABLED
934 #ifdef LIBXML_DEBUG_ENABLED
939 case XML_WITH_DEBUG_MEM
:
940 #ifdef DEBUG_MEMORY_LOCATION
945 case XML_WITH_DEBUG_RUN
:
946 #ifdef LIBXML_DEBUG_RUNTIME
952 #ifdef LIBXML_ZLIB_ENABLED
963 /************************************************************************
965 * SAX2 defaulted attributes handling *
967 ************************************************************************/
971 * @ctxt: an XML parser context
973 * Do the SAX2 detection and specific intialization
976 xmlDetectSAX2(xmlParserCtxtPtr ctxt
) {
977 if (ctxt
== NULL
) return;
978 #ifdef LIBXML_SAX1_ENABLED
979 if ((ctxt
->sax
) && (ctxt
->sax
->initialized
== XML_SAX2_MAGIC
) &&
980 ((ctxt
->sax
->startElementNs
!= NULL
) ||
981 (ctxt
->sax
->endElementNs
!= NULL
))) ctxt
->sax2
= 1;
984 #endif /* LIBXML_SAX1_ENABLED */
986 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
987 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
988 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
989 if ((ctxt
->str_xml
==NULL
) || (ctxt
->str_xmlns
==NULL
) ||
990 (ctxt
->str_xml_ns
== NULL
)) {
991 xmlErrMemory(ctxt
, NULL
);
995 typedef struct _xmlDefAttrs xmlDefAttrs
;
996 typedef xmlDefAttrs
*xmlDefAttrsPtr
;
997 struct _xmlDefAttrs
{
998 int nbAttrs
; /* number of defaulted attributes on that element */
999 int maxAttrs
; /* the size of the array */
1000 const xmlChar
*values
[5]; /* array of localname/prefix/values/external */
1004 * xmlAttrNormalizeSpace:
1005 * @src: the source string
1006 * @dst: the target string
1008 * Normalize the space in non CDATA attribute values:
1009 * If the attribute type is not CDATA, then the XML processor MUST further
1010 * process the normalized attribute value by discarding any leading and
1011 * trailing space (#x20) characters, and by replacing sequences of space
1012 * (#x20) characters by a single space (#x20) character.
1013 * Note that the size of dst need to be at least src, and if one doesn't need
1014 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1015 * passing src as dst is just fine.
1017 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1021 xmlAttrNormalizeSpace(const xmlChar
*src
, xmlChar
*dst
)
1023 if ((src
== NULL
) || (dst
== NULL
))
1026 while (*src
== 0x20) src
++;
1029 while (*src
== 0x20) src
++;
1043 * xmlAttrNormalizeSpace2:
1044 * @src: the source string
1046 * Normalize the space in non CDATA attribute values, a slightly more complex
1047 * front end to avoid allocation problems when running on attribute values
1048 * coming from the input.
1050 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1053 static const xmlChar
*
1054 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt
, xmlChar
*src
, int *len
)
1057 int remove_head
= 0;
1058 int need_realloc
= 0;
1061 if ((ctxt
== NULL
) || (src
== NULL
) || (len
== NULL
))
1068 while (*cur
== 0x20) {
1075 if ((*cur
== 0x20) || (*cur
== 0)) {
1085 ret
= xmlStrndup(src
+ remove_head
, i
- remove_head
+ 1);
1087 xmlErrMemory(ctxt
, NULL
);
1090 xmlAttrNormalizeSpace(ret
, ret
);
1091 *len
= (int) strlen((const char *)ret
);
1093 } else if (remove_head
) {
1094 *len
-= remove_head
;
1095 memmove(src
, src
+ remove_head
, 1 + *len
);
1103 * @ctxt: an XML parser context
1104 * @fullname: the element fullname
1105 * @fullattr: the attribute fullname
1106 * @value: the attribute value
1108 * Add a defaulted attribute for an element
1111 xmlAddDefAttrs(xmlParserCtxtPtr ctxt
,
1112 const xmlChar
*fullname
,
1113 const xmlChar
*fullattr
,
1114 const xmlChar
*value
) {
1115 xmlDefAttrsPtr defaults
;
1117 const xmlChar
*name
;
1118 const xmlChar
*prefix
;
1121 * Allows to detect attribute redefinitions
1123 if (ctxt
->attsSpecial
!= NULL
) {
1124 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1128 if (ctxt
->attsDefault
== NULL
) {
1129 ctxt
->attsDefault
= xmlHashCreateDict(10, ctxt
->dict
);
1130 if (ctxt
->attsDefault
== NULL
)
1135 * split the element name into prefix:localname , the string found
1136 * are within the DTD and then not associated to namespace names.
1138 name
= xmlSplitQName3(fullname
, &len
);
1140 name
= xmlDictLookup(ctxt
->dict
, fullname
, -1);
1143 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1144 prefix
= xmlDictLookup(ctxt
->dict
, fullname
, len
);
1148 * make sure there is some storage
1150 defaults
= xmlHashLookup2(ctxt
->attsDefault
, name
, prefix
);
1151 if (defaults
== NULL
) {
1152 defaults
= (xmlDefAttrsPtr
) xmlMalloc(sizeof(xmlDefAttrs
) +
1153 (4 * 5) * sizeof(const xmlChar
*));
1154 if (defaults
== NULL
)
1156 defaults
->nbAttrs
= 0;
1157 defaults
->maxAttrs
= 4;
1158 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1159 defaults
, NULL
) < 0) {
1163 } else if (defaults
->nbAttrs
>= defaults
->maxAttrs
) {
1164 xmlDefAttrsPtr temp
;
1166 temp
= (xmlDefAttrsPtr
) xmlRealloc(defaults
, sizeof(xmlDefAttrs
) +
1167 (2 * defaults
->maxAttrs
* 5) * sizeof(const xmlChar
*));
1171 defaults
->maxAttrs
*= 2;
1172 if (xmlHashUpdateEntry2(ctxt
->attsDefault
, name
, prefix
,
1173 defaults
, NULL
) < 0) {
1180 * Split the element name into prefix:localname , the string found
1181 * are within the DTD and hen not associated to namespace names.
1183 name
= xmlSplitQName3(fullattr
, &len
);
1185 name
= xmlDictLookup(ctxt
->dict
, fullattr
, -1);
1188 name
= xmlDictLookup(ctxt
->dict
, name
, -1);
1189 prefix
= xmlDictLookup(ctxt
->dict
, fullattr
, len
);
1192 defaults
->values
[5 * defaults
->nbAttrs
] = name
;
1193 defaults
->values
[5 * defaults
->nbAttrs
+ 1] = prefix
;
1194 /* intern the string and precompute the end */
1195 len
= xmlStrlen(value
);
1196 value
= xmlDictLookup(ctxt
->dict
, value
, len
);
1197 defaults
->values
[5 * defaults
->nbAttrs
+ 2] = value
;
1198 defaults
->values
[5 * defaults
->nbAttrs
+ 3] = value
+ len
;
1200 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = BAD_CAST
"external";
1202 defaults
->values
[5 * defaults
->nbAttrs
+ 4] = NULL
;
1203 defaults
->nbAttrs
++;
1208 xmlErrMemory(ctxt
, NULL
);
1213 * xmlAddSpecialAttr:
1214 * @ctxt: an XML parser context
1215 * @fullname: the element fullname
1216 * @fullattr: the attribute fullname
1217 * @type: the attribute type
1219 * Register this attribute type
1222 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt
,
1223 const xmlChar
*fullname
,
1224 const xmlChar
*fullattr
,
1227 if (ctxt
->attsSpecial
== NULL
) {
1228 ctxt
->attsSpecial
= xmlHashCreateDict(10, ctxt
->dict
);
1229 if (ctxt
->attsSpecial
== NULL
)
1233 if (xmlHashLookup2(ctxt
->attsSpecial
, fullname
, fullattr
) != NULL
)
1236 xmlHashAddEntry2(ctxt
->attsSpecial
, fullname
, fullattr
,
1237 (void *) (long) type
);
1241 xmlErrMemory(ctxt
, NULL
);
1246 * xmlCleanSpecialAttrCallback:
1248 * Removes CDATA attributes from the special attribute table
1251 xmlCleanSpecialAttrCallback(void *payload
, void *data
,
1252 const xmlChar
*fullname
, const xmlChar
*fullattr
,
1253 const xmlChar
*unused ATTRIBUTE_UNUSED
) {
1254 xmlParserCtxtPtr ctxt
= (xmlParserCtxtPtr
) data
;
1256 if (((long) payload
) == XML_ATTRIBUTE_CDATA
) {
1257 xmlHashRemoveEntry2(ctxt
->attsSpecial
, fullname
, fullattr
, NULL
);
1262 * xmlCleanSpecialAttr:
1263 * @ctxt: an XML parser context
1265 * Trim the list of attributes defined to remove all those of type
1266 * CDATA as they are not special. This call should be done when finishing
1267 * to parse the DTD and before starting to parse the document root.
1270 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt
)
1272 if (ctxt
->attsSpecial
== NULL
)
1275 xmlHashScanFull(ctxt
->attsSpecial
, xmlCleanSpecialAttrCallback
, ctxt
);
1277 if (xmlHashSize(ctxt
->attsSpecial
) == 0) {
1278 xmlHashFree(ctxt
->attsSpecial
, NULL
);
1279 ctxt
->attsSpecial
= NULL
;
1285 * xmlCheckLanguageID:
1286 * @lang: pointer to the string value
1288 * Checks that the value conforms to the LanguageID production:
1290 * NOTE: this is somewhat deprecated, those productions were removed from
1291 * the XML Second edition.
1293 * [33] LanguageID ::= Langcode ('-' Subcode)*
1294 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1295 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1296 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1297 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1298 * [38] Subcode ::= ([a-z] | [A-Z])+
1300 * Returns 1 if correct 0 otherwise
1303 xmlCheckLanguageID(const xmlChar
* lang
)
1305 const xmlChar
*cur
= lang
;
1309 if (((cur
[0] == 'i') && (cur
[1] == '-')) ||
1310 ((cur
[0] == 'I') && (cur
[1] == '-'))) {
1315 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
1316 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1318 } else if (((cur
[0] == 'x') && (cur
[1] == '-')) ||
1319 ((cur
[0] == 'X') && (cur
[1] == '-'))) {
1324 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
1325 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1327 } else if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1328 ((cur
[0] >= 'a') && (cur
[0] <= 'z'))) {
1333 if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1334 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1340 while (cur
[0] != 0) { /* non input consuming */
1344 if (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) ||
1345 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1349 while (((cur
[0] >= 'A') && (cur
[0] <= 'Z')) || /* non input consuming */
1350 ((cur
[0] >= 'a') && (cur
[0] <= 'z')))
1356 /************************************************************************
1358 * Parser stacks related functions and macros *
1360 ************************************************************************/
1362 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
,
1363 const xmlChar
** str
);
1368 * @ctxt: an XML parser context
1369 * @prefix: the namespace prefix or NULL
1370 * @URL: the namespace name
1372 * Pushes a new parser namespace on top of the ns stack
1374 * Returns -1 in case of error, -2 if the namespace should be discarded
1375 * and the index in the stack otherwise.
1378 nsPush(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
, const xmlChar
*URL
)
1380 if (ctxt
->options
& XML_PARSE_NSCLEAN
) {
1382 for (i
= 0;i
< ctxt
->nsNr
;i
+= 2) {
1383 if (ctxt
->nsTab
[i
] == prefix
) {
1385 if (ctxt
->nsTab
[i
+ 1] == URL
)
1387 /* out of scope keep it */
1392 if ((ctxt
->nsMax
== 0) || (ctxt
->nsTab
== NULL
)) {
1395 ctxt
->nsTab
= (const xmlChar
**)
1396 xmlMalloc(ctxt
->nsMax
* sizeof(xmlChar
*));
1397 if (ctxt
->nsTab
== NULL
) {
1398 xmlErrMemory(ctxt
, NULL
);
1402 } else if (ctxt
->nsNr
>= ctxt
->nsMax
) {
1403 const xmlChar
** tmp
;
1405 tmp
= (const xmlChar
**) xmlRealloc((char *) ctxt
->nsTab
,
1406 ctxt
->nsMax
* sizeof(ctxt
->nsTab
[0]));
1408 xmlErrMemory(ctxt
, NULL
);
1414 ctxt
->nsTab
[ctxt
->nsNr
++] = prefix
;
1415 ctxt
->nsTab
[ctxt
->nsNr
++] = URL
;
1416 return (ctxt
->nsNr
);
1420 * @ctxt: an XML parser context
1421 * @nr: the number to pop
1423 * Pops the top @nr parser prefix/namespace from the ns stack
1425 * Returns the number of namespaces removed
1428 nsPop(xmlParserCtxtPtr ctxt
, int nr
)
1432 if (ctxt
->nsTab
== NULL
) return(0);
1433 if (ctxt
->nsNr
< nr
) {
1434 xmlGenericError(xmlGenericErrorContext
, "Pbm popping %d NS\n", nr
);
1437 if (ctxt
->nsNr
<= 0)
1440 for (i
= 0;i
< nr
;i
++) {
1442 ctxt
->nsTab
[ctxt
->nsNr
] = NULL
;
1449 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt
, int nr
) {
1450 const xmlChar
**atts
;
1454 if (ctxt
->atts
== NULL
) {
1455 maxatts
= 55; /* allow for 10 attrs by default */
1456 atts
= (const xmlChar
**)
1457 xmlMalloc(maxatts
* sizeof(xmlChar
*));
1458 if (atts
== NULL
) goto mem_error
;
1460 attallocs
= (int *) xmlMalloc((maxatts
/ 5) * sizeof(int));
1461 if (attallocs
== NULL
) goto mem_error
;
1462 ctxt
->attallocs
= attallocs
;
1463 ctxt
->maxatts
= maxatts
;
1464 } else if (nr
+ 5 > ctxt
->maxatts
) {
1465 maxatts
= (nr
+ 5) * 2;
1466 atts
= (const xmlChar
**) xmlRealloc((void *) ctxt
->atts
,
1467 maxatts
* sizeof(const xmlChar
*));
1468 if (atts
== NULL
) goto mem_error
;
1470 attallocs
= (int *) xmlRealloc((void *) ctxt
->attallocs
,
1471 (maxatts
/ 5) * sizeof(int));
1472 if (attallocs
== NULL
) goto mem_error
;
1473 ctxt
->attallocs
= attallocs
;
1474 ctxt
->maxatts
= maxatts
;
1476 return(ctxt
->maxatts
);
1478 xmlErrMemory(ctxt
, NULL
);
1484 * @ctxt: an XML parser context
1485 * @value: the parser input
1487 * Pushes a new parser input on top of the input stack
1489 * Returns -1 in case of error, the index in the stack otherwise
1492 inputPush(xmlParserCtxtPtr ctxt
, xmlParserInputPtr value
)
1494 if ((ctxt
== NULL
) || (value
== NULL
))
1496 if (ctxt
->inputNr
>= ctxt
->inputMax
) {
1497 ctxt
->inputMax
*= 2;
1499 (xmlParserInputPtr
*) xmlRealloc(ctxt
->inputTab
,
1501 sizeof(ctxt
->inputTab
[0]));
1502 if (ctxt
->inputTab
== NULL
) {
1503 xmlErrMemory(ctxt
, NULL
);
1504 xmlFreeInputStream(value
);
1505 ctxt
->inputMax
/= 2;
1510 ctxt
->inputTab
[ctxt
->inputNr
] = value
;
1511 ctxt
->input
= value
;
1512 return (ctxt
->inputNr
++);
1516 * @ctxt: an XML parser context
1518 * Pops the top parser input from the input stack
1520 * Returns the input just removed
1523 inputPop(xmlParserCtxtPtr ctxt
)
1525 xmlParserInputPtr ret
;
1529 if (ctxt
->inputNr
<= 0)
1532 if (ctxt
->inputNr
> 0)
1533 ctxt
->input
= ctxt
->inputTab
[ctxt
->inputNr
- 1];
1536 ret
= ctxt
->inputTab
[ctxt
->inputNr
];
1537 ctxt
->inputTab
[ctxt
->inputNr
] = NULL
;
1542 * @ctxt: an XML parser context
1543 * @value: the element node
1545 * Pushes a new element node on top of the node stack
1547 * Returns -1 in case of error, the index in the stack otherwise
1550 nodePush(xmlParserCtxtPtr ctxt
, xmlNodePtr value
)
1552 if (ctxt
== NULL
) return(0);
1553 if (ctxt
->nodeNr
>= ctxt
->nodeMax
) {
1556 tmp
= (xmlNodePtr
*) xmlRealloc(ctxt
->nodeTab
,
1558 sizeof(ctxt
->nodeTab
[0]));
1560 xmlErrMemory(ctxt
, NULL
);
1563 ctxt
->nodeTab
= tmp
;
1566 if ((((unsigned int) ctxt
->nodeNr
) > xmlParserMaxDepth
) &&
1567 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
1568 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
1569 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1571 ctxt
->instate
= XML_PARSER_EOF
;
1574 ctxt
->nodeTab
[ctxt
->nodeNr
] = value
;
1576 return (ctxt
->nodeNr
++);
1581 * @ctxt: an XML parser context
1583 * Pops the top element node from the node stack
1585 * Returns the node just removed
1588 nodePop(xmlParserCtxtPtr ctxt
)
1592 if (ctxt
== NULL
) return(NULL
);
1593 if (ctxt
->nodeNr
<= 0)
1596 if (ctxt
->nodeNr
> 0)
1597 ctxt
->node
= ctxt
->nodeTab
[ctxt
->nodeNr
- 1];
1600 ret
= ctxt
->nodeTab
[ctxt
->nodeNr
];
1601 ctxt
->nodeTab
[ctxt
->nodeNr
] = NULL
;
1605 #ifdef LIBXML_PUSH_ENABLED
1608 * @ctxt: an XML parser context
1609 * @value: the element name
1610 * @prefix: the element prefix
1611 * @URI: the element namespace name
1613 * Pushes a new element name/prefix/URL on top of the name stack
1615 * Returns -1 in case of error, the index in the stack otherwise
1618 nameNsPush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
,
1619 const xmlChar
*prefix
, const xmlChar
*URI
, int nsNr
)
1621 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1622 const xmlChar
* *tmp
;
1625 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1627 sizeof(ctxt
->nameTab
[0]));
1632 ctxt
->nameTab
= tmp
;
1633 tmp2
= (void **) xmlRealloc((void * *)ctxt
->pushTab
,
1635 sizeof(ctxt
->pushTab
[0]));
1640 ctxt
->pushTab
= tmp2
;
1642 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1644 ctxt
->pushTab
[ctxt
->nameNr
* 3] = (void *) prefix
;
1645 ctxt
->pushTab
[ctxt
->nameNr
* 3 + 1] = (void *) URI
;
1646 ctxt
->pushTab
[ctxt
->nameNr
* 3 + 2] = (void *) (long) nsNr
;
1647 return (ctxt
->nameNr
++);
1649 xmlErrMemory(ctxt
, NULL
);
1654 * @ctxt: an XML parser context
1656 * Pops the top element/prefix/URI name from the name stack
1658 * Returns the name just removed
1660 static const xmlChar
*
1661 nameNsPop(xmlParserCtxtPtr ctxt
)
1665 if (ctxt
->nameNr
<= 0)
1668 if (ctxt
->nameNr
> 0)
1669 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1672 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1673 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1676 #endif /* LIBXML_PUSH_ENABLED */
1680 * @ctxt: an XML parser context
1681 * @value: the element name
1683 * Pushes a new element name on top of the name stack
1685 * Returns -1 in case of error, the index in the stack otherwise
1688 namePush(xmlParserCtxtPtr ctxt
, const xmlChar
* value
)
1690 if (ctxt
== NULL
) return (-1);
1692 if (ctxt
->nameNr
>= ctxt
->nameMax
) {
1693 const xmlChar
* *tmp
;
1695 tmp
= (const xmlChar
* *) xmlRealloc((xmlChar
* *)ctxt
->nameTab
,
1697 sizeof(ctxt
->nameTab
[0]));
1702 ctxt
->nameTab
= tmp
;
1704 ctxt
->nameTab
[ctxt
->nameNr
] = value
;
1706 return (ctxt
->nameNr
++);
1708 xmlErrMemory(ctxt
, NULL
);
1713 * @ctxt: an XML parser context
1715 * Pops the top element name from the name stack
1717 * Returns the name just removed
1720 namePop(xmlParserCtxtPtr ctxt
)
1724 if ((ctxt
== NULL
) || (ctxt
->nameNr
<= 0))
1727 if (ctxt
->nameNr
> 0)
1728 ctxt
->name
= ctxt
->nameTab
[ctxt
->nameNr
- 1];
1731 ret
= ctxt
->nameTab
[ctxt
->nameNr
];
1732 ctxt
->nameTab
[ctxt
->nameNr
] = NULL
;
1736 static int spacePush(xmlParserCtxtPtr ctxt
, int val
) {
1737 if (ctxt
->spaceNr
>= ctxt
->spaceMax
) {
1740 ctxt
->spaceMax
*= 2;
1741 tmp
= (int *) xmlRealloc(ctxt
->spaceTab
,
1742 ctxt
->spaceMax
* sizeof(ctxt
->spaceTab
[0]));
1744 xmlErrMemory(ctxt
, NULL
);
1748 ctxt
->spaceTab
= tmp
;
1750 ctxt
->spaceTab
[ctxt
->spaceNr
] = val
;
1751 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
];
1752 return(ctxt
->spaceNr
++);
1755 static int spacePop(xmlParserCtxtPtr ctxt
) {
1757 if (ctxt
->spaceNr
<= 0) return(0);
1759 if (ctxt
->spaceNr
> 0)
1760 ctxt
->space
= &ctxt
->spaceTab
[ctxt
->spaceNr
- 1];
1762 ctxt
->space
= &ctxt
->spaceTab
[0];
1763 ret
= ctxt
->spaceTab
[ctxt
->spaceNr
];
1764 ctxt
->spaceTab
[ctxt
->spaceNr
] = -1;
1769 * Macros for accessing the content. Those should be used only by the parser,
1772 * Dirty macros, i.e. one often need to make assumption on the context to
1775 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1776 * To be used with extreme caution since operations consuming
1777 * characters may move the input buffer to a different location !
1778 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1779 * This should be used internally by the parser
1780 * only to compare to ASCII values otherwise it would break when
1781 * running with UTF-8 encoding.
1782 * RAW same as CUR but in the input buffer, bypass any token
1783 * extraction that may have been done
1784 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1785 * to compare on ASCII based substring.
1786 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1787 * strings without newlines within the parser.
1788 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1789 * defined char within the parser.
1790 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1792 * NEXT Skip to the next character, this does the proper decoding
1793 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1794 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1795 * CUR_CHAR(l) returns the current unicode character (int), set l
1796 * to the number of xmlChars used for the encoding [0-5].
1797 * CUR_SCHAR same but operate on a string instead of the context
1798 * COPY_BUF copy the current unicode char to the target buffer, increment
1800 * GROW, SHRINK handling of input buffers
1803 #define RAW (*ctxt->input->cur)
1804 #define CUR (*ctxt->input->cur)
1805 #define NXT(val) ctxt->input->cur[(val)]
1806 #define CUR_PTR ctxt->input->cur
1808 #define CMP4( s, c1, c2, c3, c4 ) \
1809 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1810 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1811 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1812 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1813 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1814 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1815 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1816 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1817 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1818 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1819 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1820 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1821 ((unsigned char *) s)[ 8 ] == c9 )
1822 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1823 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1824 ((unsigned char *) s)[ 9 ] == c10 )
1826 #define SKIP(val) do { \
1827 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1834 #define SKIPL(val) do { \
1836 for(skipl=0; skipl<val; skipl++) { \
1837 if (*(ctxt->input->cur) == '\n') { \
1838 ctxt->input->line++; ctxt->input->col = 1; \
1839 } else ctxt->input->col++; \
1841 ctxt->input->cur++; \
1843 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1844 if ((*ctxt->input->cur == 0) && \
1845 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1846 xmlPopInput(ctxt); \
1849 #define SHRINK if ((ctxt->progressive == 0) && \
1850 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1851 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1854 static void xmlSHRINK (xmlParserCtxtPtr ctxt
) {
1855 xmlParserInputShrink(ctxt
->input
);
1856 if ((*ctxt
->input
->cur
== 0) &&
1857 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1861 #define GROW if ((ctxt->progressive == 0) && \
1862 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1865 static void xmlGROW (xmlParserCtxtPtr ctxt
) {
1866 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1867 if ((ctxt
->input
->cur
!= NULL
) && (*ctxt
->input
->cur
== 0) &&
1868 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1872 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1874 #define NEXT xmlNextChar(ctxt)
1877 ctxt->input->col++; \
1878 ctxt->input->cur++; \
1880 if (*ctxt->input->cur == 0) \
1881 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1884 #define NEXTL(l) do { \
1885 if (*(ctxt->input->cur) == '\n') { \
1886 ctxt->input->line++; ctxt->input->col = 1; \
1887 } else ctxt->input->col++; \
1888 ctxt->input->cur += l; \
1889 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1892 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1893 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1895 #define COPY_BUF(l,b,i,v) \
1896 if (l == 1) b[i++] = (xmlChar) v; \
1897 else i += xmlCopyCharMultiByte(&b[i],v)
1900 * xmlSkipBlankChars:
1901 * @ctxt: the XML parser context
1903 * skip all blanks character found at that point in the input streams.
1904 * It pops up finished entities in the process if allowable at that point.
1906 * Returns the number of space chars skipped
1910 xmlSkipBlankChars(xmlParserCtxtPtr ctxt
) {
1914 * It's Okay to use CUR/NEXT here since all the blanks are on
1917 if ((ctxt
->inputNr
== 1) && (ctxt
->instate
!= XML_PARSER_DTD
)) {
1920 * if we are in the document content, go really fast
1922 cur
= ctxt
->input
->cur
;
1923 while (IS_BLANK_CH(*cur
)) {
1925 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
1930 ctxt
->input
->cur
= cur
;
1931 xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
);
1932 cur
= ctxt
->input
->cur
;
1935 ctxt
->input
->cur
= cur
;
1940 while (IS_BLANK_CH(cur
)) { /* CHECKED tstblanks.xml */
1945 while ((cur
== 0) && (ctxt
->inputNr
> 1) &&
1946 (ctxt
->instate
!= XML_PARSER_COMMENT
)) {
1951 * Need to handle support of entities branching here
1953 if (*ctxt
->input
->cur
== '%') xmlParserHandlePEReference(ctxt
);
1954 } while (IS_BLANK(cur
)); /* CHECKED tstblanks.xml */
1959 /************************************************************************
1961 * Commodity functions to handle entities *
1963 ************************************************************************/
1967 * @ctxt: an XML parser context
1969 * xmlPopInput: the current input pointed by ctxt->input came to an end
1970 * pop it and return the next char.
1972 * Returns the current xmlChar in the parser context
1975 xmlPopInput(xmlParserCtxtPtr ctxt
) {
1976 if ((ctxt
== NULL
) || (ctxt
->inputNr
<= 1)) return(0);
1977 if (xmlParserDebugEntities
)
1978 xmlGenericError(xmlGenericErrorContext
,
1979 "Popping input %d\n", ctxt
->inputNr
);
1980 xmlFreeInputStream(inputPop(ctxt
));
1981 if ((*ctxt
->input
->cur
== 0) &&
1982 (xmlParserInputGrow(ctxt
->input
, INPUT_CHUNK
) <= 0))
1983 return(xmlPopInput(ctxt
));
1989 * @ctxt: an XML parser context
1990 * @input: an XML parser input fragment (entity, XML fragment ...).
1992 * xmlPushInput: switch to a new input stream which is stacked on top
1993 * of the previous one(s).
1994 * Returns -1 in case of error or the index in the input stack
1997 xmlPushInput(xmlParserCtxtPtr ctxt
, xmlParserInputPtr input
) {
1999 if (input
== NULL
) return(-1);
2001 if (xmlParserDebugEntities
) {
2002 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
2003 xmlGenericError(xmlGenericErrorContext
,
2004 "%s(%d): ", ctxt
->input
->filename
,
2006 xmlGenericError(xmlGenericErrorContext
,
2007 "Pushing input %d : %.30s\n", ctxt
->inputNr
+1, input
->cur
);
2009 ret
= inputPush(ctxt
, input
);
2016 * @ctxt: an XML parser context
2018 * parse Reference declarations
2020 * [66] CharRef ::= '&#' [0-9]+ ';' |
2021 * '&#x' [0-9a-fA-F]+ ';'
2023 * [ WFC: Legal Character ]
2024 * Characters referred to using character references must match the
2025 * production for Char.
2027 * Returns the value parsed (as an int), 0 in case of error
2030 xmlParseCharRef(xmlParserCtxtPtr ctxt
) {
2031 unsigned int val
= 0;
2033 unsigned int outofrange
= 0;
2036 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2038 if ((RAW
== '&') && (NXT(1) == '#') &&
2042 while (RAW
!= ';') { /* loop blocked by count */
2047 if ((RAW
>= '0') && (RAW
<= '9'))
2048 val
= val
* 16 + (CUR
- '0');
2049 else if ((RAW
>= 'a') && (RAW
<= 'f') && (count
< 20))
2050 val
= val
* 16 + (CUR
- 'a') + 10;
2051 else if ((RAW
>= 'A') && (RAW
<= 'F') && (count
< 20))
2052 val
= val
* 16 + (CUR
- 'A') + 10;
2054 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2065 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2070 } else if ((RAW
== '&') && (NXT(1) == '#')) {
2073 while (RAW
!= ';') { /* loop blocked by count */
2078 if ((RAW
>= '0') && (RAW
<= '9'))
2079 val
= val
* 10 + (CUR
- '0');
2081 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2092 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2098 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2102 * [ WFC: Legal Character ]
2103 * Characters referred to using character references must match the
2104 * production for Char.
2106 if ((IS_CHAR(val
) && (outofrange
== 0))) {
2109 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2110 "xmlParseCharRef: invalid xmlChar value %d\n",
2117 * xmlParseStringCharRef:
2118 * @ctxt: an XML parser context
2119 * @str: a pointer to an index in the string
2121 * parse Reference declarations, variant parsing from a string rather
2122 * than an an input flow.
2124 * [66] CharRef ::= '&#' [0-9]+ ';' |
2125 * '&#x' [0-9a-fA-F]+ ';'
2127 * [ WFC: Legal Character ]
2128 * Characters referred to using character references must match the
2129 * production for Char.
2131 * Returns the value parsed (as an int), 0 in case of error, str will be
2132 * updated to the current value of the index
2135 xmlParseStringCharRef(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
2138 unsigned int val
= 0;
2139 unsigned int outofrange
= 0;
2141 if ((str
== NULL
) || (*str
== NULL
)) return(0);
2144 if ((cur
== '&') && (ptr
[1] == '#') && (ptr
[2] == 'x')) {
2147 while (cur
!= ';') { /* Non input consuming loop */
2148 if ((cur
>= '0') && (cur
<= '9'))
2149 val
= val
* 16 + (cur
- '0');
2150 else if ((cur
>= 'a') && (cur
<= 'f'))
2151 val
= val
* 16 + (cur
- 'a') + 10;
2152 else if ((cur
>= 'A') && (cur
<= 'F'))
2153 val
= val
* 16 + (cur
- 'A') + 10;
2155 xmlFatalErr(ctxt
, XML_ERR_INVALID_HEX_CHARREF
, NULL
);
2167 } else if ((cur
== '&') && (ptr
[1] == '#')){
2170 while (cur
!= ';') { /* Non input consuming loops */
2171 if ((cur
>= '0') && (cur
<= '9'))
2172 val
= val
* 10 + (cur
- '0');
2174 xmlFatalErr(ctxt
, XML_ERR_INVALID_DEC_CHARREF
, NULL
);
2187 xmlFatalErr(ctxt
, XML_ERR_INVALID_CHARREF
, NULL
);
2193 * [ WFC: Legal Character ]
2194 * Characters referred to using character references must match the
2195 * production for Char.
2197 if ((IS_CHAR(val
) && (outofrange
== 0))) {
2200 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
2201 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2208 * xmlNewBlanksWrapperInputStream:
2209 * @ctxt: an XML parser context
2210 * @entity: an Entity pointer
2212 * Create a new input stream for wrapping
2213 * blanks around a PEReference
2215 * Returns the new input stream or NULL
2218 static void deallocblankswrapper (xmlChar
*str
) {xmlFree(str
);}
2220 static xmlParserInputPtr
2221 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
2222 xmlParserInputPtr input
;
2225 if (entity
== NULL
) {
2226 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
2227 "xmlNewBlanksWrapperInputStream entity\n");
2230 if (xmlParserDebugEntities
)
2231 xmlGenericError(xmlGenericErrorContext
,
2232 "new blanks wrapper for entity: %s\n", entity
->name
);
2233 input
= xmlNewInputStream(ctxt
);
2234 if (input
== NULL
) {
2237 length
= xmlStrlen(entity
->name
) + 5;
2238 buffer
= xmlMallocAtomic(length
);
2239 if (buffer
== NULL
) {
2240 xmlErrMemory(ctxt
, NULL
);
2246 buffer
[length
-3] = ';';
2247 buffer
[length
-2] = ' ';
2248 buffer
[length
-1] = 0;
2249 memcpy(buffer
+ 2, entity
->name
, length
- 5);
2250 input
->free
= deallocblankswrapper
;
2251 input
->base
= buffer
;
2252 input
->cur
= buffer
;
2253 input
->length
= length
;
2254 input
->end
= &buffer
[length
];
2259 * xmlParserHandlePEReference:
2260 * @ctxt: the parser context
2262 * [69] PEReference ::= '%' Name ';'
2264 * [ WFC: No Recursion ]
2265 * A parsed entity must not contain a recursive
2266 * reference to itself, either directly or indirectly.
2268 * [ WFC: Entity Declared ]
2269 * In a document without any DTD, a document with only an internal DTD
2270 * subset which contains no parameter entity references, or a document
2271 * with "standalone='yes'", ... ... The declaration of a parameter
2272 * entity must precede any reference to it...
2274 * [ VC: Entity Declared ]
2275 * In a document with an external subset or external parameter entities
2276 * with "standalone='no'", ... ... The declaration of a parameter entity
2277 * must precede any reference to it...
2280 * Parameter-entity references may only appear in the DTD.
2281 * NOTE: misleading but this is handled.
2283 * A PEReference may have been detected in the current input stream
2284 * the handling is done accordingly to
2285 * http://www.w3.org/TR/REC-xml#entproc
2287 * - Included in literal in entity values
2288 * - Included as Parameter Entity reference within DTDs
2291 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt
) {
2292 const xmlChar
*name
;
2293 xmlEntityPtr entity
= NULL
;
2294 xmlParserInputPtr input
;
2296 if (RAW
!= '%') return;
2297 switch(ctxt
->instate
) {
2298 case XML_PARSER_CDATA_SECTION
:
2300 case XML_PARSER_COMMENT
:
2302 case XML_PARSER_START_TAG
:
2304 case XML_PARSER_END_TAG
:
2306 case XML_PARSER_EOF
:
2307 xmlFatalErr(ctxt
, XML_ERR_PEREF_AT_EOF
, NULL
);
2309 case XML_PARSER_PROLOG
:
2310 case XML_PARSER_START
:
2311 case XML_PARSER_MISC
:
2312 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_PROLOG
, NULL
);
2314 case XML_PARSER_ENTITY_DECL
:
2315 case XML_PARSER_CONTENT
:
2316 case XML_PARSER_ATTRIBUTE_VALUE
:
2318 case XML_PARSER_SYSTEM_LITERAL
:
2319 case XML_PARSER_PUBLIC_LITERAL
:
2320 /* we just ignore it there */
2322 case XML_PARSER_EPILOG
:
2323 xmlFatalErr(ctxt
, XML_ERR_PEREF_IN_EPILOG
, NULL
);
2325 case XML_PARSER_ENTITY_VALUE
:
2327 * NOTE: in the case of entity values, we don't do the
2328 * substitution here since we need the literal
2329 * entity value to be able to save the internal
2330 * subset of the document.
2331 * This will be handled by xmlStringDecodeEntities
2334 case XML_PARSER_DTD
:
2336 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2337 * In the internal DTD subset, parameter-entity references
2338 * can occur only where markup declarations can occur, not
2339 * within markup declarations.
2340 * In that case this is handled in xmlParseMarkupDecl
2342 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
2344 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2347 case XML_PARSER_IGNORE
:
2352 name
= xmlParseName(ctxt
);
2353 if (xmlParserDebugEntities
)
2354 xmlGenericError(xmlGenericErrorContext
,
2355 "PEReference: %s\n", name
);
2357 xmlFatalErr(ctxt
, XML_ERR_PEREF_NO_NAME
, NULL
);
2361 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->getParameterEntity
!= NULL
))
2362 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
2363 if (entity
== NULL
) {
2366 * [ WFC: Entity Declared ]
2367 * In a document without any DTD, a document with only an
2368 * internal DTD subset which contains no parameter entity
2369 * references, or a document with "standalone='yes'", ...
2370 * ... The declaration of a parameter entity must precede
2371 * any reference to it...
2373 if ((ctxt
->standalone
== 1) ||
2374 ((ctxt
->hasExternalSubset
== 0) &&
2375 (ctxt
->hasPErefs
== 0))) {
2376 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
2377 "PEReference: %%%s; not found\n", name
);
2380 * [ VC: Entity Declared ]
2381 * In a document with an external subset or external
2382 * parameter entities with "standalone='no'", ...
2383 * ... The declaration of a parameter entity must precede
2384 * any reference to it...
2386 if ((ctxt
->validate
) && (ctxt
->vctxt
.error
!= NULL
)) {
2387 xmlValidityError(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
2388 "PEReference: %%%s; not found\n",
2391 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
2392 "PEReference: %%%s; not found\n",
2396 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
2397 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
2398 if (xmlPushInput(ctxt
, input
) < 0)
2401 if ((entity
->etype
== XML_INTERNAL_PARAMETER_ENTITY
) ||
2402 (entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
)) {
2404 xmlCharEncoding enc
;
2407 * handle the extra spaces added before and after
2408 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2409 * this is done independently.
2411 input
= xmlNewEntityInputStream(ctxt
, entity
);
2412 if (xmlPushInput(ctxt
, input
) < 0)
2416 * Get the 4 first bytes and decode the charset
2417 * if enc != XML_CHAR_ENCODING_NONE
2418 * plug some encoding conversion routines.
2419 * Note that, since we may have some non-UTF8
2420 * encoding (like UTF16, bug 135229), the 'length'
2421 * is not known, but we can calculate based upon
2422 * the amount of data in the buffer.
2425 if ((ctxt
->input
->end
- ctxt
->input
->cur
)>=4) {
2430 enc
= xmlDetectCharEncoding(start
, 4);
2431 if (enc
!= XML_CHAR_ENCODING_NONE
) {
2432 xmlSwitchEncoding(ctxt
, enc
);
2436 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
2437 (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l' )) &&
2438 (IS_BLANK_CH(NXT(5)))) {
2439 xmlParseTextDecl(ctxt
);
2442 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
2443 "PEReference: %s is not a parameter entity\n",
2448 xmlFatalErr(ctxt
, XML_ERR_PEREF_SEMICOL_MISSING
, NULL
);
2454 * Macro used to grow the current buffer.
2456 #define growBuffer(buffer, n) { \
2458 buffer##_size *= 2; \
2459 buffer##_size += n; \
2461 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2462 if (tmp == NULL) goto mem_error; \
2467 * xmlStringLenDecodeEntities:
2468 * @ctxt: the parser context
2469 * @str: the input string
2470 * @len: the string length
2471 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2472 * @end: an end marker xmlChar, 0 if none
2473 * @end2: an end marker xmlChar, 0 if none
2474 * @end3: an end marker xmlChar, 0 if none
2476 * Takes a entity string content and process to do the adequate substitutions.
2478 * [67] Reference ::= EntityRef | CharRef
2480 * [69] PEReference ::= '%' Name ';'
2482 * Returns A newly allocated string with the substitution done. The caller
2483 * must deallocate it !
2486 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2487 int what
, xmlChar end
, xmlChar end2
, xmlChar end3
) {
2488 xmlChar
*buffer
= NULL
;
2489 int buffer_size
= 0;
2491 xmlChar
*current
= NULL
;
2492 xmlChar
*rep
= NULL
;
2493 const xmlChar
*last
;
2498 if ((ctxt
== NULL
) || (str
== NULL
) || (len
< 0))
2502 if (((ctxt
->depth
> 40) &&
2503 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
2504 (ctxt
->depth
> 1024)) {
2505 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
2510 * allocate a translation buffer.
2512 buffer_size
= XML_PARSER_BIG_BUFFER_SIZE
;
2513 buffer
= (xmlChar
*) xmlMallocAtomic(buffer_size
* sizeof(xmlChar
));
2514 if (buffer
== NULL
) goto mem_error
;
2517 * OK loop until we reach one of the ending char or a size limit.
2518 * we are operating on already parsed values.
2521 c
= CUR_SCHAR(str
, l
);
2524 while ((c
!= 0) && (c
!= end
) && /* non input consuming loop */
2525 (c
!= end2
) && (c
!= end3
)) {
2528 if ((c
== '&') && (str
[1] == '#')) {
2529 int val
= xmlParseStringCharRef(ctxt
, &str
);
2531 COPY_BUF(0,buffer
,nbchars
,val
);
2533 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2534 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2536 } else if ((c
== '&') && (what
& XML_SUBSTITUTE_REF
)) {
2537 if (xmlParserDebugEntities
)
2538 xmlGenericError(xmlGenericErrorContext
,
2539 "String decoding Entity Reference: %.30s\n",
2541 ent
= xmlParseStringEntityRef(ctxt
, &str
);
2542 if ((ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
) ||
2543 (ctxt
->lastError
.code
== XML_ERR_INTERNAL_ERROR
))
2546 ctxt
->nbentities
+= ent
->checked
;
2547 if ((ent
!= NULL
) &&
2548 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
2549 if (ent
->content
!= NULL
) {
2550 COPY_BUF(0,buffer
,nbchars
,ent
->content
[0]);
2551 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2552 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2555 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
2556 "predefined entity has no content\n");
2558 } else if ((ent
!= NULL
) && (ent
->content
!= NULL
)) {
2560 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2566 while (*current
!= 0) { /* non input consuming loop */
2567 buffer
[nbchars
++] = *current
++;
2569 buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2570 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
))
2572 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2578 } else if (ent
!= NULL
) {
2579 int i
= xmlStrlen(ent
->name
);
2580 const xmlChar
*cur
= ent
->name
;
2582 buffer
[nbchars
++] = '&';
2583 if (nbchars
> buffer_size
- i
- XML_PARSER_BUFFER_SIZE
) {
2584 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2587 buffer
[nbchars
++] = *cur
++;
2588 buffer
[nbchars
++] = ';';
2590 } else if (c
== '%' && (what
& XML_SUBSTITUTE_PEREF
)) {
2591 if (xmlParserDebugEntities
)
2592 xmlGenericError(xmlGenericErrorContext
,
2593 "String decoding PE Reference: %.30s\n", str
);
2594 ent
= xmlParseStringPEReference(ctxt
, &str
);
2595 if (ctxt
->lastError
.code
== XML_ERR_ENTITY_LOOP
)
2598 ctxt
->nbentities
+= ent
->checked
;
2600 if (ent
->content
== NULL
) {
2601 xmlLoadEntityContent(ctxt
, ent
);
2604 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
, what
,
2609 while (*current
!= 0) { /* non input consuming loop */
2610 buffer
[nbchars
++] = *current
++;
2612 buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2613 if (xmlParserEntityCheck(ctxt
, nbchars
, ent
))
2615 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2623 COPY_BUF(l
,buffer
,nbchars
,c
);
2625 if (nbchars
> buffer_size
- XML_PARSER_BUFFER_SIZE
) {
2626 growBuffer(buffer
, XML_PARSER_BUFFER_SIZE
);
2630 c
= CUR_SCHAR(str
, l
);
2634 buffer
[nbchars
] = 0;
2638 xmlErrMemory(ctxt
, NULL
);
2648 * xmlStringDecodeEntities:
2649 * @ctxt: the parser context
2650 * @str: the input string
2651 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2652 * @end: an end marker xmlChar, 0 if none
2653 * @end2: an end marker xmlChar, 0 if none
2654 * @end3: an end marker xmlChar, 0 if none
2656 * Takes a entity string content and process to do the adequate substitutions.
2658 * [67] Reference ::= EntityRef | CharRef
2660 * [69] PEReference ::= '%' Name ';'
2662 * Returns A newly allocated string with the substitution done. The caller
2663 * must deallocate it !
2666 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int what
,
2667 xmlChar end
, xmlChar end2
, xmlChar end3
) {
2668 if ((ctxt
== NULL
) || (str
== NULL
)) return(NULL
);
2669 return(xmlStringLenDecodeEntities(ctxt
, str
, xmlStrlen(str
), what
,
2673 /************************************************************************
2675 * Commodity functions, cleanup needed ? *
2677 ************************************************************************/
2681 * @ctxt: an XML parser context
2683 * @len: the size of @str
2684 * @blank_chars: we know the chars are blanks
2686 * Is this a sequence of blank chars that one can ignore ?
2688 * Returns 1 if ignorable 0 otherwise.
2691 static int areBlanks(xmlParserCtxtPtr ctxt
, const xmlChar
*str
, int len
,
2694 xmlNodePtr lastChild
;
2697 * Don't spend time trying to differentiate them, the same callback is
2700 if (ctxt
->sax
->ignorableWhitespace
== ctxt
->sax
->characters
)
2704 * Check for xml:space value.
2706 if ((ctxt
->space
== NULL
) || (*(ctxt
->space
) == 1) ||
2707 (*(ctxt
->space
) == -2))
2711 * Check that the string is made of blanks
2713 if (blank_chars
== 0) {
2714 for (i
= 0;i
< len
;i
++)
2715 if (!(IS_BLANK_CH(str
[i
]))) return(0);
2719 * Look if the element is mixed content in the DTD if available
2721 if (ctxt
->node
== NULL
) return(0);
2722 if (ctxt
->myDoc
!= NULL
) {
2723 ret
= xmlIsMixedElement(ctxt
->myDoc
, ctxt
->node
->name
);
2724 if (ret
== 0) return(1);
2725 if (ret
== 1) return(0);
2729 * Otherwise, heuristic :-\
2731 if ((RAW
!= '<') && (RAW
!= 0xD)) return(0);
2732 if ((ctxt
->node
->children
== NULL
) &&
2733 (RAW
== '<') && (NXT(1) == '/')) return(0);
2735 lastChild
= xmlGetLastChild(ctxt
->node
);
2736 if (lastChild
== NULL
) {
2737 if ((ctxt
->node
->type
!= XML_ELEMENT_NODE
) &&
2738 (ctxt
->node
->content
!= NULL
)) return(0);
2739 } else if (xmlNodeIsText(lastChild
))
2741 else if ((ctxt
->node
->children
!= NULL
) &&
2742 (xmlNodeIsText(ctxt
->node
->children
)))
2747 /************************************************************************
2749 * Extra stuff for namespace support *
2750 * Relates to http://www.w3.org/TR/WD-xml-names *
2752 ************************************************************************/
2756 * @ctxt: an XML parser context
2757 * @name: an XML parser context
2758 * @prefix: a xmlChar **
2760 * parse an UTF8 encoded XML qualified name string
2762 * [NS 5] QName ::= (Prefix ':')? LocalPart
2764 * [NS 6] Prefix ::= NCName
2766 * [NS 7] LocalPart ::= NCName
2768 * Returns the local part, and prefix is updated
2769 * to get the Prefix if any.
2773 xmlSplitQName(xmlParserCtxtPtr ctxt
, const xmlChar
*name
, xmlChar
**prefix
) {
2774 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
2775 xmlChar
*buffer
= NULL
;
2777 int max
= XML_MAX_NAMELEN
;
2778 xmlChar
*ret
= NULL
;
2779 const xmlChar
*cur
= name
;
2782 if (prefix
== NULL
) return(NULL
);
2785 if (cur
== NULL
) return(NULL
);
2787 #ifndef XML_XML_NAMESPACE
2788 /* xml: prefix is not really a namespace */
2789 if ((cur
[0] == 'x') && (cur
[1] == 'm') &&
2790 (cur
[2] == 'l') && (cur
[3] == ':'))
2791 return(xmlStrdup(name
));
2794 /* nasty but well=formed */
2796 return(xmlStrdup(name
));
2799 while ((c
!= 0) && (c
!= ':') && (len
< max
)) { /* tested bigname.xml */
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2810 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
2811 if (buffer
== NULL
) {
2812 xmlErrMemory(ctxt
, NULL
);
2815 memcpy(buffer
, buf
, len
);
2816 while ((c
!= 0) && (c
!= ':')) { /* tested bigname.xml */
2817 if (len
+ 10 > max
) {
2821 tmp
= (xmlChar
*) xmlRealloc(buffer
,
2822 max
* sizeof(xmlChar
));
2825 xmlErrMemory(ctxt
, NULL
);
2836 if ((c
== ':') && (*cur
== 0)) {
2840 return(xmlStrdup(name
));
2844 ret
= xmlStrndup(buf
, len
);
2848 max
= XML_MAX_NAMELEN
;
2856 return(xmlStrndup(BAD_CAST
"", 0));
2861 * Check that the first character is proper to start
2864 if (!(((c
>= 0x61) && (c
<= 0x7A)) ||
2865 ((c
>= 0x41) && (c
<= 0x5A)) ||
2866 (c
== '_') || (c
== ':'))) {
2868 int first
= CUR_SCHAR(cur
, l
);
2870 if (!IS_LETTER(first
) && (first
!= '_')) {
2871 xmlFatalErrMsgStr(ctxt
, XML_NS_ERR_QNAME
,
2872 "Name %s is not XML Namespace compliant\n",
2878 while ((c
!= 0) && (len
< max
)) { /* tested bigname2.xml */
2884 * Okay someone managed to make a huge name, so he's ready to pay
2885 * for the processing speed.
2889 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
2890 if (buffer
== NULL
) {
2891 xmlErrMemory(ctxt
, NULL
);
2894 memcpy(buffer
, buf
, len
);
2895 while (c
!= 0) { /* tested bigname2.xml */
2896 if (len
+ 10 > max
) {
2900 tmp
= (xmlChar
*) xmlRealloc(buffer
,
2901 max
* sizeof(xmlChar
));
2903 xmlErrMemory(ctxt
, NULL
);
2916 ret
= xmlStrndup(buf
, len
);
2925 /************************************************************************
2927 * The parser itself *
2928 * Relates to http://www.w3.org/TR/REC-xml *
2930 ************************************************************************/
2932 /************************************************************************
2934 * Routines to parse Name, NCName and NmToken *
2936 ************************************************************************/
2938 static unsigned long nbParseName
= 0;
2939 static unsigned long nbParseNmToken
= 0;
2940 static unsigned long nbParseNCName
= 0;
2941 static unsigned long nbParseNCNameComplex
= 0;
2942 static unsigned long nbParseNameComplex
= 0;
2943 static unsigned long nbParseStringName
= 0;
2947 * The two following functions are related to the change of accepted
2948 * characters for Name and NmToken in the Revision 5 of XML-1.0
2949 * They correspond to the modified production [4] and the new production [4a]
2950 * changes in that revision. Also note that the macros used for the
2951 * productions Letter, Digit, CombiningChar and Extender are not needed
2953 * We still keep compatibility to pre-revision5 parsing semantic if the
2954 * new XML_PARSE_OLD10 option is given to the parser.
2957 xmlIsNameStartChar(xmlParserCtxtPtr ctxt
, int c
) {
2958 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
2960 * Use the new checks of production [4] [4a] amd [5] of the
2961 * Update 5 of XML-1.0
2963 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
2964 (((c
>= 'a') && (c
<= 'z')) ||
2965 ((c
>= 'A') && (c
<= 'Z')) ||
2966 (c
== '_') || (c
== ':') ||
2967 ((c
>= 0xC0) && (c
<= 0xD6)) ||
2968 ((c
>= 0xD8) && (c
<= 0xF6)) ||
2969 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
2970 ((c
>= 0x370) && (c
<= 0x37D)) ||
2971 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
2972 ((c
>= 0x200C) && (c
<= 0x200D)) ||
2973 ((c
>= 0x2070) && (c
<= 0x218F)) ||
2974 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
2975 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
2976 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
2977 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
2978 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
2981 if (IS_LETTER(c
) || (c
== '_') || (c
== ':'))
2988 xmlIsNameChar(xmlParserCtxtPtr ctxt
, int c
) {
2989 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
2991 * Use the new checks of production [4] [4a] amd [5] of the
2992 * Update 5 of XML-1.0
2994 if ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
2995 (((c
>= 'a') && (c
<= 'z')) ||
2996 ((c
>= 'A') && (c
<= 'Z')) ||
2997 ((c
>= '0') && (c
<= '9')) || /* !start */
2998 (c
== '_') || (c
== ':') ||
2999 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3000 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3001 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3002 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3003 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3004 ((c
>= 0x370) && (c
<= 0x37D)) ||
3005 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3006 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3007 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3008 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3009 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3010 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3011 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3012 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3013 ((c
>= 0x10000) && (c
<= 0xEFFFF))))
3016 if ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3017 (c
== '.') || (c
== '-') ||
3018 (c
== '_') || (c
== ':') ||
3019 (IS_COMBINING(c
)) ||
3026 static xmlChar
* xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
,
3027 int *len
, int *alloc
, int normalize
);
3029 static const xmlChar
*
3030 xmlParseNameComplex(xmlParserCtxtPtr ctxt
) {
3036 nbParseNameComplex
++;
3040 * Handler for more complex cases
3044 if ((ctxt
->options
& XML_PARSE_OLD10
) == 0) {
3046 * Use the new checks of production [4] [4a] amd [5] of the
3047 * Update 5 of XML-1.0
3049 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3050 (!(((c
>= 'a') && (c
<= 'z')) ||
3051 ((c
>= 'A') && (c
<= 'Z')) ||
3052 (c
== '_') || (c
== ':') ||
3053 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3054 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3055 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3056 ((c
>= 0x370) && (c
<= 0x37D)) ||
3057 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3058 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3059 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3060 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3061 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3062 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3063 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3064 ((c
>= 0x10000) && (c
<= 0xEFFFF))))) {
3070 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* accelerators */
3071 (((c
>= 'a') && (c
<= 'z')) ||
3072 ((c
>= 'A') && (c
<= 'Z')) ||
3073 ((c
>= '0') && (c
<= '9')) || /* !start */
3074 (c
== '_') || (c
== ':') ||
3075 (c
== '-') || (c
== '.') || (c
== 0xB7) || /* !start */
3076 ((c
>= 0xC0) && (c
<= 0xD6)) ||
3077 ((c
>= 0xD8) && (c
<= 0xF6)) ||
3078 ((c
>= 0xF8) && (c
<= 0x2FF)) ||
3079 ((c
>= 0x300) && (c
<= 0x36F)) || /* !start */
3080 ((c
>= 0x370) && (c
<= 0x37D)) ||
3081 ((c
>= 0x37F) && (c
<= 0x1FFF)) ||
3082 ((c
>= 0x200C) && (c
<= 0x200D)) ||
3083 ((c
>= 0x203F) && (c
<= 0x2040)) || /* !start */
3084 ((c
>= 0x2070) && (c
<= 0x218F)) ||
3085 ((c
>= 0x2C00) && (c
<= 0x2FEF)) ||
3086 ((c
>= 0x3001) && (c
<= 0xD7FF)) ||
3087 ((c
>= 0xF900) && (c
<= 0xFDCF)) ||
3088 ((c
>= 0xFDF0) && (c
<= 0xFFFD)) ||
3089 ((c
>= 0x10000) && (c
<= 0xEFFFF))
3091 if (count
++ > 100) {
3100 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3101 (!IS_LETTER(c
) && (c
!= '_') &&
3109 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3110 ((IS_LETTER(c
)) || (IS_DIGIT(c
)) ||
3111 (c
== '.') || (c
== '-') ||
3112 (c
== '_') || (c
== ':') ||
3113 (IS_COMBINING(c
)) ||
3114 (IS_EXTENDER(c
)))) {
3115 if (count
++ > 100) {
3124 if ((*ctxt
->input
->cur
== '\n') && (ctxt
->input
->cur
[-1] == '\r'))
3125 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- (len
+ 1), len
));
3126 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3131 * @ctxt: an XML parser context
3133 * parse an XML name.
3135 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3136 * CombiningChar | Extender
3138 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3140 * [6] Names ::= Name (#x20 Name)*
3142 * Returns the Name parsed or NULL
3146 xmlParseName(xmlParserCtxtPtr ctxt
) {
3158 * Accelerator for simple ASCII names
3160 in
= ctxt
->input
->cur
;
3161 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3162 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3163 (*in
== '_') || (*in
== ':')) {
3165 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3166 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3167 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3168 (*in
== '_') || (*in
== '-') ||
3169 (*in
== ':') || (*in
== '.'))
3171 if ((*in
> 0) && (*in
< 0x80)) {
3172 count
= in
- ctxt
->input
->cur
;
3173 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3174 ctxt
->input
->cur
= in
;
3175 ctxt
->nbChars
+= count
;
3176 ctxt
->input
->col
+= count
;
3178 xmlErrMemory(ctxt
, NULL
);
3182 /* accelerator for special cases */
3183 return(xmlParseNameComplex(ctxt
));
3186 static const xmlChar
*
3187 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt
) {
3193 nbParseNCNameComplex
++;
3197 * Handler for more complex cases
3201 if ((c
== ' ') || (c
== '>') || (c
== '/') || /* accelerators */
3202 (!xmlIsNameStartChar(ctxt
, c
) || (c
== ':'))) {
3206 while ((c
!= ' ') && (c
!= '>') && (c
!= '/') && /* test bigname.xml */
3207 (xmlIsNameChar(ctxt
, c
) && (c
!= ':'))) {
3208 if (count
++ > 100) {
3216 return(xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
- len
, len
));
3221 * @ctxt: an XML parser context
3222 * @len: lenght of the string parsed
3224 * parse an XML name.
3226 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3227 * CombiningChar | Extender
3229 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3231 * Returns the Name parsed or NULL
3234 static const xmlChar
*
3235 xmlParseNCName(xmlParserCtxtPtr ctxt
) {
3245 * Accelerator for simple ASCII names
3247 in
= ctxt
->input
->cur
;
3248 if (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3249 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3252 while (((*in
>= 0x61) && (*in
<= 0x7A)) ||
3253 ((*in
>= 0x41) && (*in
<= 0x5A)) ||
3254 ((*in
>= 0x30) && (*in
<= 0x39)) ||
3255 (*in
== '_') || (*in
== '-') ||
3258 if ((*in
> 0) && (*in
< 0x80)) {
3259 count
= in
- ctxt
->input
->cur
;
3260 ret
= xmlDictLookup(ctxt
->dict
, ctxt
->input
->cur
, count
);
3261 ctxt
->input
->cur
= in
;
3262 ctxt
->nbChars
+= count
;
3263 ctxt
->input
->col
+= count
;
3265 xmlErrMemory(ctxt
, NULL
);
3270 return(xmlParseNCNameComplex(ctxt
));
3274 * xmlParseNameAndCompare:
3275 * @ctxt: an XML parser context
3277 * parse an XML name and compares for match
3278 * (specialized for endtag parsing)
3280 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3281 * and the name for mismatch
3284 static const xmlChar
*
3285 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *other
) {
3286 register const xmlChar
*cmp
= other
;
3287 register const xmlChar
*in
;
3292 in
= ctxt
->input
->cur
;
3293 while (*in
!= 0 && *in
== *cmp
) {
3298 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
3300 ctxt
->input
->cur
= in
;
3301 return (const xmlChar
*) 1;
3303 /* failure (or end of input buffer), check with full function */
3304 ret
= xmlParseName (ctxt
);
3305 /* strings coming from the dictionnary direct compare possible */
3307 return (const xmlChar
*) 1;
3313 * xmlParseStringName:
3314 * @ctxt: an XML parser context
3315 * @str: a pointer to the string pointer (IN/OUT)
3317 * parse an XML name.
3319 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3320 * CombiningChar | Extender
3322 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3324 * [6] Names ::= Name (#x20 Name)*
3326 * Returns the Name parsed or NULL. The @str pointer
3327 * is updated to the current location in the string.
3331 xmlParseStringName(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
3332 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3333 const xmlChar
*cur
= *str
;
3338 nbParseStringName
++;
3341 c
= CUR_SCHAR(cur
, l
);
3342 if (!xmlIsNameStartChar(ctxt
, c
)) {
3346 COPY_BUF(l
,buf
,len
,c
);
3348 c
= CUR_SCHAR(cur
, l
);
3349 while (xmlIsNameChar(ctxt
, c
)) {
3350 COPY_BUF(l
,buf
,len
,c
);
3352 c
= CUR_SCHAR(cur
, l
);
3353 if (len
>= XML_MAX_NAMELEN
) { /* test bigentname.xml */
3355 * Okay someone managed to make a huge name, so he's ready to pay
3356 * for the processing speed.
3361 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3362 if (buffer
== NULL
) {
3363 xmlErrMemory(ctxt
, NULL
);
3366 memcpy(buffer
, buf
, len
);
3367 while (xmlIsNameChar(ctxt
, c
)) {
3368 if (len
+ 10 > max
) {
3371 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3372 max
* sizeof(xmlChar
));
3374 xmlErrMemory(ctxt
, NULL
);
3380 COPY_BUF(l
,buffer
,len
,c
);
3382 c
= CUR_SCHAR(cur
, l
);
3390 return(xmlStrndup(buf
, len
));
3395 * @ctxt: an XML parser context
3397 * parse an XML Nmtoken.
3399 * [7] Nmtoken ::= (NameChar)+
3401 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3403 * Returns the Nmtoken parsed or NULL
3407 xmlParseNmtoken(xmlParserCtxtPtr ctxt
) {
3408 xmlChar buf
[XML_MAX_NAMELEN
+ 5];
3420 while (xmlIsNameChar(ctxt
, c
)) {
3421 if (count
++ > 100) {
3425 COPY_BUF(l
,buf
,len
,c
);
3428 if (len
>= XML_MAX_NAMELEN
) {
3430 * Okay someone managed to make a huge token, so he's ready to pay
3431 * for the processing speed.
3436 buffer
= (xmlChar
*) xmlMallocAtomic(max
* sizeof(xmlChar
));
3437 if (buffer
== NULL
) {
3438 xmlErrMemory(ctxt
, NULL
);
3441 memcpy(buffer
, buf
, len
);
3442 while (xmlIsNameChar(ctxt
, c
)) {
3443 if (count
++ > 100) {
3447 if (len
+ 10 > max
) {
3451 tmp
= (xmlChar
*) xmlRealloc(buffer
,
3452 max
* sizeof(xmlChar
));
3454 xmlErrMemory(ctxt
, NULL
);
3460 COPY_BUF(l
,buffer
,len
,c
);
3470 return(xmlStrndup(buf
, len
));
3474 * xmlParseEntityValue:
3475 * @ctxt: an XML parser context
3476 * @orig: if non-NULL store a copy of the original entity value
3478 * parse a value for ENTITY declarations
3480 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3481 * "'" ([^%&'] | PEReference | Reference)* "'"
3483 * Returns the EntityValue parsed with reference substituted or NULL
3487 xmlParseEntityValue(xmlParserCtxtPtr ctxt
, xmlChar
**orig
) {
3488 xmlChar
*buf
= NULL
;
3490 int size
= XML_PARSER_BUFFER_SIZE
;
3493 xmlChar
*ret
= NULL
;
3494 const xmlChar
*cur
= NULL
;
3495 xmlParserInputPtr input
;
3497 if (RAW
== '"') stop
= '"';
3498 else if (RAW
== '\'') stop
= '\'';
3500 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_STARTED
, NULL
);
3503 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3505 xmlErrMemory(ctxt
, NULL
);
3510 * The content of the entity definition is copied in a buffer.
3513 ctxt
->instate
= XML_PARSER_ENTITY_VALUE
;
3514 input
= ctxt
->input
;
3519 * NOTE: 4.4.5 Included in Literal
3520 * When a parameter entity reference appears in a literal entity
3521 * value, ... a single or double quote character in the replacement
3522 * text is always treated as a normal data character and will not
3523 * terminate the literal.
3524 * In practice it means we stop the loop only when back at parsing
3525 * the initial entity and the quote is found
3527 while ((IS_CHAR(c
)) && ((c
!= stop
) || /* checked */
3528 (ctxt
->input
!= input
))) {
3529 if (len
+ 5 >= size
) {
3533 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3535 xmlErrMemory(ctxt
, NULL
);
3541 COPY_BUF(l
,buf
,len
,c
);
3544 * Pop-up of finished entities.
3546 while ((RAW
== 0) && (ctxt
->inputNr
> 1)) /* non input consuming */
3559 * Raise problem w.r.t. '&' and '%' being used in non-entities
3560 * reference constructs. Note Charref will be handled in
3561 * xmlStringDecodeEntities()
3564 while (*cur
!= 0) { /* non input consuming */
3565 if ((*cur
== '%') || ((*cur
== '&') && (cur
[1] != '#'))) {
3570 name
= xmlParseStringName(ctxt
, &cur
);
3571 if ((name
== NULL
) || (*cur
!= ';')) {
3572 xmlFatalErrMsgInt(ctxt
, XML_ERR_ENTITY_CHAR_ERROR
,
3573 "EntityValue: '%c' forbidden except for entities references\n",
3576 if ((tmp
== '%') && (ctxt
->inSubset
== 1) &&
3577 (ctxt
->inputNr
== 1)) {
3578 xmlFatalErr(ctxt
, XML_ERR_ENTITY_PE_INTERNAL
, NULL
);
3589 * Then PEReference entities are substituted.
3592 xmlFatalErr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
, NULL
);
3597 * NOTE: 4.4.7 Bypassed
3598 * When a general entity reference appears in the EntityValue in
3599 * an entity declaration, it is bypassed and left as is.
3600 * so XML_SUBSTITUTE_REF is not set here.
3602 ret
= xmlStringDecodeEntities(ctxt
, buf
, XML_SUBSTITUTE_PEREF
,
3614 * xmlParseAttValueComplex:
3615 * @ctxt: an XML parser context
3616 * @len: the resulting attribute len
3617 * @normalize: wether to apply the inner normalization
3619 * parse a value for an attribute, this is the fallback function
3620 * of xmlParseAttValue() when the attribute parsing requires handling
3621 * of non-ASCII characters, or normalization compaction.
3623 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3626 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt
, int *attlen
, int normalize
) {
3628 xmlChar
*buf
= NULL
;
3629 xmlChar
*rep
= NULL
;
3632 int c
, l
, in_space
= 0;
3633 xmlChar
*current
= NULL
;
3636 if (NXT(0) == '"') {
3637 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3640 } else if (NXT(0) == '\'') {
3642 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
3645 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
3650 * allocate a translation buffer.
3652 buf_size
= XML_PARSER_BUFFER_SIZE
;
3653 buf
= (xmlChar
*) xmlMallocAtomic(buf_size
* sizeof(xmlChar
));
3654 if (buf
== NULL
) goto mem_error
;
3657 * OK loop until we reach one of the ending char or a size limit.
3660 while ((NXT(0) != limit
) && /* checked */
3661 (IS_CHAR(c
)) && (c
!= '<')) {
3665 if (NXT(1) == '#') {
3666 int val
= xmlParseCharRef(ctxt
);
3669 if (ctxt
->replaceEntities
) {
3670 if (len
> buf_size
- 10) {
3671 growBuffer(buf
, 10);
3676 * The reparsing will be done in xmlStringGetNodeList()
3677 * called by the attribute() function in SAX.c
3679 if (len
> buf_size
- 10) {
3680 growBuffer(buf
, 10);
3688 } else if (val
!= 0) {
3689 if (len
> buf_size
- 10) {
3690 growBuffer(buf
, 10);
3692 len
+= xmlCopyChar(0, &buf
[len
], val
);
3695 ent
= xmlParseEntityRef(ctxt
);
3698 ctxt
->nbentities
+= ent
->owner
;
3699 if ((ent
!= NULL
) &&
3700 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
3701 if (len
> buf_size
- 10) {
3702 growBuffer(buf
, 10);
3704 if ((ctxt
->replaceEntities
== 0) &&
3705 (ent
->content
[0] == '&')) {
3712 buf
[len
++] = ent
->content
[0];
3714 } else if ((ent
!= NULL
) &&
3715 (ctxt
->replaceEntities
!= 0)) {
3716 if (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) {
3717 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
3722 while (*current
!= 0) { /* non input consuming */
3723 if ((*current
== 0xD) || (*current
== 0xA) ||
3724 (*current
== 0x9)) {
3728 buf
[len
++] = *current
++;
3729 if (len
> buf_size
- 10) {
3730 growBuffer(buf
, 10);
3737 if (len
> buf_size
- 10) {
3738 growBuffer(buf
, 10);
3740 if (ent
->content
!= NULL
)
3741 buf
[len
++] = ent
->content
[0];
3743 } else if (ent
!= NULL
) {
3744 int i
= xmlStrlen(ent
->name
);
3745 const xmlChar
*cur
= ent
->name
;
3748 * This may look absurd but is needed to detect
3751 if ((ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
3752 (ent
->content
!= NULL
)) {
3753 rep
= xmlStringDecodeEntities(ctxt
, ent
->content
,
3754 XML_SUBSTITUTE_REF
, 0, 0, 0);
3762 * Just output the reference
3765 while (len
> buf_size
- i
- 10) {
3766 growBuffer(buf
, i
+ 10);
3769 buf
[len
++] = *cur
++;
3774 if ((c
== 0x20) || (c
== 0xD) || (c
== 0xA) || (c
== 0x9)) {
3775 if ((len
!= 0) || (!normalize
)) {
3776 if ((!normalize
) || (!in_space
)) {
3777 COPY_BUF(l
,buf
,len
,0x20);
3778 while (len
> buf_size
- 10) {
3779 growBuffer(buf
, 10);
3786 COPY_BUF(l
,buf
,len
,c
);
3787 if (len
> buf_size
- 10) {
3788 growBuffer(buf
, 10);
3796 if ((in_space
) && (normalize
)) {
3797 while (buf
[len
- 1] == 0x20) len
--;
3801 xmlFatalErr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
, NULL
);
3802 } else if (RAW
!= limit
) {
3803 if ((c
!= 0) && (!IS_CHAR(c
))) {
3804 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_CHAR
,
3805 "invalid character in attribute value\n");
3807 xmlFatalErrMsg(ctxt
, XML_ERR_ATTRIBUTE_NOT_FINISHED
,
3808 "AttValue: ' expected\n");
3812 if (attlen
!= NULL
) *attlen
= len
;
3816 xmlErrMemory(ctxt
, NULL
);
3826 * @ctxt: an XML parser context
3828 * parse a value for an attribute
3829 * Note: the parser won't do substitution of entities here, this
3830 * will be handled later in xmlStringGetNodeList
3832 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3833 * "'" ([^<&'] | Reference)* "'"
3835 * 3.3.3 Attribute-Value Normalization:
3836 * Before the value of an attribute is passed to the application or
3837 * checked for validity, the XML processor must normalize it as follows:
3838 * - a character reference is processed by appending the referenced
3839 * character to the attribute value
3840 * - an entity reference is processed by recursively processing the
3841 * replacement text of the entity
3842 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3843 * appending #x20 to the normalized value, except that only a single
3844 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3845 * parsed entity or the literal entity value of an internal parsed entity
3846 * - other characters are processed by appending them to the normalized value
3847 * If the declared value is not CDATA, then the XML processor must further
3848 * process the normalized attribute value by discarding any leading and
3849 * trailing space (#x20) characters, and by replacing sequences of space
3850 * (#x20) characters by a single space (#x20) character.
3851 * All attributes for which no declaration has been read should be treated
3852 * by a non-validating parser as if declared CDATA.
3854 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3859 xmlParseAttValue(xmlParserCtxtPtr ctxt
) {
3860 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
)) return(NULL
);
3861 return(xmlParseAttValueInternal(ctxt
, NULL
, NULL
, 0));
3865 * xmlParseSystemLiteral:
3866 * @ctxt: an XML parser context
3868 * parse an XML Literal
3870 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3872 * Returns the SystemLiteral parsed or NULL
3876 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt
) {
3877 xmlChar
*buf
= NULL
;
3879 int size
= XML_PARSER_BUFFER_SIZE
;
3882 int state
= ctxt
->instate
;
3889 } else if (RAW
== '\'') {
3893 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
3897 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3899 xmlErrMemory(ctxt
, NULL
);
3902 ctxt
->instate
= XML_PARSER_SYSTEM_LITERAL
;
3904 while ((IS_CHAR(cur
)) && (cur
!= stop
)) { /* checked */
3905 if (len
+ 5 >= size
) {
3909 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3912 xmlErrMemory(ctxt
, NULL
);
3913 ctxt
->instate
= (xmlParserInputState
) state
;
3923 COPY_BUF(l
,buf
,len
,cur
);
3933 ctxt
->instate
= (xmlParserInputState
) state
;
3934 if (!IS_CHAR(cur
)) {
3935 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
3943 * xmlParsePubidLiteral:
3944 * @ctxt: an XML parser context
3946 * parse an XML public literal
3948 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3950 * Returns the PubidLiteral parsed or NULL.
3954 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt
) {
3955 xmlChar
*buf
= NULL
;
3957 int size
= XML_PARSER_BUFFER_SIZE
;
3961 xmlParserInputState oldstate
= ctxt
->instate
;
3967 } else if (RAW
== '\'') {
3971 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_STARTED
, NULL
);
3974 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
3976 xmlErrMemory(ctxt
, NULL
);
3979 ctxt
->instate
= XML_PARSER_PUBLIC_LITERAL
;
3981 while ((IS_PUBIDCHAR_CH(cur
)) && (cur
!= stop
)) { /* checked */
3982 if (len
+ 1 >= size
) {
3986 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
3988 xmlErrMemory(ctxt
, NULL
);
4010 xmlFatalErr(ctxt
, XML_ERR_LITERAL_NOT_FINISHED
, NULL
);
4014 ctxt
->instate
= oldstate
;
4018 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
);
4021 * used for the test in the inner loop of the char data testing
4023 static const unsigned char test_char_data
[256] = {
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4029 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4030 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4031 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4032 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4033 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4034 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4035 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4036 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4037 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4038 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4039 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4060 * @ctxt: an XML parser context
4061 * @cdata: int indicating whether we are within a CDATA section
4063 * parse a CharData section.
4064 * if we are within a CDATA section ']]>' marks an end of section.
4066 * The right angle bracket (>) may be represented using the string ">",
4067 * and must, for compatibility, be escaped using ">" or a character
4068 * reference when it appears in the string "]]>" in content, when that
4069 * string is not marking the end of a CDATA section.
4071 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4075 xmlParseCharData(xmlParserCtxtPtr ctxt
, int cdata
) {
4078 int line
= ctxt
->input
->line
;
4079 int col
= ctxt
->input
->col
;
4085 * Accelerated common case where input don't need to be
4086 * modified before passing it to the handler.
4089 in
= ctxt
->input
->cur
;
4092 while (*in
== 0x20) { in
++; ctxt
->input
->col
++; }
4095 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4097 } while (*in
== 0xA);
4098 goto get_more_space
;
4101 nbchar
= in
- ctxt
->input
->cur
;
4103 const xmlChar
*tmp
= ctxt
->input
->cur
;
4104 ctxt
->input
->cur
= in
;
4106 if ((ctxt
->sax
!= NULL
) &&
4107 (ctxt
->sax
->ignorableWhitespace
!=
4108 ctxt
->sax
->characters
)) {
4109 if (areBlanks(ctxt
, tmp
, nbchar
, 1)) {
4110 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4111 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4114 if (ctxt
->sax
->characters
!= NULL
)
4115 ctxt
->sax
->characters(ctxt
->userData
,
4117 if (*ctxt
->space
== -1)
4120 } else if ((ctxt
->sax
!= NULL
) &&
4121 (ctxt
->sax
->characters
!= NULL
)) {
4122 ctxt
->sax
->characters(ctxt
->userData
,
4130 ccol
= ctxt
->input
->col
;
4131 while (test_char_data
[*in
]) {
4135 ctxt
->input
->col
= ccol
;
4138 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4140 } while (*in
== 0xA);
4144 if ((in
[1] == ']') && (in
[2] == '>')) {
4145 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4146 ctxt
->input
->cur
= in
;
4153 nbchar
= in
- ctxt
->input
->cur
;
4155 if ((ctxt
->sax
!= NULL
) &&
4156 (ctxt
->sax
->ignorableWhitespace
!=
4157 ctxt
->sax
->characters
) &&
4158 (IS_BLANK_CH(*ctxt
->input
->cur
))) {
4159 const xmlChar
*tmp
= ctxt
->input
->cur
;
4160 ctxt
->input
->cur
= in
;
4162 if (areBlanks(ctxt
, tmp
, nbchar
, 0)) {
4163 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4164 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4167 if (ctxt
->sax
->characters
!= NULL
)
4168 ctxt
->sax
->characters(ctxt
->userData
,
4170 if (*ctxt
->space
== -1)
4173 line
= ctxt
->input
->line
;
4174 col
= ctxt
->input
->col
;
4175 } else if (ctxt
->sax
!= NULL
) {
4176 if (ctxt
->sax
->characters
!= NULL
)
4177 ctxt
->sax
->characters(ctxt
->userData
,
4178 ctxt
->input
->cur
, nbchar
);
4179 line
= ctxt
->input
->line
;
4180 col
= ctxt
->input
->col
;
4182 /* something really bad happened in the SAX callback */
4183 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4186 ctxt
->input
->cur
= in
;
4190 ctxt
->input
->cur
= in
;
4192 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4193 continue; /* while */
4205 in
= ctxt
->input
->cur
;
4206 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09));
4209 ctxt
->input
->line
= line
;
4210 ctxt
->input
->col
= col
;
4211 xmlParseCharDataComplex(ctxt
, cdata
);
4215 * xmlParseCharDataComplex:
4216 * @ctxt: an XML parser context
4217 * @cdata: int indicating whether we are within a CDATA section
4219 * parse a CharData section.this is the fallback function
4220 * of xmlParseCharData() when the parsing requires handling
4221 * of non-ASCII characters.
4224 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt
, int cdata
) {
4225 xmlChar buf
[XML_PARSER_BIG_BUFFER_SIZE
+ 5];
4233 while ((cur
!= '<') && /* checked */
4235 (IS_CHAR(cur
))) /* test also done in xmlCurrentChar() */ {
4236 if ((cur
== ']') && (NXT(1) == ']') &&
4240 xmlFatalErr(ctxt
, XML_ERR_MISPLACED_CDATA_END
, NULL
);
4243 COPY_BUF(l
,buf
,nbchar
,cur
);
4244 if (nbchar
>= XML_PARSER_BIG_BUFFER_SIZE
) {
4248 * OK the segment is to be consumed as chars.
4250 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4251 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4252 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4253 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
,
4256 if (ctxt
->sax
->characters
!= NULL
)
4257 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4258 if ((ctxt
->sax
->characters
!=
4259 ctxt
->sax
->ignorableWhitespace
) &&
4260 (*ctxt
->space
== -1))
4265 /* something really bad happened in the SAX callback */
4266 if (ctxt
->instate
!= XML_PARSER_CONTENT
)
4280 * OK the segment is to be consumed as chars.
4282 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
4283 if (areBlanks(ctxt
, buf
, nbchar
, 0)) {
4284 if (ctxt
->sax
->ignorableWhitespace
!= NULL
)
4285 ctxt
->sax
->ignorableWhitespace(ctxt
->userData
, buf
, nbchar
);
4287 if (ctxt
->sax
->characters
!= NULL
)
4288 ctxt
->sax
->characters(ctxt
->userData
, buf
, nbchar
);
4289 if ((ctxt
->sax
->characters
!= ctxt
->sax
->ignorableWhitespace
) &&
4290 (*ctxt
->space
== -1))
4295 if ((cur
!= 0) && (!IS_CHAR(cur
))) {
4296 /* Generate the error and skip the offending character */
4297 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4298 "PCDATA invalid Char value %d\n",
4305 * xmlParseExternalID:
4306 * @ctxt: an XML parser context
4307 * @publicID: a xmlChar** receiving PubidLiteral
4308 * @strict: indicate whether we should restrict parsing to only
4309 * production [75], see NOTE below
4311 * Parse an External ID or a Public ID
4313 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4314 * 'PUBLIC' S PubidLiteral S SystemLiteral
4316 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4317 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4319 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4321 * Returns the function returns SystemLiteral and in the second
4322 * case publicID receives PubidLiteral, is strict is off
4323 * it is possible to return NULL and have publicID set.
4327 xmlParseExternalID(xmlParserCtxtPtr ctxt
, xmlChar
**publicID
, int strict
) {
4328 xmlChar
*URI
= NULL
;
4333 if (CMP6(CUR_PTR
, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4335 if (!IS_BLANK_CH(CUR
)) {
4336 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4337 "Space required after 'SYSTEM'\n");
4340 URI
= xmlParseSystemLiteral(ctxt
);
4342 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4344 } else if (CMP6(CUR_PTR
, 'P', 'U', 'B', 'L', 'I', 'C')) {
4346 if (!IS_BLANK_CH(CUR
)) {
4347 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4348 "Space required after 'PUBLIC'\n");
4351 *publicID
= xmlParsePubidLiteral(ctxt
);
4352 if (*publicID
== NULL
) {
4353 xmlFatalErr(ctxt
, XML_ERR_PUBID_REQUIRED
, NULL
);
4357 * We don't handle [83] so "S SystemLiteral" is required.
4359 if (!IS_BLANK_CH(CUR
)) {
4360 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4361 "Space required after the Public Identifier\n");
4365 * We handle [83] so we return immediately, if
4366 * "S SystemLiteral" is not detected. From a purely parsing
4367 * point of view that's a nice mess.
4373 if (!IS_BLANK_CH(*ptr
)) return(NULL
);
4375 while (IS_BLANK_CH(*ptr
)) ptr
++; /* TODO: dangerous, fix ! */
4376 if ((*ptr
!= '\'') && (*ptr
!= '"')) return(NULL
);
4379 URI
= xmlParseSystemLiteral(ctxt
);
4381 xmlFatalErr(ctxt
, XML_ERR_URI_REQUIRED
, NULL
);
4388 * xmlParseCommentComplex:
4389 * @ctxt: an XML parser context
4390 * @buf: the already parsed part of the buffer
4391 * @len: number of bytes filles in the buffer
4392 * @size: allocated size of the buffer
4394 * Skip an XML (SGML) comment <!-- .... -->
4395 * The spec says that "For compatibility, the string "--" (double-hyphen)
4396 * must not occur within comments. "
4397 * This is the slow routine in case the accelerator for ascii didn't work
4399 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4402 xmlParseCommentComplex(xmlParserCtxtPtr ctxt
, xmlChar
*buf
, int len
, int size
) {
4409 inputid
= ctxt
->input
->id
;
4413 size
= XML_PARSER_BUFFER_SIZE
;
4414 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4416 xmlErrMemory(ctxt
, NULL
);
4420 GROW
; /* Assure there's enough input data */
4423 goto not_terminated
;
4425 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4426 "xmlParseComment: invalid xmlChar value %d\n",
4434 goto not_terminated
;
4436 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4437 "xmlParseComment: invalid xmlChar value %d\n",
4445 goto not_terminated
;
4446 while (IS_CHAR(cur
) && /* checked */
4448 (r
!= '-') || (q
!= '-'))) {
4449 if ((r
== '-') && (q
== '-')) {
4450 xmlFatalErr(ctxt
, XML_ERR_HYPHEN_IN_COMMENT
, NULL
);
4452 if (len
+ 5 >= size
) {
4455 new_buf
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4456 if (new_buf
== NULL
) {
4458 xmlErrMemory(ctxt
, NULL
);
4463 COPY_BUF(ql
,buf
,len
,q
);
4484 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4485 "Comment not terminated \n<!--%.50s\n", buf
);
4486 } else if (!IS_CHAR(cur
)) {
4487 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
4488 "xmlParseComment: invalid xmlChar value %d\n",
4491 if (inputid
!= ctxt
->input
->id
) {
4492 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4493 "Comment doesn't start and stop in the same entity\n");
4496 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4497 (!ctxt
->disableSAX
))
4498 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4503 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4504 "Comment not terminated\n", NULL
);
4511 * @ctxt: an XML parser context
4513 * Skip an XML (SGML) comment <!-- .... -->
4514 * The spec says that "For compatibility, the string "--" (double-hyphen)
4515 * must not occur within comments. "
4517 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4520 xmlParseComment(xmlParserCtxtPtr ctxt
) {
4521 xmlChar
*buf
= NULL
;
4522 int size
= XML_PARSER_BUFFER_SIZE
;
4524 xmlParserInputState state
;
4526 int nbchar
= 0, ccol
;
4530 * Check that there is a comment right here.
4532 if ((RAW
!= '<') || (NXT(1) != '!') ||
4533 (NXT(2) != '-') || (NXT(3) != '-')) return;
4534 state
= ctxt
->instate
;
4535 ctxt
->instate
= XML_PARSER_COMMENT
;
4536 inputid
= ctxt
->input
->id
;
4542 * Accelerated common case where input don't need to be
4543 * modified before passing it to the handler.
4545 in
= ctxt
->input
->cur
;
4549 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4551 } while (*in
== 0xA);
4554 ccol
= ctxt
->input
->col
;
4555 while (((*in
> '-') && (*in
<= 0x7F)) ||
4556 ((*in
>= 0x20) && (*in
< '-')) ||
4561 ctxt
->input
->col
= ccol
;
4564 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4566 } while (*in
== 0xA);
4569 nbchar
= in
- ctxt
->input
->cur
;
4571 * save current set of data
4574 if ((ctxt
->sax
!= NULL
) &&
4575 (ctxt
->sax
->comment
!= NULL
)) {
4577 if ((*in
== '-') && (in
[1] == '-'))
4580 size
= XML_PARSER_BUFFER_SIZE
+ nbchar
;
4581 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4583 xmlErrMemory(ctxt
, NULL
);
4584 ctxt
->instate
= state
;
4588 } else if (len
+ nbchar
+ 1 >= size
) {
4590 size
+= len
+ nbchar
+ XML_PARSER_BUFFER_SIZE
;
4591 new_buf
= (xmlChar
*) xmlRealloc(buf
,
4592 size
* sizeof(xmlChar
));
4593 if (new_buf
== NULL
) {
4595 xmlErrMemory(ctxt
, NULL
);
4596 ctxt
->instate
= state
;
4601 memcpy(&buf
[len
], ctxt
->input
->cur
, nbchar
);
4606 ctxt
->input
->cur
= in
;
4609 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4614 ctxt
->input
->cur
= in
;
4616 ctxt
->input
->line
++; ctxt
->input
->col
= 1;
4617 continue; /* while */
4623 in
= ctxt
->input
->cur
;
4627 if (ctxt
->input
->id
!= inputid
) {
4628 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4629 "comment doesn't start and stop in the same entity\n");
4632 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->comment
!= NULL
) &&
4633 (!ctxt
->disableSAX
)) {
4635 ctxt
->sax
->comment(ctxt
->userData
, buf
);
4637 ctxt
->sax
->comment(ctxt
->userData
, BAD_CAST
"");
4641 ctxt
->instate
= state
;
4645 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4646 "Comment not terminated \n<!--%.50s\n",
4649 xmlFatalErrMsgStr(ctxt
, XML_ERR_COMMENT_NOT_FINISHED
,
4650 "Comment not terminated \n", NULL
);
4658 } while (((*in
>= 0x20) && (*in
<= 0x7F)) || (*in
== 0x09));
4659 xmlParseCommentComplex(ctxt
, buf
, len
, size
);
4660 ctxt
->instate
= state
;
4667 * @ctxt: an XML parser context
4669 * parse the name of a PI
4671 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4673 * Returns the PITarget name or NULL
4677 xmlParsePITarget(xmlParserCtxtPtr ctxt
) {
4678 const xmlChar
*name
;
4680 name
= xmlParseName(ctxt
);
4681 if ((name
!= NULL
) &&
4682 ((name
[0] == 'x') || (name
[0] == 'X')) &&
4683 ((name
[1] == 'm') || (name
[1] == 'M')) &&
4684 ((name
[2] == 'l') || (name
[2] == 'L'))) {
4686 if ((name
[0] == 'x') && (name
[1] == 'm') &&
4687 (name
[2] == 'l') && (name
[3] == 0)) {
4688 xmlFatalErrMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
4689 "XML declaration allowed only at the start of the document\n");
4691 } else if (name
[3] == 0) {
4692 xmlFatalErr(ctxt
, XML_ERR_RESERVED_XML_NAME
, NULL
);
4696 if (xmlW3CPIs
[i
] == NULL
) break;
4697 if (xmlStrEqual(name
, (const xmlChar
*)xmlW3CPIs
[i
]))
4700 xmlWarningMsg(ctxt
, XML_ERR_RESERVED_XML_NAME
,
4701 "xmlParsePITarget: invalid name prefix 'xml'\n",
4704 if ((name
!= NULL
) && (xmlStrchr(name
, ':') != NULL
)) {
4705 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
4706 "colon are forbidden from PI names '%s'\n", name
, NULL
, NULL
);
4711 #ifdef LIBXML_CATALOG_ENABLED
4713 * xmlParseCatalogPI:
4714 * @ctxt: an XML parser context
4715 * @catalog: the PI value string
4717 * parse an XML Catalog Processing Instruction.
4719 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4721 * Occurs only if allowed by the user and if happening in the Misc
4722 * part of the document before any doctype informations
4723 * This will add the given catalog to the parsing context in order
4724 * to be used if there is a resolution need further down in the document
4728 xmlParseCatalogPI(xmlParserCtxtPtr ctxt
, const xmlChar
*catalog
) {
4729 xmlChar
*URL
= NULL
;
4730 const xmlChar
*tmp
, *base
;
4734 while (IS_BLANK_CH(*tmp
)) tmp
++;
4735 if (xmlStrncmp(tmp
, BAD_CAST
"catalog", 7))
4738 while (IS_BLANK_CH(*tmp
)) tmp
++;
4743 while (IS_BLANK_CH(*tmp
)) tmp
++;
4745 if ((marker
!= '\'') && (marker
!= '"'))
4749 while ((*tmp
!= 0) && (*tmp
!= marker
)) tmp
++;
4752 URL
= xmlStrndup(base
, tmp
- base
);
4754 while (IS_BLANK_CH(*tmp
)) tmp
++;
4759 ctxt
->catalogs
= xmlCatalogAddLocal(ctxt
->catalogs
, URL
);
4765 xmlWarningMsg(ctxt
, XML_WAR_CATALOG_PI
,
4766 "Catalog PI syntax error: %s\n",
4775 * @ctxt: an XML parser context
4777 * parse an XML Processing Instruction.
4779 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4781 * The processing is transfered to SAX once parsed.
4785 xmlParsePI(xmlParserCtxtPtr ctxt
) {
4786 xmlChar
*buf
= NULL
;
4788 int size
= XML_PARSER_BUFFER_SIZE
;
4790 const xmlChar
*target
;
4791 xmlParserInputState state
;
4794 if ((RAW
== '<') && (NXT(1) == '?')) {
4795 xmlParserInputPtr input
= ctxt
->input
;
4796 state
= ctxt
->instate
;
4797 ctxt
->instate
= XML_PARSER_PI
;
4799 * this is a Processing Instruction.
4805 * Parse the target name and check for special support like
4808 target
= xmlParsePITarget(ctxt
);
4809 if (target
!= NULL
) {
4810 if ((RAW
== '?') && (NXT(1) == '>')) {
4811 if (input
!= ctxt
->input
) {
4812 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
4813 "PI declaration doesn't start and stop in the same entity\n");
4820 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
4821 (ctxt
->sax
->processingInstruction
!= NULL
))
4822 ctxt
->sax
->processingInstruction(ctxt
->userData
,
4824 ctxt
->instate
= state
;
4827 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
4829 xmlErrMemory(ctxt
, NULL
);
4830 ctxt
->instate
= state
;
4834 if (!IS_BLANK(cur
)) {
4835 xmlFatalErrMsgStr(ctxt
, XML_ERR_SPACE_REQUIRED
,
4836 "ParsePI: PI %s space expected\n", target
);
4840 while (IS_CHAR(cur
) && /* checked */
4841 ((cur
!= '?') || (NXT(1) != '>'))) {
4842 if (len
+ 5 >= size
) {
4846 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
4848 xmlErrMemory(ctxt
, NULL
);
4850 ctxt
->instate
= state
;
4860 COPY_BUF(l
,buf
,len
,cur
);
4871 xmlFatalErrMsgStr(ctxt
, XML_ERR_PI_NOT_FINISHED
,
4872 "ParsePI: PI %s never end ...\n", target
);
4874 if (input
!= ctxt
->input
) {
4875 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4876 "PI declaration doesn't start and stop in the same entity\n");
4880 #ifdef LIBXML_CATALOG_ENABLED
4881 if (((state
== XML_PARSER_MISC
) ||
4882 (state
== XML_PARSER_START
)) &&
4883 (xmlStrEqual(target
, XML_CATALOG_PI
))) {
4884 xmlCatalogAllow allow
= xmlCatalogGetDefaults();
4885 if ((allow
== XML_CATA_ALLOW_DOCUMENT
) ||
4886 (allow
== XML_CATA_ALLOW_ALL
))
4887 xmlParseCatalogPI(ctxt
, buf
);
4895 if ((ctxt
->sax
) && (!ctxt
->disableSAX
) &&
4896 (ctxt
->sax
->processingInstruction
!= NULL
))
4897 ctxt
->sax
->processingInstruction(ctxt
->userData
,
4902 xmlFatalErr(ctxt
, XML_ERR_PI_NOT_STARTED
, NULL
);
4904 ctxt
->instate
= state
;
4909 * xmlParseNotationDecl:
4910 * @ctxt: an XML parser context
4912 * parse a notation declaration
4914 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4916 * Hence there is actually 3 choices:
4917 * 'PUBLIC' S PubidLiteral
4918 * 'PUBLIC' S PubidLiteral S SystemLiteral
4919 * and 'SYSTEM' S SystemLiteral
4921 * See the NOTE on xmlParseExternalID().
4925 xmlParseNotationDecl(xmlParserCtxtPtr ctxt
) {
4926 const xmlChar
*name
;
4930 if (CMP10(CUR_PTR
, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4931 xmlParserInputPtr input
= ctxt
->input
;
4934 if (!IS_BLANK_CH(CUR
)) {
4935 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4936 "Space required after '<!NOTATION'\n");
4941 name
= xmlParseName(ctxt
);
4943 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
4946 if (!IS_BLANK_CH(CUR
)) {
4947 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4948 "Space required after the NOTATION name'\n");
4951 if (xmlStrchr(name
, ':') != NULL
) {
4952 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
4953 "colon are forbidden from notation names '%s'\n",
4961 Systemid
= xmlParseExternalID(ctxt
, &Pubid
, 0);
4965 if (input
!= ctxt
->input
) {
4966 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
4967 "Notation declaration doesn't start and stop in the same entity\n");
4970 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
4971 (ctxt
->sax
->notationDecl
!= NULL
))
4972 ctxt
->sax
->notationDecl(ctxt
->userData
, name
, Pubid
, Systemid
);
4974 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
4976 if (Systemid
!= NULL
) xmlFree(Systemid
);
4977 if (Pubid
!= NULL
) xmlFree(Pubid
);
4982 * xmlParseEntityDecl:
4983 * @ctxt: an XML parser context
4985 * parse <!ENTITY declarations
4987 * [70] EntityDecl ::= GEDecl | PEDecl
4989 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4991 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4993 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4995 * [74] PEDef ::= EntityValue | ExternalID
4997 * [76] NDataDecl ::= S 'NDATA' S Name
4999 * [ VC: Notation Declared ]
5000 * The Name must match the declared name of a notation.
5004 xmlParseEntityDecl(xmlParserCtxtPtr ctxt
) {
5005 const xmlChar
*name
= NULL
;
5006 xmlChar
*value
= NULL
;
5007 xmlChar
*URI
= NULL
, *literal
= NULL
;
5008 const xmlChar
*ndata
= NULL
;
5009 int isParameter
= 0;
5010 xmlChar
*orig
= NULL
;
5013 /* GROW; done in the caller */
5014 if (CMP8(CUR_PTR
, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5015 xmlParserInputPtr input
= ctxt
->input
;
5018 skipped
= SKIP_BLANKS
;
5020 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5021 "Space required after '<!ENTITY'\n");
5026 skipped
= SKIP_BLANKS
;
5028 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5029 "Space required after '%'\n");
5034 name
= xmlParseName(ctxt
);
5036 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5037 "xmlParseEntityDecl: no name\n");
5040 if (xmlStrchr(name
, ':') != NULL
) {
5041 xmlNsErr(ctxt
, XML_NS_ERR_COLON
,
5042 "colon are forbidden from entities names '%s'\n",
5045 skipped
= SKIP_BLANKS
;
5047 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5048 "Space required after the entity name\n");
5051 ctxt
->instate
= XML_PARSER_ENTITY_DECL
;
5053 * handle the various case of definitions...
5056 if ((RAW
== '"') || (RAW
== '\'')) {
5057 value
= xmlParseEntityValue(ctxt
, &orig
);
5059 if ((ctxt
->sax
!= NULL
) &&
5060 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5061 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5062 XML_INTERNAL_PARAMETER_ENTITY
,
5066 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5067 if ((URI
== NULL
) && (literal
== NULL
)) {
5068 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5073 uri
= xmlParseURI((const char *) URI
);
5075 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5076 "Invalid URI: %s\n", URI
);
5078 * This really ought to be a well formedness error
5079 * but the XML Core WG decided otherwise c.f. issue
5080 * E26 of the XML erratas.
5083 if (uri
->fragment
!= NULL
) {
5085 * Okay this is foolish to block those but not
5088 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5090 if ((ctxt
->sax
!= NULL
) &&
5091 (!ctxt
->disableSAX
) &&
5092 (ctxt
->sax
->entityDecl
!= NULL
))
5093 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5094 XML_EXTERNAL_PARAMETER_ENTITY
,
5095 literal
, URI
, NULL
);
5102 if ((RAW
== '"') || (RAW
== '\'')) {
5103 value
= xmlParseEntityValue(ctxt
, &orig
);
5104 if ((ctxt
->sax
!= NULL
) &&
5105 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5106 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5107 XML_INTERNAL_GENERAL_ENTITY
,
5110 * For expat compatibility in SAX mode.
5112 if ((ctxt
->myDoc
== NULL
) ||
5113 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
5114 if (ctxt
->myDoc
== NULL
) {
5115 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5116 if (ctxt
->myDoc
== NULL
) {
5117 xmlErrMemory(ctxt
, "New Doc failed");
5120 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5122 if (ctxt
->myDoc
->intSubset
== NULL
)
5123 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5124 BAD_CAST
"fake", NULL
, NULL
);
5126 xmlSAX2EntityDecl(ctxt
, name
, XML_INTERNAL_GENERAL_ENTITY
,
5130 URI
= xmlParseExternalID(ctxt
, &literal
, 1);
5131 if ((URI
== NULL
) && (literal
== NULL
)) {
5132 xmlFatalErr(ctxt
, XML_ERR_VALUE_REQUIRED
, NULL
);
5137 uri
= xmlParseURI((const char *)URI
);
5139 xmlErrMsgStr(ctxt
, XML_ERR_INVALID_URI
,
5140 "Invalid URI: %s\n", URI
);
5142 * This really ought to be a well formedness error
5143 * but the XML Core WG decided otherwise c.f. issue
5144 * E26 of the XML erratas.
5147 if (uri
->fragment
!= NULL
) {
5149 * Okay this is foolish to block those but not
5152 xmlFatalErr(ctxt
, XML_ERR_URI_FRAGMENT
, NULL
);
5157 if ((RAW
!= '>') && (!IS_BLANK_CH(CUR
))) {
5158 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5159 "Space required before 'NDATA'\n");
5162 if (CMP5(CUR_PTR
, 'N', 'D', 'A', 'T', 'A')) {
5164 if (!IS_BLANK_CH(CUR
)) {
5165 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5166 "Space required after 'NDATA'\n");
5169 ndata
= xmlParseName(ctxt
);
5170 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5171 (ctxt
->sax
->unparsedEntityDecl
!= NULL
))
5172 ctxt
->sax
->unparsedEntityDecl(ctxt
->userData
, name
,
5173 literal
, URI
, ndata
);
5175 if ((ctxt
->sax
!= NULL
) &&
5176 (!ctxt
->disableSAX
) && (ctxt
->sax
->entityDecl
!= NULL
))
5177 ctxt
->sax
->entityDecl(ctxt
->userData
, name
,
5178 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5179 literal
, URI
, NULL
);
5181 * For expat compatibility in SAX mode.
5182 * assuming the entity repalcement was asked for
5184 if ((ctxt
->replaceEntities
!= 0) &&
5185 ((ctxt
->myDoc
== NULL
) ||
5186 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
)))) {
5187 if (ctxt
->myDoc
== NULL
) {
5188 ctxt
->myDoc
= xmlNewDoc(SAX_COMPAT_MODE
);
5189 if (ctxt
->myDoc
== NULL
) {
5190 xmlErrMemory(ctxt
, "New Doc failed");
5193 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
5196 if (ctxt
->myDoc
->intSubset
== NULL
)
5197 ctxt
->myDoc
->intSubset
= xmlNewDtd(ctxt
->myDoc
,
5198 BAD_CAST
"fake", NULL
, NULL
);
5199 xmlSAX2EntityDecl(ctxt
, name
,
5200 XML_EXTERNAL_GENERAL_PARSED_ENTITY
,
5201 literal
, URI
, NULL
);
5208 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_NOT_FINISHED
,
5209 "xmlParseEntityDecl: entity %s not terminated\n", name
);
5211 if (input
!= ctxt
->input
) {
5212 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5213 "Entity declaration doesn't start and stop in the same entity\n");
5219 * Ugly mechanism to save the raw entity value.
5221 xmlEntityPtr cur
= NULL
;
5224 if ((ctxt
->sax
!= NULL
) &&
5225 (ctxt
->sax
->getParameterEntity
!= NULL
))
5226 cur
= ctxt
->sax
->getParameterEntity(ctxt
->userData
, name
);
5228 if ((ctxt
->sax
!= NULL
) &&
5229 (ctxt
->sax
->getEntity
!= NULL
))
5230 cur
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
5231 if ((cur
== NULL
) && (ctxt
->userData
==ctxt
)) {
5232 cur
= xmlSAX2GetEntity(ctxt
, name
);
5236 if (cur
->orig
!= NULL
)
5243 if (value
!= NULL
) xmlFree(value
);
5244 if (URI
!= NULL
) xmlFree(URI
);
5245 if (literal
!= NULL
) xmlFree(literal
);
5250 * xmlParseDefaultDecl:
5251 * @ctxt: an XML parser context
5252 * @value: Receive a possible fixed default value for the attribute
5254 * Parse an attribute default declaration
5256 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5258 * [ VC: Required Attribute ]
5259 * if the default declaration is the keyword #REQUIRED, then the
5260 * attribute must be specified for all elements of the type in the
5261 * attribute-list declaration.
5263 * [ VC: Attribute Default Legal ]
5264 * The declared default value must meet the lexical constraints of
5265 * the declared attribute type c.f. xmlValidateAttributeDecl()
5267 * [ VC: Fixed Attribute Default ]
5268 * if an attribute has a default value declared with the #FIXED
5269 * keyword, instances of that attribute must match the default value.
5271 * [ WFC: No < in Attribute Values ]
5272 * handled in xmlParseAttValue()
5274 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5275 * or XML_ATTRIBUTE_FIXED.
5279 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
5284 if (CMP9(CUR_PTR
, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5286 return(XML_ATTRIBUTE_REQUIRED
);
5288 if (CMP8(CUR_PTR
, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5290 return(XML_ATTRIBUTE_IMPLIED
);
5292 val
= XML_ATTRIBUTE_NONE
;
5293 if (CMP6(CUR_PTR
, '#', 'F', 'I', 'X', 'E', 'D')) {
5295 val
= XML_ATTRIBUTE_FIXED
;
5296 if (!IS_BLANK_CH(CUR
)) {
5297 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5298 "Space required after '#FIXED'\n");
5302 ret
= xmlParseAttValue(ctxt
);
5303 ctxt
->instate
= XML_PARSER_DTD
;
5305 xmlFatalErrMsg(ctxt
, (xmlParserErrors
)ctxt
->errNo
,
5306 "Attribute default value declaration error\n");
5313 * xmlParseNotationType:
5314 * @ctxt: an XML parser context
5316 * parse an Notation attribute type.
5318 * Note: the leading 'NOTATION' S part has already being parsed...
5320 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5322 * [ VC: Notation Attributes ]
5323 * Values of this type must match one of the notation names included
5324 * in the declaration; all notation names in the declaration must be declared.
5326 * Returns: the notation attribute tree built while parsing
5330 xmlParseNotationType(xmlParserCtxtPtr ctxt
) {
5331 const xmlChar
*name
;
5332 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5335 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_STARTED
, NULL
);
5342 name
= xmlParseName(ctxt
);
5344 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5345 "Name expected in NOTATION declaration\n");
5346 xmlFreeEnumeration(ret
);
5350 while (tmp
!= NULL
) {
5351 if (xmlStrEqual(name
, tmp
->name
)) {
5352 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5353 "standalone: attribute notation value token %s duplicated\n",
5355 if (!xmlDictOwns(ctxt
->dict
, name
))
5356 xmlFree((xmlChar
*) name
);
5362 cur
= xmlCreateEnumeration(name
);
5364 xmlFreeEnumeration(ret
);
5367 if (last
== NULL
) ret
= last
= cur
;
5374 } while (RAW
== '|');
5376 xmlFatalErr(ctxt
, XML_ERR_NOTATION_NOT_FINISHED
, NULL
);
5377 xmlFreeEnumeration(ret
);
5385 * xmlParseEnumerationType:
5386 * @ctxt: an XML parser context
5388 * parse an Enumeration attribute type.
5390 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5392 * [ VC: Enumeration ]
5393 * Values of this type must match one of the Nmtoken tokens in
5396 * Returns: the enumeration attribute tree built while parsing
5400 xmlParseEnumerationType(xmlParserCtxtPtr ctxt
) {
5402 xmlEnumerationPtr ret
= NULL
, last
= NULL
, cur
, tmp
;
5405 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_STARTED
, NULL
);
5412 name
= xmlParseNmtoken(ctxt
);
5414 xmlFatalErr(ctxt
, XML_ERR_NMTOKEN_REQUIRED
, NULL
);
5418 while (tmp
!= NULL
) {
5419 if (xmlStrEqual(name
, tmp
->name
)) {
5420 xmlValidityError(ctxt
, XML_DTD_DUP_TOKEN
,
5421 "standalone: attribute enumeration value token %s duplicated\n",
5423 if (!xmlDictOwns(ctxt
->dict
, name
))
5430 cur
= xmlCreateEnumeration(name
);
5431 if (!xmlDictOwns(ctxt
->dict
, name
))
5434 xmlFreeEnumeration(ret
);
5437 if (last
== NULL
) ret
= last
= cur
;
5444 } while (RAW
== '|');
5446 xmlFatalErr(ctxt
, XML_ERR_ATTLIST_NOT_FINISHED
, NULL
);
5454 * xmlParseEnumeratedType:
5455 * @ctxt: an XML parser context
5456 * @tree: the enumeration tree built while parsing
5458 * parse an Enumerated attribute type.
5460 * [57] EnumeratedType ::= NotationType | Enumeration
5462 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5465 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5469 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5470 if (CMP8(CUR_PTR
, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5472 if (!IS_BLANK_CH(CUR
)) {
5473 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5474 "Space required after 'NOTATION'\n");
5478 *tree
= xmlParseNotationType(ctxt
);
5479 if (*tree
== NULL
) return(0);
5480 return(XML_ATTRIBUTE_NOTATION
);
5482 *tree
= xmlParseEnumerationType(ctxt
);
5483 if (*tree
== NULL
) return(0);
5484 return(XML_ATTRIBUTE_ENUMERATION
);
5488 * xmlParseAttributeType:
5489 * @ctxt: an XML parser context
5490 * @tree: the enumeration tree built while parsing
5492 * parse the Attribute list def for an element
5494 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5496 * [55] StringType ::= 'CDATA'
5498 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5499 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5501 * Validity constraints for attribute values syntax are checked in
5502 * xmlValidateAttributeValue()
5505 * Values of type ID must match the Name production. A name must not
5506 * appear more than once in an XML document as a value of this type;
5507 * i.e., ID values must uniquely identify the elements which bear them.
5509 * [ VC: One ID per Element Type ]
5510 * No element type may have more than one ID attribute specified.
5512 * [ VC: ID Attribute Default ]
5513 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5516 * Values of type IDREF must match the Name production, and values
5517 * of type IDREFS must match Names; each IDREF Name must match the value
5518 * of an ID attribute on some element in the XML document; i.e. IDREF
5519 * values must match the value of some ID attribute.
5521 * [ VC: Entity Name ]
5522 * Values of type ENTITY must match the Name production, values
5523 * of type ENTITIES must match Names; each Entity Name must match the
5524 * name of an unparsed entity declared in the DTD.
5526 * [ VC: Name Token ]
5527 * Values of type NMTOKEN must match the Nmtoken production; values
5528 * of type NMTOKENS must match Nmtokens.
5530 * Returns the attribute type
5533 xmlParseAttributeType(xmlParserCtxtPtr ctxt
, xmlEnumerationPtr
*tree
) {
5535 if (CMP5(CUR_PTR
, 'C', 'D', 'A', 'T', 'A')) {
5537 return(XML_ATTRIBUTE_CDATA
);
5538 } else if (CMP6(CUR_PTR
, 'I', 'D', 'R', 'E', 'F', 'S')) {
5540 return(XML_ATTRIBUTE_IDREFS
);
5541 } else if (CMP5(CUR_PTR
, 'I', 'D', 'R', 'E', 'F')) {
5543 return(XML_ATTRIBUTE_IDREF
);
5544 } else if ((RAW
== 'I') && (NXT(1) == 'D')) {
5546 return(XML_ATTRIBUTE_ID
);
5547 } else if (CMP6(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5549 return(XML_ATTRIBUTE_ENTITY
);
5550 } else if (CMP8(CUR_PTR
, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5552 return(XML_ATTRIBUTE_ENTITIES
);
5553 } else if (CMP8(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5555 return(XML_ATTRIBUTE_NMTOKENS
);
5556 } else if (CMP7(CUR_PTR
, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5558 return(XML_ATTRIBUTE_NMTOKEN
);
5560 return(xmlParseEnumeratedType(ctxt
, tree
));
5564 * xmlParseAttributeListDecl:
5565 * @ctxt: an XML parser context
5567 * : parse the Attribute list def for an element
5569 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5571 * [53] AttDef ::= S Name S AttType S DefaultDecl
5575 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt
) {
5576 const xmlChar
*elemName
;
5577 const xmlChar
*attrName
;
5578 xmlEnumerationPtr tree
;
5580 if (CMP9(CUR_PTR
, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5581 xmlParserInputPtr input
= ctxt
->input
;
5584 if (!IS_BLANK_CH(CUR
)) {
5585 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5586 "Space required after '<!ATTLIST'\n");
5589 elemName
= xmlParseName(ctxt
);
5590 if (elemName
== NULL
) {
5591 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5592 "ATTLIST: no name for Element\n");
5597 while (RAW
!= '>') {
5598 const xmlChar
*check
= CUR_PTR
;
5601 xmlChar
*defaultValue
= NULL
;
5605 attrName
= xmlParseName(ctxt
);
5606 if (attrName
== NULL
) {
5607 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5608 "ATTLIST: no name for Attribute\n");
5612 if (!IS_BLANK_CH(CUR
)) {
5613 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5614 "Space required after the attribute name\n");
5619 type
= xmlParseAttributeType(ctxt
, &tree
);
5625 if (!IS_BLANK_CH(CUR
)) {
5626 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5627 "Space required after the attribute type\n");
5629 xmlFreeEnumeration(tree
);
5634 def
= xmlParseDefaultDecl(ctxt
, &defaultValue
);
5636 if (defaultValue
!= NULL
)
5637 xmlFree(defaultValue
);
5639 xmlFreeEnumeration(tree
);
5642 if ((type
!= XML_ATTRIBUTE_CDATA
) && (defaultValue
!= NULL
))
5643 xmlAttrNormalizeSpace(defaultValue
, defaultValue
);
5647 if (!IS_BLANK_CH(CUR
)) {
5648 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
5649 "Space required after the attribute default value\n");
5650 if (defaultValue
!= NULL
)
5651 xmlFree(defaultValue
);
5653 xmlFreeEnumeration(tree
);
5658 if (check
== CUR_PTR
) {
5659 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
5660 "in xmlParseAttributeListDecl\n");
5661 if (defaultValue
!= NULL
)
5662 xmlFree(defaultValue
);
5664 xmlFreeEnumeration(tree
);
5667 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
5668 (ctxt
->sax
->attributeDecl
!= NULL
))
5669 ctxt
->sax
->attributeDecl(ctxt
->userData
, elemName
, attrName
,
5670 type
, def
, defaultValue
, tree
);
5671 else if (tree
!= NULL
)
5672 xmlFreeEnumeration(tree
);
5674 if ((ctxt
->sax2
) && (defaultValue
!= NULL
) &&
5675 (def
!= XML_ATTRIBUTE_IMPLIED
) &&
5676 (def
!= XML_ATTRIBUTE_REQUIRED
)) {
5677 xmlAddDefAttrs(ctxt
, elemName
, attrName
, defaultValue
);
5680 xmlAddSpecialAttr(ctxt
, elemName
, attrName
, type
);
5682 if (defaultValue
!= NULL
)
5683 xmlFree(defaultValue
);
5687 if (input
!= ctxt
->input
) {
5688 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5689 "Attribute list declaration doesn't start and stop in the same entity\n",
5698 * xmlParseElementMixedContentDecl:
5699 * @ctxt: an XML parser context
5700 * @inputchk: the input used for the current entity, needed for boundary checks
5702 * parse the declaration for a Mixed Element content
5703 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5705 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5706 * '(' S? '#PCDATA' S? ')'
5708 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5710 * [ VC: No Duplicate Types ]
5711 * The same name must not appear more than once in a single
5712 * mixed-content declaration.
5714 * returns: the list of the xmlElementContentPtr describing the element choices
5716 xmlElementContentPtr
5717 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
5718 xmlElementContentPtr ret
= NULL
, cur
= NULL
, n
;
5719 const xmlChar
*elem
= NULL
;
5722 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5727 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
5728 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5729 "Element content declaration doesn't start and stop in the same entity\n",
5733 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
5737 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
5742 if ((RAW
== '(') || (RAW
== '|')) {
5743 ret
= cur
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_PCDATA
);
5744 if (ret
== NULL
) return(NULL
);
5746 while (RAW
== '|') {
5749 ret
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
5750 if (ret
== NULL
) return(NULL
);
5756 n
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
5757 if (n
== NULL
) return(NULL
);
5758 n
->c1
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
5767 elem
= xmlParseName(ctxt
);
5769 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
5770 "xmlParseElementMixedContentDecl : Name expected\n");
5771 xmlFreeDocElementContent(ctxt
->myDoc
, cur
);
5777 if ((RAW
== ')') && (NXT(1) == '*')) {
5779 cur
->c2
= xmlNewDocElementContent(ctxt
->myDoc
, elem
,
5780 XML_ELEMENT_CONTENT_ELEMENT
);
5781 if (cur
->c2
!= NULL
)
5782 cur
->c2
->parent
= cur
;
5785 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
5786 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
5787 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
5788 "Element content declaration doesn't start and stop in the same entity\n",
5793 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5794 xmlFatalErr(ctxt
, XML_ERR_MIXED_NOT_STARTED
, NULL
);
5799 xmlFatalErr(ctxt
, XML_ERR_PCDATA_REQUIRED
, NULL
);
5805 * xmlParseElementChildrenContentDeclPriv:
5806 * @ctxt: an XML parser context
5807 * @inputchk: the input used for the current entity, needed for boundary checks
5808 * @depth: the level of recursion
5810 * parse the declaration for a Mixed Element content
5811 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5814 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5816 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5818 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5820 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5822 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5823 * TODO Parameter-entity replacement text must be properly nested
5824 * with parenthesized groups. That is to say, if either of the
5825 * opening or closing parentheses in a choice, seq, or Mixed
5826 * construct is contained in the replacement text for a parameter
5827 * entity, both must be contained in the same replacement text. For
5828 * interoperability, if a parameter-entity reference appears in a
5829 * choice, seq, or Mixed construct, its replacement text should not
5830 * be empty, and neither the first nor last non-blank character of
5831 * the replacement text should be a connector (| or ,).
5833 * Returns the tree of xmlElementContentPtr describing the element
5836 static xmlElementContentPtr
5837 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt
, int inputchk
,
5839 xmlElementContentPtr ret
= NULL
, cur
= NULL
, last
= NULL
, op
= NULL
;
5840 const xmlChar
*elem
;
5843 if (((depth
> 128) && ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
5845 xmlFatalErrMsgInt(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
,
5846 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5853 int inputid
= ctxt
->input
->id
;
5855 /* Recurse on first child */
5858 cur
= ret
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
5863 elem
= xmlParseName(ctxt
);
5865 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
5868 cur
= ret
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
5870 xmlErrMemory(ctxt
, NULL
);
5875 cur
->ocur
= XML_ELEMENT_CONTENT_OPT
;
5877 } else if (RAW
== '*') {
5878 cur
->ocur
= XML_ELEMENT_CONTENT_MULT
;
5880 } else if (RAW
== '+') {
5881 cur
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
5884 cur
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
5890 while (RAW
!= ')') {
5892 * Each loop we parse one separator and one element.
5895 if (type
== 0) type
= CUR
;
5898 * Detect "Name | Name , Name" error
5900 else if (type
!= CUR
) {
5901 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
5902 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5904 if ((last
!= NULL
) && (last
!= ret
))
5905 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5907 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5912 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_SEQ
);
5914 if ((last
!= NULL
) && (last
!= ret
))
5915 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5916 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5934 } else if (RAW
== '|') {
5935 if (type
== 0) type
= CUR
;
5938 * Detect "Name , Name | Name" error
5940 else if (type
!= CUR
) {
5941 xmlFatalErrMsgInt(ctxt
, XML_ERR_SEPARATOR_REQUIRED
,
5942 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5944 if ((last
!= NULL
) && (last
!= ret
))
5945 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5947 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5952 op
= xmlNewDocElementContent(ctxt
->myDoc
, NULL
, XML_ELEMENT_CONTENT_OR
);
5954 if ((last
!= NULL
) && (last
!= ret
))
5955 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5957 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5976 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_FINISHED
, NULL
);
5977 if ((last
!= NULL
) && (last
!= ret
))
5978 xmlFreeDocElementContent(ctxt
->myDoc
, last
);
5980 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
5987 int inputid
= ctxt
->input
->id
;
5988 /* Recurse on second child */
5991 last
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
,
5995 elem
= xmlParseName(ctxt
);
5997 xmlFatalErr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
, NULL
);
5999 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6002 last
= xmlNewDocElementContent(ctxt
->myDoc
, elem
, XML_ELEMENT_CONTENT_ELEMENT
);
6005 xmlFreeDocElementContent(ctxt
->myDoc
, ret
);
6009 last
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6011 } else if (RAW
== '*') {
6012 last
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6014 } else if (RAW
== '+') {
6015 last
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6018 last
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6024 if ((cur
!= NULL
) && (last
!= NULL
)) {
6029 if ((ctxt
->validate
) && (ctxt
->input
->id
!= inputchk
)) {
6030 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6031 "Element content declaration doesn't start and stop in the same entity\n",
6037 if ((ret
->ocur
== XML_ELEMENT_CONTENT_PLUS
) ||
6038 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6039 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6041 ret
->ocur
= XML_ELEMENT_CONTENT_OPT
;
6044 } else if (RAW
== '*') {
6046 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6049 * Some normalization:
6050 * (a | b* | c?)* == (a | b | c)*
6052 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6053 if ((cur
->c1
!= NULL
) &&
6054 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6055 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6056 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6057 if ((cur
->c2
!= NULL
) &&
6058 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6059 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
)))
6060 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6065 } else if (RAW
== '+') {
6069 if ((ret
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6070 (ret
->ocur
== XML_ELEMENT_CONTENT_MULT
))
6071 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6073 ret
->ocur
= XML_ELEMENT_CONTENT_PLUS
;
6075 * Some normalization:
6076 * (a | b*)+ == (a | b)*
6077 * (a | b?)+ == (a | b)*
6079 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_CONTENT_OR
)) {
6080 if ((cur
->c1
!= NULL
) &&
6081 ((cur
->c1
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6082 (cur
->c1
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6083 cur
->c1
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6086 if ((cur
->c2
!= NULL
) &&
6087 ((cur
->c2
->ocur
== XML_ELEMENT_CONTENT_OPT
) ||
6088 (cur
->c2
->ocur
== XML_ELEMENT_CONTENT_MULT
))) {
6089 cur
->c2
->ocur
= XML_ELEMENT_CONTENT_ONCE
;
6095 ret
->ocur
= XML_ELEMENT_CONTENT_MULT
;
6103 * xmlParseElementChildrenContentDecl:
6104 * @ctxt: an XML parser context
6105 * @inputchk: the input used for the current entity, needed for boundary checks
6107 * parse the declaration for a Mixed Element content
6108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6110 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6112 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6114 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6116 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6118 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6119 * TODO Parameter-entity replacement text must be properly nested
6120 * with parenthesized groups. That is to say, if either of the
6121 * opening or closing parentheses in a choice, seq, or Mixed
6122 * construct is contained in the replacement text for a parameter
6123 * entity, both must be contained in the same replacement text. For
6124 * interoperability, if a parameter-entity reference appears in a
6125 * choice, seq, or Mixed construct, its replacement text should not
6126 * be empty, and neither the first nor last non-blank character of
6127 * the replacement text should be a connector (| or ,).
6129 * Returns the tree of xmlElementContentPtr describing the element
6132 xmlElementContentPtr
6133 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt
, int inputchk
) {
6134 /* stub left for API/ABI compat */
6135 return(xmlParseElementChildrenContentDeclPriv(ctxt
, inputchk
, 1));
6139 * xmlParseElementContentDecl:
6140 * @ctxt: an XML parser context
6141 * @name: the name of the element being defined.
6142 * @result: the Element Content pointer will be stored here if any
6144 * parse the declaration for an Element content either Mixed or Children,
6145 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6147 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6149 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6153 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt
, const xmlChar
*name
,
6154 xmlElementContentPtr
*result
) {
6156 xmlElementContentPtr tree
= NULL
;
6157 int inputid
= ctxt
->input
->id
;
6163 xmlFatalErrMsgStr(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6164 "xmlParseElementContentDecl : %s '(' expected\n", name
);
6170 if (CMP7(CUR_PTR
, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6171 tree
= xmlParseElementMixedContentDecl(ctxt
, inputid
);
6172 res
= XML_ELEMENT_TYPE_MIXED
;
6174 tree
= xmlParseElementChildrenContentDeclPriv(ctxt
, inputid
, 1);
6175 res
= XML_ELEMENT_TYPE_ELEMENT
;
6183 * xmlParseElementDecl:
6184 * @ctxt: an XML parser context
6186 * parse an Element declaration.
6188 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6190 * [ VC: Unique Element Type Declaration ]
6191 * No element type may be declared more than once
6193 * Returns the type of the element, or -1 in case of error
6196 xmlParseElementDecl(xmlParserCtxtPtr ctxt
) {
6197 const xmlChar
*name
;
6199 xmlElementContentPtr content
= NULL
;
6201 /* GROW; done in the caller */
6202 if (CMP9(CUR_PTR
, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6203 xmlParserInputPtr input
= ctxt
->input
;
6206 if (!IS_BLANK_CH(CUR
)) {
6207 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6208 "Space required after 'ELEMENT'\n");
6211 name
= xmlParseName(ctxt
);
6213 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
6214 "xmlParseElementDecl: no name for Element\n");
6217 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6219 if (!IS_BLANK_CH(CUR
)) {
6220 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6221 "Space required after the element name\n");
6224 if (CMP5(CUR_PTR
, 'E', 'M', 'P', 'T', 'Y')) {
6227 * Element must always be empty.
6229 ret
= XML_ELEMENT_TYPE_EMPTY
;
6230 } else if ((RAW
== 'A') && (NXT(1) == 'N') &&
6234 * Element is a generic container.
6236 ret
= XML_ELEMENT_TYPE_ANY
;
6237 } else if (RAW
== '(') {
6238 ret
= xmlParseElementContentDecl(ctxt
, name
, &content
);
6241 * [ WFC: PEs in Internal Subset ] error handling.
6243 if ((RAW
== '%') && (ctxt
->external
== 0) &&
6244 (ctxt
->inputNr
== 1)) {
6245 xmlFatalErrMsg(ctxt
, XML_ERR_PEREF_IN_INT_SUBSET
,
6246 "PEReference: forbidden within markup decl in internal subset\n");
6248 xmlFatalErrMsg(ctxt
, XML_ERR_ELEMCONTENT_NOT_STARTED
,
6249 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6256 * Pop-up of finished entities.
6258 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6263 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
6264 if (content
!= NULL
) {
6265 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6268 if (input
!= ctxt
->input
) {
6269 xmlFatalErrMsg(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6270 "Element declaration doesn't start and stop in the same entity\n");
6274 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
6275 (ctxt
->sax
->elementDecl
!= NULL
)) {
6276 if (content
!= NULL
)
6277 content
->parent
= NULL
;
6278 ctxt
->sax
->elementDecl(ctxt
->userData
, name
, ret
,
6280 if ((content
!= NULL
) && (content
->parent
== NULL
)) {
6282 * this is a trick: if xmlAddElementDecl is called,
6283 * instead of copying the full tree it is plugged directly
6284 * if called from the parser. Avoid duplicating the
6285 * interfaces or change the API/ABI
6287 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6289 } else if (content
!= NULL
) {
6290 xmlFreeDocElementContent(ctxt
->myDoc
, content
);
6298 * xmlParseConditionalSections
6299 * @ctxt: an XML parser context
6301 * [61] conditionalSect ::= includeSect | ignoreSect
6302 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6303 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6304 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6305 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6309 xmlParseConditionalSections(xmlParserCtxtPtr ctxt
) {
6310 int id
= ctxt
->input
->id
;
6314 if (CMP7(CUR_PTR
, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6318 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6320 if (ctxt
->input
->id
!= id
) {
6321 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6322 "All markup of the conditional section is not in the same entity\n",
6327 if (xmlParserDebugEntities
) {
6328 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6329 xmlGenericError(xmlGenericErrorContext
,
6330 "%s(%d): ", ctxt
->input
->filename
,
6332 xmlGenericError(xmlGenericErrorContext
,
6333 "Entering INCLUDE Conditional Section\n");
6336 while ((RAW
!= 0) && ((RAW
!= ']') || (NXT(1) != ']') ||
6338 const xmlChar
*check
= CUR_PTR
;
6339 unsigned int cons
= ctxt
->input
->consumed
;
6341 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6342 xmlParseConditionalSections(ctxt
);
6343 } else if (IS_BLANK_CH(CUR
)) {
6345 } else if (RAW
== '%') {
6346 xmlParsePEReference(ctxt
);
6348 xmlParseMarkupDecl(ctxt
);
6351 * Pop-up of finished entities.
6353 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6356 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
6357 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6361 if (xmlParserDebugEntities
) {
6362 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6363 xmlGenericError(xmlGenericErrorContext
,
6364 "%s(%d): ", ctxt
->input
->filename
,
6366 xmlGenericError(xmlGenericErrorContext
,
6367 "Leaving INCLUDE Conditional Section\n");
6370 } else if (CMP6(CUR_PTR
, 'I', 'G', 'N', 'O', 'R', 'E')) {
6372 xmlParserInputState instate
;
6378 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID
, NULL
);
6380 if (ctxt
->input
->id
!= id
) {
6381 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6382 "All markup of the conditional section is not in the same entity\n",
6387 if (xmlParserDebugEntities
) {
6388 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6389 xmlGenericError(xmlGenericErrorContext
,
6390 "%s(%d): ", ctxt
->input
->filename
,
6392 xmlGenericError(xmlGenericErrorContext
,
6393 "Entering IGNORE Conditional Section\n");
6397 * Parse up to the end of the conditional section
6398 * But disable SAX event generating DTD building in the meantime
6400 state
= ctxt
->disableSAX
;
6401 instate
= ctxt
->instate
;
6402 if (ctxt
->recovery
== 0) ctxt
->disableSAX
= 1;
6403 ctxt
->instate
= XML_PARSER_IGNORE
;
6405 while ((depth
>= 0) && (RAW
!= 0)) {
6406 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6411 if ((RAW
== ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6412 if (--depth
>= 0) SKIP(3);
6419 ctxt
->disableSAX
= state
;
6420 ctxt
->instate
= instate
;
6422 if (xmlParserDebugEntities
) {
6423 if ((ctxt
->input
!= NULL
) && (ctxt
->input
->filename
))
6424 xmlGenericError(xmlGenericErrorContext
,
6425 "%s(%d): ", ctxt
->input
->filename
,
6427 xmlGenericError(xmlGenericErrorContext
,
6428 "Leaving IGNORE Conditional Section\n");
6432 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_INVALID_KEYWORD
, NULL
);
6439 xmlFatalErr(ctxt
, XML_ERR_CONDSEC_NOT_FINISHED
, NULL
);
6441 if (ctxt
->input
->id
!= id
) {
6442 xmlValidityError(ctxt
, XML_ERR_ENTITY_BOUNDARY
,
6443 "All markup of the conditional section is not in the same entity\n",
6451 * xmlParseMarkupDecl:
6452 * @ctxt: an XML parser context
6454 * parse Markup declarations
6456 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6457 * NotationDecl | PI | Comment
6459 * [ VC: Proper Declaration/PE Nesting ]
6460 * Parameter-entity replacement text must be properly nested with
6461 * markup declarations. That is to say, if either the first character
6462 * or the last character of a markup declaration (markupdecl above) is
6463 * contained in the replacement text for a parameter-entity reference,
6464 * both must be contained in the same replacement text.
6466 * [ WFC: PEs in Internal Subset ]
6467 * In the internal DTD subset, parameter-entity references can occur
6468 * only where markup declarations can occur, not within markup declarations.
6469 * (This does not apply to references that occur in external parameter
6470 * entities or to the external subset.)
6473 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt
) {
6476 if (NXT(1) == '!') {
6480 xmlParseElementDecl(ctxt
);
6481 else if (NXT(3) == 'N')
6482 xmlParseEntityDecl(ctxt
);
6485 xmlParseAttributeListDecl(ctxt
);
6488 xmlParseNotationDecl(ctxt
);
6491 xmlParseComment(ctxt
);
6494 /* there is an error but it will be detected later */
6497 } else if (NXT(1) == '?') {
6502 * This is only for internal subset. On external entities,
6503 * the replacement is done before parsing stage
6505 if ((ctxt
->external
== 0) && (ctxt
->inputNr
== 1))
6506 xmlParsePEReference(ctxt
);
6509 * Conditional sections are allowed from entities included
6510 * by PE References in the internal subset.
6512 if ((ctxt
->external
== 0) && (ctxt
->inputNr
> 1)) {
6513 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6514 xmlParseConditionalSections(ctxt
);
6518 ctxt
->instate
= XML_PARSER_DTD
;
6523 * @ctxt: an XML parser context
6525 * parse an XML declaration header for external entities
6527 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6531 xmlParseTextDecl(xmlParserCtxtPtr ctxt
) {
6533 const xmlChar
*encoding
;
6536 * We know that '<?xml' is here.
6538 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6541 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_STARTED
, NULL
);
6545 if (!IS_BLANK_CH(CUR
)) {
6546 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6547 "Space needed after '<?xml'\n");
6552 * We may have the VersionInfo here.
6554 version
= xmlParseVersionInfo(ctxt
);
6555 if (version
== NULL
)
6556 version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
6558 if (!IS_BLANK_CH(CUR
)) {
6559 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
6560 "Space needed here\n");
6563 ctxt
->input
->version
= version
;
6566 * We must have the encoding declaration
6568 encoding
= xmlParseEncodingDecl(ctxt
);
6569 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6571 * The XML REC instructs us to stop parsing right here
6575 if ((encoding
== NULL
) && (ctxt
->errNo
== XML_ERR_OK
)) {
6576 xmlFatalErrMsg(ctxt
, XML_ERR_MISSING_ENCODING
,
6577 "Missing encoding in text declaration\n");
6581 if ((RAW
== '?') && (NXT(1) == '>')) {
6583 } else if (RAW
== '>') {
6584 /* Deprecated old WD ... */
6585 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6588 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
6589 MOVETO_ENDTAG(CUR_PTR
);
6595 * xmlParseExternalSubset:
6596 * @ctxt: an XML parser context
6597 * @ExternalID: the external identifier
6598 * @SystemID: the system identifier (or URL)
6600 * parse Markup declarations from an external subset
6602 * [30] extSubset ::= textDecl? extSubsetDecl
6604 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6607 xmlParseExternalSubset(xmlParserCtxtPtr ctxt
, const xmlChar
*ExternalID
,
6608 const xmlChar
*SystemID
) {
6609 xmlDetectSAX2(ctxt
);
6612 if ((ctxt
->encoding
== (const xmlChar
*)XML_CHAR_ENCODING_NONE
) &&
6613 (ctxt
->input
->end
- ctxt
->input
->cur
>= 4)) {
6615 xmlCharEncoding enc
;
6621 enc
= xmlDetectCharEncoding(start
, 4);
6622 if (enc
!= XML_CHAR_ENCODING_NONE
)
6623 xmlSwitchEncoding(ctxt
, enc
);
6626 if (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) {
6627 xmlParseTextDecl(ctxt
);
6628 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
6630 * The XML REC instructs us to stop parsing right here
6632 ctxt
->instate
= XML_PARSER_EOF
;
6636 if (ctxt
->myDoc
== NULL
) {
6637 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
6638 if (ctxt
->myDoc
== NULL
) {
6639 xmlErrMemory(ctxt
, "New Doc failed");
6642 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
6644 if ((ctxt
->myDoc
!= NULL
) && (ctxt
->myDoc
->intSubset
== NULL
))
6645 xmlCreateIntSubset(ctxt
->myDoc
, NULL
, ExternalID
, SystemID
);
6647 ctxt
->instate
= XML_PARSER_DTD
;
6649 while (((RAW
== '<') && (NXT(1) == '?')) ||
6650 ((RAW
== '<') && (NXT(1) == '!')) ||
6651 (RAW
== '%') || IS_BLANK_CH(CUR
)) {
6652 const xmlChar
*check
= CUR_PTR
;
6653 unsigned int cons
= ctxt
->input
->consumed
;
6656 if ((RAW
== '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6657 xmlParseConditionalSections(ctxt
);
6658 } else if (IS_BLANK_CH(CUR
)) {
6660 } else if (RAW
== '%') {
6661 xmlParsePEReference(ctxt
);
6663 xmlParseMarkupDecl(ctxt
);
6666 * Pop-up of finished entities.
6668 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
6671 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
6672 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6678 xmlFatalErr(ctxt
, XML_ERR_EXT_SUBSET_NOT_FINISHED
, NULL
);
6684 * xmlParseReference:
6685 * @ctxt: an XML parser context
6687 * parse and handle entity references in content, depending on the SAX
6688 * interface, this may end-up in a call to character() if this is a
6689 * CharRef, a predefined entity, if there is no reference() callback.
6690 * or if the parser was asked to switch to that mode.
6692 * [67] Reference ::= EntityRef | CharRef
6695 xmlParseReference(xmlParserCtxtPtr ctxt
) {
6699 xmlNodePtr list
= NULL
;
6700 xmlParserErrors ret
= XML_ERR_OK
;
6707 * Simple case of a CharRef
6709 if (NXT(1) == '#') {
6713 int value
= xmlParseCharRef(ctxt
);
6717 if (ctxt
->charset
!= XML_CHAR_ENCODING_UTF8
) {
6719 * So we are using non-UTF-8 buffers
6720 * Check that the char fit on 8bits, if not
6721 * generate a CharRef.
6723 if (value
<= 0xFF) {
6726 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
6727 (!ctxt
->disableSAX
))
6728 ctxt
->sax
->characters(ctxt
->userData
, out
, 1);
6730 if ((hex
== 'x') || (hex
== 'X'))
6731 snprintf((char *)out
, sizeof(out
), "#x%X", value
);
6733 snprintf((char *)out
, sizeof(out
), "#%d", value
);
6734 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
6735 (!ctxt
->disableSAX
))
6736 ctxt
->sax
->reference(ctxt
->userData
, out
);
6740 * Just encode the value in UTF-8
6742 COPY_BUF(0 ,out
, i
, value
);
6744 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
6745 (!ctxt
->disableSAX
))
6746 ctxt
->sax
->characters(ctxt
->userData
, out
, i
);
6752 * We are seeing an entity reference
6754 ent
= xmlParseEntityRef(ctxt
);
6755 if (ent
== NULL
) return;
6756 if (!ctxt
->wellFormed
)
6758 was_checked
= ent
->checked
;
6760 /* special case of predefined entities */
6761 if ((ent
->name
== NULL
) ||
6762 (ent
->etype
== XML_INTERNAL_PREDEFINED_ENTITY
)) {
6764 if (val
== NULL
) return;
6766 * inline the entity.
6768 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->characters
!= NULL
) &&
6769 (!ctxt
->disableSAX
))
6770 ctxt
->sax
->characters(ctxt
->userData
, val
, xmlStrlen(val
));
6775 * The first reference to the entity trigger a parsing phase
6776 * where the ent->children is filled with the result from
6779 if (ent
->checked
== 0) {
6780 unsigned long oldnbent
= ctxt
->nbentities
;
6783 * This is a bit hackish but this seems the best
6784 * way to make sure both SAX and DOM entity support
6788 if (ctxt
->userData
== ctxt
)
6791 user_data
= ctxt
->userData
;
6794 * Check that this entity is well formed
6795 * 4.3.2: An internal general parsed entity is well-formed
6796 * if its replacement text matches the production labeled
6799 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
6801 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
, ent
->content
,
6805 } else if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
6807 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
, ctxt
->sax
,
6808 user_data
, ctxt
->depth
, ent
->URI
,
6809 ent
->ExternalID
, &list
);
6812 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
6813 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
6814 "invalid entity type found\n", NULL
);
6818 * Store the number of entities needing parsing for this entity
6819 * content and do checkings
6821 ent
->checked
= ctxt
->nbentities
- oldnbent
;
6822 if (ret
== XML_ERR_ENTITY_LOOP
) {
6823 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
6824 xmlFreeNodeList(list
);
6827 if (xmlParserEntityCheck(ctxt
, 0, ent
)) {
6828 xmlFreeNodeList(list
);
6832 if ((ret
== XML_ERR_OK
) && (list
!= NULL
)) {
6833 if (((ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) ||
6834 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
))&&
6835 (ent
->children
== NULL
)) {
6836 ent
->children
= list
;
6837 if (ctxt
->replaceEntities
) {
6839 * Prune it directly in the generated document
6840 * except for single text nodes.
6842 if (((list
->type
== XML_TEXT_NODE
) &&
6843 (list
->next
== NULL
)) ||
6844 (ctxt
->parseMode
== XML_PARSE_READER
)) {
6845 list
->parent
= (xmlNodePtr
) ent
;
6850 while (list
!= NULL
) {
6851 list
->parent
= (xmlNodePtr
) ctxt
->node
;
6852 list
->doc
= ctxt
->myDoc
;
6853 if (list
->next
== NULL
)
6857 list
= ent
->children
;
6858 #ifdef LIBXML_LEGACY_ENABLED
6859 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
6860 xmlAddEntityReference(ent
, list
, NULL
);
6861 #endif /* LIBXML_LEGACY_ENABLED */
6865 while (list
!= NULL
) {
6866 list
->parent
= (xmlNodePtr
) ent
;
6867 if (list
->next
== NULL
)
6873 xmlFreeNodeList(list
);
6876 } else if ((ret
!= XML_ERR_OK
) &&
6877 (ret
!= XML_WAR_UNDECLARED_ENTITY
)) {
6878 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
6879 "Entity '%s' failed to parse\n", ent
->name
);
6880 } else if (list
!= NULL
) {
6881 xmlFreeNodeList(list
);
6884 if (ent
->checked
== 0)
6886 } else if (ent
->checked
!= 1) {
6887 ctxt
->nbentities
+= ent
->checked
;
6891 * Now that the entity content has been gathered
6892 * provide it to the application, this can take different forms based
6893 * on the parsing modes.
6895 if (ent
->children
== NULL
) {
6897 * Probably running in SAX mode and the callbacks don't
6898 * build the entity content. So unless we already went
6899 * though parsing for first checking go though the entity
6900 * content to generate callbacks associated to the entity
6902 if (was_checked
!= 0) {
6905 * This is a bit hackish but this seems the best
6906 * way to make sure both SAX and DOM entity support
6909 if (ctxt
->userData
== ctxt
)
6912 user_data
= ctxt
->userData
;
6914 if (ent
->etype
== XML_INTERNAL_GENERAL_ENTITY
) {
6916 ret
= xmlParseBalancedChunkMemoryInternal(ctxt
,
6917 ent
->content
, user_data
, NULL
);
6919 } else if (ent
->etype
==
6920 XML_EXTERNAL_GENERAL_PARSED_ENTITY
) {
6922 ret
= xmlParseExternalEntityPrivate(ctxt
->myDoc
, ctxt
,
6923 ctxt
->sax
, user_data
, ctxt
->depth
,
6924 ent
->URI
, ent
->ExternalID
, NULL
);
6927 ret
= XML_ERR_ENTITY_PE_INTERNAL
;
6928 xmlErrMsgStr(ctxt
, XML_ERR_INTERNAL_ERROR
,
6929 "invalid entity type found\n", NULL
);
6931 if (ret
== XML_ERR_ENTITY_LOOP
) {
6932 xmlFatalErr(ctxt
, XML_ERR_ENTITY_LOOP
, NULL
);
6936 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
6937 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
6939 * Entity reference callback comes second, it's somewhat
6940 * superfluous but a compatibility to historical behaviour
6942 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
6948 * If we didn't get any children for the entity being built
6950 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->reference
!= NULL
) &&
6951 (ctxt
->replaceEntities
== 0) && (!ctxt
->disableSAX
)) {
6955 ctxt
->sax
->reference(ctxt
->userData
, ent
->name
);
6959 if ((ctxt
->replaceEntities
) || (ent
->children
== NULL
)) {
6961 * There is a problem on the handling of _private for entities
6962 * (bug 155816): Should we copy the content of the field from
6963 * the entity (possibly overwriting some value set by the user
6964 * when a copy is created), should we leave it alone, or should
6965 * we try to take care of different situations? The problem
6966 * is exacerbated by the usage of this field by the xmlReader.
6967 * To fix this bug, we look at _private on the created node
6968 * and, if it's NULL, we copy in whatever was in the entity.
6969 * If it's not NULL we leave it alone. This is somewhat of a
6970 * hack - maybe we should have further tests to determine
6973 if ((ctxt
->node
!= NULL
) && (ent
->children
!= NULL
)) {
6975 * Seems we are generating the DOM content, do
6976 * a simple tree copy for all references except the first
6977 * In the first occurrence list contains the replacement.
6978 * progressive == 2 means we are operating on the Reader
6979 * and since nodes are discarded we must copy all the time.
6981 if (((list
== NULL
) && (ent
->owner
== 0)) ||
6982 (ctxt
->parseMode
== XML_PARSE_READER
)) {
6983 xmlNodePtr nw
= NULL
, cur
, firstChild
= NULL
;
6986 * when operating on a reader, the entities definitions
6987 * are always owning the entities subtree.
6988 if (ctxt->parseMode == XML_PARSE_READER)
6992 cur
= ent
->children
;
6993 while (cur
!= NULL
) {
6994 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
6996 if (nw
->_private
== NULL
)
6997 nw
->_private
= cur
->_private
;
6998 if (firstChild
== NULL
){
7001 nw
= xmlAddChild(ctxt
->node
, nw
);
7003 if (cur
== ent
->last
) {
7005 * needed to detect some strange empty
7006 * node cases in the reader tests
7008 if ((ctxt
->parseMode
== XML_PARSE_READER
) &&
7010 (nw
->type
== XML_ELEMENT_NODE
) &&
7011 (nw
->children
== NULL
))
7018 #ifdef LIBXML_LEGACY_ENABLED
7019 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7020 xmlAddEntityReference(ent
, firstChild
, nw
);
7021 #endif /* LIBXML_LEGACY_ENABLED */
7022 } else if (list
== NULL
) {
7023 xmlNodePtr nw
= NULL
, cur
, next
, last
,
7026 * Copy the entity child list and make it the new
7027 * entity child list. The goal is to make sure any
7028 * ID or REF referenced will be the one from the
7029 * document content and not the entity copy.
7031 cur
= ent
->children
;
7032 ent
->children
= NULL
;
7035 while (cur
!= NULL
) {
7039 nw
= xmlDocCopyNode(cur
, ctxt
->myDoc
, 1);
7041 if (nw
->_private
== NULL
)
7042 nw
->_private
= cur
->_private
;
7043 if (firstChild
== NULL
){
7046 xmlAddChild((xmlNodePtr
) ent
, nw
);
7047 xmlAddChild(ctxt
->node
, cur
);
7053 if (ent
->owner
== 0)
7055 #ifdef LIBXML_LEGACY_ENABLED
7056 if (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)
7057 xmlAddEntityReference(ent
, firstChild
, nw
);
7058 #endif /* LIBXML_LEGACY_ENABLED */
7060 const xmlChar
*nbktext
;
7063 * the name change is to avoid coalescing of the
7064 * node with a possible previous text one which
7065 * would make ent->children a dangling pointer
7067 nbktext
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"nbktext",
7069 if (ent
->children
->type
== XML_TEXT_NODE
)
7070 ent
->children
->name
= nbktext
;
7071 if ((ent
->last
!= ent
->children
) &&
7072 (ent
->last
->type
== XML_TEXT_NODE
))
7073 ent
->last
->name
= nbktext
;
7074 xmlAddChildList(ctxt
->node
, ent
->children
);
7078 * This is to avoid a nasty side effect, see
7079 * characters() in SAX.c
7089 * xmlParseEntityRef:
7090 * @ctxt: an XML parser context
7092 * parse ENTITY references declarations
7094 * [68] EntityRef ::= '&' Name ';'
7096 * [ WFC: Entity Declared ]
7097 * In a document without any DTD, a document with only an internal DTD
7098 * subset which contains no parameter entity references, or a document
7099 * with "standalone='yes'", the Name given in the entity reference
7100 * must match that in an entity declaration, except that well-formed
7101 * documents need not declare any of the following entities: amp, lt,
7102 * gt, apos, quot. The declaration of a parameter entity must precede
7103 * any reference to it. Similarly, the declaration of a general entity
7104 * must precede any reference to it which appears in a default value in an
7105 * attribute-list declaration. Note that if entities are declared in the
7106 * external subset or in external parameter entities, a non-validating
7107 * processor is not obligated to read and process their declarations;
7108 * for such documents, the rule that an entity must be declared is a
7109 * well-formedness constraint only if standalone='yes'.
7111 * [ WFC: Parsed Entity ]
7112 * An entity reference must not contain the name of an unparsed entity
7114 * Returns the xmlEntityPtr if found, or NULL otherwise.
7117 xmlParseEntityRef(xmlParserCtxtPtr ctxt
) {
7118 const xmlChar
*name
;
7119 xmlEntityPtr ent
= NULL
;
7126 name
= xmlParseName(ctxt
);
7128 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7129 "xmlParseEntityRef: no name\n");
7133 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7139 * Predefined entites override any extra definition
7141 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7142 ent
= xmlGetPredefinedEntity(name
);
7148 * Increate the number of entity references parsed
7153 * Ask first SAX for entity resolution, otherwise try the
7154 * entities which may have stored in the parser context.
7156 if (ctxt
->sax
!= NULL
) {
7157 if (ctxt
->sax
->getEntity
!= NULL
)
7158 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7159 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7160 (ctxt
->options
& XML_PARSE_OLDSAX
))
7161 ent
= xmlGetPredefinedEntity(name
);
7162 if ((ctxt
->wellFormed
== 1 ) && (ent
== NULL
) &&
7163 (ctxt
->userData
==ctxt
)) {
7164 ent
= xmlSAX2GetEntity(ctxt
, name
);
7168 * [ WFC: Entity Declared ]
7169 * In a document without any DTD, a document with only an
7170 * internal DTD subset which contains no parameter entity
7171 * references, or a document with "standalone='yes'", the
7172 * Name given in the entity reference must match that in an
7173 * entity declaration, except that well-formed documents
7174 * need not declare any of the following entities: amp, lt,
7176 * The declaration of a parameter entity must precede any
7178 * Similarly, the declaration of a general entity must
7179 * precede any reference to it which appears in a default
7180 * value in an attribute-list declaration. Note that if
7181 * entities are declared in the external subset or in
7182 * external parameter entities, a non-validating processor
7183 * is not obligated to read and process their declarations;
7184 * for such documents, the rule that an entity must be
7185 * declared is a well-formedness constraint only if
7189 if ((ctxt
->standalone
== 1) ||
7190 ((ctxt
->hasExternalSubset
== 0) &&
7191 (ctxt
->hasPErefs
== 0))) {
7192 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7193 "Entity '%s' not defined\n", name
);
7195 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7196 "Entity '%s' not defined\n", name
);
7197 if ((ctxt
->inSubset
== 0) &&
7198 (ctxt
->sax
!= NULL
) &&
7199 (ctxt
->sax
->reference
!= NULL
)) {
7200 ctxt
->sax
->reference(ctxt
->userData
, name
);
7207 * [ WFC: Parsed Entity ]
7208 * An entity reference must not contain the name of an
7211 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7212 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7213 "Entity reference to unparsed entity %s\n", name
);
7217 * [ WFC: No External Entity References ]
7218 * Attribute values cannot contain direct or indirect
7219 * entity references to external entities.
7221 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7222 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7223 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7224 "Attribute references external entity '%s'\n", name
);
7227 * [ WFC: No < in Attribute Values ]
7228 * The replacement text of any entity referred to directly or
7229 * indirectly in an attribute value (other than "<") must
7232 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7233 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7234 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7235 (xmlStrchr(ent
->content
, '<'))) {
7236 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7237 "'<' in entity '%s' is not allowed in attributes values\n", name
);
7241 * Internal check, no parameter entities here ...
7244 switch (ent
->etype
) {
7245 case XML_INTERNAL_PARAMETER_ENTITY
:
7246 case XML_EXTERNAL_PARAMETER_ENTITY
:
7247 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7248 "Attempt to reference the parameter entity '%s'\n",
7257 * [ WFC: No Recursion ]
7258 * A parsed entity must not contain a recursive reference
7259 * to itself, either directly or indirectly.
7260 * Done somewhere else
7266 * xmlParseStringEntityRef:
7267 * @ctxt: an XML parser context
7268 * @str: a pointer to an index in the string
7270 * parse ENTITY references declarations, but this version parses it from
7273 * [68] EntityRef ::= '&' Name ';'
7275 * [ WFC: Entity Declared ]
7276 * In a document without any DTD, a document with only an internal DTD
7277 * subset which contains no parameter entity references, or a document
7278 * with "standalone='yes'", the Name given in the entity reference
7279 * must match that in an entity declaration, except that well-formed
7280 * documents need not declare any of the following entities: amp, lt,
7281 * gt, apos, quot. The declaration of a parameter entity must precede
7282 * any reference to it. Similarly, the declaration of a general entity
7283 * must precede any reference to it which appears in a default value in an
7284 * attribute-list declaration. Note that if entities are declared in the
7285 * external subset or in external parameter entities, a non-validating
7286 * processor is not obligated to read and process their declarations;
7287 * for such documents, the rule that an entity must be declared is a
7288 * well-formedness constraint only if standalone='yes'.
7290 * [ WFC: Parsed Entity ]
7291 * An entity reference must not contain the name of an unparsed entity
7293 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7294 * is updated to the current location in the string.
7297 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt
, const xmlChar
** str
) {
7301 xmlEntityPtr ent
= NULL
;
7303 if ((str
== NULL
) || (*str
== NULL
))
7311 name
= xmlParseStringName(ctxt
, &ptr
);
7313 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7314 "xmlParseStringEntityRef: no name\n");
7319 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7328 * Predefined entites override any extra definition
7330 if ((ctxt
->options
& XML_PARSE_OLDSAX
) == 0) {
7331 ent
= xmlGetPredefinedEntity(name
);
7340 * Increate the number of entity references parsed
7345 * Ask first SAX for entity resolution, otherwise try the
7346 * entities which may have stored in the parser context.
7348 if (ctxt
->sax
!= NULL
) {
7349 if (ctxt
->sax
->getEntity
!= NULL
)
7350 ent
= ctxt
->sax
->getEntity(ctxt
->userData
, name
);
7351 if ((ent
== NULL
) && (ctxt
->options
& XML_PARSE_OLDSAX
))
7352 ent
= xmlGetPredefinedEntity(name
);
7353 if ((ent
== NULL
) && (ctxt
->userData
==ctxt
)) {
7354 ent
= xmlSAX2GetEntity(ctxt
, name
);
7359 * [ WFC: Entity Declared ]
7360 * In a document without any DTD, a document with only an
7361 * internal DTD subset which contains no parameter entity
7362 * references, or a document with "standalone='yes'", the
7363 * Name given in the entity reference must match that in an
7364 * entity declaration, except that well-formed documents
7365 * need not declare any of the following entities: amp, lt,
7367 * The declaration of a parameter entity must precede any
7369 * Similarly, the declaration of a general entity must
7370 * precede any reference to it which appears in a default
7371 * value in an attribute-list declaration. Note that if
7372 * entities are declared in the external subset or in
7373 * external parameter entities, a non-validating processor
7374 * is not obligated to read and process their declarations;
7375 * for such documents, the rule that an entity must be
7376 * declared is a well-formedness constraint only if
7380 if ((ctxt
->standalone
== 1) ||
7381 ((ctxt
->hasExternalSubset
== 0) &&
7382 (ctxt
->hasPErefs
== 0))) {
7383 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7384 "Entity '%s' not defined\n", name
);
7386 xmlErrMsgStr(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7387 "Entity '%s' not defined\n",
7390 /* TODO ? check regressions ctxt->valid = 0; */
7394 * [ WFC: Parsed Entity ]
7395 * An entity reference must not contain the name of an
7398 else if (ent
->etype
== XML_EXTERNAL_GENERAL_UNPARSED_ENTITY
) {
7399 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNPARSED_ENTITY
,
7400 "Entity reference to unparsed entity %s\n", name
);
7404 * [ WFC: No External Entity References ]
7405 * Attribute values cannot contain direct or indirect
7406 * entity references to external entities.
7408 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7409 (ent
->etype
== XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) {
7410 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_EXTERNAL
,
7411 "Attribute references external entity '%s'\n", name
);
7414 * [ WFC: No < in Attribute Values ]
7415 * The replacement text of any entity referred to directly or
7416 * indirectly in an attribute value (other than "<") must
7419 else if ((ctxt
->instate
== XML_PARSER_ATTRIBUTE_VALUE
) &&
7420 (ent
!= NULL
) && (ent
->content
!= NULL
) &&
7421 (ent
->etype
!= XML_INTERNAL_PREDEFINED_ENTITY
) &&
7422 (xmlStrchr(ent
->content
, '<'))) {
7423 xmlFatalErrMsgStr(ctxt
, XML_ERR_LT_IN_ATTRIBUTE
,
7424 "'<' in entity '%s' is not allowed in attributes values\n",
7429 * Internal check, no parameter entities here ...
7432 switch (ent
->etype
) {
7433 case XML_INTERNAL_PARAMETER_ENTITY
:
7434 case XML_EXTERNAL_PARAMETER_ENTITY
:
7435 xmlFatalErrMsgStr(ctxt
, XML_ERR_ENTITY_IS_PARAMETER
,
7436 "Attempt to reference the parameter entity '%s'\n",
7445 * [ WFC: No Recursion ]
7446 * A parsed entity must not contain a recursive reference
7447 * to itself, either directly or indirectly.
7448 * Done somewhere else
7457 * xmlParsePEReference:
7458 * @ctxt: an XML parser context
7460 * parse PEReference declarations
7461 * The entity content is handled directly by pushing it's content as
7462 * a new input stream.
7464 * [69] PEReference ::= '%' Name ';'
7466 * [ WFC: No Recursion ]
7467 * A parsed entity must not contain a recursive
7468 * reference to itself, either directly or indirectly.
7470 * [ WFC: Entity Declared ]
7471 * In a document without any DTD, a document with only an internal DTD
7472 * subset which contains no parameter entity references, or a document
7473 * with "standalone='yes'", ... ... The declaration of a parameter
7474 * entity must precede any reference to it...
7476 * [ VC: Entity Declared ]
7477 * In a document with an external subset or external parameter entities
7478 * with "standalone='no'", ... ... The declaration of a parameter entity
7479 * must precede any reference to it...
7482 * Parameter-entity references may only appear in the DTD.
7483 * NOTE: misleading but this is handled.
7486 xmlParsePEReference(xmlParserCtxtPtr ctxt
)
7488 const xmlChar
*name
;
7489 xmlEntityPtr entity
= NULL
;
7490 xmlParserInputPtr input
;
7495 name
= xmlParseName(ctxt
);
7497 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7498 "xmlParsePEReference: no name\n");
7502 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7509 * Increate the number of entity references parsed
7514 * Request the entity from SAX
7516 if ((ctxt
->sax
!= NULL
) &&
7517 (ctxt
->sax
->getParameterEntity
!= NULL
))
7518 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
,
7520 if (entity
== NULL
) {
7522 * [ WFC: Entity Declared ]
7523 * In a document without any DTD, a document with only an
7524 * internal DTD subset which contains no parameter entity
7525 * references, or a document with "standalone='yes'", ...
7526 * ... The declaration of a parameter entity must precede
7527 * any reference to it...
7529 if ((ctxt
->standalone
== 1) ||
7530 ((ctxt
->hasExternalSubset
== 0) &&
7531 (ctxt
->hasPErefs
== 0))) {
7532 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7533 "PEReference: %%%s; not found\n",
7537 * [ VC: Entity Declared ]
7538 * In a document with an external subset or external
7539 * parameter entities with "standalone='no'", ...
7540 * ... The declaration of a parameter entity must
7541 * precede any reference to it...
7543 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7544 "PEReference: %%%s; not found\n",
7550 * Internal checking in case the entity quest barfed
7552 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7553 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7554 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7555 "Internal: %%%s; is not a parameter entity\n",
7557 } else if (ctxt
->input
->free
!= deallocblankswrapper
) {
7558 input
= xmlNewBlanksWrapperInputStream(ctxt
, entity
);
7559 if (xmlPushInput(ctxt
, input
) < 0)
7564 * handle the extra spaces added before and after
7565 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7567 input
= xmlNewEntityInputStream(ctxt
, entity
);
7568 if (xmlPushInput(ctxt
, input
) < 0)
7570 if ((entity
->etype
== XML_EXTERNAL_PARAMETER_ENTITY
) &&
7571 (CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) &&
7572 (IS_BLANK_CH(NXT(5)))) {
7573 xmlParseTextDecl(ctxt
);
7575 XML_ERR_UNSUPPORTED_ENCODING
) {
7577 * The XML REC instructs us to stop parsing
7580 ctxt
->instate
= XML_PARSER_EOF
;
7586 ctxt
->hasPErefs
= 1;
7590 * xmlLoadEntityContent:
7591 * @ctxt: an XML parser context
7592 * @entity: an unloaded system entity
7594 * Load the original content of the given system entity from the
7595 * ExternalID/SystemID given. This is to be used for Included in Literal
7596 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7598 * Returns 0 in case of success and -1 in case of failure
7601 xmlLoadEntityContent(xmlParserCtxtPtr ctxt
, xmlEntityPtr entity
) {
7602 xmlParserInputPtr input
;
7607 if ((ctxt
== NULL
) || (entity
== NULL
) ||
7608 ((entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
) &&
7609 (entity
->etype
!= XML_EXTERNAL_GENERAL_PARSED_ENTITY
)) ||
7610 (entity
->content
!= NULL
)) {
7611 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7612 "xmlLoadEntityContent parameter error");
7616 if (xmlParserDebugEntities
)
7617 xmlGenericError(xmlGenericErrorContext
,
7618 "Reading %s entity content input\n", entity
->name
);
7620 buf
= xmlBufferCreate();
7622 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7623 "xmlLoadEntityContent parameter error");
7627 input
= xmlNewEntityInputStream(ctxt
, entity
);
7628 if (input
== NULL
) {
7629 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7630 "xmlLoadEntityContent input error");
7636 * Push the entity as the current input, read char by char
7637 * saving to the buffer until the end of the entity or an error
7639 if (xmlPushInput(ctxt
, input
) < 0) {
7646 while ((ctxt
->input
== input
) && (ctxt
->input
->cur
< ctxt
->input
->end
) &&
7648 xmlBufferAdd(buf
, ctxt
->input
->cur
, l
);
7649 if (count
++ > 100) {
7657 if ((ctxt
->input
== input
) && (ctxt
->input
->cur
>= ctxt
->input
->end
)) {
7659 } else if (!IS_CHAR(c
)) {
7660 xmlFatalErrMsgInt(ctxt
, XML_ERR_INVALID_CHAR
,
7661 "xmlLoadEntityContent: invalid char value %d\n",
7666 entity
->content
= buf
->content
;
7667 buf
->content
= NULL
;
7674 * xmlParseStringPEReference:
7675 * @ctxt: an XML parser context
7676 * @str: a pointer to an index in the string
7678 * parse PEReference declarations
7680 * [69] PEReference ::= '%' Name ';'
7682 * [ WFC: No Recursion ]
7683 * A parsed entity must not contain a recursive
7684 * reference to itself, either directly or indirectly.
7686 * [ WFC: Entity Declared ]
7687 * In a document without any DTD, a document with only an internal DTD
7688 * subset which contains no parameter entity references, or a document
7689 * with "standalone='yes'", ... ... The declaration of a parameter
7690 * entity must precede any reference to it...
7692 * [ VC: Entity Declared ]
7693 * In a document with an external subset or external parameter entities
7694 * with "standalone='no'", ... ... The declaration of a parameter entity
7695 * must precede any reference to it...
7698 * Parameter-entity references may only appear in the DTD.
7699 * NOTE: misleading but this is handled.
7701 * Returns the string of the entity content.
7702 * str is updated to the current value of the index
7705 xmlParseStringPEReference(xmlParserCtxtPtr ctxt
, const xmlChar
**str
) {
7709 xmlEntityPtr entity
= NULL
;
7711 if ((str
== NULL
) || (*str
== NULL
)) return(NULL
);
7717 name
= xmlParseStringName(ctxt
, &ptr
);
7719 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7720 "xmlParseStringPEReference: no name\n");
7726 xmlFatalErr(ctxt
, XML_ERR_ENTITYREF_SEMICOL_MISSING
, NULL
);
7734 * Increate the number of entity references parsed
7739 * Request the entity from SAX
7741 if ((ctxt
->sax
!= NULL
) &&
7742 (ctxt
->sax
->getParameterEntity
!= NULL
))
7743 entity
= ctxt
->sax
->getParameterEntity(ctxt
->userData
,
7745 if (entity
== NULL
) {
7747 * [ WFC: Entity Declared ]
7748 * In a document without any DTD, a document with only an
7749 * internal DTD subset which contains no parameter entity
7750 * references, or a document with "standalone='yes'", ...
7751 * ... The declaration of a parameter entity must precede
7752 * any reference to it...
7754 if ((ctxt
->standalone
== 1) ||
7755 ((ctxt
->hasExternalSubset
== 0) && (ctxt
->hasPErefs
== 0))) {
7756 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNDECLARED_ENTITY
,
7757 "PEReference: %%%s; not found\n", name
);
7760 * [ VC: Entity Declared ]
7761 * In a document with an external subset or external
7762 * parameter entities with "standalone='no'", ...
7763 * ... The declaration of a parameter entity must
7764 * precede any reference to it...
7766 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7767 "PEReference: %%%s; not found\n",
7773 * Internal checking in case the entity quest barfed
7775 if ((entity
->etype
!= XML_INTERNAL_PARAMETER_ENTITY
) &&
7776 (entity
->etype
!= XML_EXTERNAL_PARAMETER_ENTITY
)) {
7777 xmlWarningMsg(ctxt
, XML_WAR_UNDECLARED_ENTITY
,
7778 "%%%s; is not a parameter entity\n",
7782 ctxt
->hasPErefs
= 1;
7789 * xmlParseDocTypeDecl:
7790 * @ctxt: an XML parser context
7792 * parse a DOCTYPE declaration
7794 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7795 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7797 * [ VC: Root Element Type ]
7798 * The Name in the document type declaration must match the element
7799 * type of the root element.
7803 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt
) {
7804 const xmlChar
*name
= NULL
;
7805 xmlChar
*ExternalID
= NULL
;
7806 xmlChar
*URI
= NULL
;
7809 * We know that '<!DOCTYPE' has been detected.
7816 * Parse the DOCTYPE name.
7818 name
= xmlParseName(ctxt
);
7820 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7821 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7823 ctxt
->intSubName
= name
;
7828 * Check for SystemID and ExternalID
7830 URI
= xmlParseExternalID(ctxt
, &ExternalID
, 1);
7832 if ((URI
!= NULL
) || (ExternalID
!= NULL
)) {
7833 ctxt
->hasExternalSubset
= 1;
7835 ctxt
->extSubURI
= URI
;
7836 ctxt
->extSubSystem
= ExternalID
;
7841 * Create and update the internal subset.
7843 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->internalSubset
!= NULL
) &&
7844 (!ctxt
->disableSAX
))
7845 ctxt
->sax
->internalSubset(ctxt
->userData
, name
, ExternalID
, URI
);
7848 * Is there any internal subset declarations ?
7849 * they are handled separately in xmlParseInternalSubset()
7855 * We should be at the end of the DOCTYPE declaration.
7858 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
7864 * xmlParseInternalSubset:
7865 * @ctxt: an XML parser context
7867 * parse the internal subset declaration
7869 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7873 xmlParseInternalSubset(xmlParserCtxtPtr ctxt
) {
7875 * Is there any DTD definition ?
7878 ctxt
->instate
= XML_PARSER_DTD
;
7881 * Parse the succession of Markup declarations and
7883 * Subsequence (markupdecl | PEReference | S)*
7885 while (RAW
!= ']') {
7886 const xmlChar
*check
= CUR_PTR
;
7887 unsigned int cons
= ctxt
->input
->consumed
;
7890 xmlParseMarkupDecl(ctxt
);
7891 xmlParsePEReference(ctxt
);
7894 * Pop-up of finished entities.
7896 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
7899 if ((CUR_PTR
== check
) && (cons
== ctxt
->input
->consumed
)) {
7900 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
7901 "xmlParseInternalSubset: error detected in Markup declaration\n");
7912 * We should be at the end of the DOCTYPE declaration.
7915 xmlFatalErr(ctxt
, XML_ERR_DOCTYPE_NOT_FINISHED
, NULL
);
7920 #ifdef LIBXML_SAX1_ENABLED
7922 * xmlParseAttribute:
7923 * @ctxt: an XML parser context
7924 * @value: a xmlChar ** used to store the value of the attribute
7926 * parse an attribute
7928 * [41] Attribute ::= Name Eq AttValue
7930 * [ WFC: No External Entity References ]
7931 * Attribute values cannot contain direct or indirect entity references
7932 * to external entities.
7934 * [ WFC: No < in Attribute Values ]
7935 * The replacement text of any entity referred to directly or indirectly in
7936 * an attribute value (other than "<") must not contain a <.
7938 * [ VC: Attribute Value Type ]
7939 * The attribute must have been declared; the value must be of the type
7942 * [25] Eq ::= S? '=' S?
7946 * [NS 11] Attribute ::= QName Eq AttValue
7948 * Also the case QName == xmlns:??? is handled independently as a namespace
7951 * Returns the attribute name, and the value in *value.
7955 xmlParseAttribute(xmlParserCtxtPtr ctxt
, xmlChar
**value
) {
7956 const xmlChar
*name
;
7961 name
= xmlParseName(ctxt
);
7963 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
7964 "error parsing attribute name\n");
7975 val
= xmlParseAttValue(ctxt
);
7976 ctxt
->instate
= XML_PARSER_CONTENT
;
7978 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
7979 "Specification mandate value for attribute %s\n", name
);
7984 * Check that xml:lang conforms to the specification
7985 * No more registered as an error, just generate a warning now
7986 * since this was deprecated in XML second edition
7988 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"xml:lang"))) {
7989 if (!xmlCheckLanguageID(val
)) {
7990 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
7991 "Malformed value for xml:lang : %s\n",
7997 * Check that xml:space conforms to the specification
7999 if (xmlStrEqual(name
, BAD_CAST
"xml:space")) {
8000 if (xmlStrEqual(val
, BAD_CAST
"default"))
8002 else if (xmlStrEqual(val
, BAD_CAST
"preserve"))
8005 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8006 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8017 * @ctxt: an XML parser context
8019 * parse a start of tag either for rule element or
8020 * EmptyElement. In both case we don't parse the tag closing chars.
8022 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8024 * [ WFC: Unique Att Spec ]
8025 * No attribute name may appear more than once in the same start-tag or
8026 * empty-element tag.
8028 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8030 * [ WFC: Unique Att Spec ]
8031 * No attribute name may appear more than once in the same start-tag or
8032 * empty-element tag.
8036 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8038 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8040 * Returns the element name parsed
8044 xmlParseStartTag(xmlParserCtxtPtr ctxt
) {
8045 const xmlChar
*name
;
8046 const xmlChar
*attname
;
8048 const xmlChar
**atts
= ctxt
->atts
;
8050 int maxatts
= ctxt
->maxatts
;
8053 if (RAW
!= '<') return(NULL
);
8056 name
= xmlParseName(ctxt
);
8058 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8059 "xmlParseStartTag: invalid element name\n");
8064 * Now parse the attributes, it ends up with the ending
8071 while ((RAW
!= '>') &&
8072 ((RAW
!= '/') || (NXT(1) != '>')) &&
8073 (IS_BYTE_CHAR(RAW
))) {
8074 const xmlChar
*q
= CUR_PTR
;
8075 unsigned int cons
= ctxt
->input
->consumed
;
8077 attname
= xmlParseAttribute(ctxt
, &attvalue
);
8078 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8080 * [ WFC: Unique Att Spec ]
8081 * No attribute name may appear more than once in the same
8082 * start-tag or empty-element tag.
8084 for (i
= 0; i
< nbatts
;i
+= 2) {
8085 if (xmlStrEqual(atts
[i
], attname
)) {
8086 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8092 * Add the pair to atts
8095 maxatts
= 22; /* allow for 10 attrs by default */
8096 atts
= (const xmlChar
**)
8097 xmlMalloc(maxatts
* sizeof(xmlChar
*));
8099 xmlErrMemory(ctxt
, NULL
);
8100 if (attvalue
!= NULL
)
8105 ctxt
->maxatts
= maxatts
;
8106 } else if (nbatts
+ 4 > maxatts
) {
8110 n
= (const xmlChar
**) xmlRealloc((void *) atts
,
8111 maxatts
* sizeof(const xmlChar
*));
8113 xmlErrMemory(ctxt
, NULL
);
8114 if (attvalue
!= NULL
)
8120 ctxt
->maxatts
= maxatts
;
8122 atts
[nbatts
++] = attname
;
8123 atts
[nbatts
++] = attvalue
;
8124 atts
[nbatts
] = NULL
;
8125 atts
[nbatts
+ 1] = NULL
;
8127 if (attvalue
!= NULL
)
8134 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8136 if (!IS_BLANK_CH(RAW
)) {
8137 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8138 "attributes construct error\n");
8141 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
) &&
8142 (attname
== NULL
) && (attvalue
== NULL
)) {
8143 xmlFatalErrMsg(ctxt
, XML_ERR_INTERNAL_ERROR
,
8144 "xmlParseStartTag: problem parsing attributes\n");
8152 * SAX: Start of Element !
8154 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElement
!= NULL
) &&
8155 (!ctxt
->disableSAX
)) {
8157 ctxt
->sax
->startElement(ctxt
->userData
, name
, atts
);
8159 ctxt
->sax
->startElement(ctxt
->userData
, name
, NULL
);
8163 /* Free only the content strings */
8164 for (i
= 1;i
< nbatts
;i
+=2)
8165 if (atts
[i
] != NULL
)
8166 xmlFree((xmlChar
*) atts
[i
]);
8173 * @ctxt: an XML parser context
8174 * @line: line of the start tag
8175 * @nsNr: number of namespaces on the start tag
8177 * parse an end of tag
8179 * [42] ETag ::= '</' Name S? '>'
8183 * [NS 9] ETag ::= '</' QName S? '>'
8187 xmlParseEndTag1(xmlParserCtxtPtr ctxt
, int line
) {
8188 const xmlChar
*name
;
8191 if ((RAW
!= '<') || (NXT(1) != '/')) {
8192 xmlFatalErrMsg(ctxt
, XML_ERR_LTSLASH_REQUIRED
,
8193 "xmlParseEndTag: '</' not found\n");
8198 name
= xmlParseNameAndCompare(ctxt
,ctxt
->name
);
8201 * We should definitely be at the ending "S? '>'" part
8205 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
8206 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
8211 * [ WFC: Element Type Match ]
8212 * The Name in an element's end-tag must match the element type in the
8216 if (name
!= (xmlChar
*)1) {
8217 if (name
== NULL
) name
= BAD_CAST
"unparseable";
8218 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
8219 "Opening and ending tag mismatch: %s line %d and %s\n",
8220 ctxt
->name
, line
, name
);
8226 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
8227 (!ctxt
->disableSAX
))
8228 ctxt
->sax
->endElement(ctxt
->userData
, ctxt
->name
);
8237 * @ctxt: an XML parser context
8239 * parse an end of tag
8241 * [42] ETag ::= '</' Name S? '>'
8245 * [NS 9] ETag ::= '</' QName S? '>'
8249 xmlParseEndTag(xmlParserCtxtPtr ctxt
) {
8250 xmlParseEndTag1(ctxt
, 0);
8252 #endif /* LIBXML_SAX1_ENABLED */
8254 /************************************************************************
8256 * SAX 2 specific operations *
8258 ************************************************************************/
8262 * @ctxt: an XML parser context
8263 * @prefix: the prefix to lookup
8265 * Lookup the namespace name for the @prefix (which ca be NULL)
8266 * The prefix must come from the @ctxt->dict dictionnary
8268 * Returns the namespace name or NULL if not bound
8270 static const xmlChar
*
8271 xmlGetNamespace(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
) {
8274 if (prefix
== ctxt
->str_xml
) return(ctxt
->str_xml_ns
);
8275 for (i
= ctxt
->nsNr
- 2;i
>= 0;i
-=2)
8276 if (ctxt
->nsTab
[i
] == prefix
) {
8277 if ((prefix
== NULL
) && (*ctxt
->nsTab
[i
+ 1] == 0))
8279 return(ctxt
->nsTab
[i
+ 1]);
8286 * @ctxt: an XML parser context
8287 * @prefix: pointer to store the prefix part
8289 * parse an XML Namespace QName
8291 * [6] QName ::= (Prefix ':')? LocalPart
8292 * [7] Prefix ::= NCName
8293 * [8] LocalPart ::= NCName
8295 * Returns the Name parsed or NULL
8298 static const xmlChar
*
8299 xmlParseQName(xmlParserCtxtPtr ctxt
, const xmlChar
**prefix
) {
8300 const xmlChar
*l
, *p
;
8304 l
= xmlParseNCName(ctxt
);
8307 l
= xmlParseName(ctxt
);
8309 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8310 "Failed to parse QName '%s'\n", l
, NULL
, NULL
);
8320 l
= xmlParseNCName(ctxt
);
8324 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8325 "Failed to parse QName '%s:'\n", p
, NULL
, NULL
);
8326 l
= xmlParseNmtoken(ctxt
);
8328 tmp
= xmlBuildQName(BAD_CAST
"", p
, NULL
, 0);
8330 tmp
= xmlBuildQName(l
, p
, NULL
, 0);
8333 p
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8334 if (tmp
!= NULL
) xmlFree(tmp
);
8341 xmlNsErr(ctxt
, XML_NS_ERR_QNAME
,
8342 "Failed to parse QName '%s:%s:'\n", p
, l
, NULL
);
8344 tmp
= (xmlChar
*) xmlParseName(ctxt
);
8346 tmp
= xmlBuildQName(tmp
, l
, NULL
, 0);
8347 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8348 if (tmp
!= NULL
) xmlFree(tmp
);
8352 tmp
= xmlBuildQName(BAD_CAST
"", l
, NULL
, 0);
8353 l
= xmlDictLookup(ctxt
->dict
, tmp
, -1);
8354 if (tmp
!= NULL
) xmlFree(tmp
);
8365 * xmlParseQNameAndCompare:
8366 * @ctxt: an XML parser context
8367 * @name: the localname
8368 * @prefix: the prefix, if any.
8370 * parse an XML name and compares for match
8371 * (specialized for endtag parsing)
8373 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8374 * and the name for mismatch
8377 static const xmlChar
*
8378 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt
, xmlChar
const *name
,
8379 xmlChar
const *prefix
) {
8383 const xmlChar
*prefix2
;
8385 if (prefix
== NULL
) return(xmlParseNameAndCompare(ctxt
, name
));
8388 in
= ctxt
->input
->cur
;
8391 while (*in
!= 0 && *in
== *cmp
) {
8395 if ((*cmp
== 0) && (*in
== ':')) {
8398 while (*in
!= 0 && *in
== *cmp
) {
8402 if (*cmp
== 0 && (*in
== '>' || IS_BLANK_CH (*in
))) {
8404 ctxt
->input
->cur
= in
;
8405 return((const xmlChar
*) 1);
8409 * all strings coms from the dictionary, equality can be done directly
8411 ret
= xmlParseQName (ctxt
, &prefix2
);
8412 if ((ret
== name
) && (prefix
== prefix2
))
8413 return((const xmlChar
*) 1);
8418 * xmlParseAttValueInternal:
8419 * @ctxt: an XML parser context
8420 * @len: attribute len result
8421 * @alloc: whether the attribute was reallocated as a new string
8422 * @normalize: if 1 then further non-CDATA normalization must be done
8424 * parse a value for an attribute.
8425 * NOTE: if no normalization is needed, the routine will return pointers
8426 * directly from the data buffer.
8428 * 3.3.3 Attribute-Value Normalization:
8429 * Before the value of an attribute is passed to the application or
8430 * checked for validity, the XML processor must normalize it as follows:
8431 * - a character reference is processed by appending the referenced
8432 * character to the attribute value
8433 * - an entity reference is processed by recursively processing the
8434 * replacement text of the entity
8435 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8436 * appending #x20 to the normalized value, except that only a single
8437 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8438 * parsed entity or the literal entity value of an internal parsed entity
8439 * - other characters are processed by appending them to the normalized value
8440 * If the declared value is not CDATA, then the XML processor must further
8441 * process the normalized attribute value by discarding any leading and
8442 * trailing space (#x20) characters, and by replacing sequences of space
8443 * (#x20) characters by a single space (#x20) character.
8444 * All attributes for which no declaration has been read should be treated
8445 * by a non-validating parser as if declared CDATA.
8447 * Returns the AttValue parsed or NULL. The value has to be freed by the
8448 * caller if it was copied, this can be detected by val[*len] == 0.
8452 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt
, int *len
, int *alloc
,
8456 const xmlChar
*in
= NULL
, *start
, *end
, *last
;
8457 xmlChar
*ret
= NULL
;
8460 in
= (xmlChar
*) CUR_PTR
;
8461 if (*in
!= '"' && *in
!= '\'') {
8462 xmlFatalErr(ctxt
, XML_ERR_ATTRIBUTE_NOT_STARTED
, NULL
);
8465 ctxt
->instate
= XML_PARSER_ATTRIBUTE_VALUE
;
8468 * try to handle in this routine the most common case where no
8469 * allocation of a new string is required and where content is
8473 end
= ctxt
->input
->end
;
8476 const xmlChar
*oldbase
= ctxt
->input
->base
;
8478 if (oldbase
!= ctxt
->input
->base
) {
8479 long delta
= ctxt
->input
->base
- oldbase
;
8480 start
= start
+ delta
;
8483 end
= ctxt
->input
->end
;
8487 * Skip any leading spaces
8489 while ((in
< end
) && (*in
!= limit
) &&
8490 ((*in
== 0x20) || (*in
== 0x9) ||
8491 (*in
== 0xA) || (*in
== 0xD))) {
8495 const xmlChar
*oldbase
= ctxt
->input
->base
;
8497 if (oldbase
!= ctxt
->input
->base
) {
8498 long delta
= ctxt
->input
->base
- oldbase
;
8499 start
= start
+ delta
;
8502 end
= ctxt
->input
->end
;
8505 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
8506 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
8507 if ((*in
++ == 0x20) && (*in
== 0x20)) break;
8509 const xmlChar
*oldbase
= ctxt
->input
->base
;
8511 if (oldbase
!= ctxt
->input
->base
) {
8512 long delta
= ctxt
->input
->base
- oldbase
;
8513 start
= start
+ delta
;
8516 end
= ctxt
->input
->end
;
8521 * skip the trailing blanks
8523 while ((last
[-1] == 0x20) && (last
> start
)) last
--;
8524 while ((in
< end
) && (*in
!= limit
) &&
8525 ((*in
== 0x20) || (*in
== 0x9) ||
8526 (*in
== 0xA) || (*in
== 0xD))) {
8529 const xmlChar
*oldbase
= ctxt
->input
->base
;
8531 if (oldbase
!= ctxt
->input
->base
) {
8532 long delta
= ctxt
->input
->base
- oldbase
;
8533 start
= start
+ delta
;
8535 last
= last
+ delta
;
8537 end
= ctxt
->input
->end
;
8540 if (*in
!= limit
) goto need_complex
;
8542 while ((in
< end
) && (*in
!= limit
) && (*in
>= 0x20) &&
8543 (*in
<= 0x7f) && (*in
!= '&') && (*in
!= '<')) {
8546 const xmlChar
*oldbase
= ctxt
->input
->base
;
8548 if (oldbase
!= ctxt
->input
->base
) {
8549 long delta
= ctxt
->input
->base
- oldbase
;
8550 start
= start
+ delta
;
8553 end
= ctxt
->input
->end
;
8557 if (*in
!= limit
) goto need_complex
;
8561 *len
= last
- start
;
8562 ret
= (xmlChar
*) start
;
8564 if (alloc
) *alloc
= 1;
8565 ret
= xmlStrndup(start
, last
- start
);
8568 if (alloc
) *alloc
= 0;
8571 if (alloc
) *alloc
= 1;
8572 return xmlParseAttValueComplex(ctxt
, len
, normalize
);
8576 * xmlParseAttribute2:
8577 * @ctxt: an XML parser context
8578 * @pref: the element prefix
8579 * @elem: the element name
8580 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8581 * @value: a xmlChar ** used to store the value of the attribute
8582 * @len: an int * to save the length of the attribute
8583 * @alloc: an int * to indicate if the attribute was allocated
8585 * parse an attribute in the new SAX2 framework.
8587 * Returns the attribute name, and the value in *value, .
8590 static const xmlChar
*
8591 xmlParseAttribute2(xmlParserCtxtPtr ctxt
,
8592 const xmlChar
* pref
, const xmlChar
* elem
,
8593 const xmlChar
** prefix
, xmlChar
** value
,
8594 int *len
, int *alloc
)
8596 const xmlChar
*name
;
8597 xmlChar
*val
, *internal_val
= NULL
;
8602 name
= xmlParseQName(ctxt
, prefix
);
8604 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8605 "error parsing attribute name\n");
8610 * get the type if needed
8612 if (ctxt
->attsSpecial
!= NULL
) {
8615 type
= (int) (long) xmlHashQLookup2(ctxt
->attsSpecial
,
8616 pref
, elem
, *prefix
, name
);
8628 val
= xmlParseAttValueInternal(ctxt
, len
, alloc
, normalize
);
8631 * Sometimes a second normalisation pass for spaces is needed
8632 * but that only happens if charrefs or entities refernces
8633 * have been used in the attribute value, i.e. the attribute
8634 * value have been extracted in an allocated string already.
8637 const xmlChar
*val2
;
8639 val2
= xmlAttrNormalizeSpace2(ctxt
, val
, len
);
8640 if ((val2
!= NULL
) && (val2
!= val
)) {
8642 val
= (xmlChar
*) val2
;
8646 ctxt
->instate
= XML_PARSER_CONTENT
;
8648 xmlFatalErrMsgStr(ctxt
, XML_ERR_ATTRIBUTE_WITHOUT_VALUE
,
8649 "Specification mandate value for attribute %s\n",
8654 if (*prefix
== ctxt
->str_xml
) {
8656 * Check that xml:lang conforms to the specification
8657 * No more registered as an error, just generate a warning now
8658 * since this was deprecated in XML second edition
8660 if ((ctxt
->pedantic
) && (xmlStrEqual(name
, BAD_CAST
"lang"))) {
8661 internal_val
= xmlStrndup(val
, *len
);
8662 if (!xmlCheckLanguageID(internal_val
)) {
8663 xmlWarningMsg(ctxt
, XML_WAR_LANG_VALUE
,
8664 "Malformed value for xml:lang : %s\n",
8665 internal_val
, NULL
);
8670 * Check that xml:space conforms to the specification
8672 if (xmlStrEqual(name
, BAD_CAST
"space")) {
8673 internal_val
= xmlStrndup(val
, *len
);
8674 if (xmlStrEqual(internal_val
, BAD_CAST
"default"))
8676 else if (xmlStrEqual(internal_val
, BAD_CAST
"preserve"))
8679 xmlWarningMsg(ctxt
, XML_WAR_SPACE_VALUE
,
8680 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8681 internal_val
, NULL
);
8685 xmlFree(internal_val
);
8693 * xmlParseStartTag2:
8694 * @ctxt: an XML parser context
8696 * parse a start of tag either for rule element or
8697 * EmptyElement. In both case we don't parse the tag closing chars.
8698 * This routine is called when running SAX2 parsing
8700 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8702 * [ WFC: Unique Att Spec ]
8703 * No attribute name may appear more than once in the same start-tag or
8704 * empty-element tag.
8706 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8708 * [ WFC: Unique Att Spec ]
8709 * No attribute name may appear more than once in the same start-tag or
8710 * empty-element tag.
8714 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8716 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8718 * Returns the element name parsed
8721 static const xmlChar
*
8722 xmlParseStartTag2(xmlParserCtxtPtr ctxt
, const xmlChar
**pref
,
8723 const xmlChar
**URI
, int *tlen
) {
8724 const xmlChar
*localname
;
8725 const xmlChar
*prefix
;
8726 const xmlChar
*attname
;
8727 const xmlChar
*aprefix
;
8728 const xmlChar
*nsname
;
8730 const xmlChar
**atts
= ctxt
->atts
;
8731 int maxatts
= ctxt
->maxatts
;
8732 int nratts
, nbatts
, nbdef
;
8733 int i
, j
, nbNs
, attval
, oldline
, oldcol
;
8734 const xmlChar
*base
;
8736 int nsNr
= ctxt
->nsNr
;
8738 if (RAW
!= '<') return(NULL
);
8742 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8743 * point since the attribute values may be stored as pointers to
8744 * the buffer and calling SHRINK would destroy them !
8745 * The Shrinking is only possible once the full set of attribute
8746 * callbacks have been done.
8750 base
= ctxt
->input
->base
;
8751 cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
8752 oldline
= ctxt
->input
->line
;
8753 oldcol
= ctxt
->input
->col
;
8759 /* Forget any namespaces added during an earlier parse of this element. */
8762 localname
= xmlParseQName(ctxt
, &prefix
);
8763 if (localname
== NULL
) {
8764 xmlFatalErrMsg(ctxt
, XML_ERR_NAME_REQUIRED
,
8765 "StartTag: invalid element name\n");
8768 *tlen
= ctxt
->input
->cur
- ctxt
->input
->base
- cur
;
8771 * Now parse the attributes, it ends up with the ending
8777 if (ctxt
->input
->base
!= base
) goto base_changed
;
8779 while ((RAW
!= '>') &&
8780 ((RAW
!= '/') || (NXT(1) != '>')) &&
8781 (IS_BYTE_CHAR(RAW
))) {
8782 const xmlChar
*q
= CUR_PTR
;
8783 unsigned int cons
= ctxt
->input
->consumed
;
8784 int len
= -1, alloc
= 0;
8786 attname
= xmlParseAttribute2(ctxt
, prefix
, localname
,
8787 &aprefix
, &attvalue
, &len
, &alloc
);
8788 if (ctxt
->input
->base
!= base
) {
8789 if ((attvalue
!= NULL
) && (alloc
!= 0))
8794 if ((attname
!= NULL
) && (attvalue
!= NULL
)) {
8795 if (len
< 0) len
= xmlStrlen(attvalue
);
8796 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
8797 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
8801 uri
= xmlParseURI((const char *) URL
);
8803 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
8804 "xmlns: '%s' is not a valid URI\n",
8807 if (uri
->scheme
== NULL
) {
8808 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
8809 "xmlns: URI %s is not absolute\n",
8814 if (URL
== ctxt
->str_xml_ns
) {
8815 if (attname
!= ctxt
->str_xml
) {
8816 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8817 "xml namespace URI cannot be the default namespace\n",
8820 goto skip_default_ns
;
8824 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
8825 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8826 "reuse of the xmlns namespace name is forbidden\n",
8828 goto skip_default_ns
;
8832 * check that it's not a defined namespace
8834 for (j
= 1;j
<= nbNs
;j
++)
8835 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
8838 xmlErrAttributeDup(ctxt
, NULL
, attname
);
8840 if (nsPush(ctxt
, NULL
, URL
) > 0) nbNs
++;
8842 if (alloc
!= 0) xmlFree(attvalue
);
8846 if (aprefix
== ctxt
->str_xmlns
) {
8847 const xmlChar
*URL
= xmlDictLookup(ctxt
->dict
, attvalue
, len
);
8850 if (attname
== ctxt
->str_xml
) {
8851 if (URL
!= ctxt
->str_xml_ns
) {
8852 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8853 "xml namespace prefix mapped to wrong URI\n",
8857 * Do not keep a namespace definition node
8861 if (URL
== ctxt
->str_xml_ns
) {
8862 if (attname
!= ctxt
->str_xml
) {
8863 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8864 "xml namespace URI mapped to wrong prefix\n",
8869 if (attname
== ctxt
->str_xmlns
) {
8870 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8871 "redefinition of the xmlns prefix is forbidden\n",
8877 BAD_CAST
"http://www.w3.org/2000/xmlns/"))) {
8878 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8879 "reuse of the xmlns namespace name is forbidden\n",
8883 if ((URL
== NULL
) || (URL
[0] == 0)) {
8884 xmlNsErr(ctxt
, XML_NS_ERR_XML_NAMESPACE
,
8885 "xmlns:%s: Empty XML namespace is not allowed\n",
8886 attname
, NULL
, NULL
);
8889 uri
= xmlParseURI((const char *) URL
);
8891 xmlNsErr(ctxt
, XML_WAR_NS_URI
,
8892 "xmlns:%s: '%s' is not a valid URI\n",
8893 attname
, URL
, NULL
);
8895 if ((ctxt
->pedantic
) && (uri
->scheme
== NULL
)) {
8896 xmlNsWarn(ctxt
, XML_WAR_NS_URI_RELATIVE
,
8897 "xmlns:%s: URI %s is not absolute\n",
8898 attname
, URL
, NULL
);
8905 * check that it's not a defined namespace
8907 for (j
= 1;j
<= nbNs
;j
++)
8908 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
8911 xmlErrAttributeDup(ctxt
, aprefix
, attname
);
8913 if (nsPush(ctxt
, attname
, URL
) > 0) nbNs
++;
8915 if (alloc
!= 0) xmlFree(attvalue
);
8917 if (ctxt
->input
->base
!= base
) goto base_changed
;
8922 * Add the pair to atts
8924 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
8925 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
8926 if (attvalue
[len
] == 0)
8930 maxatts
= ctxt
->maxatts
;
8933 ctxt
->attallocs
[nratts
++] = alloc
;
8934 atts
[nbatts
++] = attname
;
8935 atts
[nbatts
++] = aprefix
;
8936 atts
[nbatts
++] = NULL
; /* the URI will be fetched later */
8937 atts
[nbatts
++] = attvalue
;
8939 atts
[nbatts
++] = attvalue
;
8941 * tag if some deallocation is needed
8943 if (alloc
!= 0) attval
= 1;
8945 if ((attvalue
!= NULL
) && (attvalue
[len
] == 0))
8952 if (ctxt
->input
->base
!= base
) goto base_changed
;
8953 if ((RAW
== '>') || (((RAW
== '/') && (NXT(1) == '>'))))
8955 if (!IS_BLANK_CH(RAW
)) {
8956 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
8957 "attributes construct error\n");
8961 if ((cons
== ctxt
->input
->consumed
) && (q
== CUR_PTR
) &&
8962 (attname
== NULL
) && (attvalue
== NULL
)) {
8963 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
8964 "xmlParseStartTag: problem parsing attributes\n");
8968 if (ctxt
->input
->base
!= base
) goto base_changed
;
8972 * The attributes defaulting
8974 if (ctxt
->attsDefault
!= NULL
) {
8975 xmlDefAttrsPtr defaults
;
8977 defaults
= xmlHashLookup2(ctxt
->attsDefault
, localname
, prefix
);
8978 if (defaults
!= NULL
) {
8979 for (i
= 0;i
< defaults
->nbAttrs
;i
++) {
8980 attname
= defaults
->values
[5 * i
];
8981 aprefix
= defaults
->values
[5 * i
+ 1];
8984 * special work for namespaces defaulted defs
8986 if ((attname
== ctxt
->str_xmlns
) && (aprefix
== NULL
)) {
8988 * check that it's not a defined namespace
8990 for (j
= 1;j
<= nbNs
;j
++)
8991 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == NULL
)
8993 if (j
<= nbNs
) continue;
8995 nsname
= xmlGetNamespace(ctxt
, NULL
);
8996 if (nsname
!= defaults
->values
[5 * i
+ 2]) {
8997 if (nsPush(ctxt
, NULL
,
8998 defaults
->values
[5 * i
+ 2]) > 0)
9001 } else if (aprefix
== ctxt
->str_xmlns
) {
9003 * check that it's not a defined namespace
9005 for (j
= 1;j
<= nbNs
;j
++)
9006 if (ctxt
->nsTab
[ctxt
->nsNr
- 2 * j
] == attname
)
9008 if (j
<= nbNs
) continue;
9010 nsname
= xmlGetNamespace(ctxt
, attname
);
9011 if (nsname
!= defaults
->values
[2]) {
9012 if (nsPush(ctxt
, attname
,
9013 defaults
->values
[5 * i
+ 2]) > 0)
9018 * check that it's not a defined attribute
9020 for (j
= 0;j
< nbatts
;j
+=5) {
9021 if ((attname
== atts
[j
]) && (aprefix
== atts
[j
+1]))
9024 if (j
< nbatts
) continue;
9026 if ((atts
== NULL
) || (nbatts
+ 5 > maxatts
)) {
9027 if (xmlCtxtGrowAttrs(ctxt
, nbatts
+ 5) < 0) {
9030 maxatts
= ctxt
->maxatts
;
9033 atts
[nbatts
++] = attname
;
9034 atts
[nbatts
++] = aprefix
;
9035 if (aprefix
== NULL
)
9036 atts
[nbatts
++] = NULL
;
9038 atts
[nbatts
++] = xmlGetNamespace(ctxt
, aprefix
);
9039 atts
[nbatts
++] = defaults
->values
[5 * i
+ 2];
9040 atts
[nbatts
++] = defaults
->values
[5 * i
+ 3];
9041 if ((ctxt
->standalone
== 1) &&
9042 (defaults
->values
[5 * i
+ 4] != NULL
)) {
9043 xmlValidityError(ctxt
, XML_DTD_STANDALONE_DEFAULTED
,
9044 "standalone: attribute %s on %s defaulted from external subset\n",
9045 attname
, localname
);
9054 * The attributes checkings
9056 for (i
= 0; i
< nbatts
;i
+= 5) {
9058 * The default namespace does not apply to attribute names.
9060 if (atts
[i
+ 1] != NULL
) {
9061 nsname
= xmlGetNamespace(ctxt
, atts
[i
+ 1]);
9062 if (nsname
== NULL
) {
9063 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9064 "Namespace prefix %s for %s on %s is not defined\n",
9065 atts
[i
+ 1], atts
[i
], localname
);
9067 atts
[i
+ 2] = nsname
;
9071 * [ WFC: Unique Att Spec ]
9072 * No attribute name may appear more than once in the same
9073 * start-tag or empty-element tag.
9074 * As extended by the Namespace in XML REC.
9076 for (j
= 0; j
< i
;j
+= 5) {
9077 if (atts
[i
] == atts
[j
]) {
9078 if (atts
[i
+1] == atts
[j
+1]) {
9079 xmlErrAttributeDup(ctxt
, atts
[i
+1], atts
[i
]);
9082 if ((nsname
!= NULL
) && (atts
[j
+ 2] == nsname
)) {
9083 xmlNsErr(ctxt
, XML_NS_ERR_ATTRIBUTE_REDEFINED
,
9084 "Namespaced Attribute %s in '%s' redefined\n",
9085 atts
[i
], nsname
, NULL
);
9092 nsname
= xmlGetNamespace(ctxt
, prefix
);
9093 if ((prefix
!= NULL
) && (nsname
== NULL
)) {
9094 xmlNsErr(ctxt
, XML_NS_ERR_UNDEFINED_NAMESPACE
,
9095 "Namespace prefix %s on %s is not defined\n",
9096 prefix
, localname
, NULL
);
9102 * SAX: Start of Element !
9104 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->startElementNs
!= NULL
) &&
9105 (!ctxt
->disableSAX
)) {
9107 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9108 nsname
, nbNs
, &ctxt
->nsTab
[ctxt
->nsNr
- 2 * nbNs
],
9109 nbatts
/ 5, nbdef
, atts
);
9111 ctxt
->sax
->startElementNs(ctxt
->userData
, localname
, prefix
,
9112 nsname
, 0, NULL
, nbatts
/ 5, nbdef
, atts
);
9116 * Free up attribute allocated strings if needed
9119 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9120 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9121 xmlFree((xmlChar
*) atts
[i
]);
9128 * the attribute strings are valid iif the base didn't changed
9131 for (i
= 3,j
= 0; j
< nratts
;i
+= 5,j
++)
9132 if ((ctxt
->attallocs
[j
] != 0) && (atts
[i
] != NULL
))
9133 xmlFree((xmlChar
*) atts
[i
]);
9135 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
9136 ctxt
->input
->line
= oldline
;
9137 ctxt
->input
->col
= oldcol
;
9138 if (ctxt
->wellFormed
== 1) {
9146 * @ctxt: an XML parser context
9147 * @line: line of the start tag
9148 * @nsNr: number of namespaces on the start tag
9150 * parse an end of tag
9152 * [42] ETag ::= '</' Name S? '>'
9156 * [NS 9] ETag ::= '</' QName S? '>'
9160 xmlParseEndTag2(xmlParserCtxtPtr ctxt
, const xmlChar
*prefix
,
9161 const xmlChar
*URI
, int line
, int nsNr
, int tlen
) {
9162 const xmlChar
*name
;
9165 if ((RAW
!= '<') || (NXT(1) != '/')) {
9166 xmlFatalErr(ctxt
, XML_ERR_LTSLASH_REQUIRED
, NULL
);
9171 if ((tlen
> 0) && (xmlStrncmp(ctxt
->input
->cur
, ctxt
->name
, tlen
) == 0)) {
9172 if (ctxt
->input
->cur
[tlen
] == '>') {
9173 ctxt
->input
->cur
+= tlen
+ 1;
9176 ctxt
->input
->cur
+= tlen
;
9180 name
= xmlParseNameAndCompare(ctxt
, ctxt
->name
);
9182 name
= xmlParseQNameAndCompare(ctxt
, ctxt
->name
, prefix
);
9186 * We should definitely be at the ending "S? '>'" part
9190 if ((!IS_BYTE_CHAR(RAW
)) || (RAW
!= '>')) {
9191 xmlFatalErr(ctxt
, XML_ERR_GT_REQUIRED
, NULL
);
9196 * [ WFC: Element Type Match ]
9197 * The Name in an element's end-tag must match the element type in the
9201 if (name
!= (xmlChar
*)1) {
9202 if (name
== NULL
) name
= BAD_CAST
"unparseable";
9203 if ((line
== 0) && (ctxt
->node
!= NULL
))
9204 line
= ctxt
->node
->line
;
9205 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NAME_MISMATCH
,
9206 "Opening and ending tag mismatch: %s line %d and %s\n",
9207 ctxt
->name
, line
, name
);
9214 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9215 (!ctxt
->disableSAX
))
9216 ctxt
->sax
->endElementNs(ctxt
->userData
, ctxt
->name
, prefix
, URI
);
9226 * @ctxt: an XML parser context
9228 * Parse escaped pure raw content.
9230 * [18] CDSect ::= CDStart CData CDEnd
9232 * [19] CDStart ::= '<![CDATA['
9234 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9236 * [21] CDEnd ::= ']]>'
9239 xmlParseCDSect(xmlParserCtxtPtr ctxt
) {
9240 xmlChar
*buf
= NULL
;
9242 int size
= XML_PARSER_BUFFER_SIZE
;
9248 /* Check 2.6.0 was NXT(0) not RAW */
9249 if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9254 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
9257 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9258 ctxt
->instate
= XML_PARSER_CONTENT
;
9264 xmlFatalErr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
, NULL
);
9265 ctxt
->instate
= XML_PARSER_CONTENT
;
9270 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9272 xmlErrMemory(ctxt
, NULL
);
9275 while (IS_CHAR(cur
) &&
9276 ((r
!= ']') || (s
!= ']') || (cur
!= '>'))) {
9277 if (len
+ 5 >= size
) {
9281 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
9284 xmlErrMemory(ctxt
, NULL
);
9289 COPY_BUF(rl
,buf
,len
,r
);
9303 ctxt
->instate
= XML_PARSER_CONTENT
;
9305 xmlFatalErrMsgStr(ctxt
, XML_ERR_CDATA_NOT_FINISHED
,
9306 "CData section not finished\n%.50s\n", buf
);
9313 * OK the buffer is to be consumed as cdata.
9315 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
9316 if (ctxt
->sax
->cdataBlock
!= NULL
)
9317 ctxt
->sax
->cdataBlock(ctxt
->userData
, buf
, len
);
9318 else if (ctxt
->sax
->characters
!= NULL
)
9319 ctxt
->sax
->characters(ctxt
->userData
, buf
, len
);
9326 * @ctxt: an XML parser context
9330 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9334 xmlParseContent(xmlParserCtxtPtr ctxt
) {
9336 while ((RAW
!= 0) &&
9337 ((RAW
!= '<') || (NXT(1) != '/')) &&
9338 (ctxt
->instate
!= XML_PARSER_EOF
)) {
9339 const xmlChar
*test
= CUR_PTR
;
9340 unsigned int cons
= ctxt
->input
->consumed
;
9341 const xmlChar
*cur
= ctxt
->input
->cur
;
9344 * First case : a Processing Instruction.
9346 if ((*cur
== '<') && (cur
[1] == '?')) {
9351 * Second case : a CDSection
9353 /* 2.6.0 test was *cur not RAW */
9354 else if (CMP9(CUR_PTR
, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9355 xmlParseCDSect(ctxt
);
9359 * Third case : a comment
9361 else if ((*cur
== '<') && (NXT(1) == '!') &&
9362 (NXT(2) == '-') && (NXT(3) == '-')) {
9363 xmlParseComment(ctxt
);
9364 ctxt
->instate
= XML_PARSER_CONTENT
;
9368 * Fourth case : a sub-element.
9370 else if (*cur
== '<') {
9371 xmlParseElement(ctxt
);
9375 * Fifth case : a reference. If if has not been resolved,
9376 * parsing returns it's Name, create the node
9379 else if (*cur
== '&') {
9380 xmlParseReference(ctxt
);
9384 * Last case, text. Note that References are handled directly.
9387 xmlParseCharData(ctxt
, 0);
9392 * Pop-up of finished entities.
9394 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
9398 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
9399 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
9400 "detected an error in element content\n");
9401 ctxt
->instate
= XML_PARSER_EOF
;
9409 * @ctxt: an XML parser context
9411 * parse an XML element, this is highly recursive
9413 * [39] element ::= EmptyElemTag | STag content ETag
9415 * [ WFC: Element Type Match ]
9416 * The Name in an element's end-tag must match the element type in the
9422 xmlParseElement(xmlParserCtxtPtr ctxt
) {
9423 const xmlChar
*name
;
9424 const xmlChar
*prefix
= NULL
;
9425 const xmlChar
*URI
= NULL
;
9426 xmlParserNodeInfo node_info
;
9429 int nsNr
= ctxt
->nsNr
;
9431 if (((unsigned int) ctxt
->nameNr
> xmlParserMaxDepth
) &&
9432 ((ctxt
->options
& XML_PARSE_HUGE
) == 0)) {
9433 xmlFatalErrMsgInt(ctxt
, XML_ERR_INTERNAL_ERROR
,
9434 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9436 ctxt
->instate
= XML_PARSER_EOF
;
9440 /* Capture start position */
9441 if (ctxt
->record_info
) {
9442 node_info
.begin_pos
= ctxt
->input
->consumed
+
9443 (CUR_PTR
- ctxt
->input
->base
);
9444 node_info
.begin_line
= ctxt
->input
->line
;
9447 if (ctxt
->spaceNr
== 0)
9448 spacePush(ctxt
, -1);
9449 else if (*ctxt
->space
== -2)
9450 spacePush(ctxt
, -1);
9452 spacePush(ctxt
, *ctxt
->space
);
9454 line
= ctxt
->input
->line
;
9455 #ifdef LIBXML_SAX1_ENABLED
9457 #endif /* LIBXML_SAX1_ENABLED */
9458 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
9459 #ifdef LIBXML_SAX1_ENABLED
9461 name
= xmlParseStartTag(ctxt
);
9462 #endif /* LIBXML_SAX1_ENABLED */
9467 namePush(ctxt
, name
);
9470 #ifdef LIBXML_VALID_ENABLED
9472 * [ VC: Root Element Type ]
9473 * The Name in the document type declaration must match the element
9474 * type of the root element.
9476 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
9477 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
9478 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
9479 #endif /* LIBXML_VALID_ENABLED */
9482 * Check for an Empty Element.
9484 if ((RAW
== '/') && (NXT(1) == '>')) {
9487 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElementNs
!= NULL
) &&
9488 (!ctxt
->disableSAX
))
9489 ctxt
->sax
->endElementNs(ctxt
->userData
, name
, prefix
, URI
);
9490 #ifdef LIBXML_SAX1_ENABLED
9492 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->endElement
!= NULL
) &&
9493 (!ctxt
->disableSAX
))
9494 ctxt
->sax
->endElement(ctxt
->userData
, name
);
9495 #endif /* LIBXML_SAX1_ENABLED */
9499 if (nsNr
!= ctxt
->nsNr
)
9500 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
9501 if ( ret
!= NULL
&& ctxt
->record_info
) {
9502 node_info
.end_pos
= ctxt
->input
->consumed
+
9503 (CUR_PTR
- ctxt
->input
->base
);
9504 node_info
.end_line
= ctxt
->input
->line
;
9505 node_info
.node
= ret
;
9506 xmlParserAddNodeInfo(ctxt
, &node_info
);
9513 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_GT_REQUIRED
,
9514 "Couldn't find end of Start Tag %s line %d\n",
9518 * end of parsing of this node.
9523 if (nsNr
!= ctxt
->nsNr
)
9524 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
9527 * Capture end position and add node
9529 if ( ret
!= NULL
&& ctxt
->record_info
) {
9530 node_info
.end_pos
= ctxt
->input
->consumed
+
9531 (CUR_PTR
- ctxt
->input
->base
);
9532 node_info
.end_line
= ctxt
->input
->line
;
9533 node_info
.node
= ret
;
9534 xmlParserAddNodeInfo(ctxt
, &node_info
);
9540 * Parse the content of the element:
9542 xmlParseContent(ctxt
);
9543 if (!IS_BYTE_CHAR(RAW
)) {
9544 xmlFatalErrMsgStrIntStr(ctxt
, XML_ERR_TAG_NOT_FINISHED
,
9545 "Premature end of data in tag %s line %d\n",
9549 * end of parsing of this node.
9554 if (nsNr
!= ctxt
->nsNr
)
9555 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
9560 * parse the end of tag: '</' should be here.
9563 xmlParseEndTag2(ctxt
, prefix
, URI
, line
, ctxt
->nsNr
- nsNr
, tlen
);
9566 #ifdef LIBXML_SAX1_ENABLED
9568 xmlParseEndTag1(ctxt
, line
);
9569 #endif /* LIBXML_SAX1_ENABLED */
9572 * Capture end position and add node
9574 if ( ret
!= NULL
&& ctxt
->record_info
) {
9575 node_info
.end_pos
= ctxt
->input
->consumed
+
9576 (CUR_PTR
- ctxt
->input
->base
);
9577 node_info
.end_line
= ctxt
->input
->line
;
9578 node_info
.node
= ret
;
9579 xmlParserAddNodeInfo(ctxt
, &node_info
);
9584 * xmlParseVersionNum:
9585 * @ctxt: an XML parser context
9587 * parse the XML version value.
9589 * [26] VersionNum ::= '1.' [0-9]+
9591 * In practice allow [0-9].[0-9]+ at that level
9593 * Returns the string giving the XML version number, or NULL
9596 xmlParseVersionNum(xmlParserCtxtPtr ctxt
) {
9597 xmlChar
*buf
= NULL
;
9602 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9604 xmlErrMemory(ctxt
, NULL
);
9608 if (!((cur
>= '0') && (cur
<= '9'))) {
9622 while ((cur
>= '0') && (cur
<= '9')) {
9623 if (len
+ 1 >= size
) {
9627 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
9630 xmlErrMemory(ctxt
, NULL
);
9644 * xmlParseVersionInfo:
9645 * @ctxt: an XML parser context
9647 * parse the XML version.
9649 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9651 * [25] Eq ::= S? '=' S?
9653 * Returns the version string, e.g. "1.0"
9657 xmlParseVersionInfo(xmlParserCtxtPtr ctxt
) {
9658 xmlChar
*version
= NULL
;
9660 if (CMP7(CUR_PTR
, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9664 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
9671 version
= xmlParseVersionNum(ctxt
);
9673 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9676 } else if (RAW
== '\''){
9678 version
= xmlParseVersionNum(ctxt
);
9680 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9684 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
9692 * @ctxt: an XML parser context
9694 * parse the XML encoding name
9696 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9698 * Returns the encoding name value or NULL
9701 xmlParseEncName(xmlParserCtxtPtr ctxt
) {
9702 xmlChar
*buf
= NULL
;
9708 if (((cur
>= 'a') && (cur
<= 'z')) ||
9709 ((cur
>= 'A') && (cur
<= 'Z'))) {
9710 buf
= (xmlChar
*) xmlMallocAtomic(size
* sizeof(xmlChar
));
9712 xmlErrMemory(ctxt
, NULL
);
9719 while (((cur
>= 'a') && (cur
<= 'z')) ||
9720 ((cur
>= 'A') && (cur
<= 'Z')) ||
9721 ((cur
>= '0') && (cur
<= '9')) ||
9722 (cur
== '.') || (cur
== '_') ||
9724 if (len
+ 1 >= size
) {
9728 tmp
= (xmlChar
*) xmlRealloc(buf
, size
* sizeof(xmlChar
));
9730 xmlErrMemory(ctxt
, NULL
);
9747 xmlFatalErr(ctxt
, XML_ERR_ENCODING_NAME
, NULL
);
9753 * xmlParseEncodingDecl:
9754 * @ctxt: an XML parser context
9756 * parse the XML encoding declaration
9758 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9760 * this setups the conversion filters.
9762 * Returns the encoding value or NULL
9766 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt
) {
9767 xmlChar
*encoding
= NULL
;
9770 if (CMP8(CUR_PTR
, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9774 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
9781 encoding
= xmlParseEncName(ctxt
);
9783 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9786 } else if (RAW
== '\''){
9788 encoding
= xmlParseEncName(ctxt
);
9790 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9794 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
9797 * UTF-16 encoding stwich has already taken place at this stage,
9798 * more over the little-endian/big-endian selection is already done
9800 if ((encoding
!= NULL
) &&
9801 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-16")) ||
9802 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF16")))) {
9804 * If no encoding was passed to the parser, that we are
9805 * using UTF-16 and no decoder is present i.e. the
9806 * document is apparently UTF-8 compatible, then raise an
9807 * encoding mismatch fatal error
9809 if ((ctxt
->encoding
== NULL
) &&
9810 (ctxt
->input
->buf
!= NULL
) &&
9811 (ctxt
->input
->buf
->encoder
== NULL
)) {
9812 xmlFatalErrMsg(ctxt
, XML_ERR_INVALID_ENCODING
,
9813 "Document labelled UTF-16 but has UTF-8 content\n");
9815 if (ctxt
->encoding
!= NULL
)
9816 xmlFree((xmlChar
*) ctxt
->encoding
);
9817 ctxt
->encoding
= encoding
;
9820 * UTF-8 encoding is handled natively
9822 else if ((encoding
!= NULL
) &&
9823 ((!xmlStrcasecmp(encoding
, BAD_CAST
"UTF-8")) ||
9824 (!xmlStrcasecmp(encoding
, BAD_CAST
"UTF8")))) {
9825 if (ctxt
->encoding
!= NULL
)
9826 xmlFree((xmlChar
*) ctxt
->encoding
);
9827 ctxt
->encoding
= encoding
;
9829 else if (encoding
!= NULL
) {
9830 xmlCharEncodingHandlerPtr handler
;
9832 if (ctxt
->input
->encoding
!= NULL
)
9833 xmlFree((xmlChar
*) ctxt
->input
->encoding
);
9834 ctxt
->input
->encoding
= encoding
;
9836 handler
= xmlFindCharEncodingHandler((const char *) encoding
);
9837 if (handler
!= NULL
) {
9838 xmlSwitchToEncoding(ctxt
, handler
);
9840 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
9841 "Unsupported encoding %s\n", encoding
);
9851 * @ctxt: an XML parser context
9853 * parse the XML standalone declaration
9855 * [32] SDDecl ::= S 'standalone' Eq
9856 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9858 * [ VC: Standalone Document Declaration ]
9859 * TODO The standalone document declaration must have the value "no"
9860 * if any external markup declarations contain declarations of:
9861 * - attributes with default values, if elements to which these
9862 * attributes apply appear in the document without specifications
9863 * of values for these attributes, or
9864 * - entities (other than amp, lt, gt, apos, quot), if references
9865 * to those entities appear in the document, or
9866 * - attributes with values subject to normalization, where the
9867 * attribute appears in the document with a value which will change
9868 * as a result of normalization, or
9869 * - element types with element content, if white space occurs directly
9870 * within any instance of those types.
9873 * 1 if standalone="yes"
9874 * 0 if standalone="no"
9875 * -2 if standalone attribute is missing or invalid
9876 * (A standalone value of -2 means that the XML declaration was found,
9877 * but no value was specified for the standalone attribute).
9881 xmlParseSDDecl(xmlParserCtxtPtr ctxt
) {
9882 int standalone
= -2;
9885 if (CMP10(CUR_PTR
, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9889 xmlFatalErr(ctxt
, XML_ERR_EQUAL_REQUIRED
, NULL
);
9896 if ((RAW
== 'n') && (NXT(1) == 'o')) {
9899 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
9904 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
9907 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9910 } else if (RAW
== '"'){
9912 if ((RAW
== 'n') && (NXT(1) == 'o')) {
9915 } else if ((RAW
== 'y') && (NXT(1) == 'e') &&
9920 xmlFatalErr(ctxt
, XML_ERR_STANDALONE_VALUE
, NULL
);
9923 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_CLOSED
, NULL
);
9927 xmlFatalErr(ctxt
, XML_ERR_STRING_NOT_STARTED
, NULL
);
9935 * @ctxt: an XML parser context
9937 * parse an XML declaration header
9939 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9943 xmlParseXMLDecl(xmlParserCtxtPtr ctxt
) {
9947 * This value for standalone indicates that the document has an
9948 * XML declaration but it does not have a standalone attribute.
9949 * It will be overwritten later if a standalone attribute is found.
9951 ctxt
->input
->standalone
= -2;
9954 * We know that '<?xml' is here.
9958 if (!IS_BLANK_CH(RAW
)) {
9959 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
,
9960 "Blank needed after '<?xml'\n");
9965 * We must have the VersionInfo here.
9967 version
= xmlParseVersionInfo(ctxt
);
9968 if (version
== NULL
) {
9969 xmlFatalErr(ctxt
, XML_ERR_VERSION_MISSING
, NULL
);
9971 if (!xmlStrEqual(version
, (const xmlChar
*) XML_DEFAULT_VERSION
)) {
9973 * Changed here for XML-1.0 5th edition
9975 if (ctxt
->options
& XML_PARSE_OLD10
) {
9976 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
9977 "Unsupported version '%s'\n",
9980 if ((version
[0] == '1') && ((version
[1] == '.'))) {
9981 xmlWarningMsg(ctxt
, XML_WAR_UNKNOWN_VERSION
,
9982 "Unsupported version '%s'\n",
9985 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNKNOWN_VERSION
,
9986 "Unsupported version '%s'\n",
9991 if (ctxt
->version
!= NULL
)
9992 xmlFree((void *) ctxt
->version
);
9993 ctxt
->version
= version
;
9997 * We may have the encoding declaration
9999 if (!IS_BLANK_CH(RAW
)) {
10000 if ((RAW
== '?') && (NXT(1) == '>')) {
10004 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10006 xmlParseEncodingDecl(ctxt
);
10007 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10009 * The XML REC instructs us to stop parsing right here
10015 * We may have the standalone status.
10017 if ((ctxt
->input
->encoding
!= NULL
) && (!IS_BLANK_CH(RAW
))) {
10018 if ((RAW
== '?') && (NXT(1) == '>')) {
10022 xmlFatalErrMsg(ctxt
, XML_ERR_SPACE_REQUIRED
, "Blank needed here\n");
10026 * We can grow the input buffer freely at that point
10031 ctxt
->input
->standalone
= xmlParseSDDecl(ctxt
);
10034 if ((RAW
== '?') && (NXT(1) == '>')) {
10036 } else if (RAW
== '>') {
10037 /* Deprecated old WD ... */
10038 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10041 xmlFatalErr(ctxt
, XML_ERR_XMLDECL_NOT_FINISHED
, NULL
);
10042 MOVETO_ENDTAG(CUR_PTR
);
10049 * @ctxt: an XML parser context
10051 * parse an XML Misc* optional field.
10053 * [27] Misc ::= Comment | PI | S
10057 xmlParseMisc(xmlParserCtxtPtr ctxt
) {
10058 while (((RAW
== '<') && (NXT(1) == '?')) ||
10059 (CMP4(CUR_PTR
, '<', '!', '-', '-')) ||
10060 IS_BLANK_CH(CUR
)) {
10061 if ((RAW
== '<') && (NXT(1) == '?')) {
10063 } else if (IS_BLANK_CH(CUR
)) {
10066 xmlParseComment(ctxt
);
10071 * xmlParseDocument:
10072 * @ctxt: an XML parser context
10074 * parse an XML document (and build a tree if using the standard SAX
10077 * [1] document ::= prolog element Misc*
10079 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10081 * Returns 0, -1 in case of error. the parser context is augmented
10082 * as a result of the parsing.
10086 xmlParseDocument(xmlParserCtxtPtr ctxt
) {
10088 xmlCharEncoding enc
;
10092 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10098 * SAX: detecting the level.
10100 xmlDetectSAX2(ctxt
);
10103 * SAX: beginning of the document processing.
10105 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10106 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10108 if ((ctxt
->encoding
== (const xmlChar
*)XML_CHAR_ENCODING_NONE
) &&
10109 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
10111 * Get the 4 first bytes and decode the charset
10112 * if enc != XML_CHAR_ENCODING_NONE
10113 * plug some encoding conversion routines.
10119 enc
= xmlDetectCharEncoding(&start
[0], 4);
10120 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10121 xmlSwitchEncoding(ctxt
, enc
);
10127 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10131 * Check for the XMLDecl in the Prolog.
10132 * do not GROW here to avoid the detected encoder to decode more
10133 * than just the first line, unless the amount of data is really
10134 * too small to hold "<?xml version="1.0" encoding="foo"
10136 if ((ctxt
->input
->end
- ctxt
->input
->cur
) < 35) {
10139 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10142 * Note that we will switch encoding on the fly.
10144 xmlParseXMLDecl(ctxt
);
10145 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10147 * The XML REC instructs us to stop parsing right here
10151 ctxt
->standalone
= ctxt
->input
->standalone
;
10154 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10156 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10157 ctxt
->sax
->startDocument(ctxt
->userData
);
10160 * The Misc part of the Prolog
10163 xmlParseMisc(ctxt
);
10166 * Then possibly doc type declaration(s) and more Misc
10167 * (doctypedecl Misc*)?
10170 if (CMP9(CUR_PTR
, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10172 ctxt
->inSubset
= 1;
10173 xmlParseDocTypeDecl(ctxt
);
10175 ctxt
->instate
= XML_PARSER_DTD
;
10176 xmlParseInternalSubset(ctxt
);
10180 * Create and update the external subset.
10182 ctxt
->inSubset
= 2;
10183 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->externalSubset
!= NULL
) &&
10184 (!ctxt
->disableSAX
))
10185 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
10186 ctxt
->extSubSystem
, ctxt
->extSubURI
);
10187 ctxt
->inSubset
= 0;
10189 xmlCleanSpecialAttr(ctxt
);
10191 ctxt
->instate
= XML_PARSER_PROLOG
;
10192 xmlParseMisc(ctxt
);
10196 * Time to start parsing the tree itself
10200 xmlFatalErrMsg(ctxt
, XML_ERR_DOCUMENT_EMPTY
,
10201 "Start tag expected, '<' not found\n");
10203 ctxt
->instate
= XML_PARSER_CONTENT
;
10204 xmlParseElement(ctxt
);
10205 ctxt
->instate
= XML_PARSER_EPILOG
;
10209 * The Misc part at the end
10211 xmlParseMisc(ctxt
);
10214 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
10216 ctxt
->instate
= XML_PARSER_EOF
;
10220 * SAX: end of the document processing.
10222 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10223 ctxt
->sax
->endDocument(ctxt
->userData
);
10226 * Remove locally kept entity definitions if the tree was not built
10228 if ((ctxt
->myDoc
!= NULL
) &&
10229 (xmlStrEqual(ctxt
->myDoc
->version
, SAX_COMPAT_MODE
))) {
10230 xmlFreeDoc(ctxt
->myDoc
);
10231 ctxt
->myDoc
= NULL
;
10234 if ((ctxt
->wellFormed
) && (ctxt
->myDoc
!= NULL
)) {
10235 ctxt
->myDoc
->properties
|= XML_DOC_WELLFORMED
;
10237 ctxt
->myDoc
->properties
|= XML_DOC_DTDVALID
;
10238 if (ctxt
->nsWellFormed
)
10239 ctxt
->myDoc
->properties
|= XML_DOC_NSVALID
;
10240 if (ctxt
->options
& XML_PARSE_OLD10
)
10241 ctxt
->myDoc
->properties
|= XML_DOC_OLD10
;
10243 if (! ctxt
->wellFormed
) {
10251 * xmlParseExtParsedEnt:
10252 * @ctxt: an XML parser context
10254 * parse a general parsed entity
10255 * An external general parsed entity is well-formed if it matches the
10256 * production labeled extParsedEnt.
10258 * [78] extParsedEnt ::= TextDecl? content
10260 * Returns 0, -1 in case of error. the parser context is augmented
10261 * as a result of the parsing.
10265 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt
) {
10267 xmlCharEncoding enc
;
10269 if ((ctxt
== NULL
) || (ctxt
->input
== NULL
))
10272 xmlDefaultSAXHandlerInit();
10274 xmlDetectSAX2(ctxt
);
10279 * SAX: beginning of the document processing.
10281 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10282 ctxt
->sax
->setDocumentLocator(ctxt
->userData
, &xmlDefaultSAXLocator
);
10285 * Get the 4 first bytes and decode the charset
10286 * if enc != XML_CHAR_ENCODING_NONE
10287 * plug some encoding conversion routines.
10289 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
10294 enc
= xmlDetectCharEncoding(start
, 4);
10295 if (enc
!= XML_CHAR_ENCODING_NONE
) {
10296 xmlSwitchEncoding(ctxt
, enc
);
10302 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10306 * Check for the XMLDecl in the Prolog.
10309 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10312 * Note that we will switch encoding on the fly.
10314 xmlParseXMLDecl(ctxt
);
10315 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10317 * The XML REC instructs us to stop parsing right here
10323 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10325 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) && (!ctxt
->disableSAX
))
10326 ctxt
->sax
->startDocument(ctxt
->userData
);
10329 * Doing validity checking on chunk doesn't make sense
10331 ctxt
->instate
= XML_PARSER_CONTENT
;
10332 ctxt
->validate
= 0;
10333 ctxt
->loadsubset
= 0;
10336 xmlParseContent(ctxt
);
10338 if ((RAW
== '<') && (NXT(1) == '/')) {
10339 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
10340 } else if (RAW
!= 0) {
10341 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
10345 * SAX: end of the document processing.
10347 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10348 ctxt
->sax
->endDocument(ctxt
->userData
);
10350 if (! ctxt
->wellFormed
) return(-1);
10354 #ifdef LIBXML_PUSH_ENABLED
10355 /************************************************************************
10357 * Progressive parsing interfaces *
10359 ************************************************************************/
10362 * xmlParseLookupSequence:
10363 * @ctxt: an XML parser context
10364 * @first: the first char to lookup
10365 * @next: the next char to lookup or zero
10366 * @third: the next char to lookup or zero
10368 * Try to find if a sequence (first, next, third) or just (first next) or
10369 * (first) is available in the input stream.
10370 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10371 * to avoid rescanning sequences of bytes, it DOES change the state of the
10372 * parser, do not use liberally.
10374 * Returns the index to the current parsing point if the full sequence
10375 * is available, -1 otherwise.
10378 xmlParseLookupSequence(xmlParserCtxtPtr ctxt
, xmlChar first
,
10379 xmlChar next
, xmlChar third
) {
10381 xmlParserInputPtr in
;
10382 const xmlChar
*buf
;
10385 if (in
== NULL
) return(-1);
10386 base
= in
->cur
- in
->base
;
10387 if (base
< 0) return(-1);
10388 if (ctxt
->checkIndex
> base
)
10389 base
= ctxt
->checkIndex
;
10390 if (in
->buf
== NULL
) {
10394 buf
= in
->buf
->buffer
->content
;
10395 len
= in
->buf
->buffer
->use
;
10397 /* take into account the sequence length */
10398 if (third
) len
-= 2;
10399 else if (next
) len
--;
10400 for (;base
< len
;base
++) {
10401 if (buf
[base
] == first
) {
10403 if ((buf
[base
+ 1] != next
) ||
10404 (buf
[base
+ 2] != third
)) continue;
10405 } else if (next
!= 0) {
10406 if (buf
[base
+ 1] != next
) continue;
10408 ctxt
->checkIndex
= 0;
10411 xmlGenericError(xmlGenericErrorContext
,
10412 "PP: lookup '%c' found at %d\n",
10414 else if (third
== 0)
10415 xmlGenericError(xmlGenericErrorContext
,
10416 "PP: lookup '%c%c' found at %d\n",
10417 first
, next
, base
);
10419 xmlGenericError(xmlGenericErrorContext
,
10420 "PP: lookup '%c%c%c' found at %d\n",
10421 first
, next
, third
, base
);
10423 return(base
- (in
->cur
- in
->base
));
10426 ctxt
->checkIndex
= base
;
10429 xmlGenericError(xmlGenericErrorContext
,
10430 "PP: lookup '%c' failed\n", first
);
10431 else if (third
== 0)
10432 xmlGenericError(xmlGenericErrorContext
,
10433 "PP: lookup '%c%c' failed\n", first
, next
);
10435 xmlGenericError(xmlGenericErrorContext
,
10436 "PP: lookup '%c%c%c' failed\n", first
, next
, third
);
10442 * xmlParseGetLasts:
10443 * @ctxt: an XML parser context
10444 * @lastlt: pointer to store the last '<' from the input
10445 * @lastgt: pointer to store the last '>' from the input
10447 * Lookup the last < and > in the current chunk
10450 xmlParseGetLasts(xmlParserCtxtPtr ctxt
, const xmlChar
**lastlt
,
10451 const xmlChar
**lastgt
) {
10452 const xmlChar
*tmp
;
10454 if ((ctxt
== NULL
) || (lastlt
== NULL
) || (lastgt
== NULL
)) {
10455 xmlGenericError(xmlGenericErrorContext
,
10456 "Internal error: xmlParseGetLasts\n");
10459 if ((ctxt
->progressive
!= 0) && (ctxt
->inputNr
== 1)) {
10460 tmp
= ctxt
->input
->end
;
10462 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '<')) tmp
--;
10463 if (tmp
< ctxt
->input
->base
) {
10469 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '>')) {
10470 if (*tmp
== '\'') {
10472 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '\'')) tmp
++;
10473 if (tmp
< ctxt
->input
->end
) tmp
++;
10474 } else if (*tmp
== '"') {
10476 while ((tmp
< ctxt
->input
->end
) && (*tmp
!= '"')) tmp
++;
10477 if (tmp
< ctxt
->input
->end
) tmp
++;
10481 if (tmp
< ctxt
->input
->end
)
10486 while ((tmp
>= ctxt
->input
->base
) && (*tmp
!= '>')) tmp
--;
10487 if (tmp
>= ctxt
->input
->base
)
10499 * xmlCheckCdataPush:
10500 * @cur: pointer to the bock of characters
10501 * @len: length of the block in bytes
10503 * Check that the block of characters is okay as SCdata content [20]
10505 * Returns the number of bytes to pass if okay, a negative index where an
10506 * UTF-8 error occured otherwise
10509 xmlCheckCdataPush(const xmlChar
*utf
, int len
) {
10514 if ((utf
== NULL
) || (len
<= 0))
10517 for (ix
= 0; ix
< len
;) { /* string is 0-terminated */
10519 if ((c
& 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10522 else if ((c
== 0xA) || (c
== 0xD) || (c
== 0x9))
10526 } else if ((c
& 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10527 if (ix
+ 2 > len
) return(ix
);
10528 if ((utf
[ix
+1] & 0xc0 ) != 0x80)
10530 codepoint
= (utf
[ix
] & 0x1f) << 6;
10531 codepoint
|= utf
[ix
+1] & 0x3f;
10532 if (!xmlIsCharQ(codepoint
))
10535 } else if ((c
& 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10536 if (ix
+ 3 > len
) return(ix
);
10537 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
10538 ((utf
[ix
+2] & 0xc0) != 0x80))
10540 codepoint
= (utf
[ix
] & 0xf) << 12;
10541 codepoint
|= (utf
[ix
+1] & 0x3f) << 6;
10542 codepoint
|= utf
[ix
+2] & 0x3f;
10543 if (!xmlIsCharQ(codepoint
))
10546 } else if ((c
& 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10547 if (ix
+ 4 > len
) return(ix
);
10548 if (((utf
[ix
+1] & 0xc0) != 0x80) ||
10549 ((utf
[ix
+2] & 0xc0) != 0x80) ||
10550 ((utf
[ix
+3] & 0xc0) != 0x80))
10552 codepoint
= (utf
[ix
] & 0x7) << 18;
10553 codepoint
|= (utf
[ix
+1] & 0x3f) << 12;
10554 codepoint
|= (utf
[ix
+2] & 0x3f) << 6;
10555 codepoint
|= utf
[ix
+3] & 0x3f;
10556 if (!xmlIsCharQ(codepoint
))
10559 } else /* unknown encoding */
10566 * xmlParseTryOrFinish:
10567 * @ctxt: an XML parser context
10568 * @terminate: last chunk indicator
10570 * Try to progress on parsing
10572 * Returns zero if no parsing was possible
10575 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt
, int terminate
) {
10579 const xmlChar
*lastlt
, *lastgt
;
10581 if (ctxt
->input
== NULL
)
10585 switch (ctxt
->instate
) {
10586 case XML_PARSER_EOF
:
10587 xmlGenericError(xmlGenericErrorContext
,
10588 "PP: try EOF\n"); break;
10589 case XML_PARSER_START
:
10590 xmlGenericError(xmlGenericErrorContext
,
10591 "PP: try START\n"); break;
10592 case XML_PARSER_MISC
:
10593 xmlGenericError(xmlGenericErrorContext
,
10594 "PP: try MISC\n");break;
10595 case XML_PARSER_COMMENT
:
10596 xmlGenericError(xmlGenericErrorContext
,
10597 "PP: try COMMENT\n");break;
10598 case XML_PARSER_PROLOG
:
10599 xmlGenericError(xmlGenericErrorContext
,
10600 "PP: try PROLOG\n");break;
10601 case XML_PARSER_START_TAG
:
10602 xmlGenericError(xmlGenericErrorContext
,
10603 "PP: try START_TAG\n");break;
10604 case XML_PARSER_CONTENT
:
10605 xmlGenericError(xmlGenericErrorContext
,
10606 "PP: try CONTENT\n");break;
10607 case XML_PARSER_CDATA_SECTION
:
10608 xmlGenericError(xmlGenericErrorContext
,
10609 "PP: try CDATA_SECTION\n");break;
10610 case XML_PARSER_END_TAG
:
10611 xmlGenericError(xmlGenericErrorContext
,
10612 "PP: try END_TAG\n");break;
10613 case XML_PARSER_ENTITY_DECL
:
10614 xmlGenericError(xmlGenericErrorContext
,
10615 "PP: try ENTITY_DECL\n");break;
10616 case XML_PARSER_ENTITY_VALUE
:
10617 xmlGenericError(xmlGenericErrorContext
,
10618 "PP: try ENTITY_VALUE\n");break;
10619 case XML_PARSER_ATTRIBUTE_VALUE
:
10620 xmlGenericError(xmlGenericErrorContext
,
10621 "PP: try ATTRIBUTE_VALUE\n");break;
10622 case XML_PARSER_DTD
:
10623 xmlGenericError(xmlGenericErrorContext
,
10624 "PP: try DTD\n");break;
10625 case XML_PARSER_EPILOG
:
10626 xmlGenericError(xmlGenericErrorContext
,
10627 "PP: try EPILOG\n");break;
10628 case XML_PARSER_PI
:
10629 xmlGenericError(xmlGenericErrorContext
,
10630 "PP: try PI\n");break;
10631 case XML_PARSER_IGNORE
:
10632 xmlGenericError(xmlGenericErrorContext
,
10633 "PP: try IGNORE\n");break;
10637 if ((ctxt
->input
!= NULL
) &&
10638 (ctxt
->input
->cur
- ctxt
->input
->base
> 4096)) {
10640 ctxt
->checkIndex
= 0;
10642 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
10645 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
10650 * Pop-up of finished entities.
10652 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
10655 if (ctxt
->input
== NULL
) break;
10656 if (ctxt
->input
->buf
== NULL
)
10657 avail
= ctxt
->input
->length
-
10658 (ctxt
->input
->cur
- ctxt
->input
->base
);
10661 * If we are operating on converted input, try to flush
10662 * remainng chars to avoid them stalling in the non-converted
10665 if ((ctxt
->input
->buf
->raw
!= NULL
) &&
10666 (ctxt
->input
->buf
->raw
->use
> 0)) {
10667 int base
= ctxt
->input
->base
-
10668 ctxt
->input
->buf
->buffer
->content
;
10669 int current
= ctxt
->input
->cur
- ctxt
->input
->base
;
10671 xmlParserInputBufferPush(ctxt
->input
->buf
, 0, "");
10672 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
10673 ctxt
->input
->cur
= ctxt
->input
->base
+ current
;
10675 &ctxt
->input
->buf
->buffer
->content
[
10676 ctxt
->input
->buf
->buffer
->use
];
10678 avail
= ctxt
->input
->buf
->buffer
->use
-
10679 (ctxt
->input
->cur
- ctxt
->input
->base
);
10683 switch (ctxt
->instate
) {
10684 case XML_PARSER_EOF
:
10686 * Document parsing is done !
10689 case XML_PARSER_START
:
10690 if (ctxt
->charset
== XML_CHAR_ENCODING_NONE
) {
10692 xmlCharEncoding enc
;
10695 * Very first chars read from the document flow.
10701 * Get the 4 first bytes and decode the charset
10702 * if enc != XML_CHAR_ENCODING_NONE
10703 * plug some encoding conversion routines,
10704 * else xmlSwitchEncoding will set to (default)
10711 enc
= xmlDetectCharEncoding(start
, 4);
10712 xmlSwitchEncoding(ctxt
, enc
);
10718 cur
= ctxt
->input
->cur
[0];
10719 next
= ctxt
->input
->cur
[1];
10721 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10722 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
10723 &xmlDefaultSAXLocator
);
10724 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10725 ctxt
->instate
= XML_PARSER_EOF
;
10727 xmlGenericError(xmlGenericErrorContext
,
10728 "PP: entering EOF\n");
10730 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10731 ctxt
->sax
->endDocument(ctxt
->userData
);
10734 if ((cur
== '<') && (next
== '?')) {
10735 /* PI or XML decl */
10736 if (avail
< 5) return(ret
);
10737 if ((!terminate
) &&
10738 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
10740 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10741 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
10742 &xmlDefaultSAXLocator
);
10743 if ((ctxt
->input
->cur
[2] == 'x') &&
10744 (ctxt
->input
->cur
[3] == 'm') &&
10745 (ctxt
->input
->cur
[4] == 'l') &&
10746 (IS_BLANK_CH(ctxt
->input
->cur
[5]))) {
10749 xmlGenericError(xmlGenericErrorContext
,
10750 "PP: Parsing XML Decl\n");
10752 xmlParseXMLDecl(ctxt
);
10753 if (ctxt
->errNo
== XML_ERR_UNSUPPORTED_ENCODING
) {
10755 * The XML REC instructs us to stop parsing right
10758 ctxt
->instate
= XML_PARSER_EOF
;
10761 ctxt
->standalone
= ctxt
->input
->standalone
;
10762 if ((ctxt
->encoding
== NULL
) &&
10763 (ctxt
->input
->encoding
!= NULL
))
10764 ctxt
->encoding
= xmlStrdup(ctxt
->input
->encoding
);
10765 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
10766 (!ctxt
->disableSAX
))
10767 ctxt
->sax
->startDocument(ctxt
->userData
);
10768 ctxt
->instate
= XML_PARSER_MISC
;
10770 xmlGenericError(xmlGenericErrorContext
,
10771 "PP: entering MISC\n");
10774 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10775 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
10776 (!ctxt
->disableSAX
))
10777 ctxt
->sax
->startDocument(ctxt
->userData
);
10778 ctxt
->instate
= XML_PARSER_MISC
;
10780 xmlGenericError(xmlGenericErrorContext
,
10781 "PP: entering MISC\n");
10785 if ((ctxt
->sax
) && (ctxt
->sax
->setDocumentLocator
))
10786 ctxt
->sax
->setDocumentLocator(ctxt
->userData
,
10787 &xmlDefaultSAXLocator
);
10788 ctxt
->version
= xmlCharStrdup(XML_DEFAULT_VERSION
);
10789 if (ctxt
->version
== NULL
) {
10790 xmlErrMemory(ctxt
, NULL
);
10793 if ((ctxt
->sax
) && (ctxt
->sax
->startDocument
) &&
10794 (!ctxt
->disableSAX
))
10795 ctxt
->sax
->startDocument(ctxt
->userData
);
10796 ctxt
->instate
= XML_PARSER_MISC
;
10798 xmlGenericError(xmlGenericErrorContext
,
10799 "PP: entering MISC\n");
10803 case XML_PARSER_START_TAG
: {
10804 const xmlChar
*name
;
10805 const xmlChar
*prefix
= NULL
;
10806 const xmlChar
*URI
= NULL
;
10807 int nsNr
= ctxt
->nsNr
;
10809 if ((avail
< 2) && (ctxt
->inputNr
== 1))
10811 cur
= ctxt
->input
->cur
[0];
10813 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_EMPTY
, NULL
);
10814 ctxt
->instate
= XML_PARSER_EOF
;
10815 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10816 ctxt
->sax
->endDocument(ctxt
->userData
);
10820 if (ctxt
->progressive
) {
10821 /* > can be found unescaped in attribute values */
10822 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
10824 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
10828 if (ctxt
->spaceNr
== 0)
10829 spacePush(ctxt
, -1);
10830 else if (*ctxt
->space
== -2)
10831 spacePush(ctxt
, -1);
10833 spacePush(ctxt
, *ctxt
->space
);
10834 #ifdef LIBXML_SAX1_ENABLED
10836 #endif /* LIBXML_SAX1_ENABLED */
10837 name
= xmlParseStartTag2(ctxt
, &prefix
, &URI
, &tlen
);
10838 #ifdef LIBXML_SAX1_ENABLED
10840 name
= xmlParseStartTag(ctxt
);
10841 #endif /* LIBXML_SAX1_ENABLED */
10842 if (name
== NULL
) {
10844 ctxt
->instate
= XML_PARSER_EOF
;
10845 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
10846 ctxt
->sax
->endDocument(ctxt
->userData
);
10849 #ifdef LIBXML_VALID_ENABLED
10851 * [ VC: Root Element Type ]
10852 * The Name in the document type declaration must match
10853 * the element type of the root element.
10855 if (ctxt
->validate
&& ctxt
->wellFormed
&& ctxt
->myDoc
&&
10856 ctxt
->node
&& (ctxt
->node
== ctxt
->myDoc
->children
))
10857 ctxt
->valid
&= xmlValidateRoot(&ctxt
->vctxt
, ctxt
->myDoc
);
10858 #endif /* LIBXML_VALID_ENABLED */
10861 * Check for an Empty Element.
10863 if ((RAW
== '/') && (NXT(1) == '>')) {
10867 if ((ctxt
->sax
!= NULL
) &&
10868 (ctxt
->sax
->endElementNs
!= NULL
) &&
10869 (!ctxt
->disableSAX
))
10870 ctxt
->sax
->endElementNs(ctxt
->userData
, name
,
10872 if (ctxt
->nsNr
- nsNr
> 0)
10873 nsPop(ctxt
, ctxt
->nsNr
- nsNr
);
10874 #ifdef LIBXML_SAX1_ENABLED
10876 if ((ctxt
->sax
!= NULL
) &&
10877 (ctxt
->sax
->endElement
!= NULL
) &&
10878 (!ctxt
->disableSAX
))
10879 ctxt
->sax
->endElement(ctxt
->userData
, name
);
10880 #endif /* LIBXML_SAX1_ENABLED */
10883 if (ctxt
->nameNr
== 0) {
10884 ctxt
->instate
= XML_PARSER_EPILOG
;
10886 ctxt
->instate
= XML_PARSER_CONTENT
;
10893 xmlFatalErrMsgStr(ctxt
, XML_ERR_GT_REQUIRED
,
10894 "Couldn't find end of Start Tag %s\n",
10900 nameNsPush(ctxt
, name
, prefix
, URI
, ctxt
->nsNr
- nsNr
);
10901 #ifdef LIBXML_SAX1_ENABLED
10903 namePush(ctxt
, name
);
10904 #endif /* LIBXML_SAX1_ENABLED */
10906 ctxt
->instate
= XML_PARSER_CONTENT
;
10909 case XML_PARSER_CONTENT
: {
10910 const xmlChar
*test
;
10912 if ((avail
< 2) && (ctxt
->inputNr
== 1))
10914 cur
= ctxt
->input
->cur
[0];
10915 next
= ctxt
->input
->cur
[1];
10918 cons
= ctxt
->input
->consumed
;
10919 if ((cur
== '<') && (next
== '/')) {
10920 ctxt
->instate
= XML_PARSER_END_TAG
;
10922 } else if ((cur
== '<') && (next
== '?')) {
10923 if ((!terminate
) &&
10924 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
10927 } else if ((cur
== '<') && (next
!= '!')) {
10928 ctxt
->instate
= XML_PARSER_START_TAG
;
10930 } else if ((cur
== '<') && (next
== '!') &&
10931 (ctxt
->input
->cur
[2] == '-') &&
10932 (ctxt
->input
->cur
[3] == '-')) {
10937 ctxt
->input
->cur
+= 4;
10938 term
= xmlParseLookupSequence(ctxt
, '-', '-', '>');
10939 ctxt
->input
->cur
-= 4;
10940 if ((!terminate
) && (term
< 0))
10942 xmlParseComment(ctxt
);
10943 ctxt
->instate
= XML_PARSER_CONTENT
;
10944 } else if ((cur
== '<') && (ctxt
->input
->cur
[1] == '!') &&
10945 (ctxt
->input
->cur
[2] == '[') &&
10946 (ctxt
->input
->cur
[3] == 'C') &&
10947 (ctxt
->input
->cur
[4] == 'D') &&
10948 (ctxt
->input
->cur
[5] == 'A') &&
10949 (ctxt
->input
->cur
[6] == 'T') &&
10950 (ctxt
->input
->cur
[7] == 'A') &&
10951 (ctxt
->input
->cur
[8] == '[')) {
10953 ctxt
->instate
= XML_PARSER_CDATA_SECTION
;
10955 } else if ((cur
== '<') && (next
== '!') &&
10958 } else if (cur
== '&') {
10959 if ((!terminate
) &&
10960 (xmlParseLookupSequence(ctxt
, ';', 0, 0) < 0))
10962 xmlParseReference(ctxt
);
10964 /* TODO Avoid the extra copy, handle directly !!! */
10966 * Goal of the following test is:
10967 * - minimize calls to the SAX 'character' callback
10968 * when they are mergeable
10969 * - handle an problem for isBlank when we only parse
10970 * a sequence of blank chars and the next one is
10971 * not available to check against '<' presence.
10972 * - tries to homogenize the differences in SAX
10973 * callbacks between the push and pull versions
10976 if ((ctxt
->inputNr
== 1) &&
10977 (avail
< XML_PARSER_BIG_BUFFER_SIZE
)) {
10979 if (ctxt
->progressive
) {
10980 if ((lastlt
== NULL
) ||
10981 (ctxt
->input
->cur
> lastlt
))
10983 } else if (xmlParseLookupSequence(ctxt
,
10989 ctxt
->checkIndex
= 0;
10990 xmlParseCharData(ctxt
, 0);
10993 * Pop-up of finished entities.
10995 while ((RAW
== 0) && (ctxt
->inputNr
> 1))
10997 if ((cons
== ctxt
->input
->consumed
) && (test
== CUR_PTR
)) {
10998 xmlFatalErr(ctxt
, XML_ERR_INTERNAL_ERROR
,
10999 "detected an error in element content\n");
11000 ctxt
->instate
= XML_PARSER_EOF
;
11005 case XML_PARSER_END_TAG
:
11009 if (ctxt
->progressive
) {
11010 /* > can be found unescaped in attribute values */
11011 if ((lastgt
== NULL
) || (ctxt
->input
->cur
>= lastgt
))
11013 } else if (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0) {
11018 xmlParseEndTag2(ctxt
,
11019 (void *) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 3],
11020 (void *) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 2], 0,
11021 (int) (long) ctxt
->pushTab
[ctxt
->nameNr
* 3 - 1], 0);
11024 #ifdef LIBXML_SAX1_ENABLED
11026 xmlParseEndTag1(ctxt
, 0);
11027 #endif /* LIBXML_SAX1_ENABLED */
11028 if (ctxt
->nameNr
== 0) {
11029 ctxt
->instate
= XML_PARSER_EPILOG
;
11031 ctxt
->instate
= XML_PARSER_CONTENT
;
11034 case XML_PARSER_CDATA_SECTION
: {
11036 * The Push mode need to have the SAX callback for
11037 * cdataBlock merge back contiguous callbacks.
11041 base
= xmlParseLookupSequence(ctxt
, ']', ']', '>');
11043 if (avail
>= XML_PARSER_BIG_BUFFER_SIZE
+ 2) {
11046 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
,
11047 XML_PARSER_BIG_BUFFER_SIZE
);
11050 ctxt
->input
->cur
+= tmp
;
11051 goto encoding_error
;
11053 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
)) {
11054 if (ctxt
->sax
->cdataBlock
!= NULL
)
11055 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11056 ctxt
->input
->cur
, tmp
);
11057 else if (ctxt
->sax
->characters
!= NULL
)
11058 ctxt
->sax
->characters(ctxt
->userData
,
11059 ctxt
->input
->cur
, tmp
);
11062 ctxt
->checkIndex
= 0;
11068 tmp
= xmlCheckCdataPush(ctxt
->input
->cur
, base
);
11069 if ((tmp
< 0) || (tmp
!= base
)) {
11071 ctxt
->input
->cur
+= tmp
;
11072 goto encoding_error
;
11074 if ((ctxt
->sax
!= NULL
) && (base
== 0) &&
11075 (ctxt
->sax
->cdataBlock
!= NULL
) &&
11076 (!ctxt
->disableSAX
)) {
11078 * Special case to provide identical behaviour
11079 * between pull and push parsers on enpty CDATA
11082 if ((ctxt
->input
->cur
- ctxt
->input
->base
>= 9) &&
11083 (!strncmp((const char *)&ctxt
->input
->cur
[-9],
11085 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11087 } else if ((ctxt
->sax
!= NULL
) && (base
> 0) &&
11088 (!ctxt
->disableSAX
)) {
11089 if (ctxt
->sax
->cdataBlock
!= NULL
)
11090 ctxt
->sax
->cdataBlock(ctxt
->userData
,
11091 ctxt
->input
->cur
, base
);
11092 else if (ctxt
->sax
->characters
!= NULL
)
11093 ctxt
->sax
->characters(ctxt
->userData
,
11094 ctxt
->input
->cur
, base
);
11097 ctxt
->checkIndex
= 0;
11098 ctxt
->instate
= XML_PARSER_CONTENT
;
11100 xmlGenericError(xmlGenericErrorContext
,
11101 "PP: entering CONTENT\n");
11106 case XML_PARSER_MISC
:
11108 if (ctxt
->input
->buf
== NULL
)
11109 avail
= ctxt
->input
->length
-
11110 (ctxt
->input
->cur
- ctxt
->input
->base
);
11112 avail
= ctxt
->input
->buf
->buffer
->use
-
11113 (ctxt
->input
->cur
- ctxt
->input
->base
);
11116 cur
= ctxt
->input
->cur
[0];
11117 next
= ctxt
->input
->cur
[1];
11118 if ((cur
== '<') && (next
== '?')) {
11119 if ((!terminate
) &&
11120 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11123 xmlGenericError(xmlGenericErrorContext
,
11124 "PP: Parsing PI\n");
11127 ctxt
->checkIndex
= 0;
11128 } else if ((cur
== '<') && (next
== '!') &&
11129 (ctxt
->input
->cur
[2] == '-') &&
11130 (ctxt
->input
->cur
[3] == '-')) {
11131 if ((!terminate
) &&
11132 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
11135 xmlGenericError(xmlGenericErrorContext
,
11136 "PP: Parsing Comment\n");
11138 xmlParseComment(ctxt
);
11139 ctxt
->instate
= XML_PARSER_MISC
;
11140 ctxt
->checkIndex
= 0;
11141 } else if ((cur
== '<') && (next
== '!') &&
11142 (ctxt
->input
->cur
[2] == 'D') &&
11143 (ctxt
->input
->cur
[3] == 'O') &&
11144 (ctxt
->input
->cur
[4] == 'C') &&
11145 (ctxt
->input
->cur
[5] == 'T') &&
11146 (ctxt
->input
->cur
[6] == 'Y') &&
11147 (ctxt
->input
->cur
[7] == 'P') &&
11148 (ctxt
->input
->cur
[8] == 'E')) {
11149 if ((!terminate
) &&
11150 (xmlParseLookupSequence(ctxt
, '>', 0, 0) < 0))
11153 xmlGenericError(xmlGenericErrorContext
,
11154 "PP: Parsing internal subset\n");
11156 ctxt
->inSubset
= 1;
11157 xmlParseDocTypeDecl(ctxt
);
11159 ctxt
->instate
= XML_PARSER_DTD
;
11161 xmlGenericError(xmlGenericErrorContext
,
11162 "PP: entering DTD\n");
11166 * Create and update the external subset.
11168 ctxt
->inSubset
= 2;
11169 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11170 (ctxt
->sax
->externalSubset
!= NULL
))
11171 ctxt
->sax
->externalSubset(ctxt
->userData
,
11172 ctxt
->intSubName
, ctxt
->extSubSystem
,
11174 ctxt
->inSubset
= 0;
11175 xmlCleanSpecialAttr(ctxt
);
11176 ctxt
->instate
= XML_PARSER_PROLOG
;
11178 xmlGenericError(xmlGenericErrorContext
,
11179 "PP: entering PROLOG\n");
11182 } else if ((cur
== '<') && (next
== '!') &&
11186 ctxt
->instate
= XML_PARSER_START_TAG
;
11187 ctxt
->progressive
= 1;
11188 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11190 xmlGenericError(xmlGenericErrorContext
,
11191 "PP: entering START_TAG\n");
11195 case XML_PARSER_PROLOG
:
11197 if (ctxt
->input
->buf
== NULL
)
11198 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11200 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11203 cur
= ctxt
->input
->cur
[0];
11204 next
= ctxt
->input
->cur
[1];
11205 if ((cur
== '<') && (next
== '?')) {
11206 if ((!terminate
) &&
11207 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11210 xmlGenericError(xmlGenericErrorContext
,
11211 "PP: Parsing PI\n");
11214 } else if ((cur
== '<') && (next
== '!') &&
11215 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11216 if ((!terminate
) &&
11217 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
11220 xmlGenericError(xmlGenericErrorContext
,
11221 "PP: Parsing Comment\n");
11223 xmlParseComment(ctxt
);
11224 ctxt
->instate
= XML_PARSER_PROLOG
;
11225 } else if ((cur
== '<') && (next
== '!') &&
11229 ctxt
->instate
= XML_PARSER_START_TAG
;
11230 if (ctxt
->progressive
== 0)
11231 ctxt
->progressive
= 1;
11232 xmlParseGetLasts(ctxt
, &lastlt
, &lastgt
);
11234 xmlGenericError(xmlGenericErrorContext
,
11235 "PP: entering START_TAG\n");
11239 case XML_PARSER_EPILOG
:
11241 if (ctxt
->input
->buf
== NULL
)
11242 avail
= ctxt
->input
->length
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11244 avail
= ctxt
->input
->buf
->buffer
->use
- (ctxt
->input
->cur
- ctxt
->input
->base
);
11247 cur
= ctxt
->input
->cur
[0];
11248 next
= ctxt
->input
->cur
[1];
11249 if ((cur
== '<') && (next
== '?')) {
11250 if ((!terminate
) &&
11251 (xmlParseLookupSequence(ctxt
, '?', '>', 0) < 0))
11254 xmlGenericError(xmlGenericErrorContext
,
11255 "PP: Parsing PI\n");
11258 ctxt
->instate
= XML_PARSER_EPILOG
;
11259 } else if ((cur
== '<') && (next
== '!') &&
11260 (ctxt
->input
->cur
[2] == '-') && (ctxt
->input
->cur
[3] == '-')) {
11261 if ((!terminate
) &&
11262 (xmlParseLookupSequence(ctxt
, '-', '-', '>') < 0))
11265 xmlGenericError(xmlGenericErrorContext
,
11266 "PP: Parsing Comment\n");
11268 xmlParseComment(ctxt
);
11269 ctxt
->instate
= XML_PARSER_EPILOG
;
11270 } else if ((cur
== '<') && (next
== '!') &&
11274 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11275 ctxt
->instate
= XML_PARSER_EOF
;
11277 xmlGenericError(xmlGenericErrorContext
,
11278 "PP: entering EOF\n");
11280 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11281 ctxt
->sax
->endDocument(ctxt
->userData
);
11285 case XML_PARSER_DTD
: {
11287 * Sorry but progressive parsing of the internal subset
11288 * is not expected to be supported. We first check that
11289 * the full content of the internal subset is available and
11290 * the parsing is launched only at that point.
11291 * Internal subset ends up with "']' S? '>'" in an unescaped
11292 * section and not in a ']]>' sequence which are conditional
11293 * sections (whoever argued to keep that crap in XML deserve
11294 * a place in hell !).
11300 base
= ctxt
->input
->cur
- ctxt
->input
->base
;
11301 if (base
< 0) return(0);
11302 if (ctxt
->checkIndex
> base
)
11303 base
= ctxt
->checkIndex
;
11304 buf
= ctxt
->input
->buf
->buffer
->content
;
11305 for (;(unsigned int) base
< ctxt
->input
->buf
->buffer
->use
;
11308 if (buf
[base
] == quote
)
11312 if ((quote
== 0) && (buf
[base
] == '<')) {
11314 /* special handling of comments */
11315 if (((unsigned int) base
+ 4 <
11316 ctxt
->input
->buf
->buffer
->use
) &&
11317 (buf
[base
+ 1] == '!') &&
11318 (buf
[base
+ 2] == '-') &&
11319 (buf
[base
+ 3] == '-')) {
11320 for (;(unsigned int) base
+ 3 <
11321 ctxt
->input
->buf
->buffer
->use
; base
++) {
11322 if ((buf
[base
] == '-') &&
11323 (buf
[base
+ 1] == '-') &&
11324 (buf
[base
+ 2] == '>')) {
11332 fprintf(stderr
, "unfinished comment\n");
11339 if (buf
[base
] == '"') {
11343 if (buf
[base
] == '\'') {
11347 if (buf
[base
] == ']') {
11349 fprintf(stderr
, "%c%c%c%c: ", buf
[base
],
11350 buf
[base
+ 1], buf
[base
+ 2], buf
[base
+ 3]);
11352 if ((unsigned int) base
+1 >=
11353 ctxt
->input
->buf
->buffer
->use
)
11355 if (buf
[base
+ 1] == ']') {
11356 /* conditional crap, skip both ']' ! */
11361 (unsigned int) base
+ i
< ctxt
->input
->buf
->buffer
->use
;
11363 if (buf
[base
+ i
] == '>') {
11365 fprintf(stderr
, "found\n");
11367 goto found_end_int_subset
;
11369 if (!IS_BLANK_CH(buf
[base
+ i
])) {
11371 fprintf(stderr
, "not found\n");
11373 goto not_end_of_int_subset
;
11377 fprintf(stderr
, "end of stream\n");
11382 not_end_of_int_subset
:
11383 continue; /* for */
11386 * We didn't found the end of the Internal subset
11390 xmlGenericError(xmlGenericErrorContext
,
11391 "PP: lookup of int subset end filed\n");
11395 found_end_int_subset
:
11396 xmlParseInternalSubset(ctxt
);
11397 ctxt
->inSubset
= 2;
11398 if ((ctxt
->sax
!= NULL
) && (!ctxt
->disableSAX
) &&
11399 (ctxt
->sax
->externalSubset
!= NULL
))
11400 ctxt
->sax
->externalSubset(ctxt
->userData
, ctxt
->intSubName
,
11401 ctxt
->extSubSystem
, ctxt
->extSubURI
);
11402 ctxt
->inSubset
= 0;
11403 xmlCleanSpecialAttr(ctxt
);
11404 ctxt
->instate
= XML_PARSER_PROLOG
;
11405 ctxt
->checkIndex
= 0;
11407 xmlGenericError(xmlGenericErrorContext
,
11408 "PP: entering PROLOG\n");
11412 case XML_PARSER_COMMENT
:
11413 xmlGenericError(xmlGenericErrorContext
,
11414 "PP: internal error, state == COMMENT\n");
11415 ctxt
->instate
= XML_PARSER_CONTENT
;
11417 xmlGenericError(xmlGenericErrorContext
,
11418 "PP: entering CONTENT\n");
11421 case XML_PARSER_IGNORE
:
11422 xmlGenericError(xmlGenericErrorContext
,
11423 "PP: internal error, state == IGNORE");
11424 ctxt
->instate
= XML_PARSER_DTD
;
11426 xmlGenericError(xmlGenericErrorContext
,
11427 "PP: entering DTD\n");
11430 case XML_PARSER_PI
:
11431 xmlGenericError(xmlGenericErrorContext
,
11432 "PP: internal error, state == PI\n");
11433 ctxt
->instate
= XML_PARSER_CONTENT
;
11435 xmlGenericError(xmlGenericErrorContext
,
11436 "PP: entering CONTENT\n");
11439 case XML_PARSER_ENTITY_DECL
:
11440 xmlGenericError(xmlGenericErrorContext
,
11441 "PP: internal error, state == ENTITY_DECL\n");
11442 ctxt
->instate
= XML_PARSER_DTD
;
11444 xmlGenericError(xmlGenericErrorContext
,
11445 "PP: entering DTD\n");
11448 case XML_PARSER_ENTITY_VALUE
:
11449 xmlGenericError(xmlGenericErrorContext
,
11450 "PP: internal error, state == ENTITY_VALUE\n");
11451 ctxt
->instate
= XML_PARSER_CONTENT
;
11453 xmlGenericError(xmlGenericErrorContext
,
11454 "PP: entering DTD\n");
11457 case XML_PARSER_ATTRIBUTE_VALUE
:
11458 xmlGenericError(xmlGenericErrorContext
,
11459 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11460 ctxt
->instate
= XML_PARSER_START_TAG
;
11462 xmlGenericError(xmlGenericErrorContext
,
11463 "PP: entering START_TAG\n");
11466 case XML_PARSER_SYSTEM_LITERAL
:
11467 xmlGenericError(xmlGenericErrorContext
,
11468 "PP: internal error, state == SYSTEM_LITERAL\n");
11469 ctxt
->instate
= XML_PARSER_START_TAG
;
11471 xmlGenericError(xmlGenericErrorContext
,
11472 "PP: entering START_TAG\n");
11475 case XML_PARSER_PUBLIC_LITERAL
:
11476 xmlGenericError(xmlGenericErrorContext
,
11477 "PP: internal error, state == PUBLIC_LITERAL\n");
11478 ctxt
->instate
= XML_PARSER_START_TAG
;
11480 xmlGenericError(xmlGenericErrorContext
,
11481 "PP: entering START_TAG\n");
11488 xmlGenericError(xmlGenericErrorContext
, "PP: done %d\n", ret
);
11495 snprintf(buffer
, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11496 ctxt
->input
->cur
[0], ctxt
->input
->cur
[1],
11497 ctxt
->input
->cur
[2], ctxt
->input
->cur
[3]);
11498 __xmlErrEncoding(ctxt
, XML_ERR_INVALID_CHAR
,
11499 "Input is not proper UTF-8, indicate encoding !\n%s",
11500 BAD_CAST buffer
, NULL
);
11507 * @ctxt: an XML parser context
11508 * @chunk: an char array
11509 * @size: the size in byte of the chunk
11510 * @terminate: last chunk indicator
11512 * Parse a Chunk of memory
11514 * Returns zero if no error, the xmlParserErrors otherwise.
11517 xmlParseChunk(xmlParserCtxtPtr ctxt
, const char *chunk
, int size
,
11523 return(XML_ERR_INTERNAL_ERROR
);
11524 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11525 return(ctxt
->errNo
);
11526 if (ctxt
->instate
== XML_PARSER_START
)
11527 xmlDetectSAX2(ctxt
);
11528 if ((size
> 0) && (chunk
!= NULL
) && (!terminate
) &&
11529 (chunk
[size
- 1] == '\r')) {
11536 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
11537 (ctxt
->input
->buf
!= NULL
) && (ctxt
->instate
!= XML_PARSER_EOF
)) {
11538 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
11539 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
11543 * Specific handling if we autodetected an encoding, we should not
11544 * push more than the first line ... which depend on the encoding
11545 * And only push the rest once the final encoding was detected
11547 if ((ctxt
->instate
== XML_PARSER_START
) && (ctxt
->input
!= NULL
) &&
11548 (ctxt
->input
->buf
!= NULL
) && (ctxt
->input
->buf
->encoder
!= NULL
)) {
11549 unsigned int len
= 45;
11551 if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11552 BAD_CAST
"UTF-16")) ||
11553 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11554 BAD_CAST
"UTF16")))
11556 else if ((xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11557 BAD_CAST
"UCS-4")) ||
11558 (xmlStrcasestr(BAD_CAST ctxt
->input
->buf
->encoder
->name
,
11562 if (ctxt
->input
->buf
->rawconsumed
< len
)
11563 len
-= ctxt
->input
->buf
->rawconsumed
;
11566 * Change size for reading the initial declaration only
11567 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11568 * will blindly copy extra bytes from memory.
11571 remain
= size
- len
;
11577 res
=xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
11579 ctxt
->errNo
= XML_PARSER_EOF
;
11580 ctxt
->disableSAX
= 1;
11581 return (XML_PARSER_EOF
);
11583 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
11584 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
11586 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->use
];
11588 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
11591 } else if (ctxt
->instate
!= XML_PARSER_EOF
) {
11592 if ((ctxt
->input
!= NULL
) && ctxt
->input
->buf
!= NULL
) {
11593 xmlParserInputBufferPtr in
= ctxt
->input
->buf
;
11594 if ((in
->encoder
!= NULL
) && (in
->buffer
!= NULL
) &&
11595 (in
->raw
!= NULL
)) {
11598 nbchars
= xmlCharEncInFunc(in
->encoder
, in
->buffer
, in
->raw
);
11601 xmlGenericError(xmlGenericErrorContext
,
11602 "xmlParseChunk: encoder error\n");
11603 return(XML_ERR_INVALID_ENCODING
);
11609 xmlParseTryOrFinish(ctxt
, 0);
11611 xmlParseTryOrFinish(ctxt
, terminate
);
11612 if ((ctxt
->errNo
!= XML_ERR_OK
) && (ctxt
->disableSAX
== 1))
11613 return(ctxt
->errNo
);
11621 if ((end_in_lf
== 1) && (ctxt
->input
!= NULL
) &&
11622 (ctxt
->input
->buf
!= NULL
)) {
11623 xmlParserInputBufferPush(ctxt
->input
->buf
, 1, "\r");
11627 * Check for termination
11631 if (ctxt
->input
!= NULL
) {
11632 if (ctxt
->input
->buf
== NULL
)
11633 avail
= ctxt
->input
->length
-
11634 (ctxt
->input
->cur
- ctxt
->input
->base
);
11636 avail
= ctxt
->input
->buf
->buffer
->use
-
11637 (ctxt
->input
->cur
- ctxt
->input
->base
);
11640 if ((ctxt
->instate
!= XML_PARSER_EOF
) &&
11641 (ctxt
->instate
!= XML_PARSER_EPILOG
)) {
11642 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11644 if ((ctxt
->instate
== XML_PARSER_EPILOG
) && (avail
> 0)) {
11645 xmlFatalErr(ctxt
, XML_ERR_DOCUMENT_END
, NULL
);
11647 if (ctxt
->instate
!= XML_PARSER_EOF
) {
11648 if ((ctxt
->sax
) && (ctxt
->sax
->endDocument
!= NULL
))
11649 ctxt
->sax
->endDocument(ctxt
->userData
);
11651 ctxt
->instate
= XML_PARSER_EOF
;
11653 return((xmlParserErrors
) ctxt
->errNo
);
11656 /************************************************************************
11658 * I/O front end functions to the parser *
11660 ************************************************************************/
11663 * xmlCreatePushParserCtxt:
11664 * @sax: a SAX handler
11665 * @user_data: The user data returned on SAX callbacks
11666 * @chunk: a pointer to an array of chars
11667 * @size: number of chars in the array
11668 * @filename: an optional file name or URI
11670 * Create a parser context for using the XML parser in push mode.
11671 * If @buffer and @size are non-NULL, the data is used to detect
11672 * the encoding. The remaining characters will be parsed so they
11673 * don't need to be fed in again through xmlParseChunk.
11674 * To allow content encoding detection, @size should be >= 4
11675 * The value of @filename is used for fetching external entities
11676 * and error/warning reports.
11678 * Returns the new parser context or NULL
11682 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
11683 const char *chunk
, int size
, const char *filename
) {
11684 xmlParserCtxtPtr ctxt
;
11685 xmlParserInputPtr inputStream
;
11686 xmlParserInputBufferPtr buf
;
11687 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
11690 * plug some encoding conversion routines
11692 if ((chunk
!= NULL
) && (size
>= 4))
11693 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
11695 buf
= xmlAllocParserInputBuffer(enc
);
11696 if (buf
== NULL
) return(NULL
);
11698 ctxt
= xmlNewParserCtxt();
11699 if (ctxt
== NULL
) {
11700 xmlErrMemory(NULL
, "creating parser: out of memory\n");
11701 xmlFreeParserInputBuffer(buf
);
11704 ctxt
->dictNames
= 1;
11705 ctxt
->pushTab
= (void **) xmlMalloc(ctxt
->nameMax
* 3 * sizeof(xmlChar
*));
11706 if (ctxt
->pushTab
== NULL
) {
11707 xmlErrMemory(ctxt
, NULL
);
11708 xmlFreeParserInputBuffer(buf
);
11709 xmlFreeParserCtxt(ctxt
);
11713 #ifdef LIBXML_SAX1_ENABLED
11714 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
11715 #endif /* LIBXML_SAX1_ENABLED */
11716 xmlFree(ctxt
->sax
);
11717 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
11718 if (ctxt
->sax
== NULL
) {
11719 xmlErrMemory(ctxt
, NULL
);
11720 xmlFreeParserInputBuffer(buf
);
11721 xmlFreeParserCtxt(ctxt
);
11724 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
11725 if (sax
->initialized
== XML_SAX2_MAGIC
)
11726 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
11728 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
11729 if (user_data
!= NULL
)
11730 ctxt
->userData
= user_data
;
11732 if (filename
== NULL
) {
11733 ctxt
->directory
= NULL
;
11735 ctxt
->directory
= xmlParserGetDirectory(filename
);
11738 inputStream
= xmlNewInputStream(ctxt
);
11739 if (inputStream
== NULL
) {
11740 xmlFreeParserCtxt(ctxt
);
11741 xmlFreeParserInputBuffer(buf
);
11745 if (filename
== NULL
)
11746 inputStream
->filename
= NULL
;
11748 inputStream
->filename
= (char *)
11749 xmlCanonicPath((const xmlChar
*) filename
);
11750 if (inputStream
->filename
== NULL
) {
11751 xmlFreeParserCtxt(ctxt
);
11752 xmlFreeParserInputBuffer(buf
);
11756 inputStream
->buf
= buf
;
11757 inputStream
->base
= inputStream
->buf
->buffer
->content
;
11758 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
11760 &inputStream
->buf
->buffer
->content
[inputStream
->buf
->buffer
->use
];
11762 inputPush(ctxt
, inputStream
);
11765 * If the caller didn't provide an initial 'chunk' for determining
11766 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11767 * that it can be automatically determined later
11769 if ((size
== 0) || (chunk
== NULL
)) {
11770 ctxt
->charset
= XML_CHAR_ENCODING_NONE
;
11771 } else if ((ctxt
->input
!= NULL
) && (ctxt
->input
->buf
!= NULL
)) {
11772 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
11773 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
11775 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
11777 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
11778 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
11780 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->use
];
11782 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
11786 if (enc
!= XML_CHAR_ENCODING_NONE
) {
11787 xmlSwitchEncoding(ctxt
, enc
);
11792 #endif /* LIBXML_PUSH_ENABLED */
11796 * @ctxt: an XML parser context
11798 * Blocks further parser processing
11801 xmlStopParser(xmlParserCtxtPtr ctxt
) {
11804 ctxt
->instate
= XML_PARSER_EOF
;
11805 ctxt
->disableSAX
= 1;
11806 if (ctxt
->input
!= NULL
) {
11807 ctxt
->input
->cur
= BAD_CAST
"";
11808 ctxt
->input
->base
= ctxt
->input
->cur
;
11813 * xmlCreateIOParserCtxt:
11814 * @sax: a SAX handler
11815 * @user_data: The user data returned on SAX callbacks
11816 * @ioread: an I/O read function
11817 * @ioclose: an I/O close function
11818 * @ioctx: an I/O handler
11819 * @enc: the charset encoding if known
11821 * Create a parser context for using the XML parser with an existing
11824 * Returns the new parser context or NULL
11827 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax
, void *user_data
,
11828 xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
11829 void *ioctx
, xmlCharEncoding enc
) {
11830 xmlParserCtxtPtr ctxt
;
11831 xmlParserInputPtr inputStream
;
11832 xmlParserInputBufferPtr buf
;
11834 if (ioread
== NULL
) return(NULL
);
11836 buf
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
, enc
);
11837 if (buf
== NULL
) return(NULL
);
11839 ctxt
= xmlNewParserCtxt();
11840 if (ctxt
== NULL
) {
11841 xmlFreeParserInputBuffer(buf
);
11845 #ifdef LIBXML_SAX1_ENABLED
11846 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
11847 #endif /* LIBXML_SAX1_ENABLED */
11848 xmlFree(ctxt
->sax
);
11849 ctxt
->sax
= (xmlSAXHandlerPtr
) xmlMalloc(sizeof(xmlSAXHandler
));
11850 if (ctxt
->sax
== NULL
) {
11851 xmlErrMemory(ctxt
, NULL
);
11852 xmlFreeParserCtxt(ctxt
);
11855 memset(ctxt
->sax
, 0, sizeof(xmlSAXHandler
));
11856 if (sax
->initialized
== XML_SAX2_MAGIC
)
11857 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandler
));
11859 memcpy(ctxt
->sax
, sax
, sizeof(xmlSAXHandlerV1
));
11860 if (user_data
!= NULL
)
11861 ctxt
->userData
= user_data
;
11864 inputStream
= xmlNewIOInputStream(ctxt
, buf
, enc
);
11865 if (inputStream
== NULL
) {
11866 xmlFreeParserCtxt(ctxt
);
11869 inputPush(ctxt
, inputStream
);
11874 #ifdef LIBXML_VALID_ENABLED
11875 /************************************************************************
11877 * Front ends when parsing a DTD *
11879 ************************************************************************/
11883 * @sax: the SAX handler block or NULL
11884 * @input: an Input Buffer
11885 * @enc: the charset encoding if known
11887 * Load and parse a DTD
11889 * Returns the resulting xmlDtdPtr or NULL in case of error.
11890 * @input will be freed by the function in any case.
11894 xmlIOParseDTD(xmlSAXHandlerPtr sax
, xmlParserInputBufferPtr input
,
11895 xmlCharEncoding enc
) {
11896 xmlDtdPtr ret
= NULL
;
11897 xmlParserCtxtPtr ctxt
;
11898 xmlParserInputPtr pinput
= NULL
;
11904 ctxt
= xmlNewParserCtxt();
11905 if (ctxt
== NULL
) {
11906 xmlFreeParserInputBuffer(input
);
11911 * Set-up the SAX context
11914 if (ctxt
->sax
!= NULL
)
11915 xmlFree(ctxt
->sax
);
11917 ctxt
->userData
= ctxt
;
11919 xmlDetectSAX2(ctxt
);
11922 * generate a parser input from the I/O handler
11925 pinput
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
11926 if (pinput
== NULL
) {
11927 if (sax
!= NULL
) ctxt
->sax
= NULL
;
11928 xmlFreeParserInputBuffer(input
);
11929 xmlFreeParserCtxt(ctxt
);
11934 * plug some encoding conversion routines here.
11936 if (xmlPushInput(ctxt
, pinput
) < 0) {
11937 if (sax
!= NULL
) ctxt
->sax
= NULL
;
11938 xmlFreeParserCtxt(ctxt
);
11941 if (enc
!= XML_CHAR_ENCODING_NONE
) {
11942 xmlSwitchEncoding(ctxt
, enc
);
11945 pinput
->filename
= NULL
;
11948 pinput
->base
= ctxt
->input
->cur
;
11949 pinput
->cur
= ctxt
->input
->cur
;
11950 pinput
->free
= NULL
;
11953 * let's parse that entity knowing it's an external subset.
11955 ctxt
->inSubset
= 2;
11956 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
11957 if (ctxt
->myDoc
== NULL
) {
11958 xmlErrMemory(ctxt
, "New Doc failed");
11961 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
11962 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
11963 BAD_CAST
"none", BAD_CAST
"none");
11965 if ((enc
== XML_CHAR_ENCODING_NONE
) &&
11966 ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4)) {
11968 * Get the 4 first bytes and decode the charset
11969 * if enc != XML_CHAR_ENCODING_NONE
11970 * plug some encoding conversion routines.
11976 enc
= xmlDetectCharEncoding(start
, 4);
11977 if (enc
!= XML_CHAR_ENCODING_NONE
) {
11978 xmlSwitchEncoding(ctxt
, enc
);
11982 xmlParseExternalSubset(ctxt
, BAD_CAST
"none", BAD_CAST
"none");
11984 if (ctxt
->myDoc
!= NULL
) {
11985 if (ctxt
->wellFormed
) {
11986 ret
= ctxt
->myDoc
->extSubset
;
11987 ctxt
->myDoc
->extSubset
= NULL
;
11992 tmp
= ret
->children
;
11993 while (tmp
!= NULL
) {
12001 xmlFreeDoc(ctxt
->myDoc
);
12002 ctxt
->myDoc
= NULL
;
12004 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12005 xmlFreeParserCtxt(ctxt
);
12012 * @sax: the SAX handler block
12013 * @ExternalID: a NAME* containing the External ID of the DTD
12014 * @SystemID: a NAME* containing the URL to the DTD
12016 * Load and parse an external subset.
12018 * Returns the resulting xmlDtdPtr or NULL in case of error.
12022 xmlSAXParseDTD(xmlSAXHandlerPtr sax
, const xmlChar
*ExternalID
,
12023 const xmlChar
*SystemID
) {
12024 xmlDtdPtr ret
= NULL
;
12025 xmlParserCtxtPtr ctxt
;
12026 xmlParserInputPtr input
= NULL
;
12027 xmlCharEncoding enc
;
12028 xmlChar
* systemIdCanonic
;
12030 if ((ExternalID
== NULL
) && (SystemID
== NULL
)) return(NULL
);
12032 ctxt
= xmlNewParserCtxt();
12033 if (ctxt
== NULL
) {
12038 * Set-up the SAX context
12041 if (ctxt
->sax
!= NULL
)
12042 xmlFree(ctxt
->sax
);
12044 ctxt
->userData
= ctxt
;
12048 * Canonicalise the system ID
12050 systemIdCanonic
= xmlCanonicPath(SystemID
);
12051 if ((SystemID
!= NULL
) && (systemIdCanonic
== NULL
)) {
12052 xmlFreeParserCtxt(ctxt
);
12057 * Ask the Entity resolver to load the damn thing
12060 if ((ctxt
->sax
!= NULL
) && (ctxt
->sax
->resolveEntity
!= NULL
))
12061 input
= ctxt
->sax
->resolveEntity(ctxt
->userData
, ExternalID
,
12063 if (input
== NULL
) {
12064 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12065 xmlFreeParserCtxt(ctxt
);
12066 if (systemIdCanonic
!= NULL
)
12067 xmlFree(systemIdCanonic
);
12072 * plug some encoding conversion routines here.
12074 if (xmlPushInput(ctxt
, input
) < 0) {
12075 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12076 xmlFreeParserCtxt(ctxt
);
12077 if (systemIdCanonic
!= NULL
)
12078 xmlFree(systemIdCanonic
);
12081 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12082 enc
= xmlDetectCharEncoding(ctxt
->input
->cur
, 4);
12083 xmlSwitchEncoding(ctxt
, enc
);
12086 if (input
->filename
== NULL
)
12087 input
->filename
= (char *) systemIdCanonic
;
12089 xmlFree(systemIdCanonic
);
12092 input
->base
= ctxt
->input
->cur
;
12093 input
->cur
= ctxt
->input
->cur
;
12094 input
->free
= NULL
;
12097 * let's parse that entity knowing it's an external subset.
12099 ctxt
->inSubset
= 2;
12100 ctxt
->myDoc
= xmlNewDoc(BAD_CAST
"1.0");
12101 if (ctxt
->myDoc
== NULL
) {
12102 xmlErrMemory(ctxt
, "New Doc failed");
12103 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12104 xmlFreeParserCtxt(ctxt
);
12107 ctxt
->myDoc
->properties
= XML_DOC_INTERNAL
;
12108 ctxt
->myDoc
->extSubset
= xmlNewDtd(ctxt
->myDoc
, BAD_CAST
"none",
12109 ExternalID
, SystemID
);
12110 xmlParseExternalSubset(ctxt
, ExternalID
, SystemID
);
12112 if (ctxt
->myDoc
!= NULL
) {
12113 if (ctxt
->wellFormed
) {
12114 ret
= ctxt
->myDoc
->extSubset
;
12115 ctxt
->myDoc
->extSubset
= NULL
;
12120 tmp
= ret
->children
;
12121 while (tmp
!= NULL
) {
12129 xmlFreeDoc(ctxt
->myDoc
);
12130 ctxt
->myDoc
= NULL
;
12132 if (sax
!= NULL
) ctxt
->sax
= NULL
;
12133 xmlFreeParserCtxt(ctxt
);
12141 * @ExternalID: a NAME* containing the External ID of the DTD
12142 * @SystemID: a NAME* containing the URL to the DTD
12144 * Load and parse an external subset.
12146 * Returns the resulting xmlDtdPtr or NULL in case of error.
12150 xmlParseDTD(const xmlChar
*ExternalID
, const xmlChar
*SystemID
) {
12151 return(xmlSAXParseDTD(NULL
, ExternalID
, SystemID
));
12153 #endif /* LIBXML_VALID_ENABLED */
12155 /************************************************************************
12157 * Front ends when parsing an Entity *
12159 ************************************************************************/
12162 * xmlParseCtxtExternalEntity:
12163 * @ctx: the existing parsing context
12164 * @URL: the URL for the entity to load
12165 * @ID: the System ID for the entity to load
12166 * @lst: the return value for the set of parsed nodes
12168 * Parse an external general entity within an existing parsing context
12169 * An external general parsed entity is well-formed if it matches the
12170 * production labeled extParsedEnt.
12172 * [78] extParsedEnt ::= TextDecl? content
12174 * Returns 0 if the entity is well formed, -1 in case of args problem and
12175 * the parser error code otherwise
12179 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx
, const xmlChar
*URL
,
12180 const xmlChar
*ID
, xmlNodePtr
*lst
) {
12181 xmlParserCtxtPtr ctxt
;
12183 xmlNodePtr newRoot
;
12184 xmlSAXHandlerPtr oldsax
= NULL
;
12187 xmlCharEncoding enc
;
12189 if (ctx
== NULL
) return(-1);
12191 if (((ctx
->depth
> 40) && ((ctx
->options
& XML_PARSE_HUGE
) == 0)) ||
12192 (ctx
->depth
> 1024)) {
12193 return(XML_ERR_ENTITY_LOOP
);
12198 if ((URL
== NULL
) && (ID
== NULL
))
12200 if (ctx
->myDoc
== NULL
) /* @@ relax but check for dereferences */
12203 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, ctx
);
12204 if (ctxt
== NULL
) {
12208 oldsax
= ctxt
->sax
;
12209 ctxt
->sax
= ctx
->sax
;
12210 xmlDetectSAX2(ctxt
);
12211 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12212 if (newDoc
== NULL
) {
12213 xmlFreeParserCtxt(ctxt
);
12216 newDoc
->properties
= XML_DOC_INTERNAL
;
12217 if (ctx
->myDoc
->dict
) {
12218 newDoc
->dict
= ctx
->myDoc
->dict
;
12219 xmlDictReference(newDoc
->dict
);
12221 if (ctx
->myDoc
!= NULL
) {
12222 newDoc
->intSubset
= ctx
->myDoc
->intSubset
;
12223 newDoc
->extSubset
= ctx
->myDoc
->extSubset
;
12225 if (ctx
->myDoc
->URL
!= NULL
) {
12226 newDoc
->URL
= xmlStrdup(ctx
->myDoc
->URL
);
12228 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12229 if (newRoot
== NULL
) {
12230 ctxt
->sax
= oldsax
;
12231 xmlFreeParserCtxt(ctxt
);
12232 newDoc
->intSubset
= NULL
;
12233 newDoc
->extSubset
= NULL
;
12234 xmlFreeDoc(newDoc
);
12237 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
12238 nodePush(ctxt
, newDoc
->children
);
12239 if (ctx
->myDoc
== NULL
) {
12240 ctxt
->myDoc
= newDoc
;
12242 ctxt
->myDoc
= ctx
->myDoc
;
12243 newDoc
->children
->doc
= ctx
->myDoc
;
12247 * Get the 4 first bytes and decode the charset
12248 * if enc != XML_CHAR_ENCODING_NONE
12249 * plug some encoding conversion routines.
12252 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12257 enc
= xmlDetectCharEncoding(start
, 4);
12258 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12259 xmlSwitchEncoding(ctxt
, enc
);
12264 * Parse a possible text declaration first
12266 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12267 xmlParseTextDecl(ctxt
);
12269 * An XML-1.0 document can't reference an entity not XML-1.0
12271 if ((xmlStrEqual(ctx
->version
, BAD_CAST
"1.0")) &&
12272 (!xmlStrEqual(ctxt
->input
->version
, BAD_CAST
"1.0"))) {
12273 xmlFatalErrMsg(ctxt
, XML_ERR_VERSION_MISMATCH
,
12274 "Version mismatch between document and entity\n");
12279 * Doing validity checking on chunk doesn't make sense
12281 ctxt
->instate
= XML_PARSER_CONTENT
;
12282 ctxt
->validate
= ctx
->validate
;
12283 ctxt
->valid
= ctx
->valid
;
12284 ctxt
->loadsubset
= ctx
->loadsubset
;
12285 ctxt
->depth
= ctx
->depth
+ 1;
12286 ctxt
->replaceEntities
= ctx
->replaceEntities
;
12287 if (ctxt
->validate
) {
12288 ctxt
->vctxt
.error
= ctx
->vctxt
.error
;
12289 ctxt
->vctxt
.warning
= ctx
->vctxt
.warning
;
12291 ctxt
->vctxt
.error
= NULL
;
12292 ctxt
->vctxt
.warning
= NULL
;
12294 ctxt
->vctxt
.nodeTab
= NULL
;
12295 ctxt
->vctxt
.nodeNr
= 0;
12296 ctxt
->vctxt
.nodeMax
= 0;
12297 ctxt
->vctxt
.node
= NULL
;
12298 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
12299 ctxt
->dict
= ctx
->dict
;
12300 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12301 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12302 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12303 ctxt
->dictNames
= ctx
->dictNames
;
12304 ctxt
->attsDefault
= ctx
->attsDefault
;
12305 ctxt
->attsSpecial
= ctx
->attsSpecial
;
12306 ctxt
->linenumbers
= ctx
->linenumbers
;
12308 xmlParseContent(ctxt
);
12310 ctx
->validate
= ctxt
->validate
;
12311 ctx
->valid
= ctxt
->valid
;
12312 if ((RAW
== '<') && (NXT(1) == '/')) {
12313 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12314 } else if (RAW
!= 0) {
12315 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12317 if (ctxt
->node
!= newDoc
->children
) {
12318 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12321 if (!ctxt
->wellFormed
) {
12322 if (ctxt
->errNo
== 0)
12331 * Return the newly created nodeset after unlinking it from
12332 * they pseudo parent.
12334 cur
= newDoc
->children
->children
;
12336 while (cur
!= NULL
) {
12337 cur
->parent
= NULL
;
12340 newDoc
->children
->children
= NULL
;
12344 ctxt
->sax
= oldsax
;
12346 ctxt
->attsDefault
= NULL
;
12347 ctxt
->attsSpecial
= NULL
;
12348 xmlFreeParserCtxt(ctxt
);
12349 newDoc
->intSubset
= NULL
;
12350 newDoc
->extSubset
= NULL
;
12351 xmlFreeDoc(newDoc
);
12357 * xmlParseExternalEntityPrivate:
12358 * @doc: the document the chunk pertains to
12359 * @oldctxt: the previous parser context if available
12360 * @sax: the SAX handler bloc (possibly NULL)
12361 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12362 * @depth: Used for loop detection, use 0
12363 * @URL: the URL for the entity to load
12364 * @ID: the System ID for the entity to load
12365 * @list: the return value for the set of parsed nodes
12367 * Private version of xmlParseExternalEntity()
12369 * Returns 0 if the entity is well formed, -1 in case of args problem and
12370 * the parser error code otherwise
12373 static xmlParserErrors
12374 xmlParseExternalEntityPrivate(xmlDocPtr doc
, xmlParserCtxtPtr oldctxt
,
12375 xmlSAXHandlerPtr sax
,
12376 void *user_data
, int depth
, const xmlChar
*URL
,
12377 const xmlChar
*ID
, xmlNodePtr
*list
) {
12378 xmlParserCtxtPtr ctxt
;
12380 xmlNodePtr newRoot
;
12381 xmlSAXHandlerPtr oldsax
= NULL
;
12382 xmlParserErrors ret
= XML_ERR_OK
;
12384 xmlCharEncoding enc
;
12386 if (((depth
> 40) &&
12387 ((oldctxt
== NULL
) || (oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12389 return(XML_ERR_ENTITY_LOOP
);
12394 if ((URL
== NULL
) && (ID
== NULL
))
12395 return(XML_ERR_INTERNAL_ERROR
);
12397 return(XML_ERR_INTERNAL_ERROR
);
12400 ctxt
= xmlCreateEntityParserCtxtInternal(URL
, ID
, NULL
, oldctxt
);
12401 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12402 ctxt
->userData
= ctxt
;
12403 if (oldctxt
!= NULL
) {
12404 ctxt
->_private
= oldctxt
->_private
;
12405 ctxt
->loadsubset
= oldctxt
->loadsubset
;
12406 ctxt
->validate
= oldctxt
->validate
;
12407 ctxt
->external
= oldctxt
->external
;
12408 ctxt
->record_info
= oldctxt
->record_info
;
12409 ctxt
->node_seq
.maximum
= oldctxt
->node_seq
.maximum
;
12410 ctxt
->node_seq
.length
= oldctxt
->node_seq
.length
;
12411 ctxt
->node_seq
.buffer
= oldctxt
->node_seq
.buffer
;
12414 * Doing validity checking on chunk without context
12415 * doesn't make sense
12417 ctxt
->_private
= NULL
;
12418 ctxt
->validate
= 0;
12419 ctxt
->external
= 2;
12420 ctxt
->loadsubset
= 0;
12423 oldsax
= ctxt
->sax
;
12425 if (user_data
!= NULL
)
12426 ctxt
->userData
= user_data
;
12428 xmlDetectSAX2(ctxt
);
12429 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12430 if (newDoc
== NULL
) {
12431 ctxt
->node_seq
.maximum
= 0;
12432 ctxt
->node_seq
.length
= 0;
12433 ctxt
->node_seq
.buffer
= NULL
;
12434 xmlFreeParserCtxt(ctxt
);
12435 return(XML_ERR_INTERNAL_ERROR
);
12437 newDoc
->properties
= XML_DOC_INTERNAL
;
12438 newDoc
->intSubset
= doc
->intSubset
;
12439 newDoc
->extSubset
= doc
->extSubset
;
12440 newDoc
->dict
= doc
->dict
;
12441 xmlDictReference(newDoc
->dict
);
12443 if (doc
->URL
!= NULL
) {
12444 newDoc
->URL
= xmlStrdup(doc
->URL
);
12446 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12447 if (newRoot
== NULL
) {
12449 ctxt
->sax
= oldsax
;
12450 ctxt
->node_seq
.maximum
= 0;
12451 ctxt
->node_seq
.length
= 0;
12452 ctxt
->node_seq
.buffer
= NULL
;
12453 xmlFreeParserCtxt(ctxt
);
12454 newDoc
->intSubset
= NULL
;
12455 newDoc
->extSubset
= NULL
;
12456 xmlFreeDoc(newDoc
);
12457 return(XML_ERR_INTERNAL_ERROR
);
12459 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
12460 nodePush(ctxt
, newDoc
->children
);
12462 newRoot
->doc
= doc
;
12465 * Get the 4 first bytes and decode the charset
12466 * if enc != XML_CHAR_ENCODING_NONE
12467 * plug some encoding conversion routines.
12470 if ((ctxt
->input
->end
- ctxt
->input
->cur
) >= 4) {
12475 enc
= xmlDetectCharEncoding(start
, 4);
12476 if (enc
!= XML_CHAR_ENCODING_NONE
) {
12477 xmlSwitchEncoding(ctxt
, enc
);
12482 * Parse a possible text declaration first
12484 if ((CMP5(CUR_PTR
, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12485 xmlParseTextDecl(ctxt
);
12488 ctxt
->instate
= XML_PARSER_CONTENT
;
12489 ctxt
->depth
= depth
;
12491 xmlParseContent(ctxt
);
12493 if ((RAW
== '<') && (NXT(1) == '/')) {
12494 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12495 } else if (RAW
!= 0) {
12496 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12498 if (ctxt
->node
!= newDoc
->children
) {
12499 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12502 if (!ctxt
->wellFormed
) {
12503 if (ctxt
->errNo
== 0)
12504 ret
= XML_ERR_INTERNAL_ERROR
;
12506 ret
= (xmlParserErrors
)ctxt
->errNo
;
12508 if (list
!= NULL
) {
12512 * Return the newly created nodeset after unlinking it from
12513 * they pseudo parent.
12515 cur
= newDoc
->children
->children
;
12517 while (cur
!= NULL
) {
12518 cur
->parent
= NULL
;
12521 newDoc
->children
->children
= NULL
;
12527 * Record in the parent context the number of entities replacement
12528 * done when parsing that reference.
12530 if (oldctxt
!= NULL
)
12531 oldctxt
->nbentities
+= ctxt
->nbentities
;
12534 * Also record the size of the entity parsed
12536 if (ctxt
->input
!= NULL
) {
12537 oldctxt
->sizeentities
+= ctxt
->input
->consumed
;
12538 oldctxt
->sizeentities
+= (ctxt
->input
->cur
- ctxt
->input
->base
);
12541 * And record the last error if any
12543 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
12544 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
12547 ctxt
->sax
= oldsax
;
12548 oldctxt
->node_seq
.maximum
= ctxt
->node_seq
.maximum
;
12549 oldctxt
->node_seq
.length
= ctxt
->node_seq
.length
;
12550 oldctxt
->node_seq
.buffer
= ctxt
->node_seq
.buffer
;
12551 ctxt
->node_seq
.maximum
= 0;
12552 ctxt
->node_seq
.length
= 0;
12553 ctxt
->node_seq
.buffer
= NULL
;
12554 xmlFreeParserCtxt(ctxt
);
12555 newDoc
->intSubset
= NULL
;
12556 newDoc
->extSubset
= NULL
;
12557 xmlFreeDoc(newDoc
);
12562 #ifdef LIBXML_SAX1_ENABLED
12564 * xmlParseExternalEntity:
12565 * @doc: the document the chunk pertains to
12566 * @sax: the SAX handler bloc (possibly NULL)
12567 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12568 * @depth: Used for loop detection, use 0
12569 * @URL: the URL for the entity to load
12570 * @ID: the System ID for the entity to load
12571 * @lst: the return value for the set of parsed nodes
12573 * Parse an external general entity
12574 * An external general parsed entity is well-formed if it matches the
12575 * production labeled extParsedEnt.
12577 * [78] extParsedEnt ::= TextDecl? content
12579 * Returns 0 if the entity is well formed, -1 in case of args problem and
12580 * the parser error code otherwise
12584 xmlParseExternalEntity(xmlDocPtr doc
, xmlSAXHandlerPtr sax
, void *user_data
,
12585 int depth
, const xmlChar
*URL
, const xmlChar
*ID
, xmlNodePtr
*lst
) {
12586 return(xmlParseExternalEntityPrivate(doc
, NULL
, sax
, user_data
, depth
, URL
,
12591 * xmlParseBalancedChunkMemory:
12592 * @doc: the document the chunk pertains to
12593 * @sax: the SAX handler bloc (possibly NULL)
12594 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12595 * @depth: Used for loop detection, use 0
12596 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12597 * @lst: the return value for the set of parsed nodes
12599 * Parse a well-balanced chunk of an XML document
12600 * called by the parser
12601 * The allowed sequence for the Well Balanced Chunk is the one defined by
12602 * the content production in the XML grammar:
12604 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12606 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12607 * the parser error code otherwise
12611 xmlParseBalancedChunkMemory(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
12612 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
) {
12613 return xmlParseBalancedChunkMemoryRecover( doc
, sax
, user_data
,
12614 depth
, string
, lst
, 0 );
12616 #endif /* LIBXML_SAX1_ENABLED */
12619 * xmlParseBalancedChunkMemoryInternal:
12620 * @oldctxt: the existing parsing context
12621 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12622 * @user_data: the user data field for the parser context
12623 * @lst: the return value for the set of parsed nodes
12626 * Parse a well-balanced chunk of an XML document
12627 * called by the parser
12628 * The allowed sequence for the Well Balanced Chunk is the one defined by
12629 * the content production in the XML grammar:
12631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12633 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12634 * error code otherwise
12636 * In case recover is set to 1, the nodelist will not be empty even if
12637 * the parsed chunk is not well balanced.
12639 static xmlParserErrors
12640 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt
,
12641 const xmlChar
*string
, void *user_data
, xmlNodePtr
*lst
) {
12642 xmlParserCtxtPtr ctxt
;
12643 xmlDocPtr newDoc
= NULL
;
12644 xmlNodePtr newRoot
;
12645 xmlSAXHandlerPtr oldsax
= NULL
;
12646 xmlNodePtr content
= NULL
;
12647 xmlNodePtr last
= NULL
;
12649 xmlParserErrors ret
= XML_ERR_OK
;
12654 if (((oldctxt
->depth
> 40) && ((oldctxt
->options
& XML_PARSE_HUGE
) == 0)) ||
12655 (oldctxt
->depth
> 1024)) {
12656 return(XML_ERR_ENTITY_LOOP
);
12662 if (string
== NULL
)
12663 return(XML_ERR_INTERNAL_ERROR
);
12665 size
= xmlStrlen(string
);
12667 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
12668 if (ctxt
== NULL
) return(XML_WAR_UNDECLARED_ENTITY
);
12669 if (user_data
!= NULL
)
12670 ctxt
->userData
= user_data
;
12672 ctxt
->userData
= ctxt
;
12673 if (ctxt
->dict
!= NULL
) xmlDictFree(ctxt
->dict
);
12674 ctxt
->dict
= oldctxt
->dict
;
12675 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
12676 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
12677 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
12680 /* propagate namespaces down the entity */
12681 for (i
= 0;i
< oldctxt
->nsNr
;i
+= 2) {
12682 nsPush(ctxt
, oldctxt
->nsTab
[i
], oldctxt
->nsTab
[i
+1]);
12686 oldsax
= ctxt
->sax
;
12687 ctxt
->sax
= oldctxt
->sax
;
12688 xmlDetectSAX2(ctxt
);
12689 ctxt
->replaceEntities
= oldctxt
->replaceEntities
;
12690 ctxt
->options
= oldctxt
->options
;
12692 ctxt
->_private
= oldctxt
->_private
;
12693 if (oldctxt
->myDoc
== NULL
) {
12694 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
12695 if (newDoc
== NULL
) {
12696 ctxt
->sax
= oldsax
;
12698 xmlFreeParserCtxt(ctxt
);
12699 return(XML_ERR_INTERNAL_ERROR
);
12701 newDoc
->properties
= XML_DOC_INTERNAL
;
12702 newDoc
->dict
= ctxt
->dict
;
12703 xmlDictReference(newDoc
->dict
);
12704 ctxt
->myDoc
= newDoc
;
12706 ctxt
->myDoc
= oldctxt
->myDoc
;
12707 content
= ctxt
->myDoc
->children
;
12708 last
= ctxt
->myDoc
->last
;
12710 newRoot
= xmlNewDocNode(ctxt
->myDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
12711 if (newRoot
== NULL
) {
12712 ctxt
->sax
= oldsax
;
12714 xmlFreeParserCtxt(ctxt
);
12715 if (newDoc
!= NULL
) {
12716 xmlFreeDoc(newDoc
);
12718 return(XML_ERR_INTERNAL_ERROR
);
12720 ctxt
->myDoc
->children
= NULL
;
12721 ctxt
->myDoc
->last
= NULL
;
12722 xmlAddChild((xmlNodePtr
) ctxt
->myDoc
, newRoot
);
12723 nodePush(ctxt
, ctxt
->myDoc
->children
);
12724 ctxt
->instate
= XML_PARSER_CONTENT
;
12725 ctxt
->depth
= oldctxt
->depth
+ 1;
12727 ctxt
->validate
= 0;
12728 ctxt
->loadsubset
= oldctxt
->loadsubset
;
12729 if ((oldctxt
->validate
) || (oldctxt
->replaceEntities
!= 0)) {
12731 * ID/IDREF registration will be done in xmlValidateElement below
12733 ctxt
->loadsubset
|= XML_SKIP_IDS
;
12735 ctxt
->dictNames
= oldctxt
->dictNames
;
12736 ctxt
->attsDefault
= oldctxt
->attsDefault
;
12737 ctxt
->attsSpecial
= oldctxt
->attsSpecial
;
12739 xmlParseContent(ctxt
);
12740 if ((RAW
== '<') && (NXT(1) == '/')) {
12741 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12742 } else if (RAW
!= 0) {
12743 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12745 if (ctxt
->node
!= ctxt
->myDoc
->children
) {
12746 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12749 if (!ctxt
->wellFormed
) {
12750 if (ctxt
->errNo
== 0)
12751 ret
= XML_ERR_INTERNAL_ERROR
;
12753 ret
= (xmlParserErrors
)ctxt
->errNo
;
12758 if ((lst
!= NULL
) && (ret
== XML_ERR_OK
)) {
12762 * Return the newly created nodeset after unlinking it from
12763 * they pseudo parent.
12765 cur
= ctxt
->myDoc
->children
->children
;
12767 while (cur
!= NULL
) {
12768 #ifdef LIBXML_VALID_ENABLED
12769 if ((oldctxt
->validate
) && (oldctxt
->wellFormed
) &&
12770 (oldctxt
->myDoc
) && (oldctxt
->myDoc
->intSubset
) &&
12771 (cur
->type
== XML_ELEMENT_NODE
)) {
12772 oldctxt
->valid
&= xmlValidateElement(&oldctxt
->vctxt
,
12773 oldctxt
->myDoc
, cur
);
12775 #endif /* LIBXML_VALID_ENABLED */
12776 cur
->parent
= NULL
;
12779 ctxt
->myDoc
->children
->children
= NULL
;
12781 if (ctxt
->myDoc
!= NULL
) {
12782 xmlFreeNode(ctxt
->myDoc
->children
);
12783 ctxt
->myDoc
->children
= content
;
12784 ctxt
->myDoc
->last
= last
;
12788 * Record in the parent context the number of entities replacement
12789 * done when parsing that reference.
12791 if (oldctxt
!= NULL
)
12792 oldctxt
->nbentities
+= ctxt
->nbentities
;
12795 * Also record the last error if any
12797 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
12798 xmlCopyError(&ctxt
->lastError
, &oldctxt
->lastError
);
12800 ctxt
->sax
= oldsax
;
12802 ctxt
->attsDefault
= NULL
;
12803 ctxt
->attsSpecial
= NULL
;
12804 xmlFreeParserCtxt(ctxt
);
12805 if (newDoc
!= NULL
) {
12806 xmlFreeDoc(newDoc
);
12813 * xmlParseInNodeContext:
12814 * @node: the context node
12815 * @data: the input string
12816 * @datalen: the input string length in bytes
12817 * @options: a combination of xmlParserOption
12818 * @lst: the return value for the set of parsed nodes
12820 * Parse a well-balanced chunk of an XML document
12821 * within the context (DTD, namespaces, etc ...) of the given node.
12823 * The allowed sequence for the data is a Well Balanced Chunk defined by
12824 * the content production in the XML grammar:
12826 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12828 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12829 * error code otherwise
12832 xmlParseInNodeContext(xmlNodePtr node
, const char *data
, int datalen
,
12833 int options
, xmlNodePtr
*lst
) {
12835 xmlParserCtxtPtr ctxt
;
12836 xmlDocPtr doc
= NULL
;
12837 xmlNodePtr fake
, cur
;
12840 xmlParserErrors ret
= XML_ERR_OK
;
12843 * check all input parameters, grab the document
12845 if ((lst
== NULL
) || (node
== NULL
) || (data
== NULL
) || (datalen
< 0))
12846 return(XML_ERR_INTERNAL_ERROR
);
12847 switch (node
->type
) {
12848 case XML_ELEMENT_NODE
:
12849 case XML_ATTRIBUTE_NODE
:
12850 case XML_TEXT_NODE
:
12851 case XML_CDATA_SECTION_NODE
:
12852 case XML_ENTITY_REF_NODE
:
12854 case XML_COMMENT_NODE
:
12855 case XML_DOCUMENT_NODE
:
12856 case XML_HTML_DOCUMENT_NODE
:
12859 return(XML_ERR_INTERNAL_ERROR
);
12862 while ((node
!= NULL
) && (node
->type
!= XML_ELEMENT_NODE
) &&
12863 (node
->type
!= XML_DOCUMENT_NODE
) &&
12864 (node
->type
!= XML_HTML_DOCUMENT_NODE
))
12865 node
= node
->parent
;
12867 return(XML_ERR_INTERNAL_ERROR
);
12868 if (node
->type
== XML_ELEMENT_NODE
)
12871 doc
= (xmlDocPtr
) node
;
12873 return(XML_ERR_INTERNAL_ERROR
);
12876 * allocate a context and set-up everything not related to the
12877 * node position in the tree
12879 if (doc
->type
== XML_DOCUMENT_NODE
)
12880 ctxt
= xmlCreateMemoryParserCtxt((char *) data
, datalen
);
12881 #ifdef LIBXML_HTML_ENABLED
12882 else if (doc
->type
== XML_HTML_DOCUMENT_NODE
) {
12883 ctxt
= htmlCreateMemoryParserCtxt((char *) data
, datalen
);
12885 * When parsing in context, it makes no sense to add implied
12886 * elements like html/body/etc...
12888 options
|= HTML_PARSE_NOIMPLIED
;
12892 return(XML_ERR_INTERNAL_ERROR
);
12895 return(XML_ERR_NO_MEMORY
);
12898 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12899 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12900 * we must wait until the last moment to free the original one.
12902 if (doc
->dict
!= NULL
) {
12903 if (ctxt
->dict
!= NULL
)
12904 xmlDictFree(ctxt
->dict
);
12905 ctxt
->dict
= doc
->dict
;
12907 options
|= XML_PARSE_NODICT
;
12909 if (doc
->encoding
!= NULL
) {
12910 xmlCharEncodingHandlerPtr hdlr
;
12912 if (ctxt
->encoding
!= NULL
)
12913 xmlFree((xmlChar
*) ctxt
->encoding
);
12914 ctxt
->encoding
= xmlStrdup((const xmlChar
*) doc
->encoding
);
12916 hdlr
= xmlFindCharEncodingHandler(doc
->encoding
);
12917 if (hdlr
!= NULL
) {
12918 xmlSwitchToEncoding(ctxt
, hdlr
);
12920 return(XML_ERR_UNSUPPORTED_ENCODING
);
12924 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
12925 xmlDetectSAX2(ctxt
);
12928 fake
= xmlNewComment(NULL
);
12929 if (fake
== NULL
) {
12930 xmlFreeParserCtxt(ctxt
);
12931 return(XML_ERR_NO_MEMORY
);
12933 xmlAddChild(node
, fake
);
12935 if (node
->type
== XML_ELEMENT_NODE
) {
12936 nodePush(ctxt
, node
);
12938 * initialize the SAX2 namespaces stack
12941 while ((cur
!= NULL
) && (cur
->type
== XML_ELEMENT_NODE
)) {
12942 xmlNsPtr ns
= cur
->nsDef
;
12943 const xmlChar
*iprefix
, *ihref
;
12945 while (ns
!= NULL
) {
12947 iprefix
= xmlDictLookup(ctxt
->dict
, ns
->prefix
, -1);
12948 ihref
= xmlDictLookup(ctxt
->dict
, ns
->href
, -1);
12950 iprefix
= ns
->prefix
;
12954 if (xmlGetNamespace(ctxt
, iprefix
) == NULL
) {
12955 nsPush(ctxt
, iprefix
, ihref
);
12962 ctxt
->instate
= XML_PARSER_CONTENT
;
12965 if ((ctxt
->validate
) || (ctxt
->replaceEntities
!= 0)) {
12967 * ID/IDREF registration will be done in xmlValidateElement below
12969 ctxt
->loadsubset
|= XML_SKIP_IDS
;
12972 #ifdef LIBXML_HTML_ENABLED
12973 if (doc
->type
== XML_HTML_DOCUMENT_NODE
)
12974 __htmlParseContent(ctxt
);
12977 xmlParseContent(ctxt
);
12980 if ((RAW
== '<') && (NXT(1) == '/')) {
12981 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12982 } else if (RAW
!= 0) {
12983 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
12985 if ((ctxt
->node
!= NULL
) && (ctxt
->node
!= node
)) {
12986 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
12987 ctxt
->wellFormed
= 0;
12990 if (!ctxt
->wellFormed
) {
12991 if (ctxt
->errNo
== 0)
12992 ret
= XML_ERR_INTERNAL_ERROR
;
12994 ret
= (xmlParserErrors
)ctxt
->errNo
;
13000 * Return the newly created nodeset after unlinking it from
13001 * the pseudo sibling.
13014 while (cur
!= NULL
) {
13015 cur
->parent
= NULL
;
13019 xmlUnlinkNode(fake
);
13023 if (ret
!= XML_ERR_OK
) {
13024 xmlFreeNodeList(*lst
);
13028 if (doc
->dict
!= NULL
)
13030 xmlFreeParserCtxt(ctxt
);
13034 return(XML_ERR_INTERNAL_ERROR
);
13038 #ifdef LIBXML_SAX1_ENABLED
13040 * xmlParseBalancedChunkMemoryRecover:
13041 * @doc: the document the chunk pertains to
13042 * @sax: the SAX handler bloc (possibly NULL)
13043 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13044 * @depth: Used for loop detection, use 0
13045 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13046 * @lst: the return value for the set of parsed nodes
13047 * @recover: return nodes even if the data is broken (use 0)
13050 * Parse a well-balanced chunk of an XML document
13051 * called by the parser
13052 * The allowed sequence for the Well Balanced Chunk is the one defined by
13053 * the content production in the XML grammar:
13055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13058 * the parser error code otherwise
13060 * In case recover is set to 1, the nodelist will not be empty even if
13061 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13065 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc
, xmlSAXHandlerPtr sax
,
13066 void *user_data
, int depth
, const xmlChar
*string
, xmlNodePtr
*lst
,
13068 xmlParserCtxtPtr ctxt
;
13070 xmlSAXHandlerPtr oldsax
= NULL
;
13071 xmlNodePtr content
, newRoot
;
13076 return(XML_ERR_ENTITY_LOOP
);
13082 if (string
== NULL
)
13085 size
= xmlStrlen(string
);
13087 ctxt
= xmlCreateMemoryParserCtxt((char *) string
, size
);
13088 if (ctxt
== NULL
) return(-1);
13089 ctxt
->userData
= ctxt
;
13091 oldsax
= ctxt
->sax
;
13093 if (user_data
!= NULL
)
13094 ctxt
->userData
= user_data
;
13096 newDoc
= xmlNewDoc(BAD_CAST
"1.0");
13097 if (newDoc
== NULL
) {
13098 xmlFreeParserCtxt(ctxt
);
13101 newDoc
->properties
= XML_DOC_INTERNAL
;
13102 if ((doc
!= NULL
) && (doc
->dict
!= NULL
)) {
13103 xmlDictFree(ctxt
->dict
);
13104 ctxt
->dict
= doc
->dict
;
13105 xmlDictReference(ctxt
->dict
);
13106 ctxt
->str_xml
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xml", 3);
13107 ctxt
->str_xmlns
= xmlDictLookup(ctxt
->dict
, BAD_CAST
"xmlns", 5);
13108 ctxt
->str_xml_ns
= xmlDictLookup(ctxt
->dict
, XML_XML_NAMESPACE
, 36);
13109 ctxt
->dictNames
= 1;
13111 xmlCtxtUseOptionsInternal(ctxt
, XML_PARSE_NODICT
, NULL
);
13114 newDoc
->intSubset
= doc
->intSubset
;
13115 newDoc
->extSubset
= doc
->extSubset
;
13117 newRoot
= xmlNewDocNode(newDoc
, NULL
, BAD_CAST
"pseudoroot", NULL
);
13118 if (newRoot
== NULL
) {
13120 ctxt
->sax
= oldsax
;
13121 xmlFreeParserCtxt(ctxt
);
13122 newDoc
->intSubset
= NULL
;
13123 newDoc
->extSubset
= NULL
;
13124 xmlFreeDoc(newDoc
);
13127 xmlAddChild((xmlNodePtr
) newDoc
, newRoot
);
13128 nodePush(ctxt
, newRoot
);
13130 ctxt
->myDoc
= newDoc
;
13132 ctxt
->myDoc
= newDoc
;
13133 newDoc
->children
->doc
= doc
;
13134 /* Ensure that doc has XML spec namespace */
13135 xmlSearchNsByHref(doc
, (xmlNodePtr
)doc
, XML_XML_NAMESPACE
);
13136 newDoc
->oldNs
= doc
->oldNs
;
13138 ctxt
->instate
= XML_PARSER_CONTENT
;
13139 ctxt
->depth
= depth
;
13142 * Doing validity checking on chunk doesn't make sense
13144 ctxt
->validate
= 0;
13145 ctxt
->loadsubset
= 0;
13146 xmlDetectSAX2(ctxt
);
13148 if ( doc
!= NULL
){
13149 content
= doc
->children
;
13150 doc
->children
= NULL
;
13151 xmlParseContent(ctxt
);
13152 doc
->children
= content
;
13155 xmlParseContent(ctxt
);
13157 if ((RAW
== '<') && (NXT(1) == '/')) {
13158 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13159 } else if (RAW
!= 0) {
13160 xmlFatalErr(ctxt
, XML_ERR_EXTRA_CONTENT
, NULL
);
13162 if (ctxt
->node
!= newDoc
->children
) {
13163 xmlFatalErr(ctxt
, XML_ERR_NOT_WELL_BALANCED
, NULL
);
13166 if (!ctxt
->wellFormed
) {
13167 if (ctxt
->errNo
== 0)
13175 if ((lst
!= NULL
) && ((ret
== 0) || (recover
== 1))) {
13179 * Return the newly created nodeset after unlinking it from
13180 * they pseudo parent.
13182 cur
= newDoc
->children
->children
;
13184 while (cur
!= NULL
) {
13185 xmlSetTreeDoc(cur
, doc
);
13186 cur
->parent
= NULL
;
13189 newDoc
->children
->children
= NULL
;
13193 ctxt
->sax
= oldsax
;
13194 xmlFreeParserCtxt(ctxt
);
13195 newDoc
->intSubset
= NULL
;
13196 newDoc
->extSubset
= NULL
;
13197 newDoc
->oldNs
= NULL
;
13198 xmlFreeDoc(newDoc
);
13204 * xmlSAXParseEntity:
13205 * @sax: the SAX handler block
13206 * @filename: the filename
13208 * parse an XML external entity out of context and build a tree.
13209 * It use the given SAX function block to handle the parsing callback.
13210 * If sax is NULL, fallback to the default DOM tree building routines.
13212 * [78] extParsedEnt ::= TextDecl? content
13214 * This correspond to a "Well Balanced" chunk
13216 * Returns the resulting document tree
13220 xmlSAXParseEntity(xmlSAXHandlerPtr sax
, const char *filename
) {
13222 xmlParserCtxtPtr ctxt
;
13224 ctxt
= xmlCreateFileParserCtxt(filename
);
13225 if (ctxt
== NULL
) {
13229 if (ctxt
->sax
!= NULL
)
13230 xmlFree(ctxt
->sax
);
13232 ctxt
->userData
= NULL
;
13235 xmlParseExtParsedEnt(ctxt
);
13237 if (ctxt
->wellFormed
)
13241 xmlFreeDoc(ctxt
->myDoc
);
13242 ctxt
->myDoc
= NULL
;
13246 xmlFreeParserCtxt(ctxt
);
13253 * @filename: the filename
13255 * parse an XML external entity out of context and build a tree.
13257 * [78] extParsedEnt ::= TextDecl? content
13259 * This correspond to a "Well Balanced" chunk
13261 * Returns the resulting document tree
13265 xmlParseEntity(const char *filename
) {
13266 return(xmlSAXParseEntity(NULL
, filename
));
13268 #endif /* LIBXML_SAX1_ENABLED */
13271 * xmlCreateEntityParserCtxtInternal:
13272 * @URL: the entity URL
13273 * @ID: the entity PUBLIC ID
13274 * @base: a possible base for the target URI
13275 * @pctx: parser context used to set options on new context
13277 * Create a parser context for an external entity
13278 * Automatic support for ZLIB/Compress compressed document is provided
13279 * by default if found at compile-time.
13281 * Returns the new parser context or NULL
13283 static xmlParserCtxtPtr
13284 xmlCreateEntityParserCtxtInternal(const xmlChar
*URL
, const xmlChar
*ID
,
13285 const xmlChar
*base
, xmlParserCtxtPtr pctx
) {
13286 xmlParserCtxtPtr ctxt
;
13287 xmlParserInputPtr inputStream
;
13288 char *directory
= NULL
;
13291 ctxt
= xmlNewParserCtxt();
13292 if (ctxt
== NULL
) {
13296 if (pctx
!= NULL
) {
13297 ctxt
->options
= pctx
->options
;
13298 ctxt
->_private
= pctx
->_private
;
13301 uri
= xmlBuildURI(URL
, base
);
13304 inputStream
= xmlLoadExternalEntity((char *)URL
, (char *)ID
, ctxt
);
13305 if (inputStream
== NULL
) {
13306 xmlFreeParserCtxt(ctxt
);
13310 inputPush(ctxt
, inputStream
);
13312 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13313 directory
= xmlParserGetDirectory((char *)URL
);
13314 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13315 ctxt
->directory
= directory
;
13317 inputStream
= xmlLoadExternalEntity((char *)uri
, (char *)ID
, ctxt
);
13318 if (inputStream
== NULL
) {
13320 xmlFreeParserCtxt(ctxt
);
13324 inputPush(ctxt
, inputStream
);
13326 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13327 directory
= xmlParserGetDirectory((char *)uri
);
13328 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13329 ctxt
->directory
= directory
;
13336 * xmlCreateEntityParserCtxt:
13337 * @URL: the entity URL
13338 * @ID: the entity PUBLIC ID
13339 * @base: a possible base for the target URI
13341 * Create a parser context for an external entity
13342 * Automatic support for ZLIB/Compress compressed document is provided
13343 * by default if found at compile-time.
13345 * Returns the new parser context or NULL
13348 xmlCreateEntityParserCtxt(const xmlChar
*URL
, const xmlChar
*ID
,
13349 const xmlChar
*base
) {
13350 return xmlCreateEntityParserCtxtInternal(URL
, ID
, base
, NULL
);
13354 /************************************************************************
13356 * Front ends when parsing from a file *
13358 ************************************************************************/
13361 * xmlCreateURLParserCtxt:
13362 * @filename: the filename or URL
13363 * @options: a combination of xmlParserOption
13365 * Create a parser context for a file or URL content.
13366 * Automatic support for ZLIB/Compress compressed document is provided
13367 * by default if found at compile-time and for file accesses
13369 * Returns the new parser context or NULL
13372 xmlCreateURLParserCtxt(const char *filename
, int options
)
13374 xmlParserCtxtPtr ctxt
;
13375 xmlParserInputPtr inputStream
;
13376 char *directory
= NULL
;
13378 ctxt
= xmlNewParserCtxt();
13379 if (ctxt
== NULL
) {
13380 xmlErrMemory(NULL
, "cannot allocate parser context");
13385 xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
);
13386 ctxt
->linenumbers
= 1;
13388 inputStream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
13389 if (inputStream
== NULL
) {
13390 xmlFreeParserCtxt(ctxt
);
13394 inputPush(ctxt
, inputStream
);
13395 if ((ctxt
->directory
== NULL
) && (directory
== NULL
))
13396 directory
= xmlParserGetDirectory(filename
);
13397 if ((ctxt
->directory
== NULL
) && (directory
!= NULL
))
13398 ctxt
->directory
= directory
;
13404 * xmlCreateFileParserCtxt:
13405 * @filename: the filename
13407 * Create a parser context for a file content.
13408 * Automatic support for ZLIB/Compress compressed document is provided
13409 * by default if found at compile-time.
13411 * Returns the new parser context or NULL
13414 xmlCreateFileParserCtxt(const char *filename
)
13416 return(xmlCreateURLParserCtxt(filename
, 0));
13419 #ifdef LIBXML_SAX1_ENABLED
13421 * xmlSAXParseFileWithData:
13422 * @sax: the SAX handler block
13423 * @filename: the filename
13424 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13426 * @data: the userdata
13428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13429 * compressed document is provided by default if found at compile-time.
13430 * It use the given SAX function block to handle the parsing callback.
13431 * If sax is NULL, fallback to the default DOM tree building routines.
13433 * User data (void *) is stored within the parser context in the
13434 * context's _private member, so it is available nearly everywhere in libxml
13436 * Returns the resulting document tree
13440 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax
, const char *filename
,
13441 int recovery
, void *data
) {
13443 xmlParserCtxtPtr ctxt
;
13447 ctxt
= xmlCreateFileParserCtxt(filename
);
13448 if (ctxt
== NULL
) {
13452 if (ctxt
->sax
!= NULL
)
13453 xmlFree(ctxt
->sax
);
13456 xmlDetectSAX2(ctxt
);
13458 ctxt
->_private
= data
;
13461 if (ctxt
->directory
== NULL
)
13462 ctxt
->directory
= xmlParserGetDirectory(filename
);
13464 ctxt
->recovery
= recovery
;
13466 xmlParseDocument(ctxt
);
13468 if ((ctxt
->wellFormed
) || recovery
) {
13471 if (ctxt
->input
->buf
->compressed
> 0)
13472 ret
->compression
= 9;
13474 ret
->compression
= ctxt
->input
->buf
->compressed
;
13479 xmlFreeDoc(ctxt
->myDoc
);
13480 ctxt
->myDoc
= NULL
;
13484 xmlFreeParserCtxt(ctxt
);
13491 * @sax: the SAX handler block
13492 * @filename: the filename
13493 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13496 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13497 * compressed document is provided by default if found at compile-time.
13498 * It use the given SAX function block to handle the parsing callback.
13499 * If sax is NULL, fallback to the default DOM tree building routines.
13501 * Returns the resulting document tree
13505 xmlSAXParseFile(xmlSAXHandlerPtr sax
, const char *filename
,
13507 return(xmlSAXParseFileWithData(sax
,filename
,recovery
,NULL
));
13512 * @cur: a pointer to an array of xmlChar
13514 * parse an XML in-memory document and build a tree.
13515 * In the case the document is not Well Formed, a attempt to build a
13516 * tree is tried anyway
13518 * Returns the resulting document tree or NULL in case of failure
13522 xmlRecoverDoc(const xmlChar
*cur
) {
13523 return(xmlSAXParseDoc(NULL
, cur
, 1));
13528 * @filename: the filename
13530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13531 * compressed document is provided by default if found at compile-time.
13533 * Returns the resulting document tree if the file was wellformed,
13538 xmlParseFile(const char *filename
) {
13539 return(xmlSAXParseFile(NULL
, filename
, 0));
13544 * @filename: the filename
13546 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13547 * compressed document is provided by default if found at compile-time.
13548 * In the case the document is not Well Formed, it attempts to build
13551 * Returns the resulting document tree or NULL in case of failure
13555 xmlRecoverFile(const char *filename
) {
13556 return(xmlSAXParseFile(NULL
, filename
, 1));
13561 * xmlSetupParserForBuffer:
13562 * @ctxt: an XML parser context
13563 * @buffer: a xmlChar * buffer
13564 * @filename: a file name
13566 * Setup the parser context to parse a new buffer; Clears any prior
13567 * contents from the parser context. The buffer parameter must not be
13568 * NULL, but the filename parameter can be
13571 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt
, const xmlChar
* buffer
,
13572 const char* filename
)
13574 xmlParserInputPtr input
;
13576 if ((ctxt
== NULL
) || (buffer
== NULL
))
13579 input
= xmlNewInputStream(ctxt
);
13580 if (input
== NULL
) {
13581 xmlErrMemory(NULL
, "parsing new buffer: out of memory\n");
13582 xmlClearParserCtxt(ctxt
);
13586 xmlClearParserCtxt(ctxt
);
13587 if (filename
!= NULL
)
13588 input
->filename
= (char *) xmlCanonicPath((const xmlChar
*)filename
);
13589 input
->base
= buffer
;
13590 input
->cur
= buffer
;
13591 input
->end
= &buffer
[xmlStrlen(buffer
)];
13592 inputPush(ctxt
, input
);
13596 * xmlSAXUserParseFile:
13597 * @sax: a SAX handler
13598 * @user_data: The user data returned on SAX callbacks
13599 * @filename: a file name
13601 * parse an XML file and call the given SAX handler routines.
13602 * Automatic support for ZLIB/Compress compressed document is provided
13604 * Returns 0 in case of success or a error number otherwise
13607 xmlSAXUserParseFile(xmlSAXHandlerPtr sax
, void *user_data
,
13608 const char *filename
) {
13610 xmlParserCtxtPtr ctxt
;
13612 ctxt
= xmlCreateFileParserCtxt(filename
);
13613 if (ctxt
== NULL
) return -1;
13614 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
13615 xmlFree(ctxt
->sax
);
13617 xmlDetectSAX2(ctxt
);
13619 if (user_data
!= NULL
)
13620 ctxt
->userData
= user_data
;
13622 xmlParseDocument(ctxt
);
13624 if (ctxt
->wellFormed
)
13627 if (ctxt
->errNo
!= 0)
13634 if (ctxt
->myDoc
!= NULL
) {
13635 xmlFreeDoc(ctxt
->myDoc
);
13636 ctxt
->myDoc
= NULL
;
13638 xmlFreeParserCtxt(ctxt
);
13642 #endif /* LIBXML_SAX1_ENABLED */
13644 /************************************************************************
13646 * Front ends when parsing from memory *
13648 ************************************************************************/
13651 * xmlCreateMemoryParserCtxt:
13652 * @buffer: a pointer to a char array
13653 * @size: the size of the array
13655 * Create a parser context for an XML in-memory document.
13657 * Returns the new parser context or NULL
13660 xmlCreateMemoryParserCtxt(const char *buffer
, int size
) {
13661 xmlParserCtxtPtr ctxt
;
13662 xmlParserInputPtr input
;
13663 xmlParserInputBufferPtr buf
;
13665 if (buffer
== NULL
)
13670 ctxt
= xmlNewParserCtxt();
13674 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13675 buf
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
13677 xmlFreeParserCtxt(ctxt
);
13681 input
= xmlNewInputStream(ctxt
);
13682 if (input
== NULL
) {
13683 xmlFreeParserInputBuffer(buf
);
13684 xmlFreeParserCtxt(ctxt
);
13688 input
->filename
= NULL
;
13690 input
->base
= input
->buf
->buffer
->content
;
13691 input
->cur
= input
->buf
->buffer
->content
;
13692 input
->end
= &input
->buf
->buffer
->content
[input
->buf
->buffer
->use
];
13694 inputPush(ctxt
, input
);
13698 #ifdef LIBXML_SAX1_ENABLED
13700 * xmlSAXParseMemoryWithData:
13701 * @sax: the SAX handler block
13702 * @buffer: an pointer to a char array
13703 * @size: the size of the array
13704 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13706 * @data: the userdata
13708 * parse an XML in-memory block and use the given SAX function block
13709 * to handle the parsing callback. If sax is NULL, fallback to the default
13710 * DOM tree building routines.
13712 * User data (void *) is stored within the parser context in the
13713 * context's _private member, so it is available nearly everywhere in libxml
13715 * Returns the resulting document tree
13719 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax
, const char *buffer
,
13720 int size
, int recovery
, void *data
) {
13722 xmlParserCtxtPtr ctxt
;
13726 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
13727 if (ctxt
== NULL
) return(NULL
);
13729 if (ctxt
->sax
!= NULL
)
13730 xmlFree(ctxt
->sax
);
13733 xmlDetectSAX2(ctxt
);
13735 ctxt
->_private
=data
;
13738 ctxt
->recovery
= recovery
;
13740 xmlParseDocument(ctxt
);
13742 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
13745 xmlFreeDoc(ctxt
->myDoc
);
13746 ctxt
->myDoc
= NULL
;
13750 xmlFreeParserCtxt(ctxt
);
13756 * xmlSAXParseMemory:
13757 * @sax: the SAX handler block
13758 * @buffer: an pointer to a char array
13759 * @size: the size of the array
13760 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13763 * parse an XML in-memory block and use the given SAX function block
13764 * to handle the parsing callback. If sax is NULL, fallback to the default
13765 * DOM tree building routines.
13767 * Returns the resulting document tree
13770 xmlSAXParseMemory(xmlSAXHandlerPtr sax
, const char *buffer
,
13771 int size
, int recovery
) {
13772 return xmlSAXParseMemoryWithData(sax
, buffer
, size
, recovery
, NULL
);
13777 * @buffer: an pointer to a char array
13778 * @size: the size of the array
13780 * parse an XML in-memory block and build a tree.
13782 * Returns the resulting document tree
13785 xmlDocPtr
xmlParseMemory(const char *buffer
, int size
) {
13786 return(xmlSAXParseMemory(NULL
, buffer
, size
, 0));
13790 * xmlRecoverMemory:
13791 * @buffer: an pointer to a char array
13792 * @size: the size of the array
13794 * parse an XML in-memory block and build a tree.
13795 * In the case the document is not Well Formed, an attempt to
13796 * build a tree is tried anyway
13798 * Returns the resulting document tree or NULL in case of error
13801 xmlDocPtr
xmlRecoverMemory(const char *buffer
, int size
) {
13802 return(xmlSAXParseMemory(NULL
, buffer
, size
, 1));
13806 * xmlSAXUserParseMemory:
13807 * @sax: a SAX handler
13808 * @user_data: The user data returned on SAX callbacks
13809 * @buffer: an in-memory XML document input
13810 * @size: the length of the XML document in bytes
13812 * A better SAX parsing routine.
13813 * parse an XML in-memory buffer and call the given SAX handler routines.
13815 * Returns 0 in case of success or a error number otherwise
13817 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax
, void *user_data
,
13818 const char *buffer
, int size
) {
13820 xmlParserCtxtPtr ctxt
;
13824 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
13825 if (ctxt
== NULL
) return -1;
13826 if (ctxt
->sax
!= (xmlSAXHandlerPtr
) &xmlDefaultSAXHandler
)
13827 xmlFree(ctxt
->sax
);
13829 xmlDetectSAX2(ctxt
);
13831 if (user_data
!= NULL
)
13832 ctxt
->userData
= user_data
;
13834 xmlParseDocument(ctxt
);
13836 if (ctxt
->wellFormed
)
13839 if (ctxt
->errNo
!= 0)
13846 if (ctxt
->myDoc
!= NULL
) {
13847 xmlFreeDoc(ctxt
->myDoc
);
13848 ctxt
->myDoc
= NULL
;
13850 xmlFreeParserCtxt(ctxt
);
13854 #endif /* LIBXML_SAX1_ENABLED */
13857 * xmlCreateDocParserCtxt:
13858 * @cur: a pointer to an array of xmlChar
13860 * Creates a parser context for an XML in-memory document.
13862 * Returns the new parser context or NULL
13865 xmlCreateDocParserCtxt(const xmlChar
*cur
) {
13870 len
= xmlStrlen(cur
);
13871 return(xmlCreateMemoryParserCtxt((const char *)cur
, len
));
13874 #ifdef LIBXML_SAX1_ENABLED
13877 * @sax: the SAX handler block
13878 * @cur: a pointer to an array of xmlChar
13879 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13882 * parse an XML in-memory document and build a tree.
13883 * It use the given SAX function block to handle the parsing callback.
13884 * If sax is NULL, fallback to the default DOM tree building routines.
13886 * Returns the resulting document tree
13890 xmlSAXParseDoc(xmlSAXHandlerPtr sax
, const xmlChar
*cur
, int recovery
) {
13892 xmlParserCtxtPtr ctxt
;
13893 xmlSAXHandlerPtr oldsax
= NULL
;
13895 if (cur
== NULL
) return(NULL
);
13898 ctxt
= xmlCreateDocParserCtxt(cur
);
13899 if (ctxt
== NULL
) return(NULL
);
13901 oldsax
= ctxt
->sax
;
13903 ctxt
->userData
= NULL
;
13905 xmlDetectSAX2(ctxt
);
13907 xmlParseDocument(ctxt
);
13908 if ((ctxt
->wellFormed
) || recovery
) ret
= ctxt
->myDoc
;
13911 xmlFreeDoc(ctxt
->myDoc
);
13912 ctxt
->myDoc
= NULL
;
13915 ctxt
->sax
= oldsax
;
13916 xmlFreeParserCtxt(ctxt
);
13923 * @cur: a pointer to an array of xmlChar
13925 * parse an XML in-memory document and build a tree.
13927 * Returns the resulting document tree
13931 xmlParseDoc(const xmlChar
*cur
) {
13932 return(xmlSAXParseDoc(NULL
, cur
, 0));
13934 #endif /* LIBXML_SAX1_ENABLED */
13936 #ifdef LIBXML_LEGACY_ENABLED
13937 /************************************************************************
13939 * Specific function to keep track of entities references *
13940 * and used by the XSLT debugger *
13942 ************************************************************************/
13944 static xmlEntityReferenceFunc xmlEntityRefFunc
= NULL
;
13947 * xmlAddEntityReference:
13948 * @ent : A valid entity
13949 * @firstNode : A valid first node for children of entity
13950 * @lastNode : A valid last node of children entity
13952 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13955 xmlAddEntityReference(xmlEntityPtr ent
, xmlNodePtr firstNode
,
13956 xmlNodePtr lastNode
)
13958 if (xmlEntityRefFunc
!= NULL
) {
13959 (*xmlEntityRefFunc
) (ent
, firstNode
, lastNode
);
13965 * xmlSetEntityReferenceFunc:
13966 * @func: A valid function
13968 * Set the function to call call back when a xml reference has been made
13971 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func
)
13973 xmlEntityRefFunc
= func
;
13975 #endif /* LIBXML_LEGACY_ENABLED */
13977 /************************************************************************
13981 ************************************************************************/
13983 #ifdef LIBXML_XPATH_ENABLED
13984 #include <libxml/xpath.h>
13987 extern void XMLCDECL
xmlGenericErrorDefaultFunc(void *ctx
, const char *msg
, ...);
13988 static int xmlParserInitialized
= 0;
13993 * Initialization function for the XML parser.
13994 * This is not reentrant. Call once before processing in case of
13995 * use in multithreaded programs.
13999 xmlInitParser(void) {
14000 if (xmlParserInitialized
!= 0)
14003 #ifdef LIBXML_THREAD_ENABLED
14004 __xmlGlobalInitMutexLock();
14005 if (xmlParserInitialized
== 0) {
14009 if ((xmlGenericError
== xmlGenericErrorDefaultFunc
) ||
14010 (xmlGenericError
== NULL
))
14011 initGenericErrorDefaultFunc(NULL
);
14013 xmlInitCharEncodingHandlers();
14014 xmlDefaultSAXHandlerInit();
14015 xmlRegisterDefaultInputCallbacks();
14016 #ifdef LIBXML_OUTPUT_ENABLED
14017 xmlRegisterDefaultOutputCallbacks();
14018 #endif /* LIBXML_OUTPUT_ENABLED */
14019 #ifdef LIBXML_HTML_ENABLED
14020 htmlInitAutoClose();
14021 htmlDefaultSAXHandlerInit();
14023 #ifdef LIBXML_XPATH_ENABLED
14026 xmlParserInitialized
= 1;
14027 #ifdef LIBXML_THREAD_ENABLED
14029 __xmlGlobalInitMutexUnlock();
14034 * xmlCleanupParser:
14036 * This function name is somewhat misleading. It does not clean up
14037 * parser state, it cleans up memory allocated by the library itself.
14038 * It is a cleanup function for the XML library. It tries to reclaim all
14039 * related global memory allocated for the library processing.
14040 * It doesn't deallocate any document related memory. One should
14041 * call xmlCleanupParser() only when the process has finished using
14042 * the library and all XML/HTML documents built with it.
14043 * See also xmlInitParser() which has the opposite function of preparing
14044 * the library for operations.
14046 * WARNING: if your application is multithreaded or has plugin support
14047 * calling this may crash the application if another thread or
14048 * a plugin is still using libxml2. It's sometimes very hard to
14049 * guess if libxml2 is in use in the application, some libraries
14050 * or plugins may use it without notice. In case of doubt abstain
14051 * from calling this function or do it just before calling exit()
14052 * to avoid leak reports from valgrind !
14056 xmlCleanupParser(void) {
14057 if (!xmlParserInitialized
)
14060 xmlCleanupCharEncodingHandlers();
14061 #ifdef LIBXML_CATALOG_ENABLED
14062 xmlCatalogCleanup();
14065 xmlCleanupInputCallbacks();
14066 #ifdef LIBXML_OUTPUT_ENABLED
14067 xmlCleanupOutputCallbacks();
14069 #ifdef LIBXML_SCHEMAS_ENABLED
14070 xmlSchemaCleanupTypes();
14071 xmlRelaxNGCleanupTypes();
14073 xmlCleanupGlobals();
14074 xmlResetLastError();
14075 xmlCleanupThreads(); /* must be last if called not from the main thread */
14076 xmlCleanupMemory();
14077 xmlParserInitialized
= 0;
14080 /************************************************************************
14082 * New set (2.6.0) of simpler and more flexible APIs *
14084 ************************************************************************/
14090 * Free a string if it is not owned by the "dict" dictionnary in the
14093 #define DICT_FREE(str) \
14094 if ((str) && ((!dict) || \
14095 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14096 xmlFree((char *)(str));
14100 * @ctxt: an XML parser context
14102 * Reset a parser context
14105 xmlCtxtReset(xmlParserCtxtPtr ctxt
)
14107 xmlParserInputPtr input
;
14115 while ((input
= inputPop(ctxt
)) != NULL
) { /* Non consuming */
14116 xmlFreeInputStream(input
);
14119 ctxt
->input
= NULL
;
14122 if (ctxt
->spaceTab
!= NULL
) {
14123 ctxt
->spaceTab
[0] = -1;
14124 ctxt
->space
= &ctxt
->spaceTab
[0];
14126 ctxt
->space
= NULL
;
14136 DICT_FREE(ctxt
->version
);
14137 ctxt
->version
= NULL
;
14138 DICT_FREE(ctxt
->encoding
);
14139 ctxt
->encoding
= NULL
;
14140 DICT_FREE(ctxt
->directory
);
14141 ctxt
->directory
= NULL
;
14142 DICT_FREE(ctxt
->extSubURI
);
14143 ctxt
->extSubURI
= NULL
;
14144 DICT_FREE(ctxt
->extSubSystem
);
14145 ctxt
->extSubSystem
= NULL
;
14146 if (ctxt
->myDoc
!= NULL
)
14147 xmlFreeDoc(ctxt
->myDoc
);
14148 ctxt
->myDoc
= NULL
;
14150 ctxt
->standalone
= -1;
14151 ctxt
->hasExternalSubset
= 0;
14152 ctxt
->hasPErefs
= 0;
14154 ctxt
->external
= 0;
14155 ctxt
->instate
= XML_PARSER_START
;
14158 ctxt
->wellFormed
= 1;
14159 ctxt
->nsWellFormed
= 1;
14160 ctxt
->disableSAX
= 0;
14163 ctxt
->vctxt
.userData
= ctxt
;
14164 ctxt
->vctxt
.error
= xmlParserValidityError
;
14165 ctxt
->vctxt
.warning
= xmlParserValidityWarning
;
14167 ctxt
->record_info
= 0;
14169 ctxt
->checkIndex
= 0;
14170 ctxt
->inSubset
= 0;
14171 ctxt
->errNo
= XML_ERR_OK
;
14173 ctxt
->charset
= XML_CHAR_ENCODING_UTF8
;
14174 ctxt
->catalogs
= NULL
;
14175 ctxt
->nbentities
= 0;
14176 ctxt
->sizeentities
= 0;
14177 xmlInitNodeInfoSeq(&ctxt
->node_seq
);
14179 if (ctxt
->attsDefault
!= NULL
) {
14180 xmlHashFree(ctxt
->attsDefault
, (xmlHashDeallocator
) xmlFree
);
14181 ctxt
->attsDefault
= NULL
;
14183 if (ctxt
->attsSpecial
!= NULL
) {
14184 xmlHashFree(ctxt
->attsSpecial
, NULL
);
14185 ctxt
->attsSpecial
= NULL
;
14188 #ifdef LIBXML_CATALOG_ENABLED
14189 if (ctxt
->catalogs
!= NULL
)
14190 xmlCatalogFreeLocal(ctxt
->catalogs
);
14192 if (ctxt
->lastError
.code
!= XML_ERR_OK
)
14193 xmlResetError(&ctxt
->lastError
);
14197 * xmlCtxtResetPush:
14198 * @ctxt: an XML parser context
14199 * @chunk: a pointer to an array of chars
14200 * @size: number of chars in the array
14201 * @filename: an optional file name or URI
14202 * @encoding: the document encoding, or NULL
14204 * Reset a push parser context
14206 * Returns 0 in case of success and 1 in case of error
14209 xmlCtxtResetPush(xmlParserCtxtPtr ctxt
, const char *chunk
,
14210 int size
, const char *filename
, const char *encoding
)
14212 xmlParserInputPtr inputStream
;
14213 xmlParserInputBufferPtr buf
;
14214 xmlCharEncoding enc
= XML_CHAR_ENCODING_NONE
;
14219 if ((encoding
== NULL
) && (chunk
!= NULL
) && (size
>= 4))
14220 enc
= xmlDetectCharEncoding((const xmlChar
*) chunk
, size
);
14222 buf
= xmlAllocParserInputBuffer(enc
);
14226 if (ctxt
== NULL
) {
14227 xmlFreeParserInputBuffer(buf
);
14231 xmlCtxtReset(ctxt
);
14233 if (ctxt
->pushTab
== NULL
) {
14234 ctxt
->pushTab
= (void **) xmlMalloc(ctxt
->nameMax
* 3 *
14235 sizeof(xmlChar
*));
14236 if (ctxt
->pushTab
== NULL
) {
14237 xmlErrMemory(ctxt
, NULL
);
14238 xmlFreeParserInputBuffer(buf
);
14243 if (filename
== NULL
) {
14244 ctxt
->directory
= NULL
;
14246 ctxt
->directory
= xmlParserGetDirectory(filename
);
14249 inputStream
= xmlNewInputStream(ctxt
);
14250 if (inputStream
== NULL
) {
14251 xmlFreeParserInputBuffer(buf
);
14255 if (filename
== NULL
)
14256 inputStream
->filename
= NULL
;
14258 inputStream
->filename
= (char *)
14259 xmlCanonicPath((const xmlChar
*) filename
);
14260 inputStream
->buf
= buf
;
14261 inputStream
->base
= inputStream
->buf
->buffer
->content
;
14262 inputStream
->cur
= inputStream
->buf
->buffer
->content
;
14264 &inputStream
->buf
->buffer
->content
[inputStream
->buf
->buffer
->use
];
14266 inputPush(ctxt
, inputStream
);
14268 if ((size
> 0) && (chunk
!= NULL
) && (ctxt
->input
!= NULL
) &&
14269 (ctxt
->input
->buf
!= NULL
)) {
14270 int base
= ctxt
->input
->base
- ctxt
->input
->buf
->buffer
->content
;
14271 int cur
= ctxt
->input
->cur
- ctxt
->input
->base
;
14273 xmlParserInputBufferPush(ctxt
->input
->buf
, size
, chunk
);
14275 ctxt
->input
->base
= ctxt
->input
->buf
->buffer
->content
+ base
;
14276 ctxt
->input
->cur
= ctxt
->input
->base
+ cur
;
14278 &ctxt
->input
->buf
->buffer
->content
[ctxt
->input
->buf
->buffer
->
14281 xmlGenericError(xmlGenericErrorContext
, "PP: pushed %d\n", size
);
14285 if (encoding
!= NULL
) {
14286 xmlCharEncodingHandlerPtr hdlr
;
14288 if (ctxt
->encoding
!= NULL
)
14289 xmlFree((xmlChar
*) ctxt
->encoding
);
14290 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14292 hdlr
= xmlFindCharEncodingHandler(encoding
);
14293 if (hdlr
!= NULL
) {
14294 xmlSwitchToEncoding(ctxt
, hdlr
);
14296 xmlFatalErrMsgStr(ctxt
, XML_ERR_UNSUPPORTED_ENCODING
,
14297 "Unsupported encoding %s\n", BAD_CAST encoding
);
14299 } else if (enc
!= XML_CHAR_ENCODING_NONE
) {
14300 xmlSwitchEncoding(ctxt
, enc
);
14308 * xmlCtxtUseOptionsInternal:
14309 * @ctxt: an XML parser context
14310 * @options: a combination of xmlParserOption
14311 * @encoding: the user provided encoding to use
14313 * Applies the options to the parser context
14315 * Returns 0 in case of success, the set of unknown or unimplemented options
14316 * in case of error.
14319 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt
, int options
, const char *encoding
)
14323 if (encoding
!= NULL
) {
14324 if (ctxt
->encoding
!= NULL
)
14325 xmlFree((xmlChar
*) ctxt
->encoding
);
14326 ctxt
->encoding
= xmlStrdup((const xmlChar
*) encoding
);
14328 if (options
& XML_PARSE_RECOVER
) {
14329 ctxt
->recovery
= 1;
14330 options
-= XML_PARSE_RECOVER
;
14331 ctxt
->options
|= XML_PARSE_RECOVER
;
14333 ctxt
->recovery
= 0;
14334 if (options
& XML_PARSE_DTDLOAD
) {
14335 ctxt
->loadsubset
= XML_DETECT_IDS
;
14336 options
-= XML_PARSE_DTDLOAD
;
14337 ctxt
->options
|= XML_PARSE_DTDLOAD
;
14339 ctxt
->loadsubset
= 0;
14340 if (options
& XML_PARSE_DTDATTR
) {
14341 ctxt
->loadsubset
|= XML_COMPLETE_ATTRS
;
14342 options
-= XML_PARSE_DTDATTR
;
14343 ctxt
->options
|= XML_PARSE_DTDATTR
;
14345 if (options
& XML_PARSE_NOENT
) {
14346 ctxt
->replaceEntities
= 1;
14347 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14348 options
-= XML_PARSE_NOENT
;
14349 ctxt
->options
|= XML_PARSE_NOENT
;
14351 ctxt
->replaceEntities
= 0;
14352 if (options
& XML_PARSE_PEDANTIC
) {
14353 ctxt
->pedantic
= 1;
14354 options
-= XML_PARSE_PEDANTIC
;
14355 ctxt
->options
|= XML_PARSE_PEDANTIC
;
14357 ctxt
->pedantic
= 0;
14358 if (options
& XML_PARSE_NOBLANKS
) {
14359 ctxt
->keepBlanks
= 0;
14360 ctxt
->sax
->ignorableWhitespace
= xmlSAX2IgnorableWhitespace
;
14361 options
-= XML_PARSE_NOBLANKS
;
14362 ctxt
->options
|= XML_PARSE_NOBLANKS
;
14364 ctxt
->keepBlanks
= 1;
14365 if (options
& XML_PARSE_DTDVALID
) {
14366 ctxt
->validate
= 1;
14367 if (options
& XML_PARSE_NOWARNING
)
14368 ctxt
->vctxt
.warning
= NULL
;
14369 if (options
& XML_PARSE_NOERROR
)
14370 ctxt
->vctxt
.error
= NULL
;
14371 options
-= XML_PARSE_DTDVALID
;
14372 ctxt
->options
|= XML_PARSE_DTDVALID
;
14374 ctxt
->validate
= 0;
14375 if (options
& XML_PARSE_NOWARNING
) {
14376 ctxt
->sax
->warning
= NULL
;
14377 options
-= XML_PARSE_NOWARNING
;
14379 if (options
& XML_PARSE_NOERROR
) {
14380 ctxt
->sax
->error
= NULL
;
14381 ctxt
->sax
->fatalError
= NULL
;
14382 options
-= XML_PARSE_NOERROR
;
14384 #ifdef LIBXML_SAX1_ENABLED
14385 if (options
& XML_PARSE_SAX1
) {
14386 ctxt
->sax
->startElement
= xmlSAX2StartElement
;
14387 ctxt
->sax
->endElement
= xmlSAX2EndElement
;
14388 ctxt
->sax
->startElementNs
= NULL
;
14389 ctxt
->sax
->endElementNs
= NULL
;
14390 ctxt
->sax
->initialized
= 1;
14391 options
-= XML_PARSE_SAX1
;
14392 ctxt
->options
|= XML_PARSE_SAX1
;
14394 #endif /* LIBXML_SAX1_ENABLED */
14395 if (options
& XML_PARSE_NODICT
) {
14396 ctxt
->dictNames
= 0;
14397 options
-= XML_PARSE_NODICT
;
14398 ctxt
->options
|= XML_PARSE_NODICT
;
14400 ctxt
->dictNames
= 1;
14402 if (options
& XML_PARSE_NOCDATA
) {
14403 ctxt
->sax
->cdataBlock
= NULL
;
14404 options
-= XML_PARSE_NOCDATA
;
14405 ctxt
->options
|= XML_PARSE_NOCDATA
;
14407 if (options
& XML_PARSE_NSCLEAN
) {
14408 ctxt
->options
|= XML_PARSE_NSCLEAN
;
14409 options
-= XML_PARSE_NSCLEAN
;
14411 if (options
& XML_PARSE_NONET
) {
14412 ctxt
->options
|= XML_PARSE_NONET
;
14413 options
-= XML_PARSE_NONET
;
14415 if (options
& XML_PARSE_COMPACT
) {
14416 ctxt
->options
|= XML_PARSE_COMPACT
;
14417 options
-= XML_PARSE_COMPACT
;
14419 if (options
& XML_PARSE_OLD10
) {
14420 ctxt
->options
|= XML_PARSE_OLD10
;
14421 options
-= XML_PARSE_OLD10
;
14423 if (options
& XML_PARSE_NOBASEFIX
) {
14424 ctxt
->options
|= XML_PARSE_NOBASEFIX
;
14425 options
-= XML_PARSE_NOBASEFIX
;
14427 if (options
& XML_PARSE_HUGE
) {
14428 ctxt
->options
|= XML_PARSE_HUGE
;
14429 options
-= XML_PARSE_HUGE
;
14431 if (options
& XML_PARSE_OLDSAX
) {
14432 ctxt
->options
|= XML_PARSE_OLDSAX
;
14433 options
-= XML_PARSE_OLDSAX
;
14435 ctxt
->linenumbers
= 1;
14440 * xmlCtxtUseOptions:
14441 * @ctxt: an XML parser context
14442 * @options: a combination of xmlParserOption
14444 * Applies the options to the parser context
14446 * Returns 0 in case of success, the set of unknown or unimplemented options
14447 * in case of error.
14450 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt
, int options
)
14452 return(xmlCtxtUseOptionsInternal(ctxt
, options
, NULL
));
14457 * @ctxt: an XML parser context
14458 * @URL: the base URL to use for the document
14459 * @encoding: the document encoding, or NULL
14460 * @options: a combination of xmlParserOption
14461 * @reuse: keep the context for reuse
14463 * Common front-end for the xmlRead functions
14465 * Returns the resulting document tree or NULL
14468 xmlDoRead(xmlParserCtxtPtr ctxt
, const char *URL
, const char *encoding
,
14469 int options
, int reuse
)
14473 xmlCtxtUseOptionsInternal(ctxt
, options
, encoding
);
14474 if (encoding
!= NULL
) {
14475 xmlCharEncodingHandlerPtr hdlr
;
14477 hdlr
= xmlFindCharEncodingHandler(encoding
);
14479 xmlSwitchToEncoding(ctxt
, hdlr
);
14481 if ((URL
!= NULL
) && (ctxt
->input
!= NULL
) &&
14482 (ctxt
->input
->filename
== NULL
))
14483 ctxt
->input
->filename
= (char *) xmlStrdup((const xmlChar
*) URL
);
14484 xmlParseDocument(ctxt
);
14485 if ((ctxt
->wellFormed
) || ctxt
->recovery
)
14489 if (ctxt
->myDoc
!= NULL
) {
14490 xmlFreeDoc(ctxt
->myDoc
);
14493 ctxt
->myDoc
= NULL
;
14495 xmlFreeParserCtxt(ctxt
);
14503 * @cur: a pointer to a zero terminated string
14504 * @URL: the base URL to use for the document
14505 * @encoding: the document encoding, or NULL
14506 * @options: a combination of xmlParserOption
14508 * parse an XML in-memory document and build a tree.
14510 * Returns the resulting document tree
14513 xmlReadDoc(const xmlChar
* cur
, const char *URL
, const char *encoding
, int options
)
14515 xmlParserCtxtPtr ctxt
;
14520 ctxt
= xmlCreateDocParserCtxt(cur
);
14523 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14528 * @filename: a file or URL
14529 * @encoding: the document encoding, or NULL
14530 * @options: a combination of xmlParserOption
14532 * parse an XML file from the filesystem or the network.
14534 * Returns the resulting document tree
14537 xmlReadFile(const char *filename
, const char *encoding
, int options
)
14539 xmlParserCtxtPtr ctxt
;
14541 ctxt
= xmlCreateURLParserCtxt(filename
, options
);
14544 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 0));
14549 * @buffer: a pointer to a char array
14550 * @size: the size of the array
14551 * @URL: the base URL to use for the document
14552 * @encoding: the document encoding, or NULL
14553 * @options: a combination of xmlParserOption
14555 * parse an XML in-memory document and build a tree.
14557 * Returns the resulting document tree
14560 xmlReadMemory(const char *buffer
, int size
, const char *URL
, const char *encoding
, int options
)
14562 xmlParserCtxtPtr ctxt
;
14564 ctxt
= xmlCreateMemoryParserCtxt(buffer
, size
);
14567 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14572 * @fd: an open file descriptor
14573 * @URL: the base URL to use for the document
14574 * @encoding: the document encoding, or NULL
14575 * @options: a combination of xmlParserOption
14577 * parse an XML from a file descriptor and build a tree.
14578 * NOTE that the file descriptor will not be closed when the
14579 * reader is closed or reset.
14581 * Returns the resulting document tree
14584 xmlReadFd(int fd
, const char *URL
, const char *encoding
, int options
)
14586 xmlParserCtxtPtr ctxt
;
14587 xmlParserInputBufferPtr input
;
14588 xmlParserInputPtr stream
;
14593 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
14596 input
->closecallback
= NULL
;
14597 ctxt
= xmlNewParserCtxt();
14598 if (ctxt
== NULL
) {
14599 xmlFreeParserInputBuffer(input
);
14602 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14603 if (stream
== NULL
) {
14604 xmlFreeParserInputBuffer(input
);
14605 xmlFreeParserCtxt(ctxt
);
14608 inputPush(ctxt
, stream
);
14609 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14614 * @ioread: an I/O read function
14615 * @ioclose: an I/O close function
14616 * @ioctx: an I/O handler
14617 * @URL: the base URL to use for the document
14618 * @encoding: the document encoding, or NULL
14619 * @options: a combination of xmlParserOption
14621 * parse an XML document from I/O functions and source and build a tree.
14623 * Returns the resulting document tree
14626 xmlReadIO(xmlInputReadCallback ioread
, xmlInputCloseCallback ioclose
,
14627 void *ioctx
, const char *URL
, const char *encoding
, int options
)
14629 xmlParserCtxtPtr ctxt
;
14630 xmlParserInputBufferPtr input
;
14631 xmlParserInputPtr stream
;
14633 if (ioread
== NULL
)
14636 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
14637 XML_CHAR_ENCODING_NONE
);
14640 ctxt
= xmlNewParserCtxt();
14641 if (ctxt
== NULL
) {
14642 xmlFreeParserInputBuffer(input
);
14645 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14646 if (stream
== NULL
) {
14647 xmlFreeParserInputBuffer(input
);
14648 xmlFreeParserCtxt(ctxt
);
14651 inputPush(ctxt
, stream
);
14652 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 0));
14657 * @ctxt: an XML parser context
14658 * @cur: a pointer to a zero terminated string
14659 * @URL: the base URL to use for the document
14660 * @encoding: the document encoding, or NULL
14661 * @options: a combination of xmlParserOption
14663 * parse an XML in-memory document and build a tree.
14664 * This reuses the existing @ctxt parser context
14666 * Returns the resulting document tree
14669 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt
, const xmlChar
* cur
,
14670 const char *URL
, const char *encoding
, int options
)
14672 xmlParserInputPtr stream
;
14679 xmlCtxtReset(ctxt
);
14681 stream
= xmlNewStringInputStream(ctxt
, cur
);
14682 if (stream
== NULL
) {
14685 inputPush(ctxt
, stream
);
14686 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14691 * @ctxt: an XML parser context
14692 * @filename: a file or URL
14693 * @encoding: the document encoding, or NULL
14694 * @options: a combination of xmlParserOption
14696 * parse an XML file from the filesystem or the network.
14697 * This reuses the existing @ctxt parser context
14699 * Returns the resulting document tree
14702 xmlCtxtReadFile(xmlParserCtxtPtr ctxt
, const char *filename
,
14703 const char *encoding
, int options
)
14705 xmlParserInputPtr stream
;
14707 if (filename
== NULL
)
14712 xmlCtxtReset(ctxt
);
14714 stream
= xmlLoadExternalEntity(filename
, NULL
, ctxt
);
14715 if (stream
== NULL
) {
14718 inputPush(ctxt
, stream
);
14719 return (xmlDoRead(ctxt
, NULL
, encoding
, options
, 1));
14723 * xmlCtxtReadMemory:
14724 * @ctxt: an XML parser context
14725 * @buffer: a pointer to a char array
14726 * @size: the size of the array
14727 * @URL: the base URL to use for the document
14728 * @encoding: the document encoding, or NULL
14729 * @options: a combination of xmlParserOption
14731 * parse an XML in-memory document and build a tree.
14732 * This reuses the existing @ctxt parser context
14734 * Returns the resulting document tree
14737 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt
, const char *buffer
, int size
,
14738 const char *URL
, const char *encoding
, int options
)
14740 xmlParserInputBufferPtr input
;
14741 xmlParserInputPtr stream
;
14745 if (buffer
== NULL
)
14748 xmlCtxtReset(ctxt
);
14750 input
= xmlParserInputBufferCreateMem(buffer
, size
, XML_CHAR_ENCODING_NONE
);
14751 if (input
== NULL
) {
14755 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14756 if (stream
== NULL
) {
14757 xmlFreeParserInputBuffer(input
);
14761 inputPush(ctxt
, stream
);
14762 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14767 * @ctxt: an XML parser context
14768 * @fd: an open file descriptor
14769 * @URL: the base URL to use for the document
14770 * @encoding: the document encoding, or NULL
14771 * @options: a combination of xmlParserOption
14773 * parse an XML from a file descriptor and build a tree.
14774 * This reuses the existing @ctxt parser context
14775 * NOTE that the file descriptor will not be closed when the
14776 * reader is closed or reset.
14778 * Returns the resulting document tree
14781 xmlCtxtReadFd(xmlParserCtxtPtr ctxt
, int fd
,
14782 const char *URL
, const char *encoding
, int options
)
14784 xmlParserInputBufferPtr input
;
14785 xmlParserInputPtr stream
;
14792 xmlCtxtReset(ctxt
);
14795 input
= xmlParserInputBufferCreateFd(fd
, XML_CHAR_ENCODING_NONE
);
14798 input
->closecallback
= NULL
;
14799 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14800 if (stream
== NULL
) {
14801 xmlFreeParserInputBuffer(input
);
14804 inputPush(ctxt
, stream
);
14805 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14810 * @ctxt: an XML parser context
14811 * @ioread: an I/O read function
14812 * @ioclose: an I/O close function
14813 * @ioctx: an I/O handler
14814 * @URL: the base URL to use for the document
14815 * @encoding: the document encoding, or NULL
14816 * @options: a combination of xmlParserOption
14818 * parse an XML document from I/O functions and source and build a tree.
14819 * This reuses the existing @ctxt parser context
14821 * Returns the resulting document tree
14824 xmlCtxtReadIO(xmlParserCtxtPtr ctxt
, xmlInputReadCallback ioread
,
14825 xmlInputCloseCallback ioclose
, void *ioctx
,
14827 const char *encoding
, int options
)
14829 xmlParserInputBufferPtr input
;
14830 xmlParserInputPtr stream
;
14832 if (ioread
== NULL
)
14837 xmlCtxtReset(ctxt
);
14839 input
= xmlParserInputBufferCreateIO(ioread
, ioclose
, ioctx
,
14840 XML_CHAR_ENCODING_NONE
);
14843 stream
= xmlNewIOInputStream(ctxt
, input
, XML_CHAR_ENCODING_NONE
);
14844 if (stream
== NULL
) {
14845 xmlFreeParserInputBuffer(input
);
14848 inputPush(ctxt
, stream
);
14849 return (xmlDoRead(ctxt
, URL
, encoding
, options
, 1));
14852 #define bottom_parser
14853 #include "elfgcchack.h"