[LIBXML2] Update to version 2.9.8. CORE-15280
[reactos.git] / sdk / lib / 3rdparty / libxml2 / parser.c
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99 /************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107
108 /*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114 #define XML_PARSER_NON_LINEAR 10
115
116 /*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125 static int
126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128 {
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
148 ++ctxt->depth;
149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
151 --ctxt->depth;
152 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
153 ent->content[0] = 0;
154 }
155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
204 size = ent->checked / 2;
205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
223 * strange we got no data for checking
224 */
225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
229 }
230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232 }
233
234 /**
235 * xmlParserMaxDepth:
236 *
237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
241 */
242 unsigned int xmlParserMaxDepth = 256;
243
244
245
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
251 /**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260 #define XML_PARSER_CHUNK_SIZE 100
261
262 /*
263 * List of XML prefixed PI allowed by W3C specs
264 */
265
266 static const char *xmlW3CPIs[] = {
267 "xml-stylesheet",
268 "xml-model",
269 NULL
270 };
271
272
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
276
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
280 void *user_data, int depth, const xmlChar *URL,
281 const xmlChar *ID, xmlNodePtr *list);
282
283 static int
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
295
296 static int
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
299 /************************************************************************
300 * *
301 * Some factorized error routines *
302 * *
303 ************************************************************************/
304
305 /**
306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313 static void
314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316 {
317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322
323 if (prefix == NULL)
324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
328 else
329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
339 }
340
341 /**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349 static void
350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351 {
352 const char *errmsg;
353
354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
359 errmsg = "CharRef: invalid hexadecimal value";
360 break;
361 case XML_ERR_INVALID_DEC_CHARREF:
362 errmsg = "CharRef: invalid decimal value";
363 break;
364 case XML_ERR_INVALID_CHARREF:
365 errmsg = "CharRef: invalid value";
366 break;
367 case XML_ERR_INTERNAL_ERROR:
368 errmsg = "internal error";
369 break;
370 case XML_ERR_PEREF_AT_EOF:
371 errmsg = "PEReference at end of document";
372 break;
373 case XML_ERR_PEREF_IN_PROLOG:
374 errmsg = "PEReference in prolog";
375 break;
376 case XML_ERR_PEREF_IN_EPILOG:
377 errmsg = "PEReference in epilog";
378 break;
379 case XML_ERR_PEREF_NO_NAME:
380 errmsg = "PEReference: no name";
381 break;
382 case XML_ERR_PEREF_SEMICOL_MISSING:
383 errmsg = "PEReference: expecting ';'";
384 break;
385 case XML_ERR_ENTITY_LOOP:
386 errmsg = "Detected an entity reference loop";
387 break;
388 case XML_ERR_ENTITY_NOT_STARTED:
389 errmsg = "EntityValue: \" or ' expected";
390 break;
391 case XML_ERR_ENTITY_PE_INTERNAL:
392 errmsg = "PEReferences forbidden in internal subset";
393 break;
394 case XML_ERR_ENTITY_NOT_FINISHED:
395 errmsg = "EntityValue: \" or ' expected";
396 break;
397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
398 errmsg = "AttValue: \" or ' expected";
399 break;
400 case XML_ERR_LT_IN_ATTRIBUTE:
401 errmsg = "Unescaped '<' not allowed in attributes values";
402 break;
403 case XML_ERR_LITERAL_NOT_STARTED:
404 errmsg = "SystemLiteral \" or ' expected";
405 break;
406 case XML_ERR_LITERAL_NOT_FINISHED:
407 errmsg = "Unfinished System or Public ID \" or ' expected";
408 break;
409 case XML_ERR_MISPLACED_CDATA_END:
410 errmsg = "Sequence ']]>' not allowed in content";
411 break;
412 case XML_ERR_URI_REQUIRED:
413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
414 break;
415 case XML_ERR_PUBID_REQUIRED:
416 errmsg = "PUBLIC, the Public Identifier is missing";
417 break;
418 case XML_ERR_HYPHEN_IN_COMMENT:
419 errmsg = "Comment must not contain '--' (double-hyphen)";
420 break;
421 case XML_ERR_PI_NOT_STARTED:
422 errmsg = "xmlParsePI : no target name";
423 break;
424 case XML_ERR_RESERVED_XML_NAME:
425 errmsg = "Invalid PI name";
426 break;
427 case XML_ERR_NOTATION_NOT_STARTED:
428 errmsg = "NOTATION: Name expected here";
429 break;
430 case XML_ERR_NOTATION_NOT_FINISHED:
431 errmsg = "'>' required to close NOTATION declaration";
432 break;
433 case XML_ERR_VALUE_REQUIRED:
434 errmsg = "Entity value required";
435 break;
436 case XML_ERR_URI_FRAGMENT:
437 errmsg = "Fragment not allowed";
438 break;
439 case XML_ERR_ATTLIST_NOT_STARTED:
440 errmsg = "'(' required to start ATTLIST enumeration";
441 break;
442 case XML_ERR_NMTOKEN_REQUIRED:
443 errmsg = "NmToken expected in ATTLIST enumeration";
444 break;
445 case XML_ERR_ATTLIST_NOT_FINISHED:
446 errmsg = "')' required to finish ATTLIST enumeration";
447 break;
448 case XML_ERR_MIXED_NOT_STARTED:
449 errmsg = "MixedContentDecl : '|' or ')*' expected";
450 break;
451 case XML_ERR_PCDATA_REQUIRED:
452 errmsg = "MixedContentDecl : '#PCDATA' expected";
453 break;
454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
455 errmsg = "ContentDecl : Name or '(' expected";
456 break;
457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458 errmsg = "ContentDecl : ',' '|' or ')' expected";
459 break;
460 case XML_ERR_PEREF_IN_INT_SUBSET:
461 errmsg =
462 "PEReference: forbidden within markup decl in internal subset";
463 break;
464 case XML_ERR_GT_REQUIRED:
465 errmsg = "expected '>'";
466 break;
467 case XML_ERR_CONDSEC_INVALID:
468 errmsg = "XML conditional section '[' expected";
469 break;
470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471 errmsg = "Content error in the external subset";
472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
475 "conditional section INCLUDE or IGNORE keyword expected";
476 break;
477 case XML_ERR_CONDSEC_NOT_FINISHED:
478 errmsg = "XML conditional section not closed";
479 break;
480 case XML_ERR_XMLDECL_NOT_STARTED:
481 errmsg = "Text declaration '<?xml' required";
482 break;
483 case XML_ERR_XMLDECL_NOT_FINISHED:
484 errmsg = "parsing XML declaration: '?>' expected";
485 break;
486 case XML_ERR_EXT_ENTITY_STANDALONE:
487 errmsg = "external parsed entities cannot be standalone";
488 break;
489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490 errmsg = "EntityRef: expecting ';'";
491 break;
492 case XML_ERR_DOCTYPE_NOT_FINISHED:
493 errmsg = "DOCTYPE improperly terminated";
494 break;
495 case XML_ERR_LTSLASH_REQUIRED:
496 errmsg = "EndTag: '</' not found";
497 break;
498 case XML_ERR_EQUAL_REQUIRED:
499 errmsg = "expected '='";
500 break;
501 case XML_ERR_STRING_NOT_CLOSED:
502 errmsg = "String not closed expecting \" or '";
503 break;
504 case XML_ERR_STRING_NOT_STARTED:
505 errmsg = "String not started expecting ' or \"";
506 break;
507 case XML_ERR_ENCODING_NAME:
508 errmsg = "Invalid XML encoding name";
509 break;
510 case XML_ERR_STANDALONE_VALUE:
511 errmsg = "standalone accepts only 'yes' or 'no'";
512 break;
513 case XML_ERR_DOCUMENT_EMPTY:
514 errmsg = "Document is empty";
515 break;
516 case XML_ERR_DOCUMENT_END:
517 errmsg = "Extra content at the end of the document";
518 break;
519 case XML_ERR_NOT_WELL_BALANCED:
520 errmsg = "chunk is not well balanced";
521 break;
522 case XML_ERR_EXTRA_CONTENT:
523 errmsg = "extra content at the end of well balanced chunk";
524 break;
525 case XML_ERR_VERSION_MISSING:
526 errmsg = "Malformed declaration expecting version";
527 break;
528 case XML_ERR_NAME_TOO_LONG:
529 errmsg = "Name too long use XML_PARSE_HUGE option";
530 break;
531 #if 0
532 case:
533 errmsg = "";
534 break;
535 #endif
536 default:
537 errmsg = "Unregistered error message";
538 }
539 if (ctxt != NULL)
540 ctxt->errNo = error;
541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555 }
556
557 /**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565 static void LIBXML_ATTR_FORMAT(3,0)
566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
568 {
569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
572 if (ctxt != NULL)
573 ctxt->errNo = error;
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
581 }
582
583 /**
584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
593 static void LIBXML_ATTR_FORMAT(3,0)
594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597 xmlStructuredErrorFunc schannel = NULL;
598
599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
604 schannel = ctxt->sax->serror;
605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
620 }
621
622 /**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
629 * Handle a validity error.
630 */
631 static void LIBXML_ATTR_FORMAT(3,0)
632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635 xmlStructuredErrorFunc schannel = NULL;
636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
647 ctxt->vctxt.error, ctxt->vctxt.userData,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 ctxt->valid = 0;
653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
659 }
660 }
661
662 /**
663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
671 static void LIBXML_ATTR_FORMAT(3,0)
672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673 const char *msg, int val)
674 {
675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
678 if (ctxt != NULL)
679 ctxt->errNo = error;
680 __xmlRaiseError(NULL, NULL, NULL,
681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
688 }
689
690 /**
691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
701 static void LIBXML_ATTR_FORMAT(3,0)
702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703 const char *msg, const xmlChar *str1, int val,
704 const xmlChar *str2)
705 {
706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
709 if (ctxt != NULL)
710 ctxt->errNo = error;
711 __xmlRaiseError(NULL, NULL, NULL,
712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
720 }
721
722 /**
723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
731 static void LIBXML_ATTR_FORMAT(3,0)
732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733 const char *msg, const xmlChar * val)
734 {
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
738 if (ctxt != NULL)
739 ctxt->errNo = error;
740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
749 }
750
751 /**
752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
760 static void LIBXML_ATTR_FORMAT(3,0)
761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763 {
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
767 if (ctxt != NULL)
768 ctxt->errNo = error;
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773 }
774
775 /**
776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
785 static void LIBXML_ATTR_FORMAT(3,0)
786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
790 {
791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
794 if (ctxt != NULL)
795 ctxt->errNo = error;
796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
802 }
803
804 /**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
812 * Handle a namespace warning error
813 */
814 static void LIBXML_ATTR_FORMAT(3,0)
815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819 {
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827 }
828
829 /************************************************************************
830 * *
831 * Library wide options *
832 * *
833 ************************************************************************/
834
835 /**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845 int
846 xmlHasFeature(xmlFeature feature)
847 {
848 switch (feature) {
849 case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851 return(1);
852 #else
853 return(0);
854 #endif
855 case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959 return(1);
960 #else
961 return(0);
962 #endif
963 case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966 #else
967 return(0);
968 #endif
969 case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972 #else
973 return(0);
974 #endif
975 case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978 #else
979 return(0);
980 #endif
981 case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984 #else
985 return(0);
986 #endif
987 case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989 return(1);
990 #else
991 return(0);
992 #endif
993 case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996 #else
997 return(0);
998 #endif
999 case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002 #else
1003 return(0);
1004 #endif
1005 case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008 #else
1009 return(0);
1010 #endif
1011 case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014 #else
1015 return(0);
1016 #endif
1017 case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020 #else
1021 return(0);
1022 #endif
1023 case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026 #else
1027 return(0);
1028 #endif
1029 case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032 #else
1033 return(0);
1034 #endif
1035 case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038 #else
1039 return(0);
1040 #endif
1041 case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044 #else
1045 return(0);
1046 #endif
1047 default:
1048 break;
1049 }
1050 return(0);
1051 }
1052
1053 /************************************************************************
1054 * *
1055 * SAX2 defaulted attributes handling *
1056 * *
1057 ************************************************************************/
1058
1059 /**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065 static void
1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073 ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
1081 xmlErrMemory(ctxt, NULL);
1082 }
1083 }
1084
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
1090 #if __STDC_VERSION__ >= 199901L
1091 /* Using a C99 flexible array member avoids UBSan errors. */
1092 const xmlChar *values[]; /* array of localname/prefix/values/external */
1093 #else
1094 const xmlChar *values[5];
1095 #endif
1096 };
1097
1098 /**
1099 * xmlAttrNormalizeSpace:
1100 * @src: the source string
1101 * @dst: the target string
1102 *
1103 * Normalize the space in non CDATA attribute values:
1104 * If the attribute type is not CDATA, then the XML processor MUST further
1105 * process the normalized attribute value by discarding any leading and
1106 * trailing space (#x20) characters, and by replacing sequences of space
1107 * (#x20) characters by a single space (#x20) character.
1108 * Note that the size of dst need to be at least src, and if one doesn't need
1109 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110 * passing src as dst is just fine.
1111 *
1112 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113 * is needed.
1114 */
1115 static xmlChar *
1116 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117 {
1118 if ((src == NULL) || (dst == NULL))
1119 return(NULL);
1120
1121 while (*src == 0x20) src++;
1122 while (*src != 0) {
1123 if (*src == 0x20) {
1124 while (*src == 0x20) src++;
1125 if (*src != 0)
1126 *dst++ = 0x20;
1127 } else {
1128 *dst++ = *src++;
1129 }
1130 }
1131 *dst = 0;
1132 if (dst == src)
1133 return(NULL);
1134 return(dst);
1135 }
1136
1137 /**
1138 * xmlAttrNormalizeSpace2:
1139 * @src: the source string
1140 *
1141 * Normalize the space in non CDATA attribute values, a slightly more complex
1142 * front end to avoid allocation problems when running on attribute values
1143 * coming from the input.
1144 *
1145 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146 * is needed.
1147 */
1148 static const xmlChar *
1149 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1150 {
1151 int i;
1152 int remove_head = 0;
1153 int need_realloc = 0;
1154 const xmlChar *cur;
1155
1156 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157 return(NULL);
1158 i = *len;
1159 if (i <= 0)
1160 return(NULL);
1161
1162 cur = src;
1163 while (*cur == 0x20) {
1164 cur++;
1165 remove_head++;
1166 }
1167 while (*cur != 0) {
1168 if (*cur == 0x20) {
1169 cur++;
1170 if ((*cur == 0x20) || (*cur == 0)) {
1171 need_realloc = 1;
1172 break;
1173 }
1174 } else
1175 cur++;
1176 }
1177 if (need_realloc) {
1178 xmlChar *ret;
1179
1180 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 if (ret == NULL) {
1182 xmlErrMemory(ctxt, NULL);
1183 return(NULL);
1184 }
1185 xmlAttrNormalizeSpace(ret, ret);
1186 *len = (int) strlen((const char *)ret);
1187 return(ret);
1188 } else if (remove_head) {
1189 *len -= remove_head;
1190 memmove(src, src + remove_head, 1 + *len);
1191 return(src);
1192 }
1193 return(NULL);
1194 }
1195
1196 /**
1197 * xmlAddDefAttrs:
1198 * @ctxt: an XML parser context
1199 * @fullname: the element fullname
1200 * @fullattr: the attribute fullname
1201 * @value: the attribute value
1202 *
1203 * Add a defaulted attribute for an element
1204 */
1205 static void
1206 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207 const xmlChar *fullname,
1208 const xmlChar *fullattr,
1209 const xmlChar *value) {
1210 xmlDefAttrsPtr defaults;
1211 int len;
1212 const xmlChar *name;
1213 const xmlChar *prefix;
1214
1215 /*
1216 * Allows to detect attribute redefinitions
1217 */
1218 if (ctxt->attsSpecial != NULL) {
1219 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 return;
1221 }
1222
1223 if (ctxt->attsDefault == NULL) {
1224 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1225 if (ctxt->attsDefault == NULL)
1226 goto mem_error;
1227 }
1228
1229 /*
1230 * split the element name into prefix:localname , the string found
1231 * are within the DTD and then not associated to namespace names.
1232 */
1233 name = xmlSplitQName3(fullname, &len);
1234 if (name == NULL) {
1235 name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 prefix = NULL;
1237 } else {
1238 name = xmlDictLookup(ctxt->dict, name, -1);
1239 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240 }
1241
1242 /*
1243 * make sure there is some storage
1244 */
1245 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246 if (defaults == NULL) {
1247 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1248 (4 * 5) * sizeof(const xmlChar *));
1249 if (defaults == NULL)
1250 goto mem_error;
1251 defaults->nbAttrs = 0;
1252 defaults->maxAttrs = 4;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
1258 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1259 xmlDefAttrsPtr temp;
1260
1261 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1262 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1263 if (temp == NULL)
1264 goto mem_error;
1265 defaults = temp;
1266 defaults->maxAttrs *= 2;
1267 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 defaults, NULL) < 0) {
1269 xmlFree(defaults);
1270 goto mem_error;
1271 }
1272 }
1273
1274 /*
1275 * Split the element name into prefix:localname , the string found
1276 * are within the DTD and hen not associated to namespace names.
1277 */
1278 name = xmlSplitQName3(fullattr, &len);
1279 if (name == NULL) {
1280 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 prefix = NULL;
1282 } else {
1283 name = xmlDictLookup(ctxt->dict, name, -1);
1284 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285 }
1286
1287 defaults->values[5 * defaults->nbAttrs] = name;
1288 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1289 /* intern the string and precompute the end */
1290 len = xmlStrlen(value);
1291 value = xmlDictLookup(ctxt->dict, value, len);
1292 defaults->values[5 * defaults->nbAttrs + 2] = value;
1293 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294 if (ctxt->external)
1295 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296 else
1297 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1298 defaults->nbAttrs++;
1299
1300 return;
1301
1302 mem_error:
1303 xmlErrMemory(ctxt, NULL);
1304 return;
1305 }
1306
1307 /**
1308 * xmlAddSpecialAttr:
1309 * @ctxt: an XML parser context
1310 * @fullname: the element fullname
1311 * @fullattr: the attribute fullname
1312 * @type: the attribute type
1313 *
1314 * Register this attribute type
1315 */
1316 static void
1317 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 const xmlChar *fullname,
1319 const xmlChar *fullattr,
1320 int type)
1321 {
1322 if (ctxt->attsSpecial == NULL) {
1323 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1324 if (ctxt->attsSpecial == NULL)
1325 goto mem_error;
1326 }
1327
1328 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329 return;
1330
1331 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1332 (void *) (ptrdiff_t) type);
1333 return;
1334
1335 mem_error:
1336 xmlErrMemory(ctxt, NULL);
1337 return;
1338 }
1339
1340 /**
1341 * xmlCleanSpecialAttrCallback:
1342 *
1343 * Removes CDATA attributes from the special attribute table
1344 */
1345 static void
1346 xmlCleanSpecialAttrCallback(void *payload, void *data,
1347 const xmlChar *fullname, const xmlChar *fullattr,
1348 const xmlChar *unused ATTRIBUTE_UNUSED) {
1349 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350
1351 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1352 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 }
1354 }
1355
1356 /**
1357 * xmlCleanSpecialAttr:
1358 * @ctxt: an XML parser context
1359 *
1360 * Trim the list of attributes defined to remove all those of type
1361 * CDATA as they are not special. This call should be done when finishing
1362 * to parse the DTD and before starting to parse the document root.
1363 */
1364 static void
1365 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366 {
1367 if (ctxt->attsSpecial == NULL)
1368 return;
1369
1370 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371
1372 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373 xmlHashFree(ctxt->attsSpecial, NULL);
1374 ctxt->attsSpecial = NULL;
1375 }
1376 return;
1377 }
1378
1379 /**
1380 * xmlCheckLanguageID:
1381 * @lang: pointer to the string value
1382 *
1383 * Checks that the value conforms to the LanguageID production:
1384 *
1385 * NOTE: this is somewhat deprecated, those productions were removed from
1386 * the XML Second edition.
1387 *
1388 * [33] LanguageID ::= Langcode ('-' Subcode)*
1389 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1390 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393 * [38] Subcode ::= ([a-z] | [A-Z])+
1394 *
1395 * The current REC reference the sucessors of RFC 1766, currently 5646
1396 *
1397 * http://www.rfc-editor.org/rfc/rfc5646.txt
1398 * langtag = language
1399 * ["-" script]
1400 * ["-" region]
1401 * *("-" variant)
1402 * *("-" extension)
1403 * ["-" privateuse]
1404 * language = 2*3ALPHA ; shortest ISO 639 code
1405 * ["-" extlang] ; sometimes followed by
1406 * ; extended language subtags
1407 * / 4ALPHA ; or reserved for future use
1408 * / 5*8ALPHA ; or registered language subtag
1409 *
1410 * extlang = 3ALPHA ; selected ISO 639 codes
1411 * *2("-" 3ALPHA) ; permanently reserved
1412 *
1413 * script = 4ALPHA ; ISO 15924 code
1414 *
1415 * region = 2ALPHA ; ISO 3166-1 code
1416 * / 3DIGIT ; UN M.49 code
1417 *
1418 * variant = 5*8alphanum ; registered variants
1419 * / (DIGIT 3alphanum)
1420 *
1421 * extension = singleton 1*("-" (2*8alphanum))
1422 *
1423 * ; Single alphanumerics
1424 * ; "x" reserved for private use
1425 * singleton = DIGIT ; 0 - 9
1426 * / %x41-57 ; A - W
1427 * / %x59-5A ; Y - Z
1428 * / %x61-77 ; a - w
1429 * / %x79-7A ; y - z
1430 *
1431 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432 * The parser below doesn't try to cope with extension or privateuse
1433 * that could be added but that's not interoperable anyway
1434 *
1435 * Returns 1 if correct 0 otherwise
1436 **/
1437 int
1438 xmlCheckLanguageID(const xmlChar * lang)
1439 {
1440 const xmlChar *cur = lang, *nxt;
1441
1442 if (cur == NULL)
1443 return (0);
1444 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1445 ((cur[0] == 'I') && (cur[1] == '-')) ||
1446 ((cur[0] == 'x') && (cur[1] == '-')) ||
1447 ((cur[0] == 'X') && (cur[1] == '-'))) {
1448 /*
1449 * Still allow IANA code and user code which were coming
1450 * from the previous version of the XML-1.0 specification
1451 * it's deprecated but we should not fail
1452 */
1453 cur += 2;
1454 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1455 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456 cur++;
1457 return(cur[0] == 0);
1458 }
1459 nxt = cur;
1460 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462 nxt++;
1463 if (nxt - cur >= 4) {
1464 /*
1465 * Reserved
1466 */
1467 if ((nxt - cur > 8) || (nxt[0] != 0))
1468 return(0);
1469 return(1);
1470 }
1471 if (nxt - cur < 2)
1472 return(0);
1473 /* we got an ISO 639 code */
1474 if (nxt[0] == 0)
1475 return(1);
1476 if (nxt[0] != '-')
1477 return(0);
1478
1479 nxt++;
1480 cur = nxt;
1481 /* now we can have extlang or script or region or variant */
1482 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483 goto region_m49;
1484
1485 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487 nxt++;
1488 if (nxt - cur == 4)
1489 goto script;
1490 if (nxt - cur == 2)
1491 goto region;
1492 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493 goto variant;
1494 if (nxt - cur != 3)
1495 return(0);
1496 /* we parsed an extlang */
1497 if (nxt[0] == 0)
1498 return(1);
1499 if (nxt[0] != '-')
1500 return(0);
1501
1502 nxt++;
1503 cur = nxt;
1504 /* now we can have script or region or variant */
1505 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506 goto region_m49;
1507
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511 if (nxt - cur == 2)
1512 goto region;
1513 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514 goto variant;
1515 if (nxt - cur != 4)
1516 return(0);
1517 /* we parsed a script */
1518 script:
1519 if (nxt[0] == 0)
1520 return(1);
1521 if (nxt[0] != '-')
1522 return(0);
1523
1524 nxt++;
1525 cur = nxt;
1526 /* now we can have region or variant */
1527 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528 goto region_m49;
1529
1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532 nxt++;
1533
1534 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535 goto variant;
1536 if (nxt - cur != 2)
1537 return(0);
1538 /* we parsed a region */
1539 region:
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can just have a variant */
1548 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550 nxt++;
1551
1552 if ((nxt - cur < 5) || (nxt - cur > 8))
1553 return(0);
1554
1555 /* we parsed a variant */
1556 variant:
1557 if (nxt[0] == 0)
1558 return(1);
1559 if (nxt[0] != '-')
1560 return(0);
1561 /* extensions and private use subtags not checked */
1562 return (1);
1563
1564 region_m49:
1565 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567 nxt += 3;
1568 goto region;
1569 }
1570 return(0);
1571 }
1572
1573 /************************************************************************
1574 * *
1575 * Parser stacks related functions and macros *
1576 * *
1577 ************************************************************************/
1578
1579 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580 const xmlChar ** str);
1581
1582 #ifdef SAX2
1583 /**
1584 * nsPush:
1585 * @ctxt: an XML parser context
1586 * @prefix: the namespace prefix or NULL
1587 * @URL: the namespace name
1588 *
1589 * Pushes a new parser namespace on top of the ns stack
1590 *
1591 * Returns -1 in case of error, -2 if the namespace should be discarded
1592 * and the index in the stack otherwise.
1593 */
1594 static int
1595 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596 {
1597 if (ctxt->options & XML_PARSE_NSCLEAN) {
1598 int i;
1599 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1600 if (ctxt->nsTab[i] == prefix) {
1601 /* in scope */
1602 if (ctxt->nsTab[i + 1] == URL)
1603 return(-2);
1604 /* out of scope keep it */
1605 break;
1606 }
1607 }
1608 }
1609 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 ctxt->nsMax = 10;
1611 ctxt->nsNr = 0;
1612 ctxt->nsTab = (const xmlChar **)
1613 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 if (ctxt->nsTab == NULL) {
1615 xmlErrMemory(ctxt, NULL);
1616 ctxt->nsMax = 0;
1617 return (-1);
1618 }
1619 } else if (ctxt->nsNr >= ctxt->nsMax) {
1620 const xmlChar ** tmp;
1621 ctxt->nsMax *= 2;
1622 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624 if (tmp == NULL) {
1625 xmlErrMemory(ctxt, NULL);
1626 ctxt->nsMax /= 2;
1627 return (-1);
1628 }
1629 ctxt->nsTab = tmp;
1630 }
1631 ctxt->nsTab[ctxt->nsNr++] = prefix;
1632 ctxt->nsTab[ctxt->nsNr++] = URL;
1633 return (ctxt->nsNr);
1634 }
1635 /**
1636 * nsPop:
1637 * @ctxt: an XML parser context
1638 * @nr: the number to pop
1639 *
1640 * Pops the top @nr parser prefix/namespace from the ns stack
1641 *
1642 * Returns the number of namespaces removed
1643 */
1644 static int
1645 nsPop(xmlParserCtxtPtr ctxt, int nr)
1646 {
1647 int i;
1648
1649 if (ctxt->nsTab == NULL) return(0);
1650 if (ctxt->nsNr < nr) {
1651 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652 nr = ctxt->nsNr;
1653 }
1654 if (ctxt->nsNr <= 0)
1655 return (0);
1656
1657 for (i = 0;i < nr;i++) {
1658 ctxt->nsNr--;
1659 ctxt->nsTab[ctxt->nsNr] = NULL;
1660 }
1661 return(nr);
1662 }
1663 #endif
1664
1665 static int
1666 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667 const xmlChar **atts;
1668 int *attallocs;
1669 int maxatts;
1670
1671 if (ctxt->atts == NULL) {
1672 maxatts = 55; /* allow for 10 attrs by default */
1673 atts = (const xmlChar **)
1674 xmlMalloc(maxatts * sizeof(xmlChar *));
1675 if (atts == NULL) goto mem_error;
1676 ctxt->atts = atts;
1677 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 if (attallocs == NULL) goto mem_error;
1679 ctxt->attallocs = attallocs;
1680 ctxt->maxatts = maxatts;
1681 } else if (nr + 5 > ctxt->maxatts) {
1682 maxatts = (nr + 5) * 2;
1683 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 maxatts * sizeof(const xmlChar *));
1685 if (atts == NULL) goto mem_error;
1686 ctxt->atts = atts;
1687 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 (maxatts / 5) * sizeof(int));
1689 if (attallocs == NULL) goto mem_error;
1690 ctxt->attallocs = attallocs;
1691 ctxt->maxatts = maxatts;
1692 }
1693 return(ctxt->maxatts);
1694 mem_error:
1695 xmlErrMemory(ctxt, NULL);
1696 return(-1);
1697 }
1698
1699 /**
1700 * inputPush:
1701 * @ctxt: an XML parser context
1702 * @value: the parser input
1703 *
1704 * Pushes a new parser input on top of the input stack
1705 *
1706 * Returns -1 in case of error, the index in the stack otherwise
1707 */
1708 int
1709 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710 {
1711 if ((ctxt == NULL) || (value == NULL))
1712 return(-1);
1713 if (ctxt->inputNr >= ctxt->inputMax) {
1714 ctxt->inputMax *= 2;
1715 ctxt->inputTab =
1716 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717 ctxt->inputMax *
1718 sizeof(ctxt->inputTab[0]));
1719 if (ctxt->inputTab == NULL) {
1720 xmlErrMemory(ctxt, NULL);
1721 xmlFreeInputStream(value);
1722 ctxt->inputMax /= 2;
1723 value = NULL;
1724 return (-1);
1725 }
1726 }
1727 ctxt->inputTab[ctxt->inputNr] = value;
1728 ctxt->input = value;
1729 return (ctxt->inputNr++);
1730 }
1731 /**
1732 * inputPop:
1733 * @ctxt: an XML parser context
1734 *
1735 * Pops the top parser input from the input stack
1736 *
1737 * Returns the input just removed
1738 */
1739 xmlParserInputPtr
1740 inputPop(xmlParserCtxtPtr ctxt)
1741 {
1742 xmlParserInputPtr ret;
1743
1744 if (ctxt == NULL)
1745 return(NULL);
1746 if (ctxt->inputNr <= 0)
1747 return (NULL);
1748 ctxt->inputNr--;
1749 if (ctxt->inputNr > 0)
1750 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751 else
1752 ctxt->input = NULL;
1753 ret = ctxt->inputTab[ctxt->inputNr];
1754 ctxt->inputTab[ctxt->inputNr] = NULL;
1755 return (ret);
1756 }
1757 /**
1758 * nodePush:
1759 * @ctxt: an XML parser context
1760 * @value: the element node
1761 *
1762 * Pushes a new element node on top of the node stack
1763 *
1764 * Returns -1 in case of error, the index in the stack otherwise
1765 */
1766 int
1767 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768 {
1769 if (ctxt == NULL) return(0);
1770 if (ctxt->nodeNr >= ctxt->nodeMax) {
1771 xmlNodePtr *tmp;
1772
1773 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774 ctxt->nodeMax * 2 *
1775 sizeof(ctxt->nodeTab[0]));
1776 if (tmp == NULL) {
1777 xmlErrMemory(ctxt, NULL);
1778 return (-1);
1779 }
1780 ctxt->nodeTab = tmp;
1781 ctxt->nodeMax *= 2;
1782 }
1783 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1785 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1786 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1787 xmlParserMaxDepth);
1788 xmlHaltParser(ctxt);
1789 return(-1);
1790 }
1791 ctxt->nodeTab[ctxt->nodeNr] = value;
1792 ctxt->node = value;
1793 return (ctxt->nodeNr++);
1794 }
1795
1796 /**
1797 * nodePop:
1798 * @ctxt: an XML parser context
1799 *
1800 * Pops the top element node from the node stack
1801 *
1802 * Returns the node just removed
1803 */
1804 xmlNodePtr
1805 nodePop(xmlParserCtxtPtr ctxt)
1806 {
1807 xmlNodePtr ret;
1808
1809 if (ctxt == NULL) return(NULL);
1810 if (ctxt->nodeNr <= 0)
1811 return (NULL);
1812 ctxt->nodeNr--;
1813 if (ctxt->nodeNr > 0)
1814 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815 else
1816 ctxt->node = NULL;
1817 ret = ctxt->nodeTab[ctxt->nodeNr];
1818 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1819 return (ret);
1820 }
1821
1822 #ifdef LIBXML_PUSH_ENABLED
1823 /**
1824 * nameNsPush:
1825 * @ctxt: an XML parser context
1826 * @value: the element name
1827 * @prefix: the element prefix
1828 * @URI: the element namespace name
1829 *
1830 * Pushes a new element name/prefix/URL on top of the name stack
1831 *
1832 * Returns -1 in case of error, the index in the stack otherwise
1833 */
1834 static int
1835 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837 {
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 void **tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1848 }
1849 ctxt->nameTab = tmp;
1850 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax * 3 *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1856 }
1857 ctxt->pushTab = tmp2;
1858 }
1859 ctxt->nameTab[ctxt->nameNr] = value;
1860 ctxt->name = value;
1861 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1863 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1864 return (ctxt->nameNr++);
1865 mem_error:
1866 xmlErrMemory(ctxt, NULL);
1867 return (-1);
1868 }
1869 /**
1870 * nameNsPop:
1871 * @ctxt: an XML parser context
1872 *
1873 * Pops the top element/prefix/URI name from the name stack
1874 *
1875 * Returns the name just removed
1876 */
1877 static const xmlChar *
1878 nameNsPop(xmlParserCtxtPtr ctxt)
1879 {
1880 const xmlChar *ret;
1881
1882 if (ctxt->nameNr <= 0)
1883 return (NULL);
1884 ctxt->nameNr--;
1885 if (ctxt->nameNr > 0)
1886 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887 else
1888 ctxt->name = NULL;
1889 ret = ctxt->nameTab[ctxt->nameNr];
1890 ctxt->nameTab[ctxt->nameNr] = NULL;
1891 return (ret);
1892 }
1893 #endif /* LIBXML_PUSH_ENABLED */
1894
1895 /**
1896 * namePush:
1897 * @ctxt: an XML parser context
1898 * @value: the element name
1899 *
1900 * Pushes a new element name on top of the name stack
1901 *
1902 * Returns -1 in case of error, the index in the stack otherwise
1903 */
1904 int
1905 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1906 {
1907 if (ctxt == NULL) return (-1);
1908
1909 if (ctxt->nameNr >= ctxt->nameMax) {
1910 const xmlChar * *tmp;
1911 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1912 ctxt->nameMax * 2 *
1913 sizeof(ctxt->nameTab[0]));
1914 if (tmp == NULL) {
1915 goto mem_error;
1916 }
1917 ctxt->nameTab = tmp;
1918 ctxt->nameMax *= 2;
1919 }
1920 ctxt->nameTab[ctxt->nameNr] = value;
1921 ctxt->name = value;
1922 return (ctxt->nameNr++);
1923 mem_error:
1924 xmlErrMemory(ctxt, NULL);
1925 return (-1);
1926 }
1927 /**
1928 * namePop:
1929 * @ctxt: an XML parser context
1930 *
1931 * Pops the top element name from the name stack
1932 *
1933 * Returns the name just removed
1934 */
1935 const xmlChar *
1936 namePop(xmlParserCtxtPtr ctxt)
1937 {
1938 const xmlChar *ret;
1939
1940 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941 return (NULL);
1942 ctxt->nameNr--;
1943 if (ctxt->nameNr > 0)
1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945 else
1946 ctxt->name = NULL;
1947 ret = ctxt->nameTab[ctxt->nameNr];
1948 ctxt->nameTab[ctxt->nameNr] = NULL;
1949 return (ret);
1950 }
1951
1952 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1953 if (ctxt->spaceNr >= ctxt->spaceMax) {
1954 int *tmp;
1955
1956 ctxt->spaceMax *= 2;
1957 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959 if (tmp == NULL) {
1960 xmlErrMemory(ctxt, NULL);
1961 ctxt->spaceMax /=2;
1962 return(-1);
1963 }
1964 ctxt->spaceTab = tmp;
1965 }
1966 ctxt->spaceTab[ctxt->spaceNr] = val;
1967 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968 return(ctxt->spaceNr++);
1969 }
1970
1971 static int spacePop(xmlParserCtxtPtr ctxt) {
1972 int ret;
1973 if (ctxt->spaceNr <= 0) return(0);
1974 ctxt->spaceNr--;
1975 if (ctxt->spaceNr > 0)
1976 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977 else
1978 ctxt->space = &ctxt->spaceTab[0];
1979 ret = ctxt->spaceTab[ctxt->spaceNr];
1980 ctxt->spaceTab[ctxt->spaceNr] = -1;
1981 return(ret);
1982 }
1983
1984 /*
1985 * Macros for accessing the content. Those should be used only by the parser,
1986 * and not exported.
1987 *
1988 * Dirty macros, i.e. one often need to make assumption on the context to
1989 * use them
1990 *
1991 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1992 * To be used with extreme caution since operations consuming
1993 * characters may move the input buffer to a different location !
1994 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1995 * This should be used internally by the parser
1996 * only to compare to ASCII values otherwise it would break when
1997 * running with UTF-8 encoding.
1998 * RAW same as CUR but in the input buffer, bypass any token
1999 * extraction that may have been done
2000 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2001 * to compare on ASCII based substring.
2002 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2003 * strings without newlines within the parser.
2004 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2005 * defined char within the parser.
2006 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007 *
2008 * NEXT Skip to the next character, this does the proper decoding
2009 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2010 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2011 * CUR_CHAR(l) returns the current unicode character (int), set l
2012 * to the number of xmlChars used for the encoding [0-5].
2013 * CUR_SCHAR same but operate on a string instead of the context
2014 * COPY_BUF copy the current unicode char to the target buffer, increment
2015 * the index
2016 * GROW, SHRINK handling of input buffers
2017 */
2018
2019 #define RAW (*ctxt->input->cur)
2020 #define CUR (*ctxt->input->cur)
2021 #define NXT(val) ctxt->input->cur[(val)]
2022 #define CUR_PTR ctxt->input->cur
2023 #define BASE_PTR ctxt->input->base
2024
2025 #define CMP4( s, c1, c2, c3, c4 ) \
2026 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2029 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038 ((unsigned char *) s)[ 8 ] == c9 )
2039 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041 ((unsigned char *) s)[ 9 ] == c10 )
2042
2043 #define SKIP(val) do { \
2044 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2045 if (*ctxt->input->cur == 0) \
2046 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2047 } while (0)
2048
2049 #define SKIPL(val) do { \
2050 int skipl; \
2051 for(skipl=0; skipl<val; skipl++) { \
2052 if (*(ctxt->input->cur) == '\n') { \
2053 ctxt->input->line++; ctxt->input->col = 1; \
2054 } else ctxt->input->col++; \
2055 ctxt->nbChars++; \
2056 ctxt->input->cur++; \
2057 } \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2060 } while (0)
2061
2062 #define SHRINK if ((ctxt->progressive == 0) && \
2063 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2065 xmlSHRINK (ctxt);
2066
2067 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068 xmlParserInputShrink(ctxt->input);
2069 if (*ctxt->input->cur == 0)
2070 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071 }
2072
2073 #define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2075 xmlGROW (ctxt);
2076
2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083 ((ctxt->input->buf) &&
2084 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2087 xmlHaltParser(ctxt);
2088 return;
2089 }
2090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2091 if ((ctxt->input->cur > ctxt->input->end) ||
2092 (ctxt->input->cur < ctxt->input->base)) {
2093 xmlHaltParser(ctxt);
2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 return;
2096 }
2097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2099 }
2100
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103 #define NEXT xmlNextChar(ctxt)
2104
2105 #define NEXT1 { \
2106 ctxt->input->col++; \
2107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
2109 if (*ctxt->input->cur == 0) \
2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
2113 #define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
2117 ctxt->input->cur += l; \
2118 } while (0)
2119
2120 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122
2123 #define COPY_BUF(l,b,i,v) \
2124 if (l == 1) b[i++] = (xmlChar) v; \
2125 else i += xmlCopyCharMultiByte(&b[i],v)
2126
2127 /**
2128 * xmlSkipBlankChars:
2129 * @ctxt: the XML parser context
2130 *
2131 * skip all blanks character found at that point in the input streams.
2132 * It pops up finished entities in the process if allowable at that point.
2133 *
2134 * Returns the number of space chars skipped
2135 */
2136
2137 int
2138 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2139 int res = 0;
2140
2141 /*
2142 * It's Okay to use CUR/NEXT here since all the blanks are on
2143 * the ASCII range.
2144 */
2145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 const xmlChar *cur;
2147 /*
2148 * if we are in the document content, go really fast
2149 */
2150 cur = ctxt->input->cur;
2151 while (IS_BLANK_CH(*cur)) {
2152 if (*cur == '\n') {
2153 ctxt->input->line++; ctxt->input->col = 1;
2154 } else {
2155 ctxt->input->col++;
2156 }
2157 cur++;
2158 res++;
2159 if (*cur == 0) {
2160 ctxt->input->cur = cur;
2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 cur = ctxt->input->cur;
2163 }
2164 }
2165 ctxt->input->cur = cur;
2166 } else {
2167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168
2169 while (1) {
2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2171 NEXT;
2172 } else if (CUR == '%') {
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177 break;
2178 xmlParsePEReference(ctxt);
2179 } else if (CUR == 0) {
2180 if (ctxt->inputNr <= 1)
2181 break;
2182 xmlPopInput(ctxt);
2183 } else {
2184 break;
2185 }
2186
2187 /*
2188 * Also increase the counter when entering or exiting a PERef.
2189 * The spec says: "When a parameter-entity reference is recognized
2190 * in the DTD and included, its replacement text MUST be enlarged
2191 * by the attachment of one leading and one following space (#x20)
2192 * character."
2193 */
2194 res++;
2195 }
2196 }
2197 return(res);
2198 }
2199
2200 /************************************************************************
2201 * *
2202 * Commodity functions to handle entities *
2203 * *
2204 ************************************************************************/
2205
2206 /**
2207 * xmlPopInput:
2208 * @ctxt: an XML parser context
2209 *
2210 * xmlPopInput: the current input pointed by ctxt->input came to an end
2211 * pop it and return the next char.
2212 *
2213 * Returns the current xmlChar in the parser context
2214 */
2215 xmlChar
2216 xmlPopInput(xmlParserCtxtPtr ctxt) {
2217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2218 if (xmlParserDebugEntities)
2219 xmlGenericError(xmlGenericErrorContext,
2220 "Popping input %d\n", ctxt->inputNr);
2221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222 (ctxt->instate != XML_PARSER_EOF))
2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "Unfinished entity outside the DTD");
2225 xmlFreeInputStream(inputPop(ctxt));
2226 if (*ctxt->input->cur == 0)
2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2228 return(CUR);
2229 }
2230
2231 /**
2232 * xmlPushInput:
2233 * @ctxt: an XML parser context
2234 * @input: an XML parser input fragment (entity, XML fragment ...).
2235 *
2236 * xmlPushInput: switch to a new input stream which is stacked on top
2237 * of the previous one(s).
2238 * Returns -1 in case of error or the index in the input stack
2239 */
2240 int
2241 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2242 int ret;
2243 if (input == NULL) return(-1);
2244
2245 if (xmlParserDebugEntities) {
2246 if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 xmlGenericError(xmlGenericErrorContext,
2248 "%s(%d): ", ctxt->input->filename,
2249 ctxt->input->line);
2250 xmlGenericError(xmlGenericErrorContext,
2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252 }
2253 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254 (ctxt->inputNr > 1024)) {
2255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256 while (ctxt->inputNr > 1)
2257 xmlFreeInputStream(inputPop(ctxt));
2258 return(-1);
2259 }
2260 ret = inputPush(ctxt, input);
2261 if (ctxt->instate == XML_PARSER_EOF)
2262 return(-1);
2263 GROW;
2264 return(ret);
2265 }
2266
2267 /**
2268 * xmlParseCharRef:
2269 * @ctxt: an XML parser context
2270 *
2271 * parse Reference declarations
2272 *
2273 * [66] CharRef ::= '&#' [0-9]+ ';' |
2274 * '&#x' [0-9a-fA-F]+ ';'
2275 *
2276 * [ WFC: Legal Character ]
2277 * Characters referred to using character references must match the
2278 * production for Char.
2279 *
2280 * Returns the value parsed (as an int), 0 in case of error
2281 */
2282 int
2283 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2284 unsigned int val = 0;
2285 int count = 0;
2286 unsigned int outofrange = 0;
2287
2288 /*
2289 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290 */
2291 if ((RAW == '&') && (NXT(1) == '#') &&
2292 (NXT(2) == 'x')) {
2293 SKIP(3);
2294 GROW;
2295 while (RAW != ';') { /* loop blocked by count */
2296 if (count++ > 20) {
2297 count = 0;
2298 GROW;
2299 if (ctxt->instate == XML_PARSER_EOF)
2300 return(0);
2301 }
2302 if ((RAW >= '0') && (RAW <= '9'))
2303 val = val * 16 + (CUR - '0');
2304 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 val = val * 16 + (CUR - 'a') + 10;
2306 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 val = val * 16 + (CUR - 'A') + 10;
2308 else {
2309 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2310 val = 0;
2311 break;
2312 }
2313 if (val > 0x10FFFF)
2314 outofrange = val;
2315
2316 NEXT;
2317 count++;
2318 }
2319 if (RAW == ';') {
2320 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2321 ctxt->input->col++;
2322 ctxt->nbChars ++;
2323 ctxt->input->cur++;
2324 }
2325 } else if ((RAW == '&') && (NXT(1) == '#')) {
2326 SKIP(2);
2327 GROW;
2328 while (RAW != ';') { /* loop blocked by count */
2329 if (count++ > 20) {
2330 count = 0;
2331 GROW;
2332 if (ctxt->instate == XML_PARSER_EOF)
2333 return(0);
2334 }
2335 if ((RAW >= '0') && (RAW <= '9'))
2336 val = val * 10 + (CUR - '0');
2337 else {
2338 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2339 val = 0;
2340 break;
2341 }
2342 if (val > 0x10FFFF)
2343 outofrange = val;
2344
2345 NEXT;
2346 count++;
2347 }
2348 if (RAW == ';') {
2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 ctxt->input->col++;
2351 ctxt->nbChars ++;
2352 ctxt->input->cur++;
2353 }
2354 } else {
2355 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2356 }
2357
2358 /*
2359 * [ WFC: Legal Character ]
2360 * Characters referred to using character references must match the
2361 * production for Char.
2362 */
2363 if ((IS_CHAR(val) && (outofrange == 0))) {
2364 return(val);
2365 } else {
2366 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367 "xmlParseCharRef: invalid xmlChar value %d\n",
2368 val);
2369 }
2370 return(0);
2371 }
2372
2373 /**
2374 * xmlParseStringCharRef:
2375 * @ctxt: an XML parser context
2376 * @str: a pointer to an index in the string
2377 *
2378 * parse Reference declarations, variant parsing from a string rather
2379 * than an an input flow.
2380 *
2381 * [66] CharRef ::= '&#' [0-9]+ ';' |
2382 * '&#x' [0-9a-fA-F]+ ';'
2383 *
2384 * [ WFC: Legal Character ]
2385 * Characters referred to using character references must match the
2386 * production for Char.
2387 *
2388 * Returns the value parsed (as an int), 0 in case of error, str will be
2389 * updated to the current value of the index
2390 */
2391 static int
2392 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393 const xmlChar *ptr;
2394 xmlChar cur;
2395 unsigned int val = 0;
2396 unsigned int outofrange = 0;
2397
2398 if ((str == NULL) || (*str == NULL)) return(0);
2399 ptr = *str;
2400 cur = *ptr;
2401 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 ptr += 3;
2403 cur = *ptr;
2404 while (cur != ';') { /* Non input consuming loop */
2405 if ((cur >= '0') && (cur <= '9'))
2406 val = val * 16 + (cur - '0');
2407 else if ((cur >= 'a') && (cur <= 'f'))
2408 val = val * 16 + (cur - 'a') + 10;
2409 else if ((cur >= 'A') && (cur <= 'F'))
2410 val = val * 16 + (cur - 'A') + 10;
2411 else {
2412 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2413 val = 0;
2414 break;
2415 }
2416 if (val > 0x10FFFF)
2417 outofrange = val;
2418
2419 ptr++;
2420 cur = *ptr;
2421 }
2422 if (cur == ';')
2423 ptr++;
2424 } else if ((cur == '&') && (ptr[1] == '#')){
2425 ptr += 2;
2426 cur = *ptr;
2427 while (cur != ';') { /* Non input consuming loops */
2428 if ((cur >= '0') && (cur <= '9'))
2429 val = val * 10 + (cur - '0');
2430 else {
2431 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2432 val = 0;
2433 break;
2434 }
2435 if (val > 0x10FFFF)
2436 outofrange = val;
2437
2438 ptr++;
2439 cur = *ptr;
2440 }
2441 if (cur == ';')
2442 ptr++;
2443 } else {
2444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2445 return(0);
2446 }
2447 *str = ptr;
2448
2449 /*
2450 * [ WFC: Legal Character ]
2451 * Characters referred to using character references must match the
2452 * production for Char.
2453 */
2454 if ((IS_CHAR(val) && (outofrange == 0))) {
2455 return(val);
2456 } else {
2457 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 val);
2460 }
2461 return(0);
2462 }
2463
2464 /**
2465 * xmlParserHandlePEReference:
2466 * @ctxt: the parser context
2467 *
2468 * [69] PEReference ::= '%' Name ';'
2469 *
2470 * [ WFC: No Recursion ]
2471 * A parsed entity must not contain a recursive
2472 * reference to itself, either directly or indirectly.
2473 *
2474 * [ WFC: Entity Declared ]
2475 * In a document without any DTD, a document with only an internal DTD
2476 * subset which contains no parameter entity references, or a document
2477 * with "standalone='yes'", ... ... The declaration of a parameter
2478 * entity must precede any reference to it...
2479 *
2480 * [ VC: Entity Declared ]
2481 * In a document with an external subset or external parameter entities
2482 * with "standalone='no'", ... ... The declaration of a parameter entity
2483 * must precede any reference to it...
2484 *
2485 * [ WFC: In DTD ]
2486 * Parameter-entity references may only appear in the DTD.
2487 * NOTE: misleading but this is handled.
2488 *
2489 * A PEReference may have been detected in the current input stream
2490 * the handling is done accordingly to
2491 * http://www.w3.org/TR/REC-xml#entproc
2492 * i.e.
2493 * - Included in literal in entity values
2494 * - Included as Parameter Entity reference within DTDs
2495 */
2496 void
2497 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2498 switch(ctxt->instate) {
2499 case XML_PARSER_CDATA_SECTION:
2500 return;
2501 case XML_PARSER_COMMENT:
2502 return;
2503 case XML_PARSER_START_TAG:
2504 return;
2505 case XML_PARSER_END_TAG:
2506 return;
2507 case XML_PARSER_EOF:
2508 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2509 return;
2510 case XML_PARSER_PROLOG:
2511 case XML_PARSER_START:
2512 case XML_PARSER_MISC:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2514 return;
2515 case XML_PARSER_ENTITY_DECL:
2516 case XML_PARSER_CONTENT:
2517 case XML_PARSER_ATTRIBUTE_VALUE:
2518 case XML_PARSER_PI:
2519 case XML_PARSER_SYSTEM_LITERAL:
2520 case XML_PARSER_PUBLIC_LITERAL:
2521 /* we just ignore it there */
2522 return;
2523 case XML_PARSER_EPILOG:
2524 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2525 return;
2526 case XML_PARSER_ENTITY_VALUE:
2527 /*
2528 * NOTE: in the case of entity values, we don't do the
2529 * substitution here since we need the literal
2530 * entity value to be able to save the internal
2531 * subset of the document.
2532 * This will be handled by xmlStringDecodeEntities
2533 */
2534 return;
2535 case XML_PARSER_DTD:
2536 /*
2537 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 * In the internal DTD subset, parameter-entity references
2539 * can occur only where markup declarations can occur, not
2540 * within markup declarations.
2541 * In that case this is handled in xmlParseMarkupDecl
2542 */
2543 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 return;
2545 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2546 return;
2547 break;
2548 case XML_PARSER_IGNORE:
2549 return;
2550 }
2551
2552 xmlParsePEReference(ctxt);
2553 }
2554
2555 /*
2556 * Macro used to grow the current buffer.
2557 * buffer##_size is expected to be a size_t
2558 * mem_error: is expected to handle memory allocation failures
2559 */
2560 #define growBuffer(buffer, n) { \
2561 xmlChar *tmp; \
2562 size_t new_size = buffer##_size * 2 + n; \
2563 if (new_size < buffer##_size) goto mem_error; \
2564 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2565 if (tmp == NULL) goto mem_error; \
2566 buffer = tmp; \
2567 buffer##_size = new_size; \
2568 }
2569
2570 /**
2571 * xmlStringLenDecodeEntities:
2572 * @ctxt: the parser context
2573 * @str: the input string
2574 * @len: the string length
2575 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576 * @end: an end marker xmlChar, 0 if none
2577 * @end2: an end marker xmlChar, 0 if none
2578 * @end3: an end marker xmlChar, 0 if none
2579 *
2580 * Takes a entity string content and process to do the adequate substitutions.
2581 *
2582 * [67] Reference ::= EntityRef | CharRef
2583 *
2584 * [69] PEReference ::= '%' Name ';'
2585 *
2586 * Returns A newly allocated string with the substitution done. The caller
2587 * must deallocate it !
2588 */
2589 xmlChar *
2590 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2592 xmlChar *buffer = NULL;
2593 size_t buffer_size = 0;
2594 size_t nbchars = 0;
2595
2596 xmlChar *current = NULL;
2597 xmlChar *rep = NULL;
2598 const xmlChar *last;
2599 xmlEntityPtr ent;
2600 int c,l;
2601
2602 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2603 return(NULL);
2604 last = str + len;
2605
2606 if (((ctxt->depth > 40) &&
2607 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 (ctxt->depth > 1024)) {
2609 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2610 return(NULL);
2611 }
2612
2613 /*
2614 * allocate a translation buffer.
2615 */
2616 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2617 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2618 if (buffer == NULL) goto mem_error;
2619
2620 /*
2621 * OK loop until we reach one of the ending char or a size limit.
2622 * we are operating on already parsed values.
2623 */
2624 if (str < last)
2625 c = CUR_SCHAR(str, l);
2626 else
2627 c = 0;
2628 while ((c != 0) && (c != end) && /* non input consuming loop */
2629 (c != end2) && (c != end3)) {
2630
2631 if (c == 0) break;
2632 if ((c == '&') && (str[1] == '#')) {
2633 int val = xmlParseStringCharRef(ctxt, &str);
2634 if (val == 0)
2635 goto int_error;
2636 COPY_BUF(0,buffer,nbchars,val);
2637 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2638 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2639 }
2640 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 if (xmlParserDebugEntities)
2642 xmlGenericError(xmlGenericErrorContext,
2643 "String decoding Entity Reference: %.30s\n",
2644 str);
2645 ent = xmlParseStringEntityRef(ctxt, &str);
2646 xmlParserEntityCheck(ctxt, 0, ent, 0);
2647 if (ent != NULL)
2648 ctxt->nbentities += ent->checked / 2;
2649 if ((ent != NULL) &&
2650 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 if (ent->content != NULL) {
2652 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2654 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2655 }
2656 } else {
2657 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 "predefined entity has no content\n");
2659 goto int_error;
2660 }
2661 } else if ((ent != NULL) && (ent->content != NULL)) {
2662 ctxt->depth++;
2663 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 0, 0, 0);
2665 ctxt->depth--;
2666 if (rep == NULL)
2667 goto int_error;
2668
2669 current = rep;
2670 while (*current != 0) { /* non input consuming loop */
2671 buffer[nbchars++] = *current++;
2672 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674 goto int_error;
2675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676 }
2677 }
2678 xmlFree(rep);
2679 rep = NULL;
2680 } else if (ent != NULL) {
2681 int i = xmlStrlen(ent->name);
2682 const xmlChar *cur = ent->name;
2683
2684 buffer[nbchars++] = '&';
2685 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2686 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2687 }
2688 for (;i > 0;i--)
2689 buffer[nbchars++] = *cur++;
2690 buffer[nbchars++] = ';';
2691 }
2692 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 if (xmlParserDebugEntities)
2694 xmlGenericError(xmlGenericErrorContext,
2695 "String decoding PE Reference: %.30s\n", str);
2696 ent = xmlParseStringPEReference(ctxt, &str);
2697 xmlParserEntityCheck(ctxt, 0, ent, 0);
2698 if (ent != NULL)
2699 ctxt->nbentities += ent->checked / 2;
2700 if (ent != NULL) {
2701 if (ent->content == NULL) {
2702 /*
2703 * Note: external parsed entities will not be loaded,
2704 * it is not required for a non-validating parser to
2705 * complete external PEreferences coming from the
2706 * internal subset
2707 */
2708 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 (ctxt->validate != 0)) {
2711 xmlLoadEntityContent(ctxt, ent);
2712 } else {
2713 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 "not validating will not read content for PE entity %s\n",
2715 ent->name, NULL);
2716 }
2717 }
2718 ctxt->depth++;
2719 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 0, 0, 0);
2721 ctxt->depth--;
2722 if (rep == NULL)
2723 goto int_error;
2724 current = rep;
2725 while (*current != 0) { /* non input consuming loop */
2726 buffer[nbchars++] = *current++;
2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729 goto int_error;
2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731 }
2732 }
2733 xmlFree(rep);
2734 rep = NULL;
2735 }
2736 } else {
2737 COPY_BUF(l,buffer,nbchars,c);
2738 str += l;
2739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2741 }
2742 }
2743 if (str < last)
2744 c = CUR_SCHAR(str, l);
2745 else
2746 c = 0;
2747 }
2748 buffer[nbchars] = 0;
2749 return(buffer);
2750
2751 mem_error:
2752 xmlErrMemory(ctxt, NULL);
2753 int_error:
2754 if (rep != NULL)
2755 xmlFree(rep);
2756 if (buffer != NULL)
2757 xmlFree(buffer);
2758 return(NULL);
2759 }
2760
2761 /**
2762 * xmlStringDecodeEntities:
2763 * @ctxt: the parser context
2764 * @str: the input string
2765 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766 * @end: an end marker xmlChar, 0 if none
2767 * @end2: an end marker xmlChar, 0 if none
2768 * @end3: an end marker xmlChar, 0 if none
2769 *
2770 * Takes a entity string content and process to do the adequate substitutions.
2771 *
2772 * [67] Reference ::= EntityRef | CharRef
2773 *
2774 * [69] PEReference ::= '%' Name ';'
2775 *
2776 * Returns A newly allocated string with the substitution done. The caller
2777 * must deallocate it !
2778 */
2779 xmlChar *
2780 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 xmlChar end, xmlChar end2, xmlChar end3) {
2782 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2783 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784 end, end2, end3));
2785 }
2786
2787 /************************************************************************
2788 * *
2789 * Commodity functions, cleanup needed ? *
2790 * *
2791 ************************************************************************/
2792
2793 /**
2794 * areBlanks:
2795 * @ctxt: an XML parser context
2796 * @str: a xmlChar *
2797 * @len: the size of @str
2798 * @blank_chars: we know the chars are blanks
2799 *
2800 * Is this a sequence of blank chars that one can ignore ?
2801 *
2802 * Returns 1 if ignorable 0 otherwise.
2803 */
2804
2805 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806 int blank_chars) {
2807 int i, ret;
2808 xmlNodePtr lastChild;
2809
2810 /*
2811 * Don't spend time trying to differentiate them, the same callback is
2812 * used !
2813 */
2814 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2815 return(0);
2816
2817 /*
2818 * Check for xml:space value.
2819 */
2820 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821 (*(ctxt->space) == -2))
2822 return(0);
2823
2824 /*
2825 * Check that the string is made of blanks
2826 */
2827 if (blank_chars == 0) {
2828 for (i = 0;i < len;i++)
2829 if (!(IS_BLANK_CH(str[i]))) return(0);
2830 }
2831
2832 /*
2833 * Look if the element is mixed content in the DTD if available
2834 */
2835 if (ctxt->node == NULL) return(0);
2836 if (ctxt->myDoc != NULL) {
2837 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838 if (ret == 0) return(1);
2839 if (ret == 1) return(0);
2840 }
2841
2842 /*
2843 * Otherwise, heuristic :-\
2844 */
2845 if ((RAW != '<') && (RAW != 0xD)) return(0);
2846 if ((ctxt->node->children == NULL) &&
2847 (RAW == '<') && (NXT(1) == '/')) return(0);
2848
2849 lastChild = xmlGetLastChild(ctxt->node);
2850 if (lastChild == NULL) {
2851 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852 (ctxt->node->content != NULL)) return(0);
2853 } else if (xmlNodeIsText(lastChild))
2854 return(0);
2855 else if ((ctxt->node->children != NULL) &&
2856 (xmlNodeIsText(ctxt->node->children)))
2857 return(0);
2858 return(1);
2859 }
2860
2861 /************************************************************************
2862 * *
2863 * Extra stuff for namespace support *
2864 * Relates to http://www.w3.org/TR/WD-xml-names *
2865 * *
2866 ************************************************************************/
2867
2868 /**
2869 * xmlSplitQName:
2870 * @ctxt: an XML parser context
2871 * @name: an XML parser context
2872 * @prefix: a xmlChar **
2873 *
2874 * parse an UTF8 encoded XML qualified name string
2875 *
2876 * [NS 5] QName ::= (Prefix ':')? LocalPart
2877 *
2878 * [NS 6] Prefix ::= NCName
2879 *
2880 * [NS 7] LocalPart ::= NCName
2881 *
2882 * Returns the local part, and prefix is updated
2883 * to get the Prefix if any.
2884 */
2885
2886 xmlChar *
2887 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888 xmlChar buf[XML_MAX_NAMELEN + 5];
2889 xmlChar *buffer = NULL;
2890 int len = 0;
2891 int max = XML_MAX_NAMELEN;
2892 xmlChar *ret = NULL;
2893 const xmlChar *cur = name;
2894 int c;
2895
2896 if (prefix == NULL) return(NULL);
2897 *prefix = NULL;
2898
2899 if (cur == NULL) return(NULL);
2900
2901 #ifndef XML_XML_NAMESPACE
2902 /* xml: prefix is not really a namespace */
2903 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904 (cur[2] == 'l') && (cur[3] == ':'))
2905 return(xmlStrdup(name));
2906 #endif
2907
2908 /* nasty but well=formed */
2909 if (cur[0] == ':')
2910 return(xmlStrdup(name));
2911
2912 c = *cur++;
2913 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 buf[len++] = c;
2915 c = *cur++;
2916 }
2917 if (len >= max) {
2918 /*
2919 * Okay someone managed to make a huge name, so he's ready to pay
2920 * for the processing speed.
2921 */
2922 max = len * 2;
2923
2924 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2925 if (buffer == NULL) {
2926 xmlErrMemory(ctxt, NULL);
2927 return(NULL);
2928 }
2929 memcpy(buffer, buf, len);
2930 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 if (len + 10 > max) {
2932 xmlChar *tmp;
2933
2934 max *= 2;
2935 tmp = (xmlChar *) xmlRealloc(buffer,
2936 max * sizeof(xmlChar));
2937 if (tmp == NULL) {
2938 xmlFree(buffer);
2939 xmlErrMemory(ctxt, NULL);
2940 return(NULL);
2941 }
2942 buffer = tmp;
2943 }
2944 buffer[len++] = c;
2945 c = *cur++;
2946 }
2947 buffer[len] = 0;
2948 }
2949
2950 if ((c == ':') && (*cur == 0)) {
2951 if (buffer != NULL)
2952 xmlFree(buffer);
2953 *prefix = NULL;
2954 return(xmlStrdup(name));
2955 }
2956
2957 if (buffer == NULL)
2958 ret = xmlStrndup(buf, len);
2959 else {
2960 ret = buffer;
2961 buffer = NULL;
2962 max = XML_MAX_NAMELEN;
2963 }
2964
2965
2966 if (c == ':') {
2967 c = *cur;
2968 *prefix = ret;
2969 if (c == 0) {
2970 return(xmlStrndup(BAD_CAST "", 0));
2971 }
2972 len = 0;
2973
2974 /*
2975 * Check that the first character is proper to start
2976 * a new name
2977 */
2978 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 ((c >= 0x41) && (c <= 0x5A)) ||
2980 (c == '_') || (c == ':'))) {
2981 int l;
2982 int first = CUR_SCHAR(cur, l);
2983
2984 if (!IS_LETTER(first) && (first != '_')) {
2985 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2986 "Name %s is not XML Namespace compliant\n",
2987 name);
2988 }
2989 }
2990 cur++;
2991
2992 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
3002
3003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004 if (buffer == NULL) {
3005 xmlErrMemory(ctxt, NULL);
3006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while (c != 0) { /* tested bigname2.xml */
3010 if (len + 10 > max) {
3011 xmlChar *tmp;
3012
3013 max *= 2;
3014 tmp = (xmlChar *) xmlRealloc(buffer,
3015 max * sizeof(xmlChar));
3016 if (tmp == NULL) {
3017 xmlErrMemory(ctxt, NULL);
3018 xmlFree(buffer);
3019 return(NULL);
3020 }
3021 buffer = tmp;
3022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
3028
3029 if (buffer == NULL)
3030 ret = xmlStrndup(buf, len);
3031 else {
3032 ret = buffer;
3033 }
3034 }
3035
3036 return(ret);
3037 }
3038
3039 /************************************************************************
3040 * *
3041 * The parser itself *
3042 * Relates to http://www.w3.org/TR/REC-xml *
3043 * *
3044 ************************************************************************/
3045
3046 /************************************************************************
3047 * *
3048 * Routines to parse Name, NCName and NmToken *
3049 * *
3050 ************************************************************************/
3051 #ifdef DEBUG
3052 static unsigned long nbParseName = 0;
3053 static unsigned long nbParseNmToken = 0;
3054 static unsigned long nbParseNCName = 0;
3055 static unsigned long nbParseNCNameComplex = 0;
3056 static unsigned long nbParseNameComplex = 0;
3057 static unsigned long nbParseStringName = 0;
3058 #endif
3059
3060 /*
3061 * The two following functions are related to the change of accepted
3062 * characters for Name and NmToken in the Revision 5 of XML-1.0
3063 * They correspond to the modified production [4] and the new production [4a]
3064 * changes in that revision. Also note that the macros used for the
3065 * productions Letter, Digit, CombiningChar and Extender are not needed
3066 * anymore.
3067 * We still keep compatibility to pre-revision5 parsing semantic if the
3068 * new XML_PARSE_OLD10 option is given to the parser.
3069 */
3070 static int
3071 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073 /*
3074 * Use the new checks of production [4] [4a] amd [5] of the
3075 * Update 5 of XML-1.0
3076 */
3077 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 (((c >= 'a') && (c <= 'z')) ||
3079 ((c >= 'A') && (c <= 'Z')) ||
3080 (c == '_') || (c == ':') ||
3081 ((c >= 0xC0) && (c <= 0xD6)) ||
3082 ((c >= 0xD8) && (c <= 0xF6)) ||
3083 ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 ((c >= 0x370) && (c <= 0x37D)) ||
3085 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 ((c >= 0x200C) && (c <= 0x200D)) ||
3087 ((c >= 0x2070) && (c <= 0x218F)) ||
3088 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 return(1);
3094 } else {
3095 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 return(1);
3097 }
3098 return(0);
3099 }
3100
3101 static int
3102 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104 /*
3105 * Use the new checks of production [4] [4a] amd [5] of the
3106 * Update 5 of XML-1.0
3107 */
3108 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 (((c >= 'a') && (c <= 'z')) ||
3110 ((c >= 'A') && (c <= 'Z')) ||
3111 ((c >= '0') && (c <= '9')) || /* !start */
3112 (c == '_') || (c == ':') ||
3113 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 ((c >= 0xC0) && (c <= 0xD6)) ||
3115 ((c >= 0xD8) && (c <= 0xF6)) ||
3116 ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 ((c >= 0x370) && (c <= 0x37D)) ||
3119 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 ((c >= 0x200C) && (c <= 0x200D)) ||
3121 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 ((c >= 0x2070) && (c <= 0x218F)) ||
3123 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 return(1);
3129 } else {
3130 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 (c == '.') || (c == '-') ||
3132 (c == '_') || (c == ':') ||
3133 (IS_COMBINING(c)) ||
3134 (IS_EXTENDER(c)))
3135 return(1);
3136 }
3137 return(0);
3138 }
3139
3140 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3141 int *len, int *alloc, int normalize);
3142
3143 static const xmlChar *
3144 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145 int len = 0, l;
3146 int c;
3147 int count = 0;
3148
3149 #ifdef DEBUG
3150 nbParseNameComplex++;
3151 #endif
3152
3153 /*
3154 * Handler for more complex cases
3155 */
3156 GROW;
3157 if (ctxt->instate == XML_PARSER_EOF)
3158 return(NULL);
3159 c = CUR_CHAR(l);
3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161 /*
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3164 */
3165 if ((c ==