[LIBXML2] Update to v2.9.3.
[reactos.git] / reactos / lib / 3rdparty / libxml2 / parser.c
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99 /************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107
108 /*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114 #define XML_PARSER_NON_LINEAR 10
115
116 /*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125 static int
126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128 {
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0)) {
142 unsigned long oldnbent = ctxt->nbentities;
143 xmlChar *rep;
144
145 ent->checked = 1;
146
147 rep = xmlStringDecodeEntities(ctxt, ent->content,
148 XML_SUBSTITUTE_REF, 0, 0, 0);
149
150 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
151 if (rep != NULL) {
152 if (xmlStrchr(rep, '<'))
153 ent->checked |= 1;
154 xmlFree(rep);
155 rep = NULL;
156 }
157 }
158 if (replacement != 0) {
159 if (replacement < XML_MAX_TEXT_LENGTH)
160 return(0);
161
162 /*
163 * If the volume of entity copy reaches 10 times the
164 * amount of parsed data and over the large text threshold
165 * then that's very likely to be an abuse.
166 */
167 if (ctxt->input != NULL) {
168 consumed = ctxt->input->consumed +
169 (ctxt->input->cur - ctxt->input->base);
170 }
171 consumed += ctxt->sizeentities;
172
173 if (replacement < XML_PARSER_NON_LINEAR * consumed)
174 return(0);
175 } else if (size != 0) {
176 /*
177 * Do the check based on the replacement size of the entity
178 */
179 if (size < XML_PARSER_BIG_ENTITY)
180 return(0);
181
182 /*
183 * A limit on the amount of text data reasonably used
184 */
185 if (ctxt->input != NULL) {
186 consumed = ctxt->input->consumed +
187 (ctxt->input->cur - ctxt->input->base);
188 }
189 consumed += ctxt->sizeentities;
190
191 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
192 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
193 return (0);
194 } else if (ent != NULL) {
195 /*
196 * use the number of parsed entities in the replacement
197 */
198 size = ent->checked / 2;
199
200 /*
201 * The amount of data parsed counting entities size only once
202 */
203 if (ctxt->input != NULL) {
204 consumed = ctxt->input->consumed +
205 (ctxt->input->cur - ctxt->input->base);
206 }
207 consumed += ctxt->sizeentities;
208
209 /*
210 * Check the density of entities for the amount of data
211 * knowing an entity reference will take at least 3 bytes
212 */
213 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
214 return (0);
215 } else {
216 /*
217 * strange we got no data for checking
218 */
219 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
220 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
221 (ctxt->nbentities <= 10000))
222 return (0);
223 }
224 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
225 return (1);
226 }
227
228 /**
229 * xmlParserMaxDepth:
230 *
231 * arbitrary depth limit for the XML documents that we allow to
232 * process. This is not a limitation of the parser but a safety
233 * boundary feature. It can be disabled with the XML_PARSE_HUGE
234 * parser option.
235 */
236 unsigned int xmlParserMaxDepth = 256;
237
238
239
240 #define SAX2 1
241 #define XML_PARSER_BIG_BUFFER_SIZE 300
242 #define XML_PARSER_BUFFER_SIZE 100
243 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244
245 /**
246 * XML_PARSER_CHUNK_SIZE
247 *
248 * When calling GROW that's the minimal amount of data
249 * the parser expected to have received. It is not a hard
250 * limit but an optimization when reading strings like Names
251 * It is not strictly needed as long as inputs available characters
252 * are followed by 0, which should be provided by the I/O level
253 */
254 #define XML_PARSER_CHUNK_SIZE 100
255
256 /*
257 * List of XML prefixed PI allowed by W3C specs
258 */
259
260 static const char *xmlW3CPIs[] = {
261 "xml-stylesheet",
262 "xml-model",
263 NULL
264 };
265
266
267 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
268 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
269 const xmlChar **str);
270
271 static xmlParserErrors
272 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
273 xmlSAXHandlerPtr sax,
274 void *user_data, int depth, const xmlChar *URL,
275 const xmlChar *ID, xmlNodePtr *list);
276
277 static int
278 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
279 const char *encoding);
280 #ifdef LIBXML_LEGACY_ENABLED
281 static void
282 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
283 xmlNodePtr lastNode);
284 #endif /* LIBXML_LEGACY_ENABLED */
285
286 static xmlParserErrors
287 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
288 const xmlChar *string, void *user_data, xmlNodePtr *lst);
289
290 static int
291 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
292
293 /************************************************************************
294 * *
295 * Some factorized error routines *
296 * *
297 ************************************************************************/
298
299 /**
300 * xmlErrAttributeDup:
301 * @ctxt: an XML parser context
302 * @prefix: the attribute prefix
303 * @localname: the attribute localname
304 *
305 * Handle a redefinition of attribute error
306 */
307 static void
308 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
309 const xmlChar * localname)
310 {
311 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312 (ctxt->instate == XML_PARSER_EOF))
313 return;
314 if (ctxt != NULL)
315 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
316
317 if (prefix == NULL)
318 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
319 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
320 (const char *) localname, NULL, NULL, 0, 0,
321 "Attribute %s redefined\n", localname);
322 else
323 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
324 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
325 (const char *) prefix, (const char *) localname,
326 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
327 localname);
328 if (ctxt != NULL) {
329 ctxt->wellFormed = 0;
330 if (ctxt->recovery == 0)
331 ctxt->disableSAX = 1;
332 }
333 }
334
335 /**
336 * xmlFatalErr:
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @extra: extra information string
340 *
341 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
342 */
343 static void
344 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
345 {
346 const char *errmsg;
347 char errstr[129] = "";
348
349 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350 (ctxt->instate == XML_PARSER_EOF))
351 return;
352 switch (error) {
353 case XML_ERR_INVALID_HEX_CHARREF:
354 errmsg = "CharRef: invalid hexadecimal value";
355 break;
356 case XML_ERR_INVALID_DEC_CHARREF:
357 errmsg = "CharRef: invalid decimal value";
358 break;
359 case XML_ERR_INVALID_CHARREF:
360 errmsg = "CharRef: invalid value";
361 break;
362 case XML_ERR_INTERNAL_ERROR:
363 errmsg = "internal error";
364 break;
365 case XML_ERR_PEREF_AT_EOF:
366 errmsg = "PEReference at end of document";
367 break;
368 case XML_ERR_PEREF_IN_PROLOG:
369 errmsg = "PEReference in prolog";
370 break;
371 case XML_ERR_PEREF_IN_EPILOG:
372 errmsg = "PEReference in epilog";
373 break;
374 case XML_ERR_PEREF_NO_NAME:
375 errmsg = "PEReference: no name";
376 break;
377 case XML_ERR_PEREF_SEMICOL_MISSING:
378 errmsg = "PEReference: expecting ';'";
379 break;
380 case XML_ERR_ENTITY_LOOP:
381 errmsg = "Detected an entity reference loop";
382 break;
383 case XML_ERR_ENTITY_NOT_STARTED:
384 errmsg = "EntityValue: \" or ' expected";
385 break;
386 case XML_ERR_ENTITY_PE_INTERNAL:
387 errmsg = "PEReferences forbidden in internal subset";
388 break;
389 case XML_ERR_ENTITY_NOT_FINISHED:
390 errmsg = "EntityValue: \" or ' expected";
391 break;
392 case XML_ERR_ATTRIBUTE_NOT_STARTED:
393 errmsg = "AttValue: \" or ' expected";
394 break;
395 case XML_ERR_LT_IN_ATTRIBUTE:
396 errmsg = "Unescaped '<' not allowed in attributes values";
397 break;
398 case XML_ERR_LITERAL_NOT_STARTED:
399 errmsg = "SystemLiteral \" or ' expected";
400 break;
401 case XML_ERR_LITERAL_NOT_FINISHED:
402 errmsg = "Unfinished System or Public ID \" or ' expected";
403 break;
404 case XML_ERR_MISPLACED_CDATA_END:
405 errmsg = "Sequence ']]>' not allowed in content";
406 break;
407 case XML_ERR_URI_REQUIRED:
408 errmsg = "SYSTEM or PUBLIC, the URI is missing";
409 break;
410 case XML_ERR_PUBID_REQUIRED:
411 errmsg = "PUBLIC, the Public Identifier is missing";
412 break;
413 case XML_ERR_HYPHEN_IN_COMMENT:
414 errmsg = "Comment must not contain '--' (double-hyphen)";
415 break;
416 case XML_ERR_PI_NOT_STARTED:
417 errmsg = "xmlParsePI : no target name";
418 break;
419 case XML_ERR_RESERVED_XML_NAME:
420 errmsg = "Invalid PI name";
421 break;
422 case XML_ERR_NOTATION_NOT_STARTED:
423 errmsg = "NOTATION: Name expected here";
424 break;
425 case XML_ERR_NOTATION_NOT_FINISHED:
426 errmsg = "'>' required to close NOTATION declaration";
427 break;
428 case XML_ERR_VALUE_REQUIRED:
429 errmsg = "Entity value required";
430 break;
431 case XML_ERR_URI_FRAGMENT:
432 errmsg = "Fragment not allowed";
433 break;
434 case XML_ERR_ATTLIST_NOT_STARTED:
435 errmsg = "'(' required to start ATTLIST enumeration";
436 break;
437 case XML_ERR_NMTOKEN_REQUIRED:
438 errmsg = "NmToken expected in ATTLIST enumeration";
439 break;
440 case XML_ERR_ATTLIST_NOT_FINISHED:
441 errmsg = "')' required to finish ATTLIST enumeration";
442 break;
443 case XML_ERR_MIXED_NOT_STARTED:
444 errmsg = "MixedContentDecl : '|' or ')*' expected";
445 break;
446 case XML_ERR_PCDATA_REQUIRED:
447 errmsg = "MixedContentDecl : '#PCDATA' expected";
448 break;
449 case XML_ERR_ELEMCONTENT_NOT_STARTED:
450 errmsg = "ContentDecl : Name or '(' expected";
451 break;
452 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
453 errmsg = "ContentDecl : ',' '|' or ')' expected";
454 break;
455 case XML_ERR_PEREF_IN_INT_SUBSET:
456 errmsg =
457 "PEReference: forbidden within markup decl in internal subset";
458 break;
459 case XML_ERR_GT_REQUIRED:
460 errmsg = "expected '>'";
461 break;
462 case XML_ERR_CONDSEC_INVALID:
463 errmsg = "XML conditional section '[' expected";
464 break;
465 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
466 errmsg = "Content error in the external subset";
467 break;
468 case XML_ERR_CONDSEC_INVALID_KEYWORD:
469 errmsg =
470 "conditional section INCLUDE or IGNORE keyword expected";
471 break;
472 case XML_ERR_CONDSEC_NOT_FINISHED:
473 errmsg = "XML conditional section not closed";
474 break;
475 case XML_ERR_XMLDECL_NOT_STARTED:
476 errmsg = "Text declaration '<?xml' required";
477 break;
478 case XML_ERR_XMLDECL_NOT_FINISHED:
479 errmsg = "parsing XML declaration: '?>' expected";
480 break;
481 case XML_ERR_EXT_ENTITY_STANDALONE:
482 errmsg = "external parsed entities cannot be standalone";
483 break;
484 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
485 errmsg = "EntityRef: expecting ';'";
486 break;
487 case XML_ERR_DOCTYPE_NOT_FINISHED:
488 errmsg = "DOCTYPE improperly terminated";
489 break;
490 case XML_ERR_LTSLASH_REQUIRED:
491 errmsg = "EndTag: '</' not found";
492 break;
493 case XML_ERR_EQUAL_REQUIRED:
494 errmsg = "expected '='";
495 break;
496 case XML_ERR_STRING_NOT_CLOSED:
497 errmsg = "String not closed expecting \" or '";
498 break;
499 case XML_ERR_STRING_NOT_STARTED:
500 errmsg = "String not started expecting ' or \"";
501 break;
502 case XML_ERR_ENCODING_NAME:
503 errmsg = "Invalid XML encoding name";
504 break;
505 case XML_ERR_STANDALONE_VALUE:
506 errmsg = "standalone accepts only 'yes' or 'no'";
507 break;
508 case XML_ERR_DOCUMENT_EMPTY:
509 errmsg = "Document is empty";
510 break;
511 case XML_ERR_DOCUMENT_END:
512 errmsg = "Extra content at the end of the document";
513 break;
514 case XML_ERR_NOT_WELL_BALANCED:
515 errmsg = "chunk is not well balanced";
516 break;
517 case XML_ERR_EXTRA_CONTENT:
518 errmsg = "extra content at the end of well balanced chunk";
519 break;
520 case XML_ERR_VERSION_MISSING:
521 errmsg = "Malformed declaration expecting version";
522 break;
523 case XML_ERR_NAME_TOO_LONG:
524 errmsg = "Name too long use XML_PARSE_HUGE option";
525 break;
526 #if 0
527 case:
528 errmsg = "";
529 break;
530 #endif
531 default:
532 errmsg = "Unregistered error message";
533 }
534 if (info == NULL)
535 snprintf(errstr, 128, "%s\n", errmsg);
536 else
537 snprintf(errstr, 128, "%s: %%s\n", errmsg);
538 if (ctxt != NULL)
539 ctxt->errNo = error;
540 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
541 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
542 info);
543 if (ctxt != NULL) {
544 ctxt->wellFormed = 0;
545 if (ctxt->recovery == 0)
546 ctxt->disableSAX = 1;
547 }
548 }
549
550 /**
551 * xmlFatalErrMsg:
552 * @ctxt: an XML parser context
553 * @error: the error number
554 * @msg: the error message
555 *
556 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
557 */
558 static void
559 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg)
561 {
562 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
563 (ctxt->instate == XML_PARSER_EOF))
564 return;
565 if (ctxt != NULL)
566 ctxt->errNo = error;
567 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
568 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
569 if (ctxt != NULL) {
570 ctxt->wellFormed = 0;
571 if (ctxt->recovery == 0)
572 ctxt->disableSAX = 1;
573 }
574 }
575
576 /**
577 * xmlWarningMsg:
578 * @ctxt: an XML parser context
579 * @error: the error number
580 * @msg: the error message
581 * @str1: extra data
582 * @str2: extra data
583 *
584 * Handle a warning.
585 */
586 static void
587 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
588 const char *msg, const xmlChar *str1, const xmlChar *str2)
589 {
590 xmlStructuredErrorFunc schannel = NULL;
591
592 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
593 (ctxt->instate == XML_PARSER_EOF))
594 return;
595 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
596 (ctxt->sax->initialized == XML_SAX2_MAGIC))
597 schannel = ctxt->sax->serror;
598 if (ctxt != NULL) {
599 __xmlRaiseError(schannel,
600 (ctxt->sax) ? ctxt->sax->warning : NULL,
601 ctxt->userData,
602 ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_WARNING, NULL, 0,
604 (const char *) str1, (const char *) str2, NULL, 0, 0,
605 msg, (const char *) str1, (const char *) str2);
606 } else {
607 __xmlRaiseError(schannel, NULL, NULL,
608 ctxt, NULL, XML_FROM_PARSER, error,
609 XML_ERR_WARNING, NULL, 0,
610 (const char *) str1, (const char *) str2, NULL, 0, 0,
611 msg, (const char *) str1, (const char *) str2);
612 }
613 }
614
615 /**
616 * xmlValidityError:
617 * @ctxt: an XML parser context
618 * @error: the error number
619 * @msg: the error message
620 * @str1: extra data
621 *
622 * Handle a validity error.
623 */
624 static void
625 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
626 const char *msg, const xmlChar *str1, const xmlChar *str2)
627 {
628 xmlStructuredErrorFunc schannel = NULL;
629
630 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631 (ctxt->instate == XML_PARSER_EOF))
632 return;
633 if (ctxt != NULL) {
634 ctxt->errNo = error;
635 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
636 schannel = ctxt->sax->serror;
637 }
638 if (ctxt != NULL) {
639 __xmlRaiseError(schannel,
640 ctxt->vctxt.error, ctxt->vctxt.userData,
641 ctxt, NULL, XML_FROM_DTD, error,
642 XML_ERR_ERROR, NULL, 0, (const char *) str1,
643 (const char *) str2, NULL, 0, 0,
644 msg, (const char *) str1, (const char *) str2);
645 ctxt->valid = 0;
646 } else {
647 __xmlRaiseError(schannel, NULL, NULL,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 }
653 }
654
655 /**
656 * xmlFatalErrMsgInt:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @val: an integer value
661 *
662 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
663 */
664 static void
665 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
666 const char *msg, int val)
667 {
668 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
669 (ctxt->instate == XML_PARSER_EOF))
670 return;
671 if (ctxt != NULL)
672 ctxt->errNo = error;
673 __xmlRaiseError(NULL, NULL, NULL,
674 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
675 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
676 if (ctxt != NULL) {
677 ctxt->wellFormed = 0;
678 if (ctxt->recovery == 0)
679 ctxt->disableSAX = 1;
680 }
681 }
682
683 /**
684 * xmlFatalErrMsgStrIntStr:
685 * @ctxt: an XML parser context
686 * @error: the error number
687 * @msg: the error message
688 * @str1: an string info
689 * @val: an integer value
690 * @str2: an string info
691 *
692 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
693 */
694 static void
695 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
696 const char *msg, const xmlChar *str1, int val,
697 const xmlChar *str2)
698 {
699 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
700 (ctxt->instate == XML_PARSER_EOF))
701 return;
702 if (ctxt != NULL)
703 ctxt->errNo = error;
704 __xmlRaiseError(NULL, NULL, NULL,
705 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
706 NULL, 0, (const char *) str1, (const char *) str2,
707 NULL, val, 0, msg, str1, val, str2);
708 if (ctxt != NULL) {
709 ctxt->wellFormed = 0;
710 if (ctxt->recovery == 0)
711 ctxt->disableSAX = 1;
712 }
713 }
714
715 /**
716 * xmlFatalErrMsgStr:
717 * @ctxt: an XML parser context
718 * @error: the error number
719 * @msg: the error message
720 * @val: a string value
721 *
722 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
723 */
724 static void
725 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
726 const char *msg, const xmlChar * val)
727 {
728 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
729 (ctxt->instate == XML_PARSER_EOF))
730 return;
731 if (ctxt != NULL)
732 ctxt->errNo = error;
733 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
734 XML_FROM_PARSER, error, XML_ERR_FATAL,
735 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
736 val);
737 if (ctxt != NULL) {
738 ctxt->wellFormed = 0;
739 if (ctxt->recovery == 0)
740 ctxt->disableSAX = 1;
741 }
742 }
743
744 /**
745 * xmlErrMsgStr:
746 * @ctxt: an XML parser context
747 * @error: the error number
748 * @msg: the error message
749 * @val: a string value
750 *
751 * Handle a non fatal parser error
752 */
753 static void
754 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
755 const char *msg, const xmlChar * val)
756 {
757 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
758 (ctxt->instate == XML_PARSER_EOF))
759 return;
760 if (ctxt != NULL)
761 ctxt->errNo = error;
762 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
763 XML_FROM_PARSER, error, XML_ERR_ERROR,
764 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
765 val);
766 }
767
768 /**
769 * xmlNsErr:
770 * @ctxt: an XML parser context
771 * @error: the error number
772 * @msg: the message
773 * @info1: extra information string
774 * @info2: extra information string
775 *
776 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
777 */
778 static void
779 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
780 const char *msg,
781 const xmlChar * info1, const xmlChar * info2,
782 const xmlChar * info3)
783 {
784 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
785 (ctxt->instate == XML_PARSER_EOF))
786 return;
787 if (ctxt != NULL)
788 ctxt->errNo = error;
789 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
790 XML_ERR_ERROR, NULL, 0, (const char *) info1,
791 (const char *) info2, (const char *) info3, 0, 0, msg,
792 info1, info2, info3);
793 if (ctxt != NULL)
794 ctxt->nsWellFormed = 0;
795 }
796
797 /**
798 * xmlNsWarn
799 * @ctxt: an XML parser context
800 * @error: the error number
801 * @msg: the message
802 * @info1: extra information string
803 * @info2: extra information string
804 *
805 * Handle a namespace warning error
806 */
807 static void
808 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
809 const char *msg,
810 const xmlChar * info1, const xmlChar * info2,
811 const xmlChar * info3)
812 {
813 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
814 (ctxt->instate == XML_PARSER_EOF))
815 return;
816 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
817 XML_ERR_WARNING, NULL, 0, (const char *) info1,
818 (const char *) info2, (const char *) info3, 0, 0, msg,
819 info1, info2, info3);
820 }
821
822 /************************************************************************
823 * *
824 * Library wide options *
825 * *
826 ************************************************************************/
827
828 /**
829 * xmlHasFeature:
830 * @feature: the feature to be examined
831 *
832 * Examines if the library has been compiled with a given feature.
833 *
834 * Returns a non-zero value if the feature exist, otherwise zero.
835 * Returns zero (0) if the feature does not exist or an unknown
836 * unknown feature is requested, non-zero otherwise.
837 */
838 int
839 xmlHasFeature(xmlFeature feature)
840 {
841 switch (feature) {
842 case XML_WITH_THREAD:
843 #ifdef LIBXML_THREAD_ENABLED
844 return(1);
845 #else
846 return(0);
847 #endif
848 case XML_WITH_TREE:
849 #ifdef LIBXML_TREE_ENABLED
850 return(1);
851 #else
852 return(0);
853 #endif
854 case XML_WITH_OUTPUT:
855 #ifdef LIBXML_OUTPUT_ENABLED
856 return(1);
857 #else
858 return(0);
859 #endif
860 case XML_WITH_PUSH:
861 #ifdef LIBXML_PUSH_ENABLED
862 return(1);
863 #else
864 return(0);
865 #endif
866 case XML_WITH_READER:
867 #ifdef LIBXML_READER_ENABLED
868 return(1);
869 #else
870 return(0);
871 #endif
872 case XML_WITH_PATTERN:
873 #ifdef LIBXML_PATTERN_ENABLED
874 return(1);
875 #else
876 return(0);
877 #endif
878 case XML_WITH_WRITER:
879 #ifdef LIBXML_WRITER_ENABLED
880 return(1);
881 #else
882 return(0);
883 #endif
884 case XML_WITH_SAX1:
885 #ifdef LIBXML_SAX1_ENABLED
886 return(1);
887 #else
888 return(0);
889 #endif
890 case XML_WITH_FTP:
891 #ifdef LIBXML_FTP_ENABLED
892 return(1);
893 #else
894 return(0);
895 #endif
896 case XML_WITH_HTTP:
897 #ifdef LIBXML_HTTP_ENABLED
898 return(1);
899 #else
900 return(0);
901 #endif
902 case XML_WITH_VALID:
903 #ifdef LIBXML_VALID_ENABLED
904 return(1);
905 #else
906 return(0);
907 #endif
908 case XML_WITH_HTML:
909 #ifdef LIBXML_HTML_ENABLED
910 return(1);
911 #else
912 return(0);
913 #endif
914 case XML_WITH_LEGACY:
915 #ifdef LIBXML_LEGACY_ENABLED
916 return(1);
917 #else
918 return(0);
919 #endif
920 case XML_WITH_C14N:
921 #ifdef LIBXML_C14N_ENABLED
922 return(1);
923 #else
924 return(0);
925 #endif
926 case XML_WITH_CATALOG:
927 #ifdef LIBXML_CATALOG_ENABLED
928 return(1);
929 #else
930 return(0);
931 #endif
932 case XML_WITH_XPATH:
933 #ifdef LIBXML_XPATH_ENABLED
934 return(1);
935 #else
936 return(0);
937 #endif
938 case XML_WITH_XPTR:
939 #ifdef LIBXML_XPTR_ENABLED
940 return(1);
941 #else
942 return(0);
943 #endif
944 case XML_WITH_XINCLUDE:
945 #ifdef LIBXML_XINCLUDE_ENABLED
946 return(1);
947 #else
948 return(0);
949 #endif
950 case XML_WITH_ICONV:
951 #ifdef LIBXML_ICONV_ENABLED
952 return(1);
953 #else
954 return(0);
955 #endif
956 case XML_WITH_ISO8859X:
957 #ifdef LIBXML_ISO8859X_ENABLED
958 return(1);
959 #else
960 return(0);
961 #endif
962 case XML_WITH_UNICODE:
963 #ifdef LIBXML_UNICODE_ENABLED
964 return(1);
965 #else
966 return(0);
967 #endif
968 case XML_WITH_REGEXP:
969 #ifdef LIBXML_REGEXP_ENABLED
970 return(1);
971 #else
972 return(0);
973 #endif
974 case XML_WITH_AUTOMATA:
975 #ifdef LIBXML_AUTOMATA_ENABLED
976 return(1);
977 #else
978 return(0);
979 #endif
980 case XML_WITH_EXPR:
981 #ifdef LIBXML_EXPR_ENABLED
982 return(1);
983 #else
984 return(0);
985 #endif
986 case XML_WITH_SCHEMAS:
987 #ifdef LIBXML_SCHEMAS_ENABLED
988 return(1);
989 #else
990 return(0);
991 #endif
992 case XML_WITH_SCHEMATRON:
993 #ifdef LIBXML_SCHEMATRON_ENABLED
994 return(1);
995 #else
996 return(0);
997 #endif
998 case XML_WITH_MODULES:
999 #ifdef LIBXML_MODULES_ENABLED
1000 return(1);
1001 #else
1002 return(0);
1003 #endif
1004 case XML_WITH_DEBUG:
1005 #ifdef LIBXML_DEBUG_ENABLED
1006 return(1);
1007 #else
1008 return(0);
1009 #endif
1010 case XML_WITH_DEBUG_MEM:
1011 #ifdef DEBUG_MEMORY_LOCATION
1012 return(1);
1013 #else
1014 return(0);
1015 #endif
1016 case XML_WITH_DEBUG_RUN:
1017 #ifdef LIBXML_DEBUG_RUNTIME
1018 return(1);
1019 #else
1020 return(0);
1021 #endif
1022 case XML_WITH_ZLIB:
1023 #ifdef LIBXML_ZLIB_ENABLED
1024 return(1);
1025 #else
1026 return(0);
1027 #endif
1028 case XML_WITH_LZMA:
1029 #ifdef LIBXML_LZMA_ENABLED
1030 return(1);
1031 #else
1032 return(0);
1033 #endif
1034 case XML_WITH_ICU:
1035 #ifdef LIBXML_ICU_ENABLED
1036 return(1);
1037 #else
1038 return(0);
1039 #endif
1040 default:
1041 break;
1042 }
1043 return(0);
1044 }
1045
1046 /************************************************************************
1047 * *
1048 * SAX2 defaulted attributes handling *
1049 * *
1050 ************************************************************************/
1051
1052 /**
1053 * xmlDetectSAX2:
1054 * @ctxt: an XML parser context
1055 *
1056 * Do the SAX2 detection and specific intialization
1057 */
1058 static void
1059 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1060 if (ctxt == NULL) return;
1061 #ifdef LIBXML_SAX1_ENABLED
1062 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1063 ((ctxt->sax->startElementNs != NULL) ||
1064 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1065 #else
1066 ctxt->sax2 = 1;
1067 #endif /* LIBXML_SAX1_ENABLED */
1068
1069 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1070 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1071 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1072 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1073 (ctxt->str_xml_ns == NULL)) {
1074 xmlErrMemory(ctxt, NULL);
1075 }
1076 }
1077
1078 typedef struct _xmlDefAttrs xmlDefAttrs;
1079 typedef xmlDefAttrs *xmlDefAttrsPtr;
1080 struct _xmlDefAttrs {
1081 int nbAttrs; /* number of defaulted attributes on that element */
1082 int maxAttrs; /* the size of the array */
1083 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1084 };
1085
1086 /**
1087 * xmlAttrNormalizeSpace:
1088 * @src: the source string
1089 * @dst: the target string
1090 *
1091 * Normalize the space in non CDATA attribute values:
1092 * If the attribute type is not CDATA, then the XML processor MUST further
1093 * process the normalized attribute value by discarding any leading and
1094 * trailing space (#x20) characters, and by replacing sequences of space
1095 * (#x20) characters by a single space (#x20) character.
1096 * Note that the size of dst need to be at least src, and if one doesn't need
1097 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1098 * passing src as dst is just fine.
1099 *
1100 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1101 * is needed.
1102 */
1103 static xmlChar *
1104 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1105 {
1106 if ((src == NULL) || (dst == NULL))
1107 return(NULL);
1108
1109 while (*src == 0x20) src++;
1110 while (*src != 0) {
1111 if (*src == 0x20) {
1112 while (*src == 0x20) src++;
1113 if (*src != 0)
1114 *dst++ = 0x20;
1115 } else {
1116 *dst++ = *src++;
1117 }
1118 }
1119 *dst = 0;
1120 if (dst == src)
1121 return(NULL);
1122 return(dst);
1123 }
1124
1125 /**
1126 * xmlAttrNormalizeSpace2:
1127 * @src: the source string
1128 *
1129 * Normalize the space in non CDATA attribute values, a slightly more complex
1130 * front end to avoid allocation problems when running on attribute values
1131 * coming from the input.
1132 *
1133 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1134 * is needed.
1135 */
1136 static const xmlChar *
1137 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1138 {
1139 int i;
1140 int remove_head = 0;
1141 int need_realloc = 0;
1142 const xmlChar *cur;
1143
1144 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1145 return(NULL);
1146 i = *len;
1147 if (i <= 0)
1148 return(NULL);
1149
1150 cur = src;
1151 while (*cur == 0x20) {
1152 cur++;
1153 remove_head++;
1154 }
1155 while (*cur != 0) {
1156 if (*cur == 0x20) {
1157 cur++;
1158 if ((*cur == 0x20) || (*cur == 0)) {
1159 need_realloc = 1;
1160 break;
1161 }
1162 } else
1163 cur++;
1164 }
1165 if (need_realloc) {
1166 xmlChar *ret;
1167
1168 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1169 if (ret == NULL) {
1170 xmlErrMemory(ctxt, NULL);
1171 return(NULL);
1172 }
1173 xmlAttrNormalizeSpace(ret, ret);
1174 *len = (int) strlen((const char *)ret);
1175 return(ret);
1176 } else if (remove_head) {
1177 *len -= remove_head;
1178 memmove(src, src + remove_head, 1 + *len);
1179 return(src);
1180 }
1181 return(NULL);
1182 }
1183
1184 /**
1185 * xmlAddDefAttrs:
1186 * @ctxt: an XML parser context
1187 * @fullname: the element fullname
1188 * @fullattr: the attribute fullname
1189 * @value: the attribute value
1190 *
1191 * Add a defaulted attribute for an element
1192 */
1193 static void
1194 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1195 const xmlChar *fullname,
1196 const xmlChar *fullattr,
1197 const xmlChar *value) {
1198 xmlDefAttrsPtr defaults;
1199 int len;
1200 const xmlChar *name;
1201 const xmlChar *prefix;
1202
1203 /*
1204 * Allows to detect attribute redefinitions
1205 */
1206 if (ctxt->attsSpecial != NULL) {
1207 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1208 return;
1209 }
1210
1211 if (ctxt->attsDefault == NULL) {
1212 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1213 if (ctxt->attsDefault == NULL)
1214 goto mem_error;
1215 }
1216
1217 /*
1218 * split the element name into prefix:localname , the string found
1219 * are within the DTD and then not associated to namespace names.
1220 */
1221 name = xmlSplitQName3(fullname, &len);
1222 if (name == NULL) {
1223 name = xmlDictLookup(ctxt->dict, fullname, -1);
1224 prefix = NULL;
1225 } else {
1226 name = xmlDictLookup(ctxt->dict, name, -1);
1227 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1228 }
1229
1230 /*
1231 * make sure there is some storage
1232 */
1233 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1234 if (defaults == NULL) {
1235 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1236 (4 * 5) * sizeof(const xmlChar *));
1237 if (defaults == NULL)
1238 goto mem_error;
1239 defaults->nbAttrs = 0;
1240 defaults->maxAttrs = 4;
1241 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1242 defaults, NULL) < 0) {
1243 xmlFree(defaults);
1244 goto mem_error;
1245 }
1246 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1247 xmlDefAttrsPtr temp;
1248
1249 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1250 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1251 if (temp == NULL)
1252 goto mem_error;
1253 defaults = temp;
1254 defaults->maxAttrs *= 2;
1255 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1256 defaults, NULL) < 0) {
1257 xmlFree(defaults);
1258 goto mem_error;
1259 }
1260 }
1261
1262 /*
1263 * Split the element name into prefix:localname , the string found
1264 * are within the DTD and hen not associated to namespace names.
1265 */
1266 name = xmlSplitQName3(fullattr, &len);
1267 if (name == NULL) {
1268 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1269 prefix = NULL;
1270 } else {
1271 name = xmlDictLookup(ctxt->dict, name, -1);
1272 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1273 }
1274
1275 defaults->values[5 * defaults->nbAttrs] = name;
1276 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1277 /* intern the string and precompute the end */
1278 len = xmlStrlen(value);
1279 value = xmlDictLookup(ctxt->dict, value, len);
1280 defaults->values[5 * defaults->nbAttrs + 2] = value;
1281 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1282 if (ctxt->external)
1283 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1284 else
1285 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1286 defaults->nbAttrs++;
1287
1288 return;
1289
1290 mem_error:
1291 xmlErrMemory(ctxt, NULL);
1292 return;
1293 }
1294
1295 /**
1296 * xmlAddSpecialAttr:
1297 * @ctxt: an XML parser context
1298 * @fullname: the element fullname
1299 * @fullattr: the attribute fullname
1300 * @type: the attribute type
1301 *
1302 * Register this attribute type
1303 */
1304 static void
1305 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1306 const xmlChar *fullname,
1307 const xmlChar *fullattr,
1308 int type)
1309 {
1310 if (ctxt->attsSpecial == NULL) {
1311 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1312 if (ctxt->attsSpecial == NULL)
1313 goto mem_error;
1314 }
1315
1316 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1317 return;
1318
1319 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1320 (void *) (long) type);
1321 return;
1322
1323 mem_error:
1324 xmlErrMemory(ctxt, NULL);
1325 return;
1326 }
1327
1328 /**
1329 * xmlCleanSpecialAttrCallback:
1330 *
1331 * Removes CDATA attributes from the special attribute table
1332 */
1333 static void
1334 xmlCleanSpecialAttrCallback(void *payload, void *data,
1335 const xmlChar *fullname, const xmlChar *fullattr,
1336 const xmlChar *unused ATTRIBUTE_UNUSED) {
1337 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1338
1339 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1340 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1341 }
1342 }
1343
1344 /**
1345 * xmlCleanSpecialAttr:
1346 * @ctxt: an XML parser context
1347 *
1348 * Trim the list of attributes defined to remove all those of type
1349 * CDATA as they are not special. This call should be done when finishing
1350 * to parse the DTD and before starting to parse the document root.
1351 */
1352 static void
1353 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1354 {
1355 if (ctxt->attsSpecial == NULL)
1356 return;
1357
1358 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1359
1360 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1361 xmlHashFree(ctxt->attsSpecial, NULL);
1362 ctxt->attsSpecial = NULL;
1363 }
1364 return;
1365 }
1366
1367 /**
1368 * xmlCheckLanguageID:
1369 * @lang: pointer to the string value
1370 *
1371 * Checks that the value conforms to the LanguageID production:
1372 *
1373 * NOTE: this is somewhat deprecated, those productions were removed from
1374 * the XML Second edition.
1375 *
1376 * [33] LanguageID ::= Langcode ('-' Subcode)*
1377 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1378 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1379 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1380 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1381 * [38] Subcode ::= ([a-z] | [A-Z])+
1382 *
1383 * The current REC reference the sucessors of RFC 1766, currently 5646
1384 *
1385 * http://www.rfc-editor.org/rfc/rfc5646.txt
1386 * langtag = language
1387 * ["-" script]
1388 * ["-" region]
1389 * *("-" variant)
1390 * *("-" extension)
1391 * ["-" privateuse]
1392 * language = 2*3ALPHA ; shortest ISO 639 code
1393 * ["-" extlang] ; sometimes followed by
1394 * ; extended language subtags
1395 * / 4ALPHA ; or reserved for future use
1396 * / 5*8ALPHA ; or registered language subtag
1397 *
1398 * extlang = 3ALPHA ; selected ISO 639 codes
1399 * *2("-" 3ALPHA) ; permanently reserved
1400 *
1401 * script = 4ALPHA ; ISO 15924 code
1402 *
1403 * region = 2ALPHA ; ISO 3166-1 code
1404 * / 3DIGIT ; UN M.49 code
1405 *
1406 * variant = 5*8alphanum ; registered variants
1407 * / (DIGIT 3alphanum)
1408 *
1409 * extension = singleton 1*("-" (2*8alphanum))
1410 *
1411 * ; Single alphanumerics
1412 * ; "x" reserved for private use
1413 * singleton = DIGIT ; 0 - 9
1414 * / %x41-57 ; A - W
1415 * / %x59-5A ; Y - Z
1416 * / %x61-77 ; a - w
1417 * / %x79-7A ; y - z
1418 *
1419 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1420 * The parser below doesn't try to cope with extension or privateuse
1421 * that could be added but that's not interoperable anyway
1422 *
1423 * Returns 1 if correct 0 otherwise
1424 **/
1425 int
1426 xmlCheckLanguageID(const xmlChar * lang)
1427 {
1428 const xmlChar *cur = lang, *nxt;
1429
1430 if (cur == NULL)
1431 return (0);
1432 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1433 ((cur[0] == 'I') && (cur[1] == '-')) ||
1434 ((cur[0] == 'x') && (cur[1] == '-')) ||
1435 ((cur[0] == 'X') && (cur[1] == '-'))) {
1436 /*
1437 * Still allow IANA code and user code which were coming
1438 * from the previous version of the XML-1.0 specification
1439 * it's deprecated but we should not fail
1440 */
1441 cur += 2;
1442 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1443 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1444 cur++;
1445 return(cur[0] == 0);
1446 }
1447 nxt = cur;
1448 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1449 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1450 nxt++;
1451 if (nxt - cur >= 4) {
1452 /*
1453 * Reserved
1454 */
1455 if ((nxt - cur > 8) || (nxt[0] != 0))
1456 return(0);
1457 return(1);
1458 }
1459 if (nxt - cur < 2)
1460 return(0);
1461 /* we got an ISO 639 code */
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466
1467 nxt++;
1468 cur = nxt;
1469 /* now we can have extlang or script or region or variant */
1470 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1471 goto region_m49;
1472
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475 nxt++;
1476 if (nxt - cur == 4)
1477 goto script;
1478 if (nxt - cur == 2)
1479 goto region;
1480 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1481 goto variant;
1482 if (nxt - cur != 3)
1483 return(0);
1484 /* we parsed an extlang */
1485 if (nxt[0] == 0)
1486 return(1);
1487 if (nxt[0] != '-')
1488 return(0);
1489
1490 nxt++;
1491 cur = nxt;
1492 /* now we can have script or region or variant */
1493 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1494 goto region_m49;
1495
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur == 2)
1500 goto region;
1501 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 goto variant;
1503 if (nxt - cur != 4)
1504 return(0);
1505 /* we parsed a script */
1506 script:
1507 if (nxt[0] == 0)
1508 return(1);
1509 if (nxt[0] != '-')
1510 return(0);
1511
1512 nxt++;
1513 cur = nxt;
1514 /* now we can have region or variant */
1515 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1516 goto region_m49;
1517
1518 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1519 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1520 nxt++;
1521
1522 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 goto variant;
1524 if (nxt - cur != 2)
1525 return(0);
1526 /* we parsed a region */
1527 region:
1528 if (nxt[0] == 0)
1529 return(1);
1530 if (nxt[0] != '-')
1531 return(0);
1532
1533 nxt++;
1534 cur = nxt;
1535 /* now we can just have a variant */
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur < 5) || (nxt - cur > 8))
1541 return(0);
1542
1543 /* we parsed a variant */
1544 variant:
1545 if (nxt[0] == 0)
1546 return(1);
1547 if (nxt[0] != '-')
1548 return(0);
1549 /* extensions and private use subtags not checked */
1550 return (1);
1551
1552 region_m49:
1553 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1554 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1555 nxt += 3;
1556 goto region;
1557 }
1558 return(0);
1559 }
1560
1561 /************************************************************************
1562 * *
1563 * Parser stacks related functions and macros *
1564 * *
1565 ************************************************************************/
1566
1567 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1568 const xmlChar ** str);
1569
1570 #ifdef SAX2
1571 /**
1572 * nsPush:
1573 * @ctxt: an XML parser context
1574 * @prefix: the namespace prefix or NULL
1575 * @URL: the namespace name
1576 *
1577 * Pushes a new parser namespace on top of the ns stack
1578 *
1579 * Returns -1 in case of error, -2 if the namespace should be discarded
1580 * and the index in the stack otherwise.
1581 */
1582 static int
1583 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1584 {
1585 if (ctxt->options & XML_PARSE_NSCLEAN) {
1586 int i;
1587 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1588 if (ctxt->nsTab[i] == prefix) {
1589 /* in scope */
1590 if (ctxt->nsTab[i + 1] == URL)
1591 return(-2);
1592 /* out of scope keep it */
1593 break;
1594 }
1595 }
1596 }
1597 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1598 ctxt->nsMax = 10;
1599 ctxt->nsNr = 0;
1600 ctxt->nsTab = (const xmlChar **)
1601 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1602 if (ctxt->nsTab == NULL) {
1603 xmlErrMemory(ctxt, NULL);
1604 ctxt->nsMax = 0;
1605 return (-1);
1606 }
1607 } else if (ctxt->nsNr >= ctxt->nsMax) {
1608 const xmlChar ** tmp;
1609 ctxt->nsMax *= 2;
1610 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1611 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1612 if (tmp == NULL) {
1613 xmlErrMemory(ctxt, NULL);
1614 ctxt->nsMax /= 2;
1615 return (-1);
1616 }
1617 ctxt->nsTab = tmp;
1618 }
1619 ctxt->nsTab[ctxt->nsNr++] = prefix;
1620 ctxt->nsTab[ctxt->nsNr++] = URL;
1621 return (ctxt->nsNr);
1622 }
1623 /**
1624 * nsPop:
1625 * @ctxt: an XML parser context
1626 * @nr: the number to pop
1627 *
1628 * Pops the top @nr parser prefix/namespace from the ns stack
1629 *
1630 * Returns the number of namespaces removed
1631 */
1632 static int
1633 nsPop(xmlParserCtxtPtr ctxt, int nr)
1634 {
1635 int i;
1636
1637 if (ctxt->nsTab == NULL) return(0);
1638 if (ctxt->nsNr < nr) {
1639 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1640 nr = ctxt->nsNr;
1641 }
1642 if (ctxt->nsNr <= 0)
1643 return (0);
1644
1645 for (i = 0;i < nr;i++) {
1646 ctxt->nsNr--;
1647 ctxt->nsTab[ctxt->nsNr] = NULL;
1648 }
1649 return(nr);
1650 }
1651 #endif
1652
1653 static int
1654 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1655 const xmlChar **atts;
1656 int *attallocs;
1657 int maxatts;
1658
1659 if (ctxt->atts == NULL) {
1660 maxatts = 55; /* allow for 10 attrs by default */
1661 atts = (const xmlChar **)
1662 xmlMalloc(maxatts * sizeof(xmlChar *));
1663 if (atts == NULL) goto mem_error;
1664 ctxt->atts = atts;
1665 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1666 if (attallocs == NULL) goto mem_error;
1667 ctxt->attallocs = attallocs;
1668 ctxt->maxatts = maxatts;
1669 } else if (nr + 5 > ctxt->maxatts) {
1670 maxatts = (nr + 5) * 2;
1671 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1672 maxatts * sizeof(const xmlChar *));
1673 if (atts == NULL) goto mem_error;
1674 ctxt->atts = atts;
1675 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1676 (maxatts / 5) * sizeof(int));
1677 if (attallocs == NULL) goto mem_error;
1678 ctxt->attallocs = attallocs;
1679 ctxt->maxatts = maxatts;
1680 }
1681 return(ctxt->maxatts);
1682 mem_error:
1683 xmlErrMemory(ctxt, NULL);
1684 return(-1);
1685 }
1686
1687 /**
1688 * inputPush:
1689 * @ctxt: an XML parser context
1690 * @value: the parser input
1691 *
1692 * Pushes a new parser input on top of the input stack
1693 *
1694 * Returns -1 in case of error, the index in the stack otherwise
1695 */
1696 int
1697 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1698 {
1699 if ((ctxt == NULL) || (value == NULL))
1700 return(-1);
1701 if (ctxt->inputNr >= ctxt->inputMax) {
1702 ctxt->inputMax *= 2;
1703 ctxt->inputTab =
1704 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1705 ctxt->inputMax *
1706 sizeof(ctxt->inputTab[0]));
1707 if (ctxt->inputTab == NULL) {
1708 xmlErrMemory(ctxt, NULL);
1709 xmlFreeInputStream(value);
1710 ctxt->inputMax /= 2;
1711 value = NULL;
1712 return (-1);
1713 }
1714 }
1715 ctxt->inputTab[ctxt->inputNr] = value;
1716 ctxt->input = value;
1717 return (ctxt->inputNr++);
1718 }
1719 /**
1720 * inputPop:
1721 * @ctxt: an XML parser context
1722 *
1723 * Pops the top parser input from the input stack
1724 *
1725 * Returns the input just removed
1726 */
1727 xmlParserInputPtr
1728 inputPop(xmlParserCtxtPtr ctxt)
1729 {
1730 xmlParserInputPtr ret;
1731
1732 if (ctxt == NULL)
1733 return(NULL);
1734 if (ctxt->inputNr <= 0)
1735 return (NULL);
1736 ctxt->inputNr--;
1737 if (ctxt->inputNr > 0)
1738 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1739 else
1740 ctxt->input = NULL;
1741 ret = ctxt->inputTab[ctxt->inputNr];
1742 ctxt->inputTab[ctxt->inputNr] = NULL;
1743 return (ret);
1744 }
1745 /**
1746 * nodePush:
1747 * @ctxt: an XML parser context
1748 * @value: the element node
1749 *
1750 * Pushes a new element node on top of the node stack
1751 *
1752 * Returns -1 in case of error, the index in the stack otherwise
1753 */
1754 int
1755 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1756 {
1757 if (ctxt == NULL) return(0);
1758 if (ctxt->nodeNr >= ctxt->nodeMax) {
1759 xmlNodePtr *tmp;
1760
1761 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1762 ctxt->nodeMax * 2 *
1763 sizeof(ctxt->nodeTab[0]));
1764 if (tmp == NULL) {
1765 xmlErrMemory(ctxt, NULL);
1766 return (-1);
1767 }
1768 ctxt->nodeTab = tmp;
1769 ctxt->nodeMax *= 2;
1770 }
1771 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1772 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1773 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1774 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1775 xmlParserMaxDepth);
1776 xmlHaltParser(ctxt);
1777 return(-1);
1778 }
1779 ctxt->nodeTab[ctxt->nodeNr] = value;
1780 ctxt->node = value;
1781 return (ctxt->nodeNr++);
1782 }
1783
1784 /**
1785 * nodePop:
1786 * @ctxt: an XML parser context
1787 *
1788 * Pops the top element node from the node stack
1789 *
1790 * Returns the node just removed
1791 */
1792 xmlNodePtr
1793 nodePop(xmlParserCtxtPtr ctxt)
1794 {
1795 xmlNodePtr ret;
1796
1797 if (ctxt == NULL) return(NULL);
1798 if (ctxt->nodeNr <= 0)
1799 return (NULL);
1800 ctxt->nodeNr--;
1801 if (ctxt->nodeNr > 0)
1802 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1803 else
1804 ctxt->node = NULL;
1805 ret = ctxt->nodeTab[ctxt->nodeNr];
1806 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1807 return (ret);
1808 }
1809
1810 #ifdef LIBXML_PUSH_ENABLED
1811 /**
1812 * nameNsPush:
1813 * @ctxt: an XML parser context
1814 * @value: the element name
1815 * @prefix: the element prefix
1816 * @URI: the element namespace name
1817 *
1818 * Pushes a new element name/prefix/URL on top of the name stack
1819 *
1820 * Returns -1 in case of error, the index in the stack otherwise
1821 */
1822 static int
1823 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1824 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1825 {
1826 if (ctxt->nameNr >= ctxt->nameMax) {
1827 const xmlChar * *tmp;
1828 void **tmp2;
1829 ctxt->nameMax *= 2;
1830 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1831 ctxt->nameMax *
1832 sizeof(ctxt->nameTab[0]));
1833 if (tmp == NULL) {
1834 ctxt->nameMax /= 2;
1835 goto mem_error;
1836 }
1837 ctxt->nameTab = tmp;
1838 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1839 ctxt->nameMax * 3 *
1840 sizeof(ctxt->pushTab[0]));
1841 if (tmp2 == NULL) {
1842 ctxt->nameMax /= 2;
1843 goto mem_error;
1844 }
1845 ctxt->pushTab = tmp2;
1846 }
1847 ctxt->nameTab[ctxt->nameNr] = value;
1848 ctxt->name = value;
1849 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1850 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1851 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1852 return (ctxt->nameNr++);
1853 mem_error:
1854 xmlErrMemory(ctxt, NULL);
1855 return (-1);
1856 }
1857 /**
1858 * nameNsPop:
1859 * @ctxt: an XML parser context
1860 *
1861 * Pops the top element/prefix/URI name from the name stack
1862 *
1863 * Returns the name just removed
1864 */
1865 static const xmlChar *
1866 nameNsPop(xmlParserCtxtPtr ctxt)
1867 {
1868 const xmlChar *ret;
1869
1870 if (ctxt->nameNr <= 0)
1871 return (NULL);
1872 ctxt->nameNr--;
1873 if (ctxt->nameNr > 0)
1874 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1875 else
1876 ctxt->name = NULL;
1877 ret = ctxt->nameTab[ctxt->nameNr];
1878 ctxt->nameTab[ctxt->nameNr] = NULL;
1879 return (ret);
1880 }
1881 #endif /* LIBXML_PUSH_ENABLED */
1882
1883 /**
1884 * namePush:
1885 * @ctxt: an XML parser context
1886 * @value: the element name
1887 *
1888 * Pushes a new element name on top of the name stack
1889 *
1890 * Returns -1 in case of error, the index in the stack otherwise
1891 */
1892 int
1893 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1894 {
1895 if (ctxt == NULL) return (-1);
1896
1897 if (ctxt->nameNr >= ctxt->nameMax) {
1898 const xmlChar * *tmp;
1899 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1900 ctxt->nameMax * 2 *
1901 sizeof(ctxt->nameTab[0]));
1902 if (tmp == NULL) {
1903 goto mem_error;
1904 }
1905 ctxt->nameTab = tmp;
1906 ctxt->nameMax *= 2;
1907 }
1908 ctxt->nameTab[ctxt->nameNr] = value;
1909 ctxt->name = value;
1910 return (ctxt->nameNr++);
1911 mem_error:
1912 xmlErrMemory(ctxt, NULL);
1913 return (-1);
1914 }
1915 /**
1916 * namePop:
1917 * @ctxt: an XML parser context
1918 *
1919 * Pops the top element name from the name stack
1920 *
1921 * Returns the name just removed
1922 */
1923 const xmlChar *
1924 namePop(xmlParserCtxtPtr ctxt)
1925 {
1926 const xmlChar *ret;
1927
1928 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1929 return (NULL);
1930 ctxt->nameNr--;
1931 if (ctxt->nameNr > 0)
1932 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1933 else
1934 ctxt->name = NULL;
1935 ret = ctxt->nameTab[ctxt->nameNr];
1936 ctxt->nameTab[ctxt->nameNr] = NULL;
1937 return (ret);
1938 }
1939
1940 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1941 if (ctxt->spaceNr >= ctxt->spaceMax) {
1942 int *tmp;
1943
1944 ctxt->spaceMax *= 2;
1945 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1946 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1947 if (tmp == NULL) {
1948 xmlErrMemory(ctxt, NULL);
1949 ctxt->spaceMax /=2;
1950 return(-1);
1951 }
1952 ctxt->spaceTab = tmp;
1953 }
1954 ctxt->spaceTab[ctxt->spaceNr] = val;
1955 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1956 return(ctxt->spaceNr++);
1957 }
1958
1959 static int spacePop(xmlParserCtxtPtr ctxt) {
1960 int ret;
1961 if (ctxt->spaceNr <= 0) return(0);
1962 ctxt->spaceNr--;
1963 if (ctxt->spaceNr > 0)
1964 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1965 else
1966 ctxt->space = &ctxt->spaceTab[0];
1967 ret = ctxt->spaceTab[ctxt->spaceNr];
1968 ctxt->spaceTab[ctxt->spaceNr] = -1;
1969 return(ret);
1970 }
1971
1972 /*
1973 * Macros for accessing the content. Those should be used only by the parser,
1974 * and not exported.
1975 *
1976 * Dirty macros, i.e. one often need to make assumption on the context to
1977 * use them
1978 *
1979 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1980 * To be used with extreme caution since operations consuming
1981 * characters may move the input buffer to a different location !
1982 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1983 * This should be used internally by the parser
1984 * only to compare to ASCII values otherwise it would break when
1985 * running with UTF-8 encoding.
1986 * RAW same as CUR but in the input buffer, bypass any token
1987 * extraction that may have been done
1988 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1989 * to compare on ASCII based substring.
1990 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1991 * strings without newlines within the parser.
1992 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1993 * defined char within the parser.
1994 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1995 *
1996 * NEXT Skip to the next character, this does the proper decoding
1997 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1998 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1999 * CUR_CHAR(l) returns the current unicode character (int), set l
2000 * to the number of xmlChars used for the encoding [0-5].
2001 * CUR_SCHAR same but operate on a string instead of the context
2002 * COPY_BUF copy the current unicode char to the target buffer, increment
2003 * the index
2004 * GROW, SHRINK handling of input buffers
2005 */
2006
2007 #define RAW (*ctxt->input->cur)
2008 #define CUR (*ctxt->input->cur)
2009 #define NXT(val) ctxt->input->cur[(val)]
2010 #define CUR_PTR ctxt->input->cur
2011
2012 #define CMP4( s, c1, c2, c3, c4 ) \
2013 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2014 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2015 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2016 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2017 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2018 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2019 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2020 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2021 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2022 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2023 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2024 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2025 ((unsigned char *) s)[ 8 ] == c9 )
2026 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2027 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2028 ((unsigned char *) s)[ 9 ] == c10 )
2029
2030 #define SKIP(val) do { \
2031 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2032 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2033 if ((*ctxt->input->cur == 0) && \
2034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2035 xmlPopInput(ctxt); \
2036 } while (0)
2037
2038 #define SKIPL(val) do { \
2039 int skipl; \
2040 for(skipl=0; skipl<val; skipl++) { \
2041 if (*(ctxt->input->cur) == '\n') { \
2042 ctxt->input->line++; ctxt->input->col = 1; \
2043 } else ctxt->input->col++; \
2044 ctxt->nbChars++; \
2045 ctxt->input->cur++; \
2046 } \
2047 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2048 if ((*ctxt->input->cur == 0) && \
2049 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2050 xmlPopInput(ctxt); \
2051 } while (0)
2052
2053 #define SHRINK if ((ctxt->progressive == 0) && \
2054 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2055 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2056 xmlSHRINK (ctxt);
2057
2058 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2059 xmlParserInputShrink(ctxt->input);
2060 if ((*ctxt->input->cur == 0) &&
2061 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2062 xmlPopInput(ctxt);
2063 }
2064
2065 #define GROW if ((ctxt->progressive == 0) && \
2066 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2067 xmlGROW (ctxt);
2068
2069 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2070 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2071 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2072
2073 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2074 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2075 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2076 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2077 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2078 xmlHaltParser(ctxt);
2079 return;
2080 }
2081 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2082 if ((ctxt->input->cur > ctxt->input->end) ||
2083 (ctxt->input->cur < ctxt->input->base)) {
2084 xmlHaltParser(ctxt);
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2086 return;
2087 }
2088 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2089 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2090 xmlPopInput(ctxt);
2091 }
2092
2093 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2094
2095 #define NEXT xmlNextChar(ctxt)
2096
2097 #define NEXT1 { \
2098 ctxt->input->col++; \
2099 ctxt->input->cur++; \
2100 ctxt->nbChars++; \
2101 if (*ctxt->input->cur == 0) \
2102 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2103 }
2104
2105 #define NEXTL(l) do { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
2109 ctxt->input->cur += l; \
2110 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2111 } while (0)
2112
2113 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2114 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2115
2116 #define COPY_BUF(l,b,i,v) \
2117 if (l == 1) b[i++] = (xmlChar) v; \
2118 else i += xmlCopyCharMultiByte(&b[i],v)
2119
2120 /**
2121 * xmlSkipBlankChars:
2122 * @ctxt: the XML parser context
2123 *
2124 * skip all blanks character found at that point in the input streams.
2125 * It pops up finished entities in the process if allowable at that point.
2126 *
2127 * Returns the number of space chars skipped
2128 */
2129
2130 int
2131 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2132 int res = 0;
2133
2134 /*
2135 * It's Okay to use CUR/NEXT here since all the blanks are on
2136 * the ASCII range.
2137 */
2138 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2139 const xmlChar *cur;
2140 /*
2141 * if we are in the document content, go really fast
2142 */
2143 cur = ctxt->input->cur;
2144 while (IS_BLANK_CH(*cur)) {
2145 if (*cur == '\n') {
2146 ctxt->input->line++; ctxt->input->col = 1;
2147 } else {
2148 ctxt->input->col++;
2149 }
2150 cur++;
2151 res++;
2152 if (*cur == 0) {
2153 ctxt->input->cur = cur;
2154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2155 cur = ctxt->input->cur;
2156 }
2157 }
2158 ctxt->input->cur = cur;
2159 } else {
2160 int cur;
2161 do {
2162 cur = CUR;
2163 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2164 (ctxt->instate != XML_PARSER_EOF))) {
2165 NEXT;
2166 cur = CUR;
2167 res++;
2168 }
2169 while ((cur == 0) && (ctxt->inputNr > 1) &&
2170 (ctxt->instate != XML_PARSER_COMMENT)) {
2171 xmlPopInput(ctxt);
2172 cur = CUR;
2173 }
2174 /*
2175 * Need to handle support of entities branching here
2176 */
2177 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2178 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2179 (ctxt->instate != XML_PARSER_EOF));
2180 }
2181 return(res);
2182 }
2183
2184 /************************************************************************
2185 * *
2186 * Commodity functions to handle entities *
2187 * *
2188 ************************************************************************/
2189
2190 /**
2191 * xmlPopInput:
2192 * @ctxt: an XML parser context
2193 *
2194 * xmlPopInput: the current input pointed by ctxt->input came to an end
2195 * pop it and return the next char.
2196 *
2197 * Returns the current xmlChar in the parser context
2198 */
2199 xmlChar
2200 xmlPopInput(xmlParserCtxtPtr ctxt) {
2201 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2202 if (xmlParserDebugEntities)
2203 xmlGenericError(xmlGenericErrorContext,
2204 "Popping input %d\n", ctxt->inputNr);
2205 xmlFreeInputStream(inputPop(ctxt));
2206 if ((*ctxt->input->cur == 0) &&
2207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2208 return(xmlPopInput(ctxt));
2209 return(CUR);
2210 }
2211
2212 /**
2213 * xmlPushInput:
2214 * @ctxt: an XML parser context
2215 * @input: an XML parser input fragment (entity, XML fragment ...).
2216 *
2217 * xmlPushInput: switch to a new input stream which is stacked on top
2218 * of the previous one(s).
2219 * Returns -1 in case of error or the index in the input stack
2220 */
2221 int
2222 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2223 int ret;
2224 if (input == NULL) return(-1);
2225
2226 if (xmlParserDebugEntities) {
2227 if ((ctxt->input != NULL) && (ctxt->input->filename))
2228 xmlGenericError(xmlGenericErrorContext,
2229 "%s(%d): ", ctxt->input->filename,
2230 ctxt->input->line);
2231 xmlGenericError(xmlGenericErrorContext,
2232 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2233 }
2234 ret = inputPush(ctxt, input);
2235 if (ctxt->instate == XML_PARSER_EOF)
2236 return(-1);
2237 GROW;
2238 return(ret);
2239 }
2240
2241 /**
2242 * xmlParseCharRef:
2243 * @ctxt: an XML parser context
2244 *
2245 * parse Reference declarations
2246 *
2247 * [66] CharRef ::= '&#' [0-9]+ ';' |
2248 * '&#x' [0-9a-fA-F]+ ';'
2249 *
2250 * [ WFC: Legal Character ]
2251 * Characters referred to using character references must match the
2252 * production for Char.
2253 *
2254 * Returns the value parsed (as an int), 0 in case of error
2255 */
2256 int
2257 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2258 unsigned int val = 0;
2259 int count = 0;
2260 unsigned int outofrange = 0;
2261
2262 /*
2263 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2264 */
2265 if ((RAW == '&') && (NXT(1) == '#') &&
2266 (NXT(2) == 'x')) {
2267 SKIP(3);
2268 GROW;
2269 while (RAW != ';') { /* loop blocked by count */
2270 if (count++ > 20) {
2271 count = 0;
2272 GROW;
2273 if (ctxt->instate == XML_PARSER_EOF)
2274 return(0);
2275 }
2276 if ((RAW >= '0') && (RAW <= '9'))
2277 val = val * 16 + (CUR - '0');
2278 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2279 val = val * 16 + (CUR - 'a') + 10;
2280 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2281 val = val * 16 + (CUR - 'A') + 10;
2282 else {
2283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284 val = 0;
2285 break;
2286 }
2287 if (val > 0x10FFFF)
2288 outofrange = val;
2289
2290 NEXT;
2291 count++;
2292 }
2293 if (RAW == ';') {
2294 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2295 ctxt->input->col++;
2296 ctxt->nbChars ++;
2297 ctxt->input->cur++;
2298 }
2299 } else if ((RAW == '&') && (NXT(1) == '#')) {
2300 SKIP(2);
2301 GROW;
2302 while (RAW != ';') { /* loop blocked by count */
2303 if (count++ > 20) {
2304 count = 0;
2305 GROW;
2306 if (ctxt->instate == XML_PARSER_EOF)
2307 return(0);
2308 }
2309 if ((RAW >= '0') && (RAW <= '9'))
2310 val = val * 10 + (CUR - '0');
2311 else {
2312 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2313 val = 0;
2314 break;
2315 }
2316 if (val > 0x10FFFF)
2317 outofrange = val;
2318
2319 NEXT;
2320 count++;
2321 }
2322 if (RAW == ';') {
2323 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2324 ctxt->input->col++;
2325 ctxt->nbChars ++;
2326 ctxt->input->cur++;
2327 }
2328 } else {
2329 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2330 }
2331
2332 /*
2333 * [ WFC: Legal Character ]
2334 * Characters referred to using character references must match the
2335 * production for Char.
2336 */
2337 if ((IS_CHAR(val) && (outofrange == 0))) {
2338 return(val);
2339 } else {
2340 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2341 "xmlParseCharRef: invalid xmlChar value %d\n",
2342 val);
2343 }
2344 return(0);
2345 }
2346
2347 /**
2348 * xmlParseStringCharRef:
2349 * @ctxt: an XML parser context
2350 * @str: a pointer to an index in the string
2351 *
2352 * parse Reference declarations, variant parsing from a string rather
2353 * than an an input flow.
2354 *
2355 * [66] CharRef ::= '&#' [0-9]+ ';' |
2356 * '&#x' [0-9a-fA-F]+ ';'
2357 *
2358 * [ WFC: Legal Character ]
2359 * Characters referred to using character references must match the
2360 * production for Char.
2361 *
2362 * Returns the value parsed (as an int), 0 in case of error, str will be
2363 * updated to the current value of the index
2364 */
2365 static int
2366 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2367 const xmlChar *ptr;
2368 xmlChar cur;
2369 unsigned int val = 0;
2370 unsigned int outofrange = 0;
2371
2372 if ((str == NULL) || (*str == NULL)) return(0);
2373 ptr = *str;
2374 cur = *ptr;
2375 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2376 ptr += 3;
2377 cur = *ptr;
2378 while (cur != ';') { /* Non input consuming loop */
2379 if ((cur >= '0') && (cur <= '9'))
2380 val = val * 16 + (cur - '0');
2381 else if ((cur >= 'a') && (cur <= 'f'))
2382 val = val * 16 + (cur - 'a') + 10;
2383 else if ((cur >= 'A') && (cur <= 'F'))
2384 val = val * 16 + (cur - 'A') + 10;
2385 else {
2386 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2387 val = 0;
2388 break;
2389 }
2390 if (val > 0x10FFFF)
2391 outofrange = val;
2392
2393 ptr++;
2394 cur = *ptr;
2395 }
2396 if (cur == ';')
2397 ptr++;
2398 } else if ((cur == '&') && (ptr[1] == '#')){
2399 ptr += 2;
2400 cur = *ptr;
2401 while (cur != ';') { /* Non input consuming loops */
2402 if ((cur >= '0') && (cur <= '9'))
2403 val = val * 10 + (cur - '0');
2404 else {
2405 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2406 val = 0;
2407 break;
2408 }
2409 if (val > 0x10FFFF)
2410 outofrange = val;
2411
2412 ptr++;
2413 cur = *ptr;
2414 }
2415 if (cur == ';')
2416 ptr++;
2417 } else {
2418 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2419 return(0);
2420 }
2421 *str = ptr;
2422
2423 /*
2424 * [ WFC: Legal Character ]
2425 * Characters referred to using character references must match the
2426 * production for Char.
2427 */
2428 if ((IS_CHAR(val) && (outofrange == 0))) {
2429 return(val);
2430 } else {
2431 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2432 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2433 val);
2434 }
2435 return(0);
2436 }
2437
2438 /**
2439 * xmlNewBlanksWrapperInputStream:
2440 * @ctxt: an XML parser context
2441 * @entity: an Entity pointer
2442 *
2443 * Create a new input stream for wrapping
2444 * blanks around a PEReference
2445 *
2446 * Returns the new input stream or NULL
2447 */
2448
2449 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2450
2451 static xmlParserInputPtr
2452 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2453 xmlParserInputPtr input;
2454 xmlChar *buffer;
2455 size_t length;
2456 if (entity == NULL) {
2457 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2458 "xmlNewBlanksWrapperInputStream entity\n");
2459 return(NULL);
2460 }
2461 if (xmlParserDebugEntities)
2462 xmlGenericError(xmlGenericErrorContext,
2463 "new blanks wrapper for entity: %s\n", entity->name);
2464 input = xmlNewInputStream(ctxt);
2465 if (input == NULL) {
2466 return(NULL);
2467 }
2468 length = xmlStrlen(entity->name) + 5;
2469 buffer = xmlMallocAtomic(length);
2470 if (buffer == NULL) {
2471 xmlErrMemory(ctxt, NULL);
2472 xmlFree(input);
2473 return(NULL);
2474 }
2475 buffer [0] = ' ';
2476 buffer [1] = '%';
2477 buffer [length-3] = ';';
2478 buffer [length-2] = ' ';
2479 buffer [length-1] = 0;
2480 memcpy(buffer + 2, entity->name, length - 5);
2481 input->free = deallocblankswrapper;
2482 input->base = buffer;
2483 input->cur = buffer;
2484 input->length = length;
2485 input->end = &buffer[length];
2486 return(input);
2487 }
2488
2489 /**
2490 * xmlParserHandlePEReference:
2491 * @ctxt: the parser context
2492 *
2493 * [69] PEReference ::= '%' Name ';'
2494 *
2495 * [ WFC: No Recursion ]
2496 * A parsed entity must not contain a recursive
2497 * reference to itself, either directly or indirectly.
2498 *
2499 * [ WFC: Entity Declared ]
2500 * In a document without any DTD, a document with only an internal DTD
2501 * subset which contains no parameter entity references, or a document
2502 * with "standalone='yes'", ... ... The declaration of a parameter
2503 * entity must precede any reference to it...
2504 *
2505 * [ VC: Entity Declared ]
2506 * In a document with an external subset or external parameter entities
2507 * with "standalone='no'", ... ... The declaration of a parameter entity
2508 * must precede any reference to it...
2509 *
2510 * [ WFC: In DTD ]
2511 * Parameter-entity references may only appear in the DTD.
2512 * NOTE: misleading but this is handled.
2513 *
2514 * A PEReference may have been detected in the current input stream
2515 * the handling is done accordingly to
2516 * http://www.w3.org/TR/REC-xml#entproc
2517 * i.e.
2518 * - Included in literal in entity values
2519 * - Included as Parameter Entity reference within DTDs
2520 */
2521 void
2522 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2523 const xmlChar *name;
2524 xmlEntityPtr entity = NULL;
2525 xmlParserInputPtr input;
2526
2527 if (RAW != '%') return;
2528 switch(ctxt->instate) {
2529 case XML_PARSER_CDATA_SECTION:
2530 return;
2531 case XML_PARSER_COMMENT:
2532 return;
2533 case XML_PARSER_START_TAG:
2534 return;
2535 case XML_PARSER_END_TAG:
2536 return;
2537 case XML_PARSER_EOF:
2538 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2539 return;
2540 case XML_PARSER_PROLOG:
2541 case XML_PARSER_START:
2542 case XML_PARSER_MISC:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2544 return;
2545 case XML_PARSER_ENTITY_DECL:
2546 case XML_PARSER_CONTENT:
2547 case XML_PARSER_ATTRIBUTE_VALUE:
2548 case XML_PARSER_PI:
2549 case XML_PARSER_SYSTEM_LITERAL:
2550 case XML_PARSER_PUBLIC_LITERAL:
2551 /* we just ignore it there */
2552 return;
2553 case XML_PARSER_EPILOG:
2554 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2555 return;
2556 case XML_PARSER_ENTITY_VALUE:
2557 /*
2558 * NOTE: in the case of entity values, we don't do the
2559 * substitution here since we need the literal
2560 * entity value to be able to save the internal
2561 * subset of the document.
2562 * This will be handled by xmlStringDecodeEntities
2563 */
2564 return;
2565 case XML_PARSER_DTD:
2566 /*
2567 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2568 * In the internal DTD subset, parameter-entity references
2569 * can occur only where markup declarations can occur, not
2570 * within markup declarations.
2571 * In that case this is handled in xmlParseMarkupDecl
2572 */
2573 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2574 return;
2575 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2576 return;
2577 break;
2578 case XML_PARSER_IGNORE:
2579 return;
2580 }
2581
2582 NEXT;
2583 name = xmlParseName(ctxt);
2584 if (xmlParserDebugEntities)
2585 xmlGenericError(xmlGenericErrorContext,
2586 "PEReference: %s\n", name);
2587 if (name == NULL) {
2588 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2589 } else {
2590 if (RAW == ';') {
2591 NEXT;
2592 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2593 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2594 if (ctxt->instate == XML_PARSER_EOF)
2595 return;
2596 if (entity == NULL) {
2597
2598 /*
2599 * [ WFC: Entity Declared ]
2600 * In a document without any DTD, a document with only an
2601 * internal DTD subset which contains no parameter entity
2602 * references, or a document with "standalone='yes'", ...
2603 * ... The declaration of a parameter entity must precede
2604 * any reference to it...
2605 */
2606 if ((ctxt->standalone == 1) ||
2607 ((ctxt->hasExternalSubset == 0) &&
2608 (ctxt->hasPErefs == 0))) {
2609 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n", name);
2611 } else {
2612 /*
2613 * [ VC: Entity Declared ]
2614 * In a document with an external subset or external
2615 * parameter entities with "standalone='no'", ...
2616 * ... The declaration of a parameter entity must precede
2617 * any reference to it...
2618 */
2619 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2620 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2621 "PEReference: %%%s; not found\n",
2622 name, NULL);
2623 } else
2624 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2625 "PEReference: %%%s; not found\n",
2626 name, NULL);
2627 ctxt->valid = 0;
2628 }
2629 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2630 } else if (ctxt->input->free != deallocblankswrapper) {
2631 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2632 if (xmlPushInput(ctxt, input) < 0)
2633 return;
2634 } else {
2635 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2636 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2637 xmlChar start[4];
2638 xmlCharEncoding enc;
2639
2640 /*
2641 * Note: external parameter entities will not be loaded, it
2642 * is not required for a non-validating parser, unless the
2643 * option of validating, or substituting entities were
2644 * given. Doing so is far more secure as the parser will
2645 * only process data coming from the document entity by
2646 * default.
2647 */
2648 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2649 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2650 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2651 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2652 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2653 (ctxt->replaceEntities == 0) &&
2654 (ctxt->validate == 0))
2655 return;
2656
2657 /*
2658 * handle the extra spaces added before and after
2659 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2660 * this is done independently.
2661 */
2662 input = xmlNewEntityInputStream(ctxt, entity);
2663 if (xmlPushInput(ctxt, input) < 0)
2664 return;
2665
2666 /*
2667 * Get the 4 first bytes and decode the charset
2668 * if enc != XML_CHAR_ENCODING_NONE
2669 * plug some encoding conversion routines.
2670 * Note that, since we may have some non-UTF8
2671 * encoding (like UTF16, bug 135229), the 'length'
2672 * is not known, but we can calculate based upon
2673 * the amount of data in the buffer.
2674 */
2675 GROW
2676 if (ctxt->instate == XML_PARSER_EOF)
2677 return;
2678 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2679 start[0] = RAW;
2680 start[1] = NXT(1);
2681 start[2] = NXT(2);
2682 start[3] = NXT(3);
2683 enc = xmlDetectCharEncoding(start, 4);
2684 if (enc != XML_CHAR_ENCODING_NONE) {
2685 xmlSwitchEncoding(ctxt, enc);
2686 }
2687 }
2688
2689 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2690 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2691 (IS_BLANK_CH(NXT(5)))) {
2692 xmlParseTextDecl(ctxt);
2693 }
2694 } else {
2695 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2696 "PEReference: %s is not a parameter entity\n",
2697 name);
2698 }
2699 }
2700 } else {
2701 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2702 }
2703 }
2704 }
2705
2706 /*
2707 * Macro used to grow the current buffer.
2708 * buffer##_size is expected to be a size_t
2709 * mem_error: is expected to handle memory allocation failures
2710 */
2711 #define growBuffer(buffer, n) { \
2712 xmlChar *tmp; \
2713 size_t new_size = buffer##_size * 2 + n; \
2714 if (new_size < buffer##_size) goto mem_error; \
2715 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2716 if (tmp == NULL) goto mem_error; \
2717 buffer = tmp; \
2718 buffer##_size = new_size; \
2719 }
2720
2721 /**
2722 * xmlStringLenDecodeEntities:
2723 * @ctxt: the parser context
2724 * @str: the input string
2725 * @len: the string length
2726 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2727 * @end: an end marker xmlChar, 0 if none
2728 * @end2: an end marker xmlChar, 0 if none
2729 * @end3: an end marker xmlChar, 0 if none
2730 *
2731 * Takes a entity string content and process to do the adequate substitutions.
2732 *
2733 * [67] Reference ::= EntityRef | CharRef
2734 *
2735 * [69] PEReference ::= '%' Name ';'
2736 *
2737 * Returns A newly allocated string with the substitution done. The caller
2738 * must deallocate it !
2739 */
2740 xmlChar *
2741 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2742 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2743 xmlChar *buffer = NULL;
2744 size_t buffer_size = 0;
2745 size_t nbchars = 0;
2746
2747 xmlChar *current = NULL;
2748 xmlChar *rep = NULL;
2749 const xmlChar *last;
2750 xmlEntityPtr ent;
2751 int c,l;
2752
2753 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2754 return(NULL);
2755 last = str + len;
2756
2757 if (((ctxt->depth > 40) &&
2758 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2759 (ctxt->depth > 1024)) {
2760 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2761 return(NULL);
2762 }
2763
2764 /*
2765 * allocate a translation buffer.
2766 */
2767 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2768 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2769 if (buffer == NULL) goto mem_error;
2770
2771 /*
2772 * OK loop until we reach one of the ending char or a size limit.
2773 * we are operating on already parsed values.
2774 */
2775 if (str < last)
2776 c = CUR_SCHAR(str, l);
2777 else
2778 c = 0;
2779 while ((c != 0) && (c != end) && /* non input consuming loop */
2780 (c != end2) && (c != end3)) {
2781
2782 if (c == 0) break;
2783 if ((c == '&') && (str[1] == '#')) {
2784 int val = xmlParseStringCharRef(ctxt, &str);
2785 if (val != 0) {
2786 COPY_BUF(0,buffer,nbchars,val);
2787 }
2788 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790 }
2791 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2792 if (xmlParserDebugEntities)
2793 xmlGenericError(xmlGenericErrorContext,
2794 "String decoding Entity Reference: %.30s\n",
2795 str);
2796 ent = xmlParseStringEntityRef(ctxt, &str);
2797 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2798 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2799 goto int_error;
2800 xmlParserEntityCheck(ctxt, 0, ent, 0);
2801 if (ent != NULL)
2802 ctxt->nbentities += ent->checked / 2;
2803 if ((ent != NULL) &&
2804 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2805 if (ent->content != NULL) {
2806 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2807 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2809 }
2810 } else {
2811 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2812 "predefined entity has no content\n");
2813 }
2814 } else if ((ent != NULL) && (ent->content != NULL)) {
2815 ctxt->depth++;
2816 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2817 0, 0, 0);
2818 ctxt->depth--;
2819
2820 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2821 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2822 goto int_error;
2823
2824 if (rep != NULL) {
2825 current = rep;
2826 while (*current != 0) { /* non input consuming loop */
2827 buffer[nbchars++] = *current++;
2828 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2829 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2830 goto int_error;
2831 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2832 }
2833 }
2834 xmlFree(rep);
2835 rep = NULL;
2836 }
2837 } else if (ent != NULL) {
2838 int i = xmlStrlen(ent->name);
2839 const xmlChar *cur = ent->name;
2840
2841 buffer[nbchars++] = '&';
2842 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2843 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2844 }
2845 for (;i > 0;i--)
2846 buffer[nbchars++] = *cur++;
2847 buffer[nbchars++] = ';';
2848 }
2849 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2850 if (xmlParserDebugEntities)
2851 xmlGenericError(xmlGenericErrorContext,
2852 "String decoding PE Reference: %.30s\n", str);
2853 ent = xmlParseStringPEReference(ctxt, &str);
2854 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2855 goto int_error;
2856 xmlParserEntityCheck(ctxt, 0, ent, 0);
2857 if (ent != NULL)
2858 ctxt->nbentities += ent->checked / 2;
2859 if (ent != NULL) {
2860 if (ent->content == NULL) {
2861 xmlLoadEntityContent(ctxt, ent);
2862 }
2863 ctxt->depth++;
2864 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2865 0, 0, 0);
2866 ctxt->depth--;
2867 if (rep != NULL) {
2868 current = rep;
2869 while (*current != 0) { /* non input consuming loop */
2870 buffer[nbchars++] = *current++;
2871 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2872 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2873 goto int_error;
2874 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2875 }
2876 }
2877 xmlFree(rep);
2878 rep = NULL;
2879 }
2880 }
2881 } else {
2882 COPY_BUF(l,buffer,nbchars,c);
2883 str += l;
2884 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2885 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2886 }
2887 }
2888 if (str < last)
2889 c = CUR_SCHAR(str, l);
2890 else
2891 c = 0;
2892 }
2893 buffer[nbchars] = 0;
2894 return(buffer);
2895
2896 mem_error:
2897 xmlErrMemory(ctxt, NULL);
2898 int_error:
2899 if (rep != NULL)
2900 xmlFree(rep);
2901 if (buffer != NULL)
2902 xmlFree(buffer);
2903 return(NULL);
2904 }
2905
2906 /**
2907 * xmlStringDecodeEntities:
2908 * @ctxt: the parser context
2909 * @str: the input string
2910 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2911 * @end: an end marker xmlChar, 0 if none
2912 * @end2: an end marker xmlChar, 0 if none
2913 * @end3: an end marker xmlChar, 0 if none
2914 *
2915 * Takes a entity string content and process to do the adequate substitutions.
2916 *
2917 * [67] Reference ::= EntityRef | CharRef
2918 *
2919 * [69] PEReference ::= '%' Name ';'
2920 *
2921 * Returns A newly allocated string with the substitution done. The caller
2922 * must deallocate it !
2923 */
2924 xmlChar *
2925 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2926 xmlChar end, xmlChar end2, xmlChar end3) {
2927 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2928 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2929 end, end2, end3));
2930 }
2931
2932 /************************************************************************
2933 * *
2934 * Commodity functions, cleanup needed ? *
2935 * *
2936 ************************************************************************/
2937
2938 /**
2939 * areBlanks:
2940 * @ctxt: an XML parser context
2941 * @str: a xmlChar *
2942 * @len: the size of @str
2943 * @blank_chars: we know the chars are blanks
2944 *
2945 * Is this a sequence of blank chars that one can ignore ?
2946 *
2947 * Returns 1 if ignorable 0 otherwise.
2948 */
2949
2950 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2951 int blank_chars) {
2952 int i, ret;
2953 xmlNodePtr lastChild;
2954
2955 /*
2956 * Don't spend time trying to differentiate them, the same callback is
2957 * used !
2958 */
2959 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2960 return(0);
2961
2962 /*
2963 * Check for xml:space value.
2964 */
2965 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2966 (*(ctxt->space) == -2))
2967 return(0);
2968
2969 /*
2970 * Check that the string is made of blanks
2971 */
2972 if (blank_chars == 0) {
2973 for (i = 0;i < len;i++)
2974 if (!(IS_BLANK_CH(str[i]))) return(0);
2975 }
2976
2977 /*
2978 * Look if the element is mixed content in the DTD if available
2979 */
2980 if (ctxt->node == NULL) return(0);
2981 if (ctxt->myDoc != NULL) {
2982 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2983 if (ret == 0) return(1);
2984 if (ret == 1) return(0);
2985 }
2986
2987 /*
2988 * Otherwise, heuristic :-\
2989 */
2990 if ((RAW != '<') && (RAW != 0xD)) return(0);
2991 if ((ctxt->node->children == NULL) &&
2992 (RAW == '<') && (NXT(1) == '/')) return(0);
2993
2994 lastChild = xmlGetLastChild(ctxt->node);
2995 if (lastChild == NULL) {
2996 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2997 (ctxt->node->content != NULL)) return(0);
2998 } else if (xmlNodeIsText(lastChild))
2999 return(0);
3000 else if ((ctxt->node->children != NULL) &&
3001 (xmlNodeIsText(ctxt->node->children)))
3002 return(0);
3003 return(1);
3004 }
3005
3006 /************************************************************************
3007 * *
3008 * Extra stuff for namespace support *
3009 * Relates to http://www.w3.org/TR/WD-xml-names *
3010 * *
3011 ************************************************************************/
3012
3013 /**
3014 * xmlSplitQName:
3015 * @ctxt: an XML parser context
3016 * @name: an XML parser context
3017 * @prefix: a xmlChar **
3018 *
3019 * parse an UTF8 encoded XML qualified name string
3020 *
3021 * [NS 5] QName ::= (Prefix ':')? LocalPart
3022 *
3023 * [NS 6] Prefix ::= NCName
3024 *
3025 * [NS 7] LocalPart ::= NCName
3026 *
3027 * Returns the local part, and prefix is updated
3028 * to get the Prefix if any.
3029 */
3030
3031 xmlChar *
3032 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3033 xmlChar buf[XML_MAX_NAMELEN + 5];
3034 xmlChar *buffer = NULL;
3035 int len = 0;
3036 int max = XML_MAX_NAMELEN;
3037 xmlChar *ret = NULL;
3038 const xmlChar *cur = name;
3039 int c;
3040
3041 if (prefix == NULL) return(NULL);
3042 *prefix = NULL;
3043
3044 if (cur == NULL) return(NULL);
3045
3046 #ifndef XML_XML_NAMESPACE
3047 /* xml: prefix is not really a namespace */
3048 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3049 (cur[2] == 'l') && (cur[3] == ':'))
3050 return(xmlStrdup(name));
3051 #endif
3052
3053 /* nasty but well=formed */
3054 if (cur[0] == ':')
3055 return(xmlStrdup(name));
3056
3057 c = *cur++;
3058 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3059 buf[len++] = c;
3060 c = *cur++;
3061 }
3062 if (len >= max) {
3063 /*
3064 * Okay someone managed to make a huge name, so he's ready to pay
3065 * for the processing speed.
3066 */
3067 max = len * 2;
3068
3069 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3070 if (buffer == NULL) {
3071 xmlErrMemory(ctxt, NULL);
3072 return(NULL);
3073 }
3074 memcpy(buffer, buf, len);
3075 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3076 if (len + 10 > max) {
3077 xmlChar *tmp;
3078
3079 max *= 2;
3080 tmp = (xmlChar *) xmlRealloc(buffer,
3081 max * sizeof(xmlChar));
3082 if (tmp == NULL) {
3083 xmlFree(buffer);
3084 xmlErrMemory(ctxt, NULL);
3085 return(NULL);
3086 }
3087 buffer = tmp;
3088 }
3089 buffer[len++] = c;
3090 c = *cur++;
3091 }
3092 buffer[len] = 0;
3093 }
3094
3095 if ((c == ':') && (*cur == 0)) {
3096 if (buffer != NULL)
3097 xmlFree(buffer);
3098 *prefix = NULL;
3099 return(xmlStrdup(name));
3100 }
3101
3102 if (buffer == NULL)
3103 ret = xmlStrndup(buf, len);
3104 else {
3105 ret = buffer;
3106 buffer = NULL;
3107 max = XML_MAX_NAMELEN;
3108 }
3109
3110
3111 if (c == ':') {
3112 c = *cur;
3113 *prefix = ret;
3114 if (c == 0) {
3115 return(xmlStrndup(BAD_CAST "", 0));
3116 }
3117 len = 0;
3118
3119 /*
3120 * Check that the first character is proper to start
3121 * a new name
3122 */
3123 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3124 ((c >= 0x41) && (c <= 0x5A)) ||
3125 (c == '_') || (c == ':'))) {
3126 int l;
3127 int first = CUR_SCHAR(cur, l);
3128
3129 if (!IS_LETTER(first) && (first != '_')) {
3130 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3131 "Name %s is not XML Namespace compliant\n",
3132 name);
3133 }
3134 }
3135 cur++;
3136
3137 while ((c != 0) && (len < max)) { /* tested bigname2.xml */