1c5e036ea265c843a1d67fbb195e729b704e1352
[reactos.git] / sdk / lib / 3rdparty / libxml2 / parser.c
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 #ifdef HAVE_ZLIB_H
87 #include <zlib.h>
88 #endif
89 #ifdef HAVE_LZMA_H
90 #include <lzma.h>
91 #endif
92
93 #include "buf.h"
94 #include "enc.h"
95
96 static void
97 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
98
99 static xmlParserCtxtPtr
100 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
102
103 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105 /************************************************************************
106 * *
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
108 * *
109 ************************************************************************/
110
111 #define XML_PARSER_BIG_ENTITY 1000
112 #define XML_PARSER_LOT_ENTITY 5000
113
114 /*
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
119 */
120 #define XML_PARSER_NON_LINEAR 10
121
122 /*
123 * xmlParserEntityCheck
124 *
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129 * parser option.
130 */
131 static int
132 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
133 xmlEntityPtr ent, size_t replacement)
134 {
135 size_t consumed = 0;
136
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138 return (0);
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140 return (1);
141
142 /*
143 * This may look absurd but is needed to detect
144 * entities problems
145 */
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149 unsigned long oldnbent = ctxt->nbentities;
150 xmlChar *rep;
151
152 ent->checked = 1;
153
154 ++ctxt->depth;
155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
157 --ctxt->depth;
158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
159 ent->content[0] = 0;
160 }
161
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
168 }
169 }
170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
172 return(0);
173
174 /*
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
178 */
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
182 }
183 consumed += ctxt->sizeentities;
184
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
186 return(0);
187 } else if (size != 0) {
188 /*
189 * Do the check based on the replacement size of the entity
190 */
191 if (size < XML_PARSER_BIG_ENTITY)
192 return(0);
193
194 /*
195 * A limit on the amount of text data reasonably used
196 */
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
200 }
201 consumed += ctxt->sizeentities;
202
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
205 return (0);
206 } else if (ent != NULL) {
207 /*
208 * use the number of parsed entities in the replacement
209 */
210 size = ent->checked / 2;
211
212 /*
213 * The amount of data parsed counting entities size only once
214 */
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
218 }
219 consumed += ctxt->sizeentities;
220
221 /*
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
224 */
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
226 return (0);
227 } else {
228 /*
229 * strange we got no data for checking
230 */
231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
234 return (0);
235 }
236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237 return (1);
238 }
239
240 /**
241 * xmlParserMaxDepth:
242 *
243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
246 * parser option.
247 */
248 unsigned int xmlParserMaxDepth = 256;
249
250
251
252 #define SAX2 1
253 #define XML_PARSER_BIG_BUFFER_SIZE 300
254 #define XML_PARSER_BUFFER_SIZE 100
255 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
256
257 /**
258 * XML_PARSER_CHUNK_SIZE
259 *
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
265 */
266 #define XML_PARSER_CHUNK_SIZE 100
267
268 /*
269 * List of XML prefixed PI allowed by W3C specs
270 */
271
272 static const char *xmlW3CPIs[] = {
273 "xml-stylesheet",
274 "xml-model",
275 NULL
276 };
277
278
279 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
280 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
282
283 static xmlParserErrors
284 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
286 void *user_data, int depth, const xmlChar *URL,
287 const xmlChar *ID, xmlNodePtr *list);
288
289 static int
290 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
292 #ifdef LIBXML_LEGACY_ENABLED
293 static void
294 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
296 #endif /* LIBXML_LEGACY_ENABLED */
297
298 static xmlParserErrors
299 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
301
302 static int
303 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
304
305 /************************************************************************
306 * *
307 * Some factorized error routines *
308 * *
309 ************************************************************************/
310
311 /**
312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
316 *
317 * Handle a redefinition of attribute error
318 */
319 static void
320 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
322 {
323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
325 return;
326 if (ctxt != NULL)
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
328
329 if (prefix == NULL)
330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
334 else
335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
339 localname);
340 if (ctxt != NULL) {
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
344 }
345 }
346
347 /**
348 * xmlFatalErr:
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
352 *
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
354 */
355 static void
356 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
357 {
358 const char *errmsg;
359
360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
362 return;
363 switch (error) {
364 case XML_ERR_INVALID_HEX_CHARREF:
365 errmsg = "CharRef: invalid hexadecimal value";
366 break;
367 case XML_ERR_INVALID_DEC_CHARREF:
368 errmsg = "CharRef: invalid decimal value";
369 break;
370 case XML_ERR_INVALID_CHARREF:
371 errmsg = "CharRef: invalid value";
372 break;
373 case XML_ERR_INTERNAL_ERROR:
374 errmsg = "internal error";
375 break;
376 case XML_ERR_PEREF_AT_EOF:
377 errmsg = "PEReference at end of document";
378 break;
379 case XML_ERR_PEREF_IN_PROLOG:
380 errmsg = "PEReference in prolog";
381 break;
382 case XML_ERR_PEREF_IN_EPILOG:
383 errmsg = "PEReference in epilog";
384 break;
385 case XML_ERR_PEREF_NO_NAME:
386 errmsg = "PEReference: no name";
387 break;
388 case XML_ERR_PEREF_SEMICOL_MISSING:
389 errmsg = "PEReference: expecting ';'";
390 break;
391 case XML_ERR_ENTITY_LOOP:
392 errmsg = "Detected an entity reference loop";
393 break;
394 case XML_ERR_ENTITY_NOT_STARTED:
395 errmsg = "EntityValue: \" or ' expected";
396 break;
397 case XML_ERR_ENTITY_PE_INTERNAL:
398 errmsg = "PEReferences forbidden in internal subset";
399 break;
400 case XML_ERR_ENTITY_NOT_FINISHED:
401 errmsg = "EntityValue: \" or ' expected";
402 break;
403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
404 errmsg = "AttValue: \" or ' expected";
405 break;
406 case XML_ERR_LT_IN_ATTRIBUTE:
407 errmsg = "Unescaped '<' not allowed in attributes values";
408 break;
409 case XML_ERR_LITERAL_NOT_STARTED:
410 errmsg = "SystemLiteral \" or ' expected";
411 break;
412 case XML_ERR_LITERAL_NOT_FINISHED:
413 errmsg = "Unfinished System or Public ID \" or ' expected";
414 break;
415 case XML_ERR_MISPLACED_CDATA_END:
416 errmsg = "Sequence ']]>' not allowed in content";
417 break;
418 case XML_ERR_URI_REQUIRED:
419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
420 break;
421 case XML_ERR_PUBID_REQUIRED:
422 errmsg = "PUBLIC, the Public Identifier is missing";
423 break;
424 case XML_ERR_HYPHEN_IN_COMMENT:
425 errmsg = "Comment must not contain '--' (double-hyphen)";
426 break;
427 case XML_ERR_PI_NOT_STARTED:
428 errmsg = "xmlParsePI : no target name";
429 break;
430 case XML_ERR_RESERVED_XML_NAME:
431 errmsg = "Invalid PI name";
432 break;
433 case XML_ERR_NOTATION_NOT_STARTED:
434 errmsg = "NOTATION: Name expected here";
435 break;
436 case XML_ERR_NOTATION_NOT_FINISHED:
437 errmsg = "'>' required to close NOTATION declaration";
438 break;
439 case XML_ERR_VALUE_REQUIRED:
440 errmsg = "Entity value required";
441 break;
442 case XML_ERR_URI_FRAGMENT:
443 errmsg = "Fragment not allowed";
444 break;
445 case XML_ERR_ATTLIST_NOT_STARTED:
446 errmsg = "'(' required to start ATTLIST enumeration";
447 break;
448 case XML_ERR_NMTOKEN_REQUIRED:
449 errmsg = "NmToken expected in ATTLIST enumeration";
450 break;
451 case XML_ERR_ATTLIST_NOT_FINISHED:
452 errmsg = "')' required to finish ATTLIST enumeration";
453 break;
454 case XML_ERR_MIXED_NOT_STARTED:
455 errmsg = "MixedContentDecl : '|' or ')*' expected";
456 break;
457 case XML_ERR_PCDATA_REQUIRED:
458 errmsg = "MixedContentDecl : '#PCDATA' expected";
459 break;
460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
461 errmsg = "ContentDecl : Name or '(' expected";
462 break;
463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
464 errmsg = "ContentDecl : ',' '|' or ')' expected";
465 break;
466 case XML_ERR_PEREF_IN_INT_SUBSET:
467 errmsg =
468 "PEReference: forbidden within markup decl in internal subset";
469 break;
470 case XML_ERR_GT_REQUIRED:
471 errmsg = "expected '>'";
472 break;
473 case XML_ERR_CONDSEC_INVALID:
474 errmsg = "XML conditional section '[' expected";
475 break;
476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
477 errmsg = "Content error in the external subset";
478 break;
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
480 errmsg =
481 "conditional section INCLUDE or IGNORE keyword expected";
482 break;
483 case XML_ERR_CONDSEC_NOT_FINISHED:
484 errmsg = "XML conditional section not closed";
485 break;
486 case XML_ERR_XMLDECL_NOT_STARTED:
487 errmsg = "Text declaration '<?xml' required";
488 break;
489 case XML_ERR_XMLDECL_NOT_FINISHED:
490 errmsg = "parsing XML declaration: '?>' expected";
491 break;
492 case XML_ERR_EXT_ENTITY_STANDALONE:
493 errmsg = "external parsed entities cannot be standalone";
494 break;
495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
496 errmsg = "EntityRef: expecting ';'";
497 break;
498 case XML_ERR_DOCTYPE_NOT_FINISHED:
499 errmsg = "DOCTYPE improperly terminated";
500 break;
501 case XML_ERR_LTSLASH_REQUIRED:
502 errmsg = "EndTag: '</' not found";
503 break;
504 case XML_ERR_EQUAL_REQUIRED:
505 errmsg = "expected '='";
506 break;
507 case XML_ERR_STRING_NOT_CLOSED:
508 errmsg = "String not closed expecting \" or '";
509 break;
510 case XML_ERR_STRING_NOT_STARTED:
511 errmsg = "String not started expecting ' or \"";
512 break;
513 case XML_ERR_ENCODING_NAME:
514 errmsg = "Invalid XML encoding name";
515 break;
516 case XML_ERR_STANDALONE_VALUE:
517 errmsg = "standalone accepts only 'yes' or 'no'";
518 break;
519 case XML_ERR_DOCUMENT_EMPTY:
520 errmsg = "Document is empty";
521 break;
522 case XML_ERR_DOCUMENT_END:
523 errmsg = "Extra content at the end of the document";
524 break;
525 case XML_ERR_NOT_WELL_BALANCED:
526 errmsg = "chunk is not well balanced";
527 break;
528 case XML_ERR_EXTRA_CONTENT:
529 errmsg = "extra content at the end of well balanced chunk";
530 break;
531 case XML_ERR_VERSION_MISSING:
532 errmsg = "Malformed declaration expecting version";
533 break;
534 case XML_ERR_NAME_TOO_LONG:
535 errmsg = "Name too long use XML_PARSE_HUGE option";
536 break;
537 #if 0
538 case:
539 errmsg = "";
540 break;
541 #endif
542 default:
543 errmsg = "Unregistered error message";
544 }
545 if (ctxt != NULL)
546 ctxt->errNo = error;
547 if (info == NULL) {
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
550 errmsg);
551 } else {
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
554 errmsg, info);
555 }
556 if (ctxt != NULL) {
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
560 }
561 }
562
563 /**
564 * xmlFatalErrMsg:
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
568 *
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
570 */
571 static void LIBXML_ATTR_FORMAT(3,0)
572 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
573 const char *msg)
574 {
575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
577 return;
578 if (ctxt != NULL)
579 ctxt->errNo = error;
580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
582 if (ctxt != NULL) {
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
586 }
587 }
588
589 /**
590 * xmlWarningMsg:
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
594 * @str1: extra data
595 * @str2: extra data
596 *
597 * Handle a warning.
598 */
599 static void LIBXML_ATTR_FORMAT(3,0)
600 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
602 {
603 xmlStructuredErrorFunc schannel = NULL;
604
605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
607 return;
608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
610 schannel = ctxt->sax->serror;
611 if (ctxt != NULL) {
612 __xmlRaiseError(schannel,
613 (ctxt->sax) ? ctxt->sax->warning : NULL,
614 ctxt->userData,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 } else {
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
625 }
626 }
627
628 /**
629 * xmlValidityError:
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
633 * @str1: extra data
634 *
635 * Handle a validity error.
636 */
637 static void LIBXML_ATTR_FORMAT(3,0)
638 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
639 const char *msg, const xmlChar *str1, const xmlChar *str2)
640 {
641 xmlStructuredErrorFunc schannel = NULL;
642
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
645 return;
646 if (ctxt != NULL) {
647 ctxt->errNo = error;
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
650 }
651 if (ctxt != NULL) {
652 __xmlRaiseError(schannel,
653 ctxt->vctxt.error, ctxt->vctxt.userData,
654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
658 ctxt->valid = 0;
659 } else {
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
665 }
666 }
667
668 /**
669 * xmlFatalErrMsgInt:
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
674 *
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
676 */
677 static void LIBXML_ATTR_FORMAT(3,0)
678 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
679 const char *msg, int val)
680 {
681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
683 return;
684 if (ctxt != NULL)
685 ctxt->errNo = error;
686 __xmlRaiseError(NULL, NULL, NULL,
687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
689 if (ctxt != NULL) {
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
693 }
694 }
695
696 /**
697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707 static void LIBXML_ATTR_FORMAT(3,0)
708 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg, const xmlChar *str1, int val,
710 const xmlChar *str2)
711 {
712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
714 return;
715 if (ctxt != NULL)
716 ctxt->errNo = error;
717 __xmlRaiseError(NULL, NULL, NULL,
718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
721 if (ctxt != NULL) {
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
725 }
726 }
727
728 /**
729 * xmlFatalErrMsgStr:
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
734 *
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736 */
737 static void LIBXML_ATTR_FORMAT(3,0)
738 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739 const char *msg, const xmlChar * val)
740 {
741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
743 return;
744 if (ctxt != NULL)
745 ctxt->errNo = error;
746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
749 val);
750 if (ctxt != NULL) {
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
754 }
755 }
756
757 /**
758 * xmlErrMsgStr:
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
763 *
764 * Handle a non fatal parser error
765 */
766 static void LIBXML_ATTR_FORMAT(3,0)
767 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
769 {
770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
772 return;
773 if (ctxt != NULL)
774 ctxt->errNo = error;
775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
778 val);
779 }
780
781 /**
782 * xmlNsErr:
783 * @ctxt: an XML parser context
784 * @error: the error number
785 * @msg: the message
786 * @info1: extra information string
787 * @info2: extra information string
788 *
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
790 */
791 static void LIBXML_ATTR_FORMAT(3,0)
792 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
793 const char *msg,
794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
796 {
797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
799 return;
800 if (ctxt != NULL)
801 ctxt->errNo = error;
802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
806 if (ctxt != NULL)
807 ctxt->nsWellFormed = 0;
808 }
809
810 /**
811 * xmlNsWarn
812 * @ctxt: an XML parser context
813 * @error: the error number
814 * @msg: the message
815 * @info1: extra information string
816 * @info2: extra information string
817 *
818 * Handle a namespace warning error
819 */
820 static void LIBXML_ATTR_FORMAT(3,0)
821 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
822 const char *msg,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
825 {
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
828 return;
829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
833 }
834
835 /************************************************************************
836 * *
837 * Library wide options *
838 * *
839 ************************************************************************/
840
841 /**
842 * xmlHasFeature:
843 * @feature: the feature to be examined
844 *
845 * Examines if the library has been compiled with a given feature.
846 *
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
850 */
851 int
852 xmlHasFeature(xmlFeature feature)
853 {
854 switch (feature) {
855 case XML_WITH_THREAD:
856 #ifdef LIBXML_THREAD_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_TREE:
862 #ifdef LIBXML_TREE_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_OUTPUT:
868 #ifdef LIBXML_OUTPUT_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_PUSH:
874 #ifdef LIBXML_PUSH_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_READER:
880 #ifdef LIBXML_READER_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_PATTERN:
886 #ifdef LIBXML_PATTERN_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_WRITER:
892 #ifdef LIBXML_WRITER_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_SAX1:
898 #ifdef LIBXML_SAX1_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_FTP:
904 #ifdef LIBXML_FTP_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_HTTP:
910 #ifdef LIBXML_HTTP_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_VALID:
916 #ifdef LIBXML_VALID_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_HTML:
922 #ifdef LIBXML_HTML_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_LEGACY:
928 #ifdef LIBXML_LEGACY_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_C14N:
934 #ifdef LIBXML_C14N_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_CATALOG:
940 #ifdef LIBXML_CATALOG_ENABLED
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_XPATH:
946 #ifdef LIBXML_XPATH_ENABLED
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_XPTR:
952 #ifdef LIBXML_XPTR_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 case XML_WITH_XINCLUDE:
958 #ifdef LIBXML_XINCLUDE_ENABLED
959 return(1);
960 #else
961 return(0);
962 #endif
963 case XML_WITH_ICONV:
964 #ifdef LIBXML_ICONV_ENABLED
965 return(1);
966 #else
967 return(0);
968 #endif
969 case XML_WITH_ISO8859X:
970 #ifdef LIBXML_ISO8859X_ENABLED
971 return(1);
972 #else
973 return(0);
974 #endif
975 case XML_WITH_UNICODE:
976 #ifdef LIBXML_UNICODE_ENABLED
977 return(1);
978 #else
979 return(0);
980 #endif
981 case XML_WITH_REGEXP:
982 #ifdef LIBXML_REGEXP_ENABLED
983 return(1);
984 #else
985 return(0);
986 #endif
987 case XML_WITH_AUTOMATA:
988 #ifdef LIBXML_AUTOMATA_ENABLED
989 return(1);
990 #else
991 return(0);
992 #endif
993 case XML_WITH_EXPR:
994 #ifdef LIBXML_EXPR_ENABLED
995 return(1);
996 #else
997 return(0);
998 #endif
999 case XML_WITH_SCHEMAS:
1000 #ifdef LIBXML_SCHEMAS_ENABLED
1001 return(1);
1002 #else
1003 return(0);
1004 #endif
1005 case XML_WITH_SCHEMATRON:
1006 #ifdef LIBXML_SCHEMATRON_ENABLED
1007 return(1);
1008 #else
1009 return(0);
1010 #endif
1011 case XML_WITH_MODULES:
1012 #ifdef LIBXML_MODULES_ENABLED
1013 return(1);
1014 #else
1015 return(0);
1016 #endif
1017 case XML_WITH_DEBUG:
1018 #ifdef LIBXML_DEBUG_ENABLED
1019 return(1);
1020 #else
1021 return(0);
1022 #endif
1023 case XML_WITH_DEBUG_MEM:
1024 #ifdef DEBUG_MEMORY_LOCATION
1025 return(1);
1026 #else
1027 return(0);
1028 #endif
1029 case XML_WITH_DEBUG_RUN:
1030 #ifdef LIBXML_DEBUG_RUNTIME
1031 return(1);
1032 #else
1033 return(0);
1034 #endif
1035 case XML_WITH_ZLIB:
1036 #ifdef LIBXML_ZLIB_ENABLED
1037 return(1);
1038 #else
1039 return(0);
1040 #endif
1041 case XML_WITH_LZMA:
1042 #ifdef LIBXML_LZMA_ENABLED
1043 return(1);
1044 #else
1045 return(0);
1046 #endif
1047 case XML_WITH_ICU:
1048 #ifdef LIBXML_ICU_ENABLED
1049 return(1);
1050 #else
1051 return(0);
1052 #endif
1053 default:
1054 break;
1055 }
1056 return(0);
1057 }
1058
1059 /************************************************************************
1060 * *
1061 * SAX2 defaulted attributes handling *
1062 * *
1063 ************************************************************************/
1064
1065 /**
1066 * xmlDetectSAX2:
1067 * @ctxt: an XML parser context
1068 *
1069 * Do the SAX2 detection and specific intialization
1070 */
1071 static void
1072 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
1074 #ifdef LIBXML_SAX1_ENABLED
1075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1078 #else
1079 ctxt->sax2 = 1;
1080 #endif /* LIBXML_SAX1_ENABLED */
1081
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
1087 xmlErrMemory(ctxt, NULL);
1088 }
1089 }
1090
1091 typedef struct _xmlDefAttrs xmlDefAttrs;
1092 typedef xmlDefAttrs *xmlDefAttrsPtr;
1093 struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
1096 #if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1099 #else
1100 const xmlChar *values[5];
1101 #endif
1102 };
1103
1104 /**
1105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1108 *
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1117 *
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1119 * is needed.
1120 */
1121 static xmlChar *
1122 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1123 {
1124 if ((src == NULL) || (dst == NULL))
1125 return(NULL);
1126
1127 while (*src == 0x20) src++;
1128 while (*src != 0) {
1129 if (*src == 0x20) {
1130 while (*src == 0x20) src++;
1131 if (*src != 0)
1132 *dst++ = 0x20;
1133 } else {
1134 *dst++ = *src++;
1135 }
1136 }
1137 *dst = 0;
1138 if (dst == src)
1139 return(NULL);
1140 return(dst);
1141 }
1142
1143 /**
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1146 *
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1150 *
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152 * is needed.
1153 */
1154 static const xmlChar *
1155 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1156 {
1157 int i;
1158 int remove_head = 0;
1159 int need_realloc = 0;
1160 const xmlChar *cur;
1161
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1163 return(NULL);
1164 i = *len;
1165 if (i <= 0)
1166 return(NULL);
1167
1168 cur = src;
1169 while (*cur == 0x20) {
1170 cur++;
1171 remove_head++;
1172 }
1173 while (*cur != 0) {
1174 if (*cur == 0x20) {
1175 cur++;
1176 if ((*cur == 0x20) || (*cur == 0)) {
1177 need_realloc = 1;
1178 break;
1179 }
1180 } else
1181 cur++;
1182 }
1183 if (need_realloc) {
1184 xmlChar *ret;
1185
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1187 if (ret == NULL) {
1188 xmlErrMemory(ctxt, NULL);
1189 return(NULL);
1190 }
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1193 return(ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
1196 memmove(src, src + remove_head, 1 + *len);
1197 return(src);
1198 }
1199 return(NULL);
1200 }
1201
1202 /**
1203 * xmlAddDefAttrs:
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1208 *
1209 * Add a defaulted attribute for an element
1210 */
1211 static void
1212 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1217 int len;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1220
1221 /*
1222 * Allows to detect attribute redefinitions
1223 */
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226 return;
1227 }
1228
1229 if (ctxt->attsDefault == NULL) {
1230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1231 if (ctxt->attsDefault == NULL)
1232 goto mem_error;
1233 }
1234
1235 /*
1236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
1238 */
1239 name = xmlSplitQName3(fullname, &len);
1240 if (name == NULL) {
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1242 prefix = NULL;
1243 } else {
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1246 }
1247
1248 /*
1249 * make sure there is some storage
1250 */
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1254 (4 * 5) * sizeof(const xmlChar *));
1255 if (defaults == NULL)
1256 goto mem_error;
1257 defaults->nbAttrs = 0;
1258 defaults->maxAttrs = 4;
1259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1261 xmlFree(defaults);
1262 goto mem_error;
1263 }
1264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1265 xmlDefAttrsPtr temp;
1266
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1269 if (temp == NULL)
1270 goto mem_error;
1271 defaults = temp;
1272 defaults->maxAttrs *= 2;
1273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1275 xmlFree(defaults);
1276 goto mem_error;
1277 }
1278 }
1279
1280 /*
1281 * Split the element name into prefix:localname , the string found
1282 * are within the DTD and hen not associated to namespace names.
1283 */
1284 name = xmlSplitQName3(fullattr, &len);
1285 if (name == NULL) {
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1287 prefix = NULL;
1288 } else {
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1291 }
1292
1293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
1298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1300 if (ctxt->external)
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1302 else
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1304 defaults->nbAttrs++;
1305
1306 return;
1307
1308 mem_error:
1309 xmlErrMemory(ctxt, NULL);
1310 return;
1311 }
1312
1313 /**
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1319 *
1320 * Register this attribute type
1321 */
1322 static void
1323 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1326 int type)
1327 {
1328 if (ctxt->attsSpecial == NULL) {
1329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1330 if (ctxt->attsSpecial == NULL)
1331 goto mem_error;
1332 }
1333
1334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1335 return;
1336
1337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (ptrdiff_t) type);
1339 return;
1340
1341 mem_error:
1342 xmlErrMemory(ctxt, NULL);
1343 return;
1344 }
1345
1346 /**
1347 * xmlCleanSpecialAttrCallback:
1348 *
1349 * Removes CDATA attributes from the special attribute table
1350 */
1351 static void
1352 xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1356
1357 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1359 }
1360 }
1361
1362 /**
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1365 *
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1369 */
1370 static void
1371 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1372 {
1373 if (ctxt->attsSpecial == NULL)
1374 return;
1375
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1377
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1381 }
1382 return;
1383 }
1384
1385 /**
1386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1388 *
1389 * Checks that the value conforms to the LanguageID production:
1390 *
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1393 *
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1400 *
1401 * The current REC reference the sucessors of RFC 1766, currently 5646
1402 *
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1405 * ["-" script]
1406 * ["-" region]
1407 * *("-" variant)
1408 * *("-" extension)
1409 * ["-" privateuse]
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1415 *
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1418 *
1419 * script = 4ALPHA ; ISO 15924 code
1420 *
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1423 *
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1426 *
1427 * extension = singleton 1*("-" (2*8alphanum))
1428 *
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1432 * / %x41-57 ; A - W
1433 * / %x59-5A ; Y - Z
1434 * / %x61-77 ; a - w
1435 * / %x79-7A ; y - z
1436 *
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1440 *
1441 * Returns 1 if correct 0 otherwise
1442 **/
1443 int
1444 xmlCheckLanguageID(const xmlChar * lang)
1445 {
1446 const xmlChar *cur = lang, *nxt;
1447
1448 if (cur == NULL)
1449 return (0);
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
1454 /*
1455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
1458 */
1459 cur += 2;
1460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1462 cur++;
1463 return(cur[0] == 0);
1464 }
1465 nxt = cur;
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1468 nxt++;
1469 if (nxt - cur >= 4) {
1470 /*
1471 * Reserved
1472 */
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1474 return(0);
1475 return(1);
1476 }
1477 if (nxt - cur < 2)
1478 return(0);
1479 /* we got an ISO 639 code */
1480 if (nxt[0] == 0)
1481 return(1);
1482 if (nxt[0] != '-')
1483 return(0);
1484
1485 nxt++;
1486 cur = nxt;
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1489 goto region_m49;
1490
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1493 nxt++;
1494 if (nxt - cur == 4)
1495 goto script;
1496 if (nxt - cur == 2)
1497 goto region;
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1499 goto variant;
1500 if (nxt - cur != 3)
1501 return(0);
1502 /* we parsed an extlang */
1503 if (nxt[0] == 0)
1504 return(1);
1505 if (nxt[0] != '-')
1506 return(0);
1507
1508 nxt++;
1509 cur = nxt;
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512 goto region_m49;
1513
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516 nxt++;
1517 if (nxt - cur == 2)
1518 goto region;
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1520 goto variant;
1521 if (nxt - cur != 4)
1522 return(0);
1523 /* we parsed a script */
1524 script:
1525 if (nxt[0] == 0)
1526 return(1);
1527 if (nxt[0] != '-')
1528 return(0);
1529
1530 nxt++;
1531 cur = nxt;
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1534 goto region_m49;
1535
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1541 goto variant;
1542 if (nxt - cur != 2)
1543 return(0);
1544 /* we parsed a region */
1545 region:
1546 if (nxt[0] == 0)
1547 return(1);
1548 if (nxt[0] != '-')
1549 return(0);
1550
1551 nxt++;
1552 cur = nxt;
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1556 nxt++;
1557
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1559 return(0);
1560
1561 /* we parsed a variant */
1562 variant:
1563 if (nxt[0] == 0)
1564 return(1);
1565 if (nxt[0] != '-')
1566 return(0);
1567 /* extensions and private use subtags not checked */
1568 return (1);
1569
1570 region_m49:
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1573 nxt += 3;
1574 goto region;
1575 }
1576 return(0);
1577 }
1578
1579 /************************************************************************
1580 * *
1581 * Parser stacks related functions and macros *
1582 * *
1583 ************************************************************************/
1584
1585 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
1587
1588 #ifdef SAX2
1589 /**
1590 * nsPush:
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1594 *
1595 * Pushes a new parser namespace on top of the ns stack
1596 *
1597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
1599 */
1600 static int
1601 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1602 {
1603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1604 int i;
1605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1606 if (ctxt->nsTab[i] == prefix) {
1607 /* in scope */
1608 if (ctxt->nsTab[i + 1] == URL)
1609 return(-2);
1610 /* out of scope keep it */
1611 break;
1612 }
1613 }
1614 }
1615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1616 ctxt->nsMax = 10;
1617 ctxt->nsNr = 0;
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
1621 xmlErrMemory(ctxt, NULL);
1622 ctxt->nsMax = 0;
1623 return (-1);
1624 }
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
1626 const xmlChar ** tmp;
1627 ctxt->nsMax *= 2;
1628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1630 if (tmp == NULL) {
1631 xmlErrMemory(ctxt, NULL);
1632 ctxt->nsMax /= 2;
1633 return (-1);
1634 }
1635 ctxt->nsTab = tmp;
1636 }
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1640 }
1641 /**
1642 * nsPop:
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1645 *
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1647 *
1648 * Returns the number of namespaces removed
1649 */
1650 static int
1651 nsPop(xmlParserCtxtPtr ctxt, int nr)
1652 {
1653 int i;
1654
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1658 nr = ctxt->nsNr;
1659 }
1660 if (ctxt->nsNr <= 0)
1661 return (0);
1662
1663 for (i = 0;i < nr;i++) {
1664 ctxt->nsNr--;
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1666 }
1667 return(nr);
1668 }
1669 #endif
1670
1671 static int
1672 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
1674 int *attallocs;
1675 int maxatts;
1676
1677 if (ctxt->atts == NULL) {
1678 maxatts = 55; /* allow for 10 attrs by default */
1679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
1681 if (atts == NULL) goto mem_error;
1682 ctxt->atts = atts;
1683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
1689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
1691 if (atts == NULL) goto mem_error;
1692 ctxt->atts = atts;
1693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
1697 ctxt->maxatts = maxatts;
1698 }
1699 return(ctxt->maxatts);
1700 mem_error:
1701 xmlErrMemory(ctxt, NULL);
1702 return(-1);
1703 }
1704
1705 /**
1706 * inputPush:
1707 * @ctxt: an XML parser context
1708 * @value: the parser input
1709 *
1710 * Pushes a new parser input on top of the input stack
1711 *
1712 * Returns -1 in case of error, the index in the stack otherwise
1713 */
1714 int
1715 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1716 {
1717 if ((ctxt == NULL) || (value == NULL))
1718 return(-1);
1719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1721 ctxt->inputTab =
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1723 ctxt->inputMax *
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
1726 xmlErrMemory(ctxt, NULL);
1727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1729 value = NULL;
1730 return (-1);
1731 }
1732 }
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1736 }
1737 /**
1738 * inputPop:
1739 * @ctxt: an XML parser context
1740 *
1741 * Pops the top parser input from the input stack
1742 *
1743 * Returns the input just removed
1744 */
1745 xmlParserInputPtr
1746 inputPop(xmlParserCtxtPtr ctxt)
1747 {
1748 xmlParserInputPtr ret;
1749
1750 if (ctxt == NULL)
1751 return(NULL);
1752 if (ctxt->inputNr <= 0)
1753 return (NULL);
1754 ctxt->inputNr--;
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1757 else
1758 ctxt->input = NULL;
1759 ret = ctxt->inputTab[ctxt->inputNr];
1760 ctxt->inputTab[ctxt->inputNr] = NULL;
1761 return (ret);
1762 }
1763 /**
1764 * nodePush:
1765 * @ctxt: an XML parser context
1766 * @value: the element node
1767 *
1768 * Pushes a new element node on top of the node stack
1769 *
1770 * Returns -1 in case of error, the index in the stack otherwise
1771 */
1772 int
1773 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1774 {
1775 if (ctxt == NULL) return(0);
1776 if (ctxt->nodeNr >= ctxt->nodeMax) {
1777 xmlNodePtr *tmp;
1778
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1780 ctxt->nodeMax * 2 *
1781 sizeof(ctxt->nodeTab[0]));
1782 if (tmp == NULL) {
1783 xmlErrMemory(ctxt, NULL);
1784 return (-1);
1785 }
1786 ctxt->nodeTab = tmp;
1787 ctxt->nodeMax *= 2;
1788 }
1789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1793 xmlParserMaxDepth);
1794 xmlHaltParser(ctxt);
1795 return(-1);
1796 }
1797 ctxt->nodeTab[ctxt->nodeNr] = value;
1798 ctxt->node = value;
1799 return (ctxt->nodeNr++);
1800 }
1801
1802 /**
1803 * nodePop:
1804 * @ctxt: an XML parser context
1805 *
1806 * Pops the top element node from the node stack
1807 *
1808 * Returns the node just removed
1809 */
1810 xmlNodePtr
1811 nodePop(xmlParserCtxtPtr ctxt)
1812 {
1813 xmlNodePtr ret;
1814
1815 if (ctxt == NULL) return(NULL);
1816 if (ctxt->nodeNr <= 0)
1817 return (NULL);
1818 ctxt->nodeNr--;
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1821 else
1822 ctxt->node = NULL;
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
1824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1825 return (ret);
1826 }
1827
1828 #ifdef LIBXML_PUSH_ENABLED
1829 /**
1830 * nameNsPush:
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1835 *
1836 * Pushes a new element name/prefix/URL on top of the name stack
1837 *
1838 * Returns -1 in case of error, the index in the stack otherwise
1839 */
1840 static int
1841 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1843 {
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1846 void **tmp2;
1847 ctxt->nameMax *= 2;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1849 ctxt->nameMax *
1850 sizeof(ctxt->nameTab[0]));
1851 if (tmp == NULL) {
1852 ctxt->nameMax /= 2;
1853 goto mem_error;
1854 }
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1857 ctxt->nameMax * 3 *
1858 sizeof(ctxt->pushTab[0]));
1859 if (tmp2 == NULL) {
1860 ctxt->nameMax /= 2;
1861 goto mem_error;
1862 }
1863 ctxt->pushTab = tmp2;
1864 }
1865 ctxt->nameTab[ctxt->nameNr] = value;
1866 ctxt->name = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1870 return (ctxt->nameNr++);
1871 mem_error:
1872 xmlErrMemory(ctxt, NULL);
1873 return (-1);
1874 }
1875 /**
1876 * nameNsPop:
1877 * @ctxt: an XML parser context
1878 *
1879 * Pops the top element/prefix/URI name from the name stack
1880 *
1881 * Returns the name just removed
1882 */
1883 static const xmlChar *
1884 nameNsPop(xmlParserCtxtPtr ctxt)
1885 {
1886 const xmlChar *ret;
1887
1888 if (ctxt->nameNr <= 0)
1889 return (NULL);
1890 ctxt->nameNr--;
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1893 else
1894 ctxt->name = NULL;
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1897 return (ret);
1898 }
1899 #endif /* LIBXML_PUSH_ENABLED */
1900
1901 /**
1902 * namePush:
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1905 *
1906 * Pushes a new element name on top of the name stack
1907 *
1908 * Returns -1 in case of error, the index in the stack otherwise
1909 */
1910 int
1911 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1912 {
1913 if (ctxt == NULL) return (-1);
1914
1915 if (ctxt->nameNr >= ctxt->nameMax) {
1916 const xmlChar * *tmp;
1917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1918 ctxt->nameMax * 2 *
1919 sizeof(ctxt->nameTab[0]));
1920 if (tmp == NULL) {
1921 goto mem_error;
1922 }
1923 ctxt->nameTab = tmp;
1924 ctxt->nameMax *= 2;
1925 }
1926 ctxt->nameTab[ctxt->nameNr] = value;
1927 ctxt->name = value;
1928 return (ctxt->nameNr++);
1929 mem_error:
1930 xmlErrMemory(ctxt, NULL);
1931 return (-1);
1932 }
1933 /**
1934 * namePop:
1935 * @ctxt: an XML parser context
1936 *
1937 * Pops the top element name from the name stack
1938 *
1939 * Returns the name just removed
1940 */
1941 const xmlChar *
1942 namePop(xmlParserCtxtPtr ctxt)
1943 {
1944 const xmlChar *ret;
1945
1946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1947 return (NULL);
1948 ctxt->nameNr--;
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1951 else
1952 ctxt->name = NULL;
1953 ret = ctxt->nameTab[ctxt->nameNr];
1954 ctxt->nameTab[ctxt->nameNr] = NULL;
1955 return (ret);
1956 }
1957
1958 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1959 if (ctxt->spaceNr >= ctxt->spaceMax) {
1960 int *tmp;
1961
1962 ctxt->spaceMax *= 2;
1963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1965 if (tmp == NULL) {
1966 xmlErrMemory(ctxt, NULL);
1967 ctxt->spaceMax /=2;
1968 return(-1);
1969 }
1970 ctxt->spaceTab = tmp;
1971 }
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1975 }
1976
1977 static int spacePop(xmlParserCtxtPtr ctxt) {
1978 int ret;
1979 if (ctxt->spaceNr <= 0) return(0);
1980 ctxt->spaceNr--;
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1983 else
1984 ctxt->space = &ctxt->spaceTab[0];
1985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1987 return(ret);
1988 }
1989
1990 /*
1991 * Macros for accessing the content. Those should be used only by the parser,
1992 * and not exported.
1993 *
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1995 * use them
1996 *
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2009 * strings without newlines within the parser.
2010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2011 * defined char within the parser.
2012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2013 *
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2021 * the index
2022 * GROW, SHRINK handling of input buffers
2023 */
2024
2025 #define RAW (*ctxt->input->cur)
2026 #define CUR (*ctxt->input->cur)
2027 #define NXT(val) ctxt->input->cur[(val)]
2028 #define CUR_PTR ctxt->input->cur
2029 #define BASE_PTR ctxt->input->base
2030
2031 #define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2048
2049 #define SKIP(val) do { \
2050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2053 } while (0)
2054
2055 #define SKIPL(val) do { \
2056 int skipl; \
2057 for(skipl=0; skipl<val; skipl++) { \
2058 if (*(ctxt->input->cur) == '\n') { \
2059 ctxt->input->line++; ctxt->input->col = 1; \
2060 } else ctxt->input->col++; \
2061 ctxt->nbChars++; \
2062 ctxt->input->cur++; \
2063 } \
2064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2066 } while (0)
2067
2068 #define SHRINK if ((ctxt->progressive == 0) && \
2069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2071 xmlSHRINK (ctxt);
2072
2073 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
2075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2077 }
2078
2079 #define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081 xmlGROW (ctxt);
2082
2083 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2092 xmlHaltParser(ctxt);
2093 return;
2094 }
2095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100 return;
2101 }
2102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2104 }
2105
2106 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2107
2108 #define NEXT xmlNextChar(ctxt)
2109
2110 #define NEXT1 { \
2111 ctxt->input->col++; \
2112 ctxt->input->cur++; \
2113 ctxt->nbChars++; \
2114 if (*ctxt->input->cur == 0) \
2115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2116 }
2117
2118 #define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
2122 ctxt->input->cur += l; \
2123 } while (0)
2124
2125 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2127
2128 #define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
2130 else i += xmlCopyCharMultiByte(&b[i],v)
2131
2132 /**
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2135 *
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2138 *
2139 * Returns the number of space chars skipped
2140 */
2141
2142 int
2143 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2144 int res = 0;
2145
2146 /*
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2148 * the ASCII range.
2149 */
2150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2151 const xmlChar *cur;
2152 /*
2153 * if we are in the document content, go really fast
2154 */
2155 cur = ctxt->input->cur;
2156 while (IS_BLANK_CH(*cur)) {
2157 if (*cur == '\n') {
2158 ctxt->input->line++; ctxt->input->col = 1;
2159 } else {
2160 ctxt->input->col++;
2161 }
2162 cur++;
2163 res++;
2164 if (*cur == 0) {
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2168 }
2169 }
2170 ctxt->input->cur = cur;
2171 } else {
2172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2173
2174 while (1) {
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2176 NEXT;
2177 } else if (CUR == '%') {
2178 /*
2179 * Need to handle support of entities branching here
2180 */
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2182 break;
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2186 break;
2187 xmlPopInput(ctxt);
2188 } else {
2189 break;
2190 }
2191
2192 /*
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2197 * character."
2198 */
2199 res++;
2200 }
2201 }
2202 return(res);
2203 }
2204
2205 /************************************************************************
2206 * *
2207 * Commodity functions to handle entities *
2208 * *
2209 ************************************************************************/
2210
2211 /**
2212 * xmlPopInput:
2213 * @ctxt: an XML parser context
2214 *
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2217 *
2218 * Returns the current xmlChar in the parser context
2219 */
2220 xmlChar
2221 xmlPopInput(xmlParserCtxtPtr ctxt) {
2222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
2226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
2230 xmlFreeInputStream(inputPop(ctxt));
2231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2233 return(CUR);
2234 }
2235
2236 /**
2237 * xmlPushInput:
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2240 *
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
2243 * Returns -1 in case of error or the index in the input stack
2244 */
2245 int
2246 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2247 int ret;
2248 if (input == NULL) return(-1);
2249
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2254 ctxt->input->line);
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2257 }
2258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2263 return(-1);
2264 }
2265 ret = inputPush(ctxt, input);
2266 if (ctxt->instate == XML_PARSER_EOF)
2267 return(-1);
2268 GROW;
2269 return(ret);
2270 }
2271
2272 /**
2273 * xmlParseCharRef:
2274 * @ctxt: an XML parser context
2275 *
2276 * parse Reference declarations
2277 *
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2280 *
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
2283 * production for Char.
2284 *
2285 * Returns the value parsed (as an int), 0 in case of error
2286 */
2287 int
2288 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2289 unsigned int val = 0;
2290 int count = 0;
2291 unsigned int outofrange = 0;
2292
2293 /*
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2295 */
2296 if ((RAW == '&') && (NXT(1) == '#') &&
2297 (NXT(2) == 'x')) {
2298 SKIP(3);
2299 GROW;
2300 while (RAW != ';') { /* loop blocked by count */
2301 if (count++ > 20) {
2302 count = 0;
2303 GROW;
2304 if (ctxt->instate == XML_PARSER_EOF)
2305 return(0);
2306 }
2307 if ((RAW >= '0') && (RAW <= '9'))
2308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2313 else {
2314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2315 val = 0;
2316 break;
2317 }
2318 if (val > 0x10FFFF)
2319 outofrange = val;
2320
2321 NEXT;
2322 count++;
2323 }
2324 if (RAW == ';') {
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2326 ctxt->input->col++;
2327 ctxt->nbChars ++;
2328 ctxt->input->cur++;
2329 }
2330 } else if ((RAW == '&') && (NXT(1) == '#')) {
2331 SKIP(2);
2332 GROW;
2333 while (RAW != ';') { /* loop blocked by count */
2334 if (count++ > 20) {
2335 count = 0;
2336 GROW;
2337 if (ctxt->instate == XML_PARSER_EOF)
2338 return(0);
2339 }
2340 if ((RAW >= '0') && (RAW <= '9'))
2341 val = val * 10 + (CUR - '0');
2342 else {
2343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2344 val = 0;
2345 break;
2346 }
2347 if (val > 0x10FFFF)
2348 outofrange = val;
2349
2350 NEXT;
2351 count++;
2352 }
2353 if (RAW == ';') {
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2355 ctxt->input->col++;
2356 ctxt->nbChars ++;
2357 ctxt->input->cur++;
2358 }
2359 } else {
2360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2361 }
2362
2363 /*
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
2366 * production for Char.
2367 */
2368 if ((IS_CHAR(val) && (outofrange == 0))) {
2369 return(val);
2370 } else {
2371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2373 val);
2374 }
2375 return(0);
2376 }
2377
2378 /**
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2382 *
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2385 *
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2388 *
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
2391 * production for Char.
2392 *
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2395 */
2396 static int
2397 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398 const xmlChar *ptr;
2399 xmlChar cur;
2400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
2402
2403 if ((str == NULL) || (*str == NULL)) return(0);
2404 ptr = *str;
2405 cur = *ptr;
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2407 ptr += 3;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loop */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2416 else {
2417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2418 val = 0;
2419 break;
2420 }
2421 if (val > 0x10FFFF)
2422 outofrange = val;
2423
2424 ptr++;
2425 cur = *ptr;
2426 }
2427 if (cur == ';')
2428 ptr++;
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2430 ptr += 2;
2431 cur = *ptr;
2432 while (cur != ';') { /* Non input consuming loops */
2433 if ((cur >= '0') && (cur <= '9'))
2434 val = val * 10 + (cur - '0');
2435 else {
2436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2437 val = 0;
2438 break;
2439 }
2440 if (val > 0x10FFFF)
2441 outofrange = val;
2442
2443 ptr++;
2444 cur = *ptr;
2445 }
2446 if (cur == ';')
2447 ptr++;
2448 } else {
2449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2450 return(0);
2451 }
2452 *str = ptr;
2453
2454 /*
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
2457 * production for Char.
2458 */
2459 if ((IS_CHAR(val) && (outofrange == 0))) {
2460 return(val);
2461 } else {
2462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2464 val);
2465 }
2466 return(0);
2467 }
2468
2469 /**
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
2472 *
2473 * [69] PEReference ::= '%' Name ';'
2474 *
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
2477 * reference to itself, either directly or indirectly.
2478 *
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2484 *
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2489 *
2490 * [ WFC: In DTD ]
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2493 *
2494 * A PEReference may have been detected in the current input stream
2495 * the handling is done accordingly to
2496 * http://www.w3.org/TR/REC-xml#entproc
2497 * i.e.
2498 * - Included in literal in entity values
2499 * - Included as Parameter Entity reference within DTDs
2500 */
2501 void
2502 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2505 return;
2506 case XML_PARSER_COMMENT:
2507 return;
2508 case XML_PARSER_START_TAG:
2509 return;
2510 case XML_PARSER_END_TAG:
2511 return;
2512 case XML_PARSER_EOF:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2514 return;
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
2518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2519 return;
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2523 case XML_PARSER_PI:
2524 case XML_PARSER_SYSTEM_LITERAL:
2525 case XML_PARSER_PUBLIC_LITERAL:
2526 /* we just ignore it there */
2527 return;
2528 case XML_PARSER_EPILOG:
2529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2530 return;
2531 case XML_PARSER_ENTITY_VALUE:
2532 /*
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2538 */
2539 return;
2540 case XML_PARSER_DTD:
2541 /*
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2547 */
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2549 return;
2550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2551 return;
2552 break;
2553 case XML_PARSER_IGNORE:
2554 return;
2555 }
2556
2557 xmlParsePEReference(ctxt);
2558 }
2559
2560 /*
2561 * Macro used to grow the current buffer.
2562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
2564 */
2565 #define growBuffer(buffer, n) { \
2566 xmlChar *tmp; \
2567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2570 if (tmp == NULL) goto mem_error; \
2571 buffer = tmp; \
2572 buffer##_size = new_size; \
2573 }
2574
2575 /**
2576 * xmlStringLenDecodeEntities:
2577 * @ctxt: the parser context
2578 * @str: the input string
2579 * @len: the string length
2580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
2584 *
2585 * Takes a entity string content and process to do the adequate substitutions.
2586 *
2587 * [67] Reference ::= EntityRef | CharRef
2588 *
2589 * [69] PEReference ::= '%' Name ';'
2590 *
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2593 */
2594 xmlChar *
2595 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2597 xmlChar *buffer = NULL;
2598 size_t buffer_size = 0;
2599 size_t nbchars = 0;
2600
2601 xmlChar *current = NULL;
2602 xmlChar *rep = NULL;
2603 const xmlChar *last;
2604 xmlEntityPtr ent;
2605 int c,l;
2606
2607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2608 return(NULL);
2609 last = str + len;
2610
2611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
2614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2615 return(NULL);
2616 }
2617
2618 /*
2619 * allocate a translation buffer.
2620 */
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2623 if (buffer == NULL) goto mem_error;
2624
2625 /*
2626 * OK loop until we reach one of the ending char or a size limit.
2627 * we are operating on already parsed values.
2628 */
2629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
2633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2635
2636 if (c == 0) break;
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
2639 if (val == 0)
2640 goto int_error;
2641 COPY_BUF(0,buffer,nbchars,val);
2642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2644 }
2645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2649 str);
2650 ent = xmlParseStringEntityRef(ctxt, &str);
2651 xmlParserEntityCheck(ctxt, 0, ent, 0);
2652 if (ent != NULL)
2653 ctxt->nbentities += ent->checked / 2;
2654 if ((ent != NULL) &&
2655 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656 if (ent->content != NULL) {
2657 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2660 }
2661 } else {
2662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663 "predefined entity has no content\n");
2664 goto int_error;
2665 }
2666 } else if ((ent != NULL) && (ent->content != NULL)) {
2667 ctxt->depth++;
2668 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2669 0, 0, 0);
2670 ctxt->depth--;
2671 if (rep == NULL)
2672 goto int_error;
2673
2674 current = rep;
2675 while (*current != 0) { /* non input consuming loop */
2676 buffer[nbchars++] = *current++;
2677 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2679 goto int_error;
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681 }
2682 }
2683 xmlFree(rep);
2684 rep = NULL;
2685 } else if (ent != NULL) {
2686 int i = xmlStrlen(ent->name);
2687 const xmlChar *cur = ent->name;
2688
2689 buffer[nbchars++] = '&';
2690 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2691 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2692 }
2693 for (;i > 0;i--)
2694 buffer[nbchars++] = *cur++;
2695 buffer[nbchars++] = ';';
2696 }
2697 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding PE Reference: %.30s\n", str);
2701 ent = xmlParseStringPEReference(ctxt, &str);
2702 xmlParserEntityCheck(ctxt, 0, ent, 0);
2703 if (ent != NULL)
2704 ctxt->nbentities += ent->checked / 2;
2705 if (ent != NULL) {
2706 if (ent->content == NULL) {
2707 /*
2708 * Note: external parsed entities will not be loaded,
2709 * it is not required for a non-validating parser to
2710 * complete external PEreferences coming from the
2711 * internal subset
2712 */
2713 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715 (ctxt->validate != 0)) {
2716 xmlLoadEntityContent(ctxt, ent);
2717 } else {
2718 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719 "not validating will not read content for PE entity %s\n",
2720 ent->name, NULL);
2721 }
2722 }
2723 ctxt->depth++;
2724 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2725 0, 0, 0);
2726 ctxt->depth--;
2727 if (rep == NULL)
2728 goto int_error;
2729 current = rep;
2730 while (*current != 0) { /* non input consuming loop */
2731 buffer[nbchars++] = *current++;
2732 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2734 goto int_error;
2735 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2736 }
2737 }
2738 xmlFree(rep);
2739 rep = NULL;
2740 }
2741 } else {
2742 COPY_BUF(l,buffer,nbchars,c);
2743 str += l;
2744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2746 }
2747 }
2748 if (str < last)
2749 c = CUR_SCHAR(str, l);
2750 else
2751 c = 0;
2752 }
2753 buffer[nbchars] = 0;
2754 return(buffer);
2755
2756 mem_error:
2757 xmlErrMemory(ctxt, NULL);
2758 int_error:
2759 if (rep != NULL)
2760 xmlFree(rep);
2761 if (buffer != NULL)
2762 xmlFree(buffer);
2763 return(NULL);
2764 }
2765
2766 /**
2767 * xmlStringDecodeEntities:
2768 * @ctxt: the parser context
2769 * @str: the input string
2770 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771 * @end: an end marker xmlChar, 0 if none
2772 * @end2: an end marker xmlChar, 0 if none
2773 * @end3: an end marker xmlChar, 0 if none
2774 *
2775 * Takes a entity string content and process to do the adequate substitutions.
2776 *
2777 * [67] Reference ::= EntityRef | CharRef
2778 *
2779 * [69] PEReference ::= '%' Name ';'
2780 *
2781 * Returns A newly allocated string with the substitution done. The caller
2782 * must deallocate it !
2783 */
2784 xmlChar *
2785 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786 xmlChar end, xmlChar end2, xmlChar end3) {
2787 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2788 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2789 end, end2, end3));
2790 }
2791
2792 /************************************************************************
2793 * *
2794 * Commodity functions, cleanup needed ? *
2795 * *
2796 ************************************************************************/
2797
2798 /**
2799 * areBlanks:
2800 * @ctxt: an XML parser context
2801 * @str: a xmlChar *
2802 * @len: the size of @str
2803 * @blank_chars: we know the chars are blanks
2804 *
2805 * Is this a sequence of blank chars that one can ignore ?
2806 *
2807 * Returns 1 if ignorable 0 otherwise.
2808 */
2809
2810 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2811 int blank_chars) {
2812 int i, ret;
2813 xmlNodePtr lastChild;
2814
2815 /*
2816 * Don't spend time trying to differentiate them, the same callback is
2817 * used !
2818 */
2819 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2820 return(0);
2821
2822 /*
2823 * Check for xml:space value.
2824 */
2825 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826 (*(ctxt->space) == -2))
2827 return(0);
2828
2829 /*
2830 * Check that the string is made of blanks
2831 */
2832 if (blank_chars == 0) {
2833 for (i = 0;i < len;i++)
2834 if (!(IS_BLANK_CH(str[i]))) return(0);
2835 }
2836
2837 /*
2838 * Look if the element is mixed content in the DTD if available
2839 */
2840 if (ctxt->node == NULL) return(0);
2841 if (ctxt->myDoc != NULL) {
2842 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843 if (ret == 0) return(1);
2844 if (ret == 1) return(0);
2845 }
2846
2847 /*
2848 * Otherwise, heuristic :-\
2849 */
2850 if ((RAW != '<') && (RAW != 0xD)) return(0);
2851 if ((ctxt->node->children == NULL) &&
2852 (RAW == '<') && (NXT(1) == '/')) return(0);
2853
2854 lastChild = xmlGetLastChild(ctxt->node);
2855 if (lastChild == NULL) {
2856 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857 (ctxt->node->content != NULL)) return(0);
2858 } else if (xmlNodeIsText(lastChild))
2859 return(0);
2860 else if ((ctxt->node->children != NULL) &&
2861 (xmlNodeIsText(ctxt->node->children)))
2862 return(0);
2863 return(1);
2864 }
2865
2866 /************************************************************************
2867 * *
2868 * Extra stuff for namespace support *
2869 * Relates to http://www.w3.org/TR/WD-xml-names *
2870 * *
2871 ************************************************************************/
2872
2873 /**
2874 * xmlSplitQName:
2875 * @ctxt: an XML parser context
2876 * @name: an XML parser context
2877 * @prefix: a xmlChar **
2878 *
2879 * parse an UTF8 encoded XML qualified name string
2880 *
2881 * [NS 5] QName ::= (Prefix ':')? LocalPart
2882 *
2883 * [NS 6] Prefix ::= NCName
2884 *
2885 * [NS 7] LocalPart ::= NCName
2886 *
2887 * Returns the local part, and prefix is updated
2888 * to get the Prefix if any.
2889 */
2890
2891 xmlChar *
2892 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893 xmlChar buf[XML_MAX_NAMELEN + 5];
2894 xmlChar *buffer = NULL;
2895 int len = 0;
2896 int max = XML_MAX_NAMELEN;
2897 xmlChar *ret = NULL;
2898 const xmlChar *cur = name;
2899 int c;
2900
2901 if (prefix == NULL) return(NULL);
2902 *prefix = NULL;
2903
2904 if (cur == NULL) return(NULL);
2905
2906 #ifndef XML_XML_NAMESPACE
2907 /* xml: prefix is not really a namespace */
2908 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909 (cur[2] == 'l') && (cur[3] == ':'))
2910 return(xmlStrdup(name));
2911 #endif
2912
2913 /* nasty but well=formed */
2914 if (cur[0] == ':')
2915 return(xmlStrdup(name));
2916
2917 c = *cur++;
2918 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2919 buf[len++] = c;
2920 c = *cur++;
2921 }
2922 if (len >= max) {
2923 /*
2924 * Okay someone managed to make a huge name, so he's ready to pay
2925 * for the processing speed.
2926 */
2927 max = len * 2;
2928
2929 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2930 if (buffer == NULL) {
2931 xmlErrMemory(ctxt, NULL);
2932 return(NULL);
2933 }
2934 memcpy(buffer, buf, len);
2935 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936 if (len + 10 > max) {
2937 xmlChar *tmp;
2938
2939 max *= 2;
2940 tmp = (xmlChar *) xmlRealloc(buffer,
2941 max * sizeof(xmlChar));
2942 if (tmp == NULL) {
2943 xmlFree(buffer);
2944 xmlErrMemory(ctxt, NULL);
2945 return(NULL);
2946 }
2947 buffer = tmp;
2948 }
2949 buffer[len++] = c;
2950 c = *cur++;
2951 }
2952 buffer[len] = 0;
2953 }
2954
2955 if ((c == ':') && (*cur == 0)) {
2956 if (buffer != NULL)
2957 xmlFree(buffer);
2958 *prefix = NULL;
2959 return(xmlStrdup(name));
2960 }
2961
2962 if (buffer == NULL)
2963 ret = xmlStrndup(buf, len);
2964 else {
2965 ret = buffer;
2966 buffer = NULL;
2967 max = XML_MAX_NAMELEN;
2968 }
2969
2970
2971 if (c == ':') {
2972 c = *cur;
2973 *prefix = ret;
2974 if (c == 0) {
2975 return(xmlStrndup(BAD_CAST "", 0));
2976 }
2977 len = 0;
2978
2979 /*
2980 * Check that the first character is proper to start
2981 * a new name
2982 */
2983 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984 ((c >= 0x41) && (c <= 0x5A)) ||
2985 (c == '_') || (c == ':'))) {
2986 int l;
2987 int first = CUR_SCHAR(cur, l);
2988
2989 if (!IS_LETTER(first) && (first != '_')) {
2990 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2991 "Name %s is not XML Namespace compliant\n",
2992 name);
2993 }
2994 }
2995 cur++;
2996
2997 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2998 buf[len++] = c;
2999 c = *cur++;
3000 }
3001 if (len >= max) {
3002 /*
3003 * Okay someone managed to make a huge name, so he's ready to pay
3004 * for the processing speed.
3005 */
3006 max = len * 2;
3007
3008 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3009 if (buffer == NULL) {
3010 xmlErrMemory(ctxt, NULL);
3011 return(NULL);
3012 }
3013 memcpy(buffer, buf, len);
3014 while (c != 0) { /* tested bigname2.xml */
3015 if (len + 10 > max) {
3016 xmlChar *tmp;
3017
3018 max *= 2;
3019 tmp = (xmlChar *) xmlRealloc(buffer,
3020 max * sizeof(xmlChar));
3021 if (tmp == NULL) {
3022 xmlErrMemory(ctxt, NULL);
3023 xmlFree(buffer);
3024 return(NULL);
3025 }
3026 buffer = tmp;
3027 }
3028 buffer[len++] = c;
3029 c = *cur++;
3030 }
3031 buffer[len] = 0;
3032 }
3033
3034 if (buffer == NULL)
3035 ret = xmlStrndup(buf, len);
3036 else {
3037 ret = buffer;
3038 }
3039 }
3040
3041 return(ret);
3042 }
3043
3044 /************************************************************************
3045 * *
3046 * The parser itself *
3047 * Relates to http://www.w3.org/TR/REC-xml *
3048 * *
3049 ************************************************************************/
3050
3051 /************************************************************************
3052 * *
3053 * Routines to parse Name, NCName and NmToken *
3054 * *
3055 ************************************************************************/
3056 #ifdef DEBUG
3057 static unsigned long nbParseName = 0;
3058 static unsigned long nbParseNmToken = 0;
3059 static unsigned long nbParseNCName = 0;
3060 static unsigned long nbParseNCNameComplex = 0;
3061 static unsigned long nbParseNameComplex = 0;
3062 static unsigned long nbParseStringName = 0;
3063 #endif
3064
3065 /*
3066 * The two following functions are related to the change of accepted
3067 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068 * They correspond to the modified production [4] and the new production [4a]
3069 * changes in that revision. Also note that the macros used for the
3070 * productions Letter, Digit, CombiningChar and Extender are not needed
3071 * anymore.
3072 * We still keep compatibility to pre-revision5 parsing semantic if the
3073 * new XML_PARSE_OLD10 option is given to the parser.
3074 */
3075 static int
3076 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3078 /*
3079 * Use the new checks of production [4] [4a] amd [5] of the
3080 * Update 5 of XML-1.0
3081 */
3082 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083 (((c >= 'a') && (c <= 'z')) ||
3084 ((c >= 'A') && (c <= 'Z')) ||
3085 (c == '_') || (c == ':') ||
3086 ((c >= 0xC0) && (c <= 0xD6)) ||
3087 ((c >= 0xD8) && (c <= 0xF6)) ||
3088 ((c >= 0xF8) && (c <= 0x2FF)) ||
3089 ((c >= 0x370) && (c <= 0x37D)) ||
3090 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091 ((c >= 0x200C) && (c <= 0x200D)) ||
3092 ((c >= 0x2070) && (c <= 0x218F)) ||
3093 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097 ((c >= 0x10000) && (c <= 0xEFFFF))))
3098 return(1);
3099 } else {
3100 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3101 return(1);
3102 }
3103 return(0);
3104 }
3105
3106 static int
3107 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3109 /*
3110 * Use the new checks of production [4] [4a] amd [5] of the
3111 * Update 5 of XML-1.0
3112 */
3113 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114 (((c >= 'a') && (c <= 'z')) ||
3115 ((c >= 'A') && (c <= 'Z')) ||
3116 ((c >= '0') && (c <= '9')) || /* !start */
3117 (c == '_') || (c == ':') ||
3118 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119 ((c >= 0xC0) && (c <= 0xD6)) ||
3120 ((c >= 0xD8) && (c <= 0xF6)) ||
3121 ((c >= 0xF8) && (c <= 0x2FF)) ||
3122 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123 ((c >= 0x370) && (c <= 0x37D)) ||
3124 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125 ((c >= 0x200C) && (c <= 0x200D)) ||
3126 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127 ((c >= 0x2070) && (c <= 0x218F)) ||
3128 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132 ((c >= 0x10000) && (c <= 0xEFFFF))))
3133 return(1);
3134 } else {
3135 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136 (c == '.') || (c == '-') ||
3137 (c == '_') || (c == ':') ||
3138 (IS_COMBINING(c)) ||
3139 (IS_EXTENDER(c)))
3140 return(1);
3141 }
3142 return(0);
3143 }
3144
3145 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3146 int *len, int *alloc, int normalize);
3147
3148 static const xmlChar *
3149 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3150 int len = 0, l;
3151 int c;
3152 int count = 0;
3153
3154 #ifdef DEBUG
3155 nbParseNameComplex++;
3156 #endif
3157
3158 /*
3159 * Handler for more complex cases
3160 */
3161 GROW;
3162 if (ctxt->instate == XML_PARSER_EOF)
3163 return(NULL);
3164 c = CUR_CHAR(l);
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166 /*
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3169 */
3170 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171 (!(((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 (c == '_') || (c == ':') ||
3174 ((c >= 0xC0) && (c <= 0xD6)) ||
3175 ((c >= 0xD8) && (c <= 0xF6)) ||
3176 ((c >= 0xF8) && (c <= 0x2FF)) ||
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x2070) && (c <= 0x218F)) ||
3181 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3186 return(NULL);
3187 }
3188 len += l;
3189 NEXTL(l);
3190 c = CUR_CHAR(l);
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 (((c >= 'a') && (c <= 'z')) ||
3193 ((c >= 'A') && (c <= 'Z')) ||
3194 ((c >= '0') && (c <= '9')) || /* !start */
3195 (c == '_') || (c == ':') ||
3196 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197 ((c >= 0xC0) && (c <= 0xD6)) ||
3198 ((c >= 0xD8) && (c <= 0xF6)) ||
3199 ((c >= 0xF8) && (c <= 0x2FF)) ||
3200 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201 ((c >= 0x370) && (c <= 0x37D)) ||
3202 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 ((c >= 0x200C) && (c <= 0x200D)) ||
3204 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205 ((c >= 0x2070) && (c <= 0x218F)) ||
3206 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210 ((c >= 0x10000) && (c <= 0xEFFFF))
3211 )) {
3212 if (count++ > XML_PARSER_CHUNK_SIZE) {
3213 count = 0;
3214 GROW;
3215 if (ctxt->instate == XML_PARSER_EOF)
3216 return(NULL);
3217 }
3218 len += l;
3219 NEXTL(l);
3220 c = CUR_CHAR(l);
3221 }
3222 } else {
3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 (!IS_LETTER(c) && (c != '_') &&
3225 (c != ':'))) {
3226 return(NULL);
3227 }
3228 len += l;
3229 NEXTL(l);
3230 c = CUR_CHAR(l);
3231
3232 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234 (c == '.') || (c == '-') ||
3235 (c == '_') || (c == ':') ||
3236 (IS_COMBINING(c)) ||
3237 (IS_EXTENDER(c)))) {
3238 if (count++ > XML_PARSER_CHUNK_SIZE) {
3239 count = 0;
3240 GROW;
3241 if (ctxt->instate == XML_PARSER_EOF)
3242 return(NULL);
3243 }
3244 len += l;
3245 NEXTL(l);
3246 c = CUR_CHAR(l);
3247 }
3248 }
3249 if ((len > XML_MAX_NAME_LENGTH) &&
3250 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3252 return(NULL);
3253 }
3254 if (ctxt->input->cur - ctxt->input->base < len) {
3255 /*
3256 * There were a couple of bugs where PERefs lead to to a change
3257 * of the buffer. Check the buffer size to avoid passing an invalid
3258 * pointer to xmlDictLookup.
3259 */
3260 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261 "unexpected change of input buffer");
3262 return (NULL);
3263 }
3264 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3267 }
3268
3269 /**
3270 * xmlParseName:
3271 * @ctxt: an XML parser context
3272 *
3273 * parse an XML name.
3274 *
3275 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276 * CombiningChar | Extender
3277 *
3278 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3279 *
3280 * [6] Names ::= Name (#x20 Name)*
3281 *
3282 * Returns the Name parsed or NULL
3283 */
3284
3285 const xmlChar *
3286 xmlParseName(xmlParserCtxtPtr ctxt) {
3287 const xmlChar *in;
3288 const xmlChar *ret;
3289 int count = 0;
3290
3291 GROW;
3292
3293 #ifdef DEBUG
3294 nbParseName++;
3295 #endif
3296
3297 /*
3298 * Accelerator for simple ASCII names
3299 */
3300 in = ctxt->input->cur;
3301 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302 ((*in >= 0x41) && (*in <= 0x5A)) ||
3303 (*in == '_') || (*in == ':')) {
3304 in++;
3305 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306 ((*in >= 0x41) && (*in <= 0x5A)) ||
3307 ((*in >= 0x30) && (*in <= 0x39)) ||
3308 (*in == '_') || (*in == '-') ||
3309 (*in == ':') || (*in == '.'))
3310 in++;
3311 if ((*in > 0) && (*in < 0x80)) {
3312 count = in - ctxt->input->cur;
3313 if ((count > XML_MAX_NAME_LENGTH) &&
3314 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316 return(NULL);
3317 }
3318 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3319 ctxt->input->cur = in;
3320 ctxt->nbChars += count;
3321 ctxt->input->col += count;
3322 if (ret == NULL)
3323 xmlErrMemory(ctxt, NULL);
3324 return(ret);
3325 }
3326 }
3327 /* accelerator for special cases */
3328 return(xmlParseNameComplex(ctxt));
3329 }
3330
3331 static const xmlChar *
3332 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3333 int len = 0, l;
3334 int c;
3335 int count = 0;
3336 size_t startPosition = 0;
3337
3338 #ifdef DEBUG
3339 nbParseNCNameComplex++;
3340 #endif
3341
3342 /*
3343 * Handler for more complex cases
3344 */
3345 GROW;
3346 startPosition = CUR_PTR - BASE_PTR;
3347 c = CUR_CHAR(l);
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3350 return(NULL);
3351 }
3352
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3355 if (count++ > XML_PARSER_CHUNK_SIZE) {
3356 if ((len > XML_MAX_NAME_LENGTH) &&
3357 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3359 return(NULL);
3360 }
3361 count = 0;
3362 GROW;
3363 if (ctxt->instate == XML_PARSER_EOF)
3364 return(NULL);
3365 }
3366 len += l;
3367 NEXTL(l);
3368 c = CUR_CHAR(l);
3369 if (c == 0) {
3370 count = 0;
3371 /*
3372 * when shrinking to extend the buffer we really need to preserve
3373 * the part of the name we already parsed. Hence rolling back
3374 * by current lenght.
3375 */
3376 ctxt->input->cur -= l;
3377 GROW;
3378 ctxt->input->cur += l;
3379 if (ctxt->instate == XML_PARSER_EOF)
3380 return(NULL);
3381 c = CUR_CHAR(l);
3382 }
3383 }
3384 if ((len > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3387 return(NULL);
3388 }
3389 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3390 }
3391
3392 /**
3393 * xmlParseNCName:
3394 * @ctxt: an XML parser context
3395 * @len: length of the string parsed
3396 *
3397 * parse an XML name.
3398 *
3399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400 * CombiningChar | Extender
3401 *
3402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3403 *
3404 * Returns the Name parsed or NULL
3405 */
3406
3407 static const xmlChar *
3408 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in, *e;
3410 const xmlChar *ret;
3411 int count = 0;
3412
3413 #ifdef DEBUG
3414 nbParseNCName++;
3415 #endif
3416
3417 /*
3418 * Accelerator for simple ASCII names
3419 */
3420 in = ctxt->input->cur;
3421 e = ctxt->input->end;
3422 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423 ((*in >= 0x41) && (*in <= 0x5A)) ||
3424 (*in == '_')) && (in < e)) {
3425 in++;
3426 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427 ((*in >= 0x41) && (*in <= 0x5A)) ||
3428 ((*in >= 0x30) && (*in <= 0x39)) ||
3429 (*in == '_') || (*in == '-') ||
3430 (*in == '.')) && (in < e))
3431 in++;
3432 if (in >= e)
3433 goto complex;
3434 if ((*in > 0) && (*in < 0x80)) {
3435 count = in - ctxt->input->cur;
3436 if ((count > XML_MAX_NAME_LENGTH) &&
3437 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3439 return(NULL);
3440 }
3441 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442 ctxt->input->cur = in;
3443 ctxt->nbChars += count;
3444 ctxt->input->col += count;
3445 if (ret == NULL) {
3446 xmlErrMemory(ctxt, NULL);
3447 }
3448 return(ret);
3449 }
3450 }
3451 complex:
3452 return(xmlParseNCNameComplex(ctxt));
3453 }
3454
3455 /**
3456 * xmlParseNameAndCompare:
3457 * @ctxt: an XML parser context
3458 *
3459 * parse an XML name and compares for match
3460 * (specialized for endtag parsing)
3461 *
3462 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463 * and the name for mismatch
3464 */
3465
3466 static const xmlChar *
3467 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3468 register const xmlChar *cmp = other;
3469 register const xmlChar *in;
3470 const xmlChar *ret;
3471
3472 GROW;
3473 if (ctxt->instate == XML_PARSER_EOF)
3474 return(NULL);
3475
3476 in = ctxt->input->cur;
3477 while (*in != 0 && *in == *cmp) {
3478 ++in;
3479 ++cmp;
3480 ctxt->input->col++;
3481 }
3482 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3483 /* success */
3484 ctxt->input->cur = in;
3485 return (const xmlChar*) 1;
3486 }
3487 /* failure (or end of input buffer), check with full function */
3488 ret = xmlParseName (ctxt);
3489 /* strings coming from the dictionary direct compare possible */
3490 if (ret == other) {
3491 return (const xmlChar*) 1;
3492 }
3493 return ret;
3494 }
3495
3496 /**
3497 * xmlParseStringName:
3498 * @ctxt: an XML parser context
3499 * @str: a pointer to the string pointer (IN/OUT)
3500 *
3501 * parse an XML name.
3502 *
3503 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504 * CombiningChar | Extender
3505 *
3506 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3507 *
3508 * [6] Names ::= Name (#x20 Name)*
3509 *
3510 * Returns the Name parsed or NULL. The @str pointer
3511 * is updated to the current location in the string.
3512 */
3513
3514 static xmlChar *
3515 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516 xmlChar buf[XML_MAX_NAMELEN + 5];
3517 const xmlChar *cur = *str;
3518 int len = 0, l;
3519 int c;
3520
3521 #ifdef DEBUG
3522 nbParseStringName++;
3523 #endif
3524
3525 c = CUR_SCHAR(cur, l);
3526 if (!xmlIsNameStartChar(ctxt, c)) {
3527 return(NULL);
3528 }
3529
3530 COPY_BUF(l,buf,len,c);
3531 cur += l;
3532 c = CUR_SCHAR(cur, l);
3533 while (xmlIsNameChar(ctxt, c)) {
3534 COPY_BUF(l,buf,len,c);
3535 cur += l;
3536 c = CUR_SCHAR(cur, l);
3537 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3538 /*
3539 * Okay someone managed to make a huge name, so he's ready to pay
3540 * for the processing speed.
3541 */
3542 xmlChar *buffer;
3543 int max = len * 2;
3544
3545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3546 if (buffer == NULL) {
3547 xmlErrMemory(ctxt, NULL);
3548 return(NULL);
3549 }
3550 memcpy(buffer, buf, len);
3551 while (xmlIsNameChar(ctxt, c)) {
3552 if (len + 10 > max) {
3553 xmlChar *tmp;
3554
3555 if ((len > XML_MAX_NAME_LENGTH) &&
3556 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3558 xmlFree(buffer);
3559 return(NULL);
3560 }
3561 max *= 2;
3562 tmp = (xmlChar *) xmlRealloc(buffer,
3563 max * sizeof(xmlChar));
3564 if (tmp == NULL) {
3565 xmlErrMemory(ctxt, NULL);
3566 xmlFree(buffer);
3567 return(NULL);
3568 }
3569 buffer = tmp;
3570 }
3571 COPY_BUF(l,buffer,len,c);
3572 cur += l;
3573 c = CUR_SCHAR(cur, l);
3574 }
3575 buffer[len] = 0;
3576 *str = cur;
3577 return(buffer);
3578 }
3579 }
3580 if ((len > XML_MAX_NAME_LENGTH) &&
3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3583 return(NULL);
3584 }
3585 *str = cur;
3586 return(xmlStrndup(buf, len));
3587 }
3588
3589 /**
3590 * xmlParseNmtoken:
3591 * @ctxt: an XML parser context
3592 *
3593 * parse an XML Nmtoken.
3594 *
3595 * [7] Nmtoken ::= (NameChar)+
3596 *
3597 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3598 *
3599 * Returns the Nmtoken parsed or NULL
3600 */
3601
3602 xmlChar *
3603 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604 xmlChar buf[XML_MAX_NAMELEN + 5];
3605 int len = 0, l;
3606 int c;
3607 int count = 0;
3608
3609 #ifdef DEBUG
3610 nbParseNmToken++;
3611 #endif
3612
3613 GROW;
3614 if (ctxt->instate == XML_PARSER_EOF)
3615 return(NULL);
3616 c = CUR_CHAR(l);
3617
3618 while (xmlIsNameChar(ctxt, c)) {
3619 if (count++ > XML_PARSER_CHUNK_SIZE) {
3620 count = 0;
3621 GROW;
3622 }
3623 COPY_BUF(l,buf,len,c);
3624 NEXTL(l);
3625 c = CUR_CHAR(l);
3626 if (c == 0) {
3627 count = 0;
3628 GROW;
3629 if (ctxt->instate == XML_PARSER_EOF)
3630 return(NULL);
3631 c = CUR_CHAR(l);
3632 }
3633 if (len >= XML_MAX_NAMELEN) {