1d9396786ba7eca5d8e985d18e2679398243acd4
[reactos.git] / reactos / lib / 3rdparty / libxml2 / parser.c
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 /************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103 #define XML_PARSER_BIG_ENTITY 1000
104 #define XML_PARSER_LOT_ENTITY 5000
105
106 /*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112 #define XML_PARSER_NON_LINEAR 10
113
114 /*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123 static int
124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125 xmlEntityPtr ent, size_t replacement)
126 {
127 size_t consumed = 0;
128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133
134 /*
135 * This may look absurd but is needed to detect
136 * entities problems
137 */
138 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
139 (ent->content != NULL) && (ent->checked == 0)) {
140 unsigned long oldnbent = ctxt->nbentities;
141 xmlChar *rep;
142
143 ent->checked = 1;
144
145 rep = xmlStringDecodeEntities(ctxt, ent->content,
146 XML_SUBSTITUTE_REF, 0, 0, 0);
147
148 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
149 if (rep != NULL) {
150 if (xmlStrchr(rep, '<'))
151 ent->checked |= 1;
152 xmlFree(rep);
153 rep = NULL;
154 }
155 }
156 if (replacement != 0) {
157 if (replacement < XML_MAX_TEXT_LENGTH)
158 return(0);
159
160 /*
161 * If the volume of entity copy reaches 10 times the
162 * amount of parsed data and over the large text threshold
163 * then that's very likely to be an abuse.
164 */
165 if (ctxt->input != NULL) {
166 consumed = ctxt->input->consumed +
167 (ctxt->input->cur - ctxt->input->base);
168 }
169 consumed += ctxt->sizeentities;
170
171 if (replacement < XML_PARSER_NON_LINEAR * consumed)
172 return(0);
173 } else if (size != 0) {
174 /*
175 * Do the check based on the replacement size of the entity
176 */
177 if (size < XML_PARSER_BIG_ENTITY)
178 return(0);
179
180 /*
181 * A limit on the amount of text data reasonably used
182 */
183 if (ctxt->input != NULL) {
184 consumed = ctxt->input->consumed +
185 (ctxt->input->cur - ctxt->input->base);
186 }
187 consumed += ctxt->sizeentities;
188
189 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
190 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
191 return (0);
192 } else if (ent != NULL) {
193 /*
194 * use the number of parsed entities in the replacement
195 */
196 size = ent->checked / 2;
197
198 /*
199 * The amount of data parsed counting entities size only once
200 */
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
204 }
205 consumed += ctxt->sizeentities;
206
207 /*
208 * Check the density of entities for the amount of data
209 * knowing an entity reference will take at least 3 bytes
210 */
211 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
212 return (0);
213 } else {
214 /*
215 * strange we got no data for checking
216 */
217 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
218 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
219 (ctxt->nbentities <= 10000))
220 return (0);
221 }
222 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
223 return (1);
224 }
225
226 /**
227 * xmlParserMaxDepth:
228 *
229 * arbitrary depth limit for the XML documents that we allow to
230 * process. This is not a limitation of the parser but a safety
231 * boundary feature. It can be disabled with the XML_PARSE_HUGE
232 * parser option.
233 */
234 unsigned int xmlParserMaxDepth = 256;
235
236
237
238 #define SAX2 1
239 #define XML_PARSER_BIG_BUFFER_SIZE 300
240 #define XML_PARSER_BUFFER_SIZE 100
241 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
242
243 /**
244 * XML_PARSER_CHUNK_SIZE
245 *
246 * When calling GROW that's the minimal amount of data
247 * the parser expected to have received. It is not a hard
248 * limit but an optimization when reading strings like Names
249 * It is not strictly needed as long as inputs available characters
250 * are followed by 0, which should be provided by the I/O level
251 */
252 #define XML_PARSER_CHUNK_SIZE 100
253
254 /*
255 * List of XML prefixed PI allowed by W3C specs
256 */
257
258 static const char *xmlW3CPIs[] = {
259 "xml-stylesheet",
260 "xml-model",
261 NULL
262 };
263
264
265 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
267 const xmlChar **str);
268
269 static xmlParserErrors
270 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
271 xmlSAXHandlerPtr sax,
272 void *user_data, int depth, const xmlChar *URL,
273 const xmlChar *ID, xmlNodePtr *list);
274
275 static int
276 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
277 const char *encoding);
278 #ifdef LIBXML_LEGACY_ENABLED
279 static void
280 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
281 xmlNodePtr lastNode);
282 #endif /* LIBXML_LEGACY_ENABLED */
283
284 static xmlParserErrors
285 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
286 const xmlChar *string, void *user_data, xmlNodePtr *lst);
287
288 static int
289 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
290
291 /************************************************************************
292 * *
293 * Some factorized error routines *
294 * *
295 ************************************************************************/
296
297 /**
298 * xmlErrAttributeDup:
299 * @ctxt: an XML parser context
300 * @prefix: the attribute prefix
301 * @localname: the attribute localname
302 *
303 * Handle a redefinition of attribute error
304 */
305 static void
306 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
307 const xmlChar * localname)
308 {
309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310 (ctxt->instate == XML_PARSER_EOF))
311 return;
312 if (ctxt != NULL)
313 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
314
315 if (prefix == NULL)
316 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
317 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
318 (const char *) localname, NULL, NULL, 0, 0,
319 "Attribute %s redefined\n", localname);
320 else
321 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
322 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
323 (const char *) prefix, (const char *) localname,
324 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
325 localname);
326 if (ctxt != NULL) {
327 ctxt->wellFormed = 0;
328 if (ctxt->recovery == 0)
329 ctxt->disableSAX = 1;
330 }
331 }
332
333 /**
334 * xmlFatalErr:
335 * @ctxt: an XML parser context
336 * @error: the error number
337 * @extra: extra information string
338 *
339 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
340 */
341 static void
342 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
343 {
344 const char *errmsg;
345 char errstr[129] = "";
346
347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348 (ctxt->instate == XML_PARSER_EOF))
349 return;
350 switch (error) {
351 case XML_ERR_INVALID_HEX_CHARREF:
352 errmsg = "CharRef: invalid hexadecimal value";
353 break;
354 case XML_ERR_INVALID_DEC_CHARREF:
355 errmsg = "CharRef: invalid decimal value";
356 break;
357 case XML_ERR_INVALID_CHARREF:
358 errmsg = "CharRef: invalid value";
359 break;
360 case XML_ERR_INTERNAL_ERROR:
361 errmsg = "internal error";
362 break;
363 case XML_ERR_PEREF_AT_EOF:
364 errmsg = "PEReference at end of document";
365 break;
366 case XML_ERR_PEREF_IN_PROLOG:
367 errmsg = "PEReference in prolog";
368 break;
369 case XML_ERR_PEREF_IN_EPILOG:
370 errmsg = "PEReference in epilog";
371 break;
372 case XML_ERR_PEREF_NO_NAME:
373 errmsg = "PEReference: no name";
374 break;
375 case XML_ERR_PEREF_SEMICOL_MISSING:
376 errmsg = "PEReference: expecting ';'";
377 break;
378 case XML_ERR_ENTITY_LOOP:
379 errmsg = "Detected an entity reference loop";
380 break;
381 case XML_ERR_ENTITY_NOT_STARTED:
382 errmsg = "EntityValue: \" or ' expected";
383 break;
384 case XML_ERR_ENTITY_PE_INTERNAL:
385 errmsg = "PEReferences forbidden in internal subset";
386 break;
387 case XML_ERR_ENTITY_NOT_FINISHED:
388 errmsg = "EntityValue: \" or ' expected";
389 break;
390 case XML_ERR_ATTRIBUTE_NOT_STARTED:
391 errmsg = "AttValue: \" or ' expected";
392 break;
393 case XML_ERR_LT_IN_ATTRIBUTE:
394 errmsg = "Unescaped '<' not allowed in attributes values";
395 break;
396 case XML_ERR_LITERAL_NOT_STARTED:
397 errmsg = "SystemLiteral \" or ' expected";
398 break;
399 case XML_ERR_LITERAL_NOT_FINISHED:
400 errmsg = "Unfinished System or Public ID \" or ' expected";
401 break;
402 case XML_ERR_MISPLACED_CDATA_END:
403 errmsg = "Sequence ']]>' not allowed in content";
404 break;
405 case XML_ERR_URI_REQUIRED:
406 errmsg = "SYSTEM or PUBLIC, the URI is missing";
407 break;
408 case XML_ERR_PUBID_REQUIRED:
409 errmsg = "PUBLIC, the Public Identifier is missing";
410 break;
411 case XML_ERR_HYPHEN_IN_COMMENT:
412 errmsg = "Comment must not contain '--' (double-hyphen)";
413 break;
414 case XML_ERR_PI_NOT_STARTED:
415 errmsg = "xmlParsePI : no target name";
416 break;
417 case XML_ERR_RESERVED_XML_NAME:
418 errmsg = "Invalid PI name";
419 break;
420 case XML_ERR_NOTATION_NOT_STARTED:
421 errmsg = "NOTATION: Name expected here";
422 break;
423 case XML_ERR_NOTATION_NOT_FINISHED:
424 errmsg = "'>' required to close NOTATION declaration";
425 break;
426 case XML_ERR_VALUE_REQUIRED:
427 errmsg = "Entity value required";
428 break;
429 case XML_ERR_URI_FRAGMENT:
430 errmsg = "Fragment not allowed";
431 break;
432 case XML_ERR_ATTLIST_NOT_STARTED:
433 errmsg = "'(' required to start ATTLIST enumeration";
434 break;
435 case XML_ERR_NMTOKEN_REQUIRED:
436 errmsg = "NmToken expected in ATTLIST enumeration";
437 break;
438 case XML_ERR_ATTLIST_NOT_FINISHED:
439 errmsg = "')' required to finish ATTLIST enumeration";
440 break;
441 case XML_ERR_MIXED_NOT_STARTED:
442 errmsg = "MixedContentDecl : '|' or ')*' expected";
443 break;
444 case XML_ERR_PCDATA_REQUIRED:
445 errmsg = "MixedContentDecl : '#PCDATA' expected";
446 break;
447 case XML_ERR_ELEMCONTENT_NOT_STARTED:
448 errmsg = "ContentDecl : Name or '(' expected";
449 break;
450 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
451 errmsg = "ContentDecl : ',' '|' or ')' expected";
452 break;
453 case XML_ERR_PEREF_IN_INT_SUBSET:
454 errmsg =
455 "PEReference: forbidden within markup decl in internal subset";
456 break;
457 case XML_ERR_GT_REQUIRED:
458 errmsg = "expected '>'";
459 break;
460 case XML_ERR_CONDSEC_INVALID:
461 errmsg = "XML conditional section '[' expected";
462 break;
463 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
464 errmsg = "Content error in the external subset";
465 break;
466 case XML_ERR_CONDSEC_INVALID_KEYWORD:
467 errmsg =
468 "conditional section INCLUDE or IGNORE keyword expected";
469 break;
470 case XML_ERR_CONDSEC_NOT_FINISHED:
471 errmsg = "XML conditional section not closed";
472 break;
473 case XML_ERR_XMLDECL_NOT_STARTED:
474 errmsg = "Text declaration '<?xml' required";
475 break;
476 case XML_ERR_XMLDECL_NOT_FINISHED:
477 errmsg = "parsing XML declaration: '?>' expected";
478 break;
479 case XML_ERR_EXT_ENTITY_STANDALONE:
480 errmsg = "external parsed entities cannot be standalone";
481 break;
482 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
483 errmsg = "EntityRef: expecting ';'";
484 break;
485 case XML_ERR_DOCTYPE_NOT_FINISHED:
486 errmsg = "DOCTYPE improperly terminated";
487 break;
488 case XML_ERR_LTSLASH_REQUIRED:
489 errmsg = "EndTag: '</' not found";
490 break;
491 case XML_ERR_EQUAL_REQUIRED:
492 errmsg = "expected '='";
493 break;
494 case XML_ERR_STRING_NOT_CLOSED:
495 errmsg = "String not closed expecting \" or '";
496 break;
497 case XML_ERR_STRING_NOT_STARTED:
498 errmsg = "String not started expecting ' or \"";
499 break;
500 case XML_ERR_ENCODING_NAME:
501 errmsg = "Invalid XML encoding name";
502 break;
503 case XML_ERR_STANDALONE_VALUE:
504 errmsg = "standalone accepts only 'yes' or 'no'";
505 break;
506 case XML_ERR_DOCUMENT_EMPTY:
507 errmsg = "Document is empty";
508 break;
509 case XML_ERR_DOCUMENT_END:
510 errmsg = "Extra content at the end of the document";
511 break;
512 case XML_ERR_NOT_WELL_BALANCED:
513 errmsg = "chunk is not well balanced";
514 break;
515 case XML_ERR_EXTRA_CONTENT:
516 errmsg = "extra content at the end of well balanced chunk";
517 break;
518 case XML_ERR_VERSION_MISSING:
519 errmsg = "Malformed declaration expecting version";
520 break;
521 case XML_ERR_NAME_TOO_LONG:
522 errmsg = "Name too long use XML_PARSE_HUGE option";
523 break;
524 #if 0
525 case:
526 errmsg = "";
527 break;
528 #endif
529 default:
530 errmsg = "Unregistered error message";
531 }
532 if (info == NULL)
533 snprintf(errstr, 128, "%s\n", errmsg);
534 else
535 snprintf(errstr, 128, "%s: %%s\n", errmsg);
536 if (ctxt != NULL)
537 ctxt->errNo = error;
538 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
539 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
540 info);
541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
546 }
547
548 /**
549 * xmlFatalErrMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 *
554 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
555 */
556 static void
557 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
558 const char *msg)
559 {
560 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
561 (ctxt->instate == XML_PARSER_EOF))
562 return;
563 if (ctxt != NULL)
564 ctxt->errNo = error;
565 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
566 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
567 if (ctxt != NULL) {
568 ctxt->wellFormed = 0;
569 if (ctxt->recovery == 0)
570 ctxt->disableSAX = 1;
571 }
572 }
573
574 /**
575 * xmlWarningMsg:
576 * @ctxt: an XML parser context
577 * @error: the error number
578 * @msg: the error message
579 * @str1: extra data
580 * @str2: extra data
581 *
582 * Handle a warning.
583 */
584 static void
585 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
586 const char *msg, const xmlChar *str1, const xmlChar *str2)
587 {
588 xmlStructuredErrorFunc schannel = NULL;
589
590 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
591 (ctxt->instate == XML_PARSER_EOF))
592 return;
593 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
594 (ctxt->sax->initialized == XML_SAX2_MAGIC))
595 schannel = ctxt->sax->serror;
596 if (ctxt != NULL) {
597 __xmlRaiseError(schannel,
598 (ctxt->sax) ? ctxt->sax->warning : NULL,
599 ctxt->userData,
600 ctxt, NULL, XML_FROM_PARSER, error,
601 XML_ERR_WARNING, NULL, 0,
602 (const char *) str1, (const char *) str2, NULL, 0, 0,
603 msg, (const char *) str1, (const char *) str2);
604 } else {
605 __xmlRaiseError(schannel, NULL, NULL,
606 ctxt, NULL, XML_FROM_PARSER, error,
607 XML_ERR_WARNING, NULL, 0,
608 (const char *) str1, (const char *) str2, NULL, 0, 0,
609 msg, (const char *) str1, (const char *) str2);
610 }
611 }
612
613 /**
614 * xmlValidityError:
615 * @ctxt: an XML parser context
616 * @error: the error number
617 * @msg: the error message
618 * @str1: extra data
619 *
620 * Handle a validity error.
621 */
622 static void
623 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624 const char *msg, const xmlChar *str1, const xmlChar *str2)
625 {
626 xmlStructuredErrorFunc schannel = NULL;
627
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL) {
632 ctxt->errNo = error;
633 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
634 schannel = ctxt->sax->serror;
635 }
636 if (ctxt != NULL) {
637 __xmlRaiseError(schannel,
638 ctxt->vctxt.error, ctxt->vctxt.userData,
639 ctxt, NULL, XML_FROM_DTD, error,
640 XML_ERR_ERROR, NULL, 0, (const char *) str1,
641 (const char *) str2, NULL, 0, 0,
642 msg, (const char *) str1, (const char *) str2);
643 ctxt->valid = 0;
644 } else {
645 __xmlRaiseError(schannel, NULL, NULL,
646 ctxt, NULL, XML_FROM_DTD, error,
647 XML_ERR_ERROR, NULL, 0, (const char *) str1,
648 (const char *) str2, NULL, 0, 0,
649 msg, (const char *) str1, (const char *) str2);
650 }
651 }
652
653 /**
654 * xmlFatalErrMsgInt:
655 * @ctxt: an XML parser context
656 * @error: the error number
657 * @msg: the error message
658 * @val: an integer value
659 *
660 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
661 */
662 static void
663 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
664 const char *msg, int val)
665 {
666 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667 (ctxt->instate == XML_PARSER_EOF))
668 return;
669 if (ctxt != NULL)
670 ctxt->errNo = error;
671 __xmlRaiseError(NULL, NULL, NULL,
672 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
673 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
674 if (ctxt != NULL) {
675 ctxt->wellFormed = 0;
676 if (ctxt->recovery == 0)
677 ctxt->disableSAX = 1;
678 }
679 }
680
681 /**
682 * xmlFatalErrMsgStrIntStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @str1: an string info
687 * @val: an integer value
688 * @str2: an string info
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692 static void
693 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg, const xmlChar *str1, int val,
695 const xmlChar *str2)
696 {
697 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
698 (ctxt->instate == XML_PARSER_EOF))
699 return;
700 if (ctxt != NULL)
701 ctxt->errNo = error;
702 __xmlRaiseError(NULL, NULL, NULL,
703 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
704 NULL, 0, (const char *) str1, (const char *) str2,
705 NULL, val, 0, msg, str1, val, str2);
706 if (ctxt != NULL) {
707 ctxt->wellFormed = 0;
708 if (ctxt->recovery == 0)
709 ctxt->disableSAX = 1;
710 }
711 }
712
713 /**
714 * xmlFatalErrMsgStr:
715 * @ctxt: an XML parser context
716 * @error: the error number
717 * @msg: the error message
718 * @val: a string value
719 *
720 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
721 */
722 static void
723 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
724 const char *msg, const xmlChar * val)
725 {
726 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
727 (ctxt->instate == XML_PARSER_EOF))
728 return;
729 if (ctxt != NULL)
730 ctxt->errNo = error;
731 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
732 XML_FROM_PARSER, error, XML_ERR_FATAL,
733 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
734 val);
735 if (ctxt != NULL) {
736 ctxt->wellFormed = 0;
737 if (ctxt->recovery == 0)
738 ctxt->disableSAX = 1;
739 }
740 }
741
742 /**
743 * xmlErrMsgStr:
744 * @ctxt: an XML parser context
745 * @error: the error number
746 * @msg: the error message
747 * @val: a string value
748 *
749 * Handle a non fatal parser error
750 */
751 static void
752 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753 const char *msg, const xmlChar * val)
754 {
755 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
756 (ctxt->instate == XML_PARSER_EOF))
757 return;
758 if (ctxt != NULL)
759 ctxt->errNo = error;
760 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
761 XML_FROM_PARSER, error, XML_ERR_ERROR,
762 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
763 val);
764 }
765
766 /**
767 * xmlNsErr:
768 * @ctxt: an XML parser context
769 * @error: the error number
770 * @msg: the message
771 * @info1: extra information string
772 * @info2: extra information string
773 *
774 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
775 */
776 static void
777 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
778 const char *msg,
779 const xmlChar * info1, const xmlChar * info2,
780 const xmlChar * info3)
781 {
782 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
783 (ctxt->instate == XML_PARSER_EOF))
784 return;
785 if (ctxt != NULL)
786 ctxt->errNo = error;
787 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
788 XML_ERR_ERROR, NULL, 0, (const char *) info1,
789 (const char *) info2, (const char *) info3, 0, 0, msg,
790 info1, info2, info3);
791 if (ctxt != NULL)
792 ctxt->nsWellFormed = 0;
793 }
794
795 /**
796 * xmlNsWarn
797 * @ctxt: an XML parser context
798 * @error: the error number
799 * @msg: the message
800 * @info1: extra information string
801 * @info2: extra information string
802 *
803 * Handle a namespace warning error
804 */
805 static void
806 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
807 const char *msg,
808 const xmlChar * info1, const xmlChar * info2,
809 const xmlChar * info3)
810 {
811 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
812 (ctxt->instate == XML_PARSER_EOF))
813 return;
814 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
815 XML_ERR_WARNING, NULL, 0, (const char *) info1,
816 (const char *) info2, (const char *) info3, 0, 0, msg,
817 info1, info2, info3);
818 }
819
820 /************************************************************************
821 * *
822 * Library wide options *
823 * *
824 ************************************************************************/
825
826 /**
827 * xmlHasFeature:
828 * @feature: the feature to be examined
829 *
830 * Examines if the library has been compiled with a given feature.
831 *
832 * Returns a non-zero value if the feature exist, otherwise zero.
833 * Returns zero (0) if the feature does not exist or an unknown
834 * unknown feature is requested, non-zero otherwise.
835 */
836 int
837 xmlHasFeature(xmlFeature feature)
838 {
839 switch (feature) {
840 case XML_WITH_THREAD:
841 #ifdef LIBXML_THREAD_ENABLED
842 return(1);
843 #else
844 return(0);
845 #endif
846 case XML_WITH_TREE:
847 #ifdef LIBXML_TREE_ENABLED
848 return(1);
849 #else
850 return(0);
851 #endif
852 case XML_WITH_OUTPUT:
853 #ifdef LIBXML_OUTPUT_ENABLED
854 return(1);
855 #else
856 return(0);
857 #endif
858 case XML_WITH_PUSH:
859 #ifdef LIBXML_PUSH_ENABLED
860 return(1);
861 #else
862 return(0);
863 #endif
864 case XML_WITH_READER:
865 #ifdef LIBXML_READER_ENABLED
866 return(1);
867 #else
868 return(0);
869 #endif
870 case XML_WITH_PATTERN:
871 #ifdef LIBXML_PATTERN_ENABLED
872 return(1);
873 #else
874 return(0);
875 #endif
876 case XML_WITH_WRITER:
877 #ifdef LIBXML_WRITER_ENABLED
878 return(1);
879 #else
880 return(0);
881 #endif
882 case XML_WITH_SAX1:
883 #ifdef LIBXML_SAX1_ENABLED
884 return(1);
885 #else
886 return(0);
887 #endif
888 case XML_WITH_FTP:
889 #ifdef LIBXML_FTP_ENABLED
890 return(1);
891 #else
892 return(0);
893 #endif
894 case XML_WITH_HTTP:
895 #ifdef LIBXML_HTTP_ENABLED
896 return(1);
897 #else
898 return(0);
899 #endif
900 case XML_WITH_VALID:
901 #ifdef LIBXML_VALID_ENABLED
902 return(1);
903 #else
904 return(0);
905 #endif
906 case XML_WITH_HTML:
907 #ifdef LIBXML_HTML_ENABLED
908 return(1);
909 #else
910 return(0);
911 #endif
912 case XML_WITH_LEGACY:
913 #ifdef LIBXML_LEGACY_ENABLED
914 return(1);
915 #else
916 return(0);
917 #endif
918 case XML_WITH_C14N:
919 #ifdef LIBXML_C14N_ENABLED
920 return(1);
921 #else
922 return(0);
923 #endif
924 case XML_WITH_CATALOG:
925 #ifdef LIBXML_CATALOG_ENABLED
926 return(1);
927 #else
928 return(0);
929 #endif
930 case XML_WITH_XPATH:
931 #ifdef LIBXML_XPATH_ENABLED
932 return(1);
933 #else
934 return(0);
935 #endif
936 case XML_WITH_XPTR:
937 #ifdef LIBXML_XPTR_ENABLED
938 return(1);
939 #else
940 return(0);
941 #endif
942 case XML_WITH_XINCLUDE:
943 #ifdef LIBXML_XINCLUDE_ENABLED
944 return(1);
945 #else
946 return(0);
947 #endif
948 case XML_WITH_ICONV:
949 #ifdef LIBXML_ICONV_ENABLED
950 return(1);
951 #else
952 return(0);
953 #endif
954 case XML_WITH_ISO8859X:
955 #ifdef LIBXML_ISO8859X_ENABLED
956 return(1);
957 #else
958 return(0);
959 #endif
960 case XML_WITH_UNICODE:
961 #ifdef LIBXML_UNICODE_ENABLED
962 return(1);
963 #else
964 return(0);
965 #endif
966 case XML_WITH_REGEXP:
967 #ifdef LIBXML_REGEXP_ENABLED
968 return(1);
969 #else
970 return(0);
971 #endif
972 case XML_WITH_AUTOMATA:
973 #ifdef LIBXML_AUTOMATA_ENABLED
974 return(1);
975 #else
976 return(0);
977 #endif
978 case XML_WITH_EXPR:
979 #ifdef LIBXML_EXPR_ENABLED
980 return(1);
981 #else
982 return(0);
983 #endif
984 case XML_WITH_SCHEMAS:
985 #ifdef LIBXML_SCHEMAS_ENABLED
986 return(1);
987 #else
988 return(0);
989 #endif
990 case XML_WITH_SCHEMATRON:
991 #ifdef LIBXML_SCHEMATRON_ENABLED
992 return(1);
993 #else
994 return(0);
995 #endif
996 case XML_WITH_MODULES:
997 #ifdef LIBXML_MODULES_ENABLED
998 return(1);
999 #else
1000 return(0);
1001 #endif
1002 case XML_WITH_DEBUG:
1003 #ifdef LIBXML_DEBUG_ENABLED
1004 return(1);
1005 #else
1006 return(0);
1007 #endif
1008 case XML_WITH_DEBUG_MEM:
1009 #ifdef DEBUG_MEMORY_LOCATION
1010 return(1);
1011 #else
1012 return(0);
1013 #endif
1014 case XML_WITH_DEBUG_RUN:
1015 #ifdef LIBXML_DEBUG_RUNTIME
1016 return(1);
1017 #else
1018 return(0);
1019 #endif
1020 case XML_WITH_ZLIB:
1021 #ifdef LIBXML_ZLIB_ENABLED
1022 return(1);
1023 #else
1024 return(0);
1025 #endif
1026 case XML_WITH_LZMA:
1027 #ifdef LIBXML_LZMA_ENABLED
1028 return(1);
1029 #else
1030 return(0);
1031 #endif
1032 case XML_WITH_ICU:
1033 #ifdef LIBXML_ICU_ENABLED
1034 return(1);
1035 #else
1036 return(0);
1037 #endif
1038 default:
1039 break;
1040 }
1041 return(0);
1042 }
1043
1044 /************************************************************************
1045 * *
1046 * SAX2 defaulted attributes handling *
1047 * *
1048 ************************************************************************/
1049
1050 /**
1051 * xmlDetectSAX2:
1052 * @ctxt: an XML parser context
1053 *
1054 * Do the SAX2 detection and specific intialization
1055 */
1056 static void
1057 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1058 if (ctxt == NULL) return;
1059 #ifdef LIBXML_SAX1_ENABLED
1060 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1061 ((ctxt->sax->startElementNs != NULL) ||
1062 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1063 #else
1064 ctxt->sax2 = 1;
1065 #endif /* LIBXML_SAX1_ENABLED */
1066
1067 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1068 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1069 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1070 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1071 (ctxt->str_xml_ns == NULL)) {
1072 xmlErrMemory(ctxt, NULL);
1073 }
1074 }
1075
1076 typedef struct _xmlDefAttrs xmlDefAttrs;
1077 typedef xmlDefAttrs *xmlDefAttrsPtr;
1078 struct _xmlDefAttrs {
1079 int nbAttrs; /* number of defaulted attributes on that element */
1080 int maxAttrs; /* the size of the array */
1081 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1082 };
1083
1084 /**
1085 * xmlAttrNormalizeSpace:
1086 * @src: the source string
1087 * @dst: the target string
1088 *
1089 * Normalize the space in non CDATA attribute values:
1090 * If the attribute type is not CDATA, then the XML processor MUST further
1091 * process the normalized attribute value by discarding any leading and
1092 * trailing space (#x20) characters, and by replacing sequences of space
1093 * (#x20) characters by a single space (#x20) character.
1094 * Note that the size of dst need to be at least src, and if one doesn't need
1095 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1096 * passing src as dst is just fine.
1097 *
1098 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1099 * is needed.
1100 */
1101 static xmlChar *
1102 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1103 {
1104 if ((src == NULL) || (dst == NULL))
1105 return(NULL);
1106
1107 while (*src == 0x20) src++;
1108 while (*src != 0) {
1109 if (*src == 0x20) {
1110 while (*src == 0x20) src++;
1111 if (*src != 0)
1112 *dst++ = 0x20;
1113 } else {
1114 *dst++ = *src++;
1115 }
1116 }
1117 *dst = 0;
1118 if (dst == src)
1119 return(NULL);
1120 return(dst);
1121 }
1122
1123 /**
1124 * xmlAttrNormalizeSpace2:
1125 * @src: the source string
1126 *
1127 * Normalize the space in non CDATA attribute values, a slightly more complex
1128 * front end to avoid allocation problems when running on attribute values
1129 * coming from the input.
1130 *
1131 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1132 * is needed.
1133 */
1134 static const xmlChar *
1135 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1136 {
1137 int i;
1138 int remove_head = 0;
1139 int need_realloc = 0;
1140 const xmlChar *cur;
1141
1142 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1143 return(NULL);
1144 i = *len;
1145 if (i <= 0)
1146 return(NULL);
1147
1148 cur = src;
1149 while (*cur == 0x20) {
1150 cur++;
1151 remove_head++;
1152 }
1153 while (*cur != 0) {
1154 if (*cur == 0x20) {
1155 cur++;
1156 if ((*cur == 0x20) || (*cur == 0)) {
1157 need_realloc = 1;
1158 break;
1159 }
1160 } else
1161 cur++;
1162 }
1163 if (need_realloc) {
1164 xmlChar *ret;
1165
1166 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1167 if (ret == NULL) {
1168 xmlErrMemory(ctxt, NULL);
1169 return(NULL);
1170 }
1171 xmlAttrNormalizeSpace(ret, ret);
1172 *len = (int) strlen((const char *)ret);
1173 return(ret);
1174 } else if (remove_head) {
1175 *len -= remove_head;
1176 memmove(src, src + remove_head, 1 + *len);
1177 return(src);
1178 }
1179 return(NULL);
1180 }
1181
1182 /**
1183 * xmlAddDefAttrs:
1184 * @ctxt: an XML parser context
1185 * @fullname: the element fullname
1186 * @fullattr: the attribute fullname
1187 * @value: the attribute value
1188 *
1189 * Add a defaulted attribute for an element
1190 */
1191 static void
1192 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1193 const xmlChar *fullname,
1194 const xmlChar *fullattr,
1195 const xmlChar *value) {
1196 xmlDefAttrsPtr defaults;
1197 int len;
1198 const xmlChar *name;
1199 const xmlChar *prefix;
1200
1201 /*
1202 * Allows to detect attribute redefinitions
1203 */
1204 if (ctxt->attsSpecial != NULL) {
1205 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1206 return;
1207 }
1208
1209 if (ctxt->attsDefault == NULL) {
1210 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1211 if (ctxt->attsDefault == NULL)
1212 goto mem_error;
1213 }
1214
1215 /*
1216 * split the element name into prefix:localname , the string found
1217 * are within the DTD and then not associated to namespace names.
1218 */
1219 name = xmlSplitQName3(fullname, &len);
1220 if (name == NULL) {
1221 name = xmlDictLookup(ctxt->dict, fullname, -1);
1222 prefix = NULL;
1223 } else {
1224 name = xmlDictLookup(ctxt->dict, name, -1);
1225 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1226 }
1227
1228 /*
1229 * make sure there is some storage
1230 */
1231 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1232 if (defaults == NULL) {
1233 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1234 (4 * 5) * sizeof(const xmlChar *));
1235 if (defaults == NULL)
1236 goto mem_error;
1237 defaults->nbAttrs = 0;
1238 defaults->maxAttrs = 4;
1239 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1240 defaults, NULL) < 0) {
1241 xmlFree(defaults);
1242 goto mem_error;
1243 }
1244 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1245 xmlDefAttrsPtr temp;
1246
1247 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1248 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1249 if (temp == NULL)
1250 goto mem_error;
1251 defaults = temp;
1252 defaults->maxAttrs *= 2;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
1258 }
1259
1260 /*
1261 * Split the element name into prefix:localname , the string found
1262 * are within the DTD and hen not associated to namespace names.
1263 */
1264 name = xmlSplitQName3(fullattr, &len);
1265 if (name == NULL) {
1266 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1267 prefix = NULL;
1268 } else {
1269 name = xmlDictLookup(ctxt->dict, name, -1);
1270 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1271 }
1272
1273 defaults->values[5 * defaults->nbAttrs] = name;
1274 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1275 /* intern the string and precompute the end */
1276 len = xmlStrlen(value);
1277 value = xmlDictLookup(ctxt->dict, value, len);
1278 defaults->values[5 * defaults->nbAttrs + 2] = value;
1279 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1280 if (ctxt->external)
1281 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1282 else
1283 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1284 defaults->nbAttrs++;
1285
1286 return;
1287
1288 mem_error:
1289 xmlErrMemory(ctxt, NULL);
1290 return;
1291 }
1292
1293 /**
1294 * xmlAddSpecialAttr:
1295 * @ctxt: an XML parser context
1296 * @fullname: the element fullname
1297 * @fullattr: the attribute fullname
1298 * @type: the attribute type
1299 *
1300 * Register this attribute type
1301 */
1302 static void
1303 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1304 const xmlChar *fullname,
1305 const xmlChar *fullattr,
1306 int type)
1307 {
1308 if (ctxt->attsSpecial == NULL) {
1309 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1310 if (ctxt->attsSpecial == NULL)
1311 goto mem_error;
1312 }
1313
1314 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1315 return;
1316
1317 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1318 (void *) (long) type);
1319 return;
1320
1321 mem_error:
1322 xmlErrMemory(ctxt, NULL);
1323 return;
1324 }
1325
1326 /**
1327 * xmlCleanSpecialAttrCallback:
1328 *
1329 * Removes CDATA attributes from the special attribute table
1330 */
1331 static void
1332 xmlCleanSpecialAttrCallback(void *payload, void *data,
1333 const xmlChar *fullname, const xmlChar *fullattr,
1334 const xmlChar *unused ATTRIBUTE_UNUSED) {
1335 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1336
1337 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1338 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1339 }
1340 }
1341
1342 /**
1343 * xmlCleanSpecialAttr:
1344 * @ctxt: an XML parser context
1345 *
1346 * Trim the list of attributes defined to remove all those of type
1347 * CDATA as they are not special. This call should be done when finishing
1348 * to parse the DTD and before starting to parse the document root.
1349 */
1350 static void
1351 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1352 {
1353 if (ctxt->attsSpecial == NULL)
1354 return;
1355
1356 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1357
1358 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1359 xmlHashFree(ctxt->attsSpecial, NULL);
1360 ctxt->attsSpecial = NULL;
1361 }
1362 return;
1363 }
1364
1365 /**
1366 * xmlCheckLanguageID:
1367 * @lang: pointer to the string value
1368 *
1369 * Checks that the value conforms to the LanguageID production:
1370 *
1371 * NOTE: this is somewhat deprecated, those productions were removed from
1372 * the XML Second edition.
1373 *
1374 * [33] LanguageID ::= Langcode ('-' Subcode)*
1375 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1376 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1377 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1378 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1379 * [38] Subcode ::= ([a-z] | [A-Z])+
1380 *
1381 * The current REC reference the sucessors of RFC 1766, currently 5646
1382 *
1383 * http://www.rfc-editor.org/rfc/rfc5646.txt
1384 * langtag = language
1385 * ["-" script]
1386 * ["-" region]
1387 * *("-" variant)
1388 * *("-" extension)
1389 * ["-" privateuse]
1390 * language = 2*3ALPHA ; shortest ISO 639 code
1391 * ["-" extlang] ; sometimes followed by
1392 * ; extended language subtags
1393 * / 4ALPHA ; or reserved for future use
1394 * / 5*8ALPHA ; or registered language subtag
1395 *
1396 * extlang = 3ALPHA ; selected ISO 639 codes
1397 * *2("-" 3ALPHA) ; permanently reserved
1398 *
1399 * script = 4ALPHA ; ISO 15924 code
1400 *
1401 * region = 2ALPHA ; ISO 3166-1 code
1402 * / 3DIGIT ; UN M.49 code
1403 *
1404 * variant = 5*8alphanum ; registered variants
1405 * / (DIGIT 3alphanum)
1406 *
1407 * extension = singleton 1*("-" (2*8alphanum))
1408 *
1409 * ; Single alphanumerics
1410 * ; "x" reserved for private use
1411 * singleton = DIGIT ; 0 - 9
1412 * / %x41-57 ; A - W
1413 * / %x59-5A ; Y - Z
1414 * / %x61-77 ; a - w
1415 * / %x79-7A ; y - z
1416 *
1417 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1418 * The parser below doesn't try to cope with extension or privateuse
1419 * that could be added but that's not interoperable anyway
1420 *
1421 * Returns 1 if correct 0 otherwise
1422 **/
1423 int
1424 xmlCheckLanguageID(const xmlChar * lang)
1425 {
1426 const xmlChar *cur = lang, *nxt;
1427
1428 if (cur == NULL)
1429 return (0);
1430 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1431 ((cur[0] == 'I') && (cur[1] == '-')) ||
1432 ((cur[0] == 'x') && (cur[1] == '-')) ||
1433 ((cur[0] == 'X') && (cur[1] == '-'))) {
1434 /*
1435 * Still allow IANA code and user code which were coming
1436 * from the previous version of the XML-1.0 specification
1437 * it's deprecated but we should not fail
1438 */
1439 cur += 2;
1440 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1441 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1442 cur++;
1443 return(cur[0] == 0);
1444 }
1445 nxt = cur;
1446 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1447 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1448 nxt++;
1449 if (nxt - cur >= 4) {
1450 /*
1451 * Reserved
1452 */
1453 if ((nxt - cur > 8) || (nxt[0] != 0))
1454 return(0);
1455 return(1);
1456 }
1457 if (nxt - cur < 2)
1458 return(0);
1459 /* we got an ISO 639 code */
1460 if (nxt[0] == 0)
1461 return(1);
1462 if (nxt[0] != '-')
1463 return(0);
1464
1465 nxt++;
1466 cur = nxt;
1467 /* now we can have extlang or script or region or variant */
1468 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1469 goto region_m49;
1470
1471 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1472 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473 nxt++;
1474 if (nxt - cur == 4)
1475 goto script;
1476 if (nxt - cur == 2)
1477 goto region;
1478 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479 goto variant;
1480 if (nxt - cur != 3)
1481 return(0);
1482 /* we parsed an extlang */
1483 if (nxt[0] == 0)
1484 return(1);
1485 if (nxt[0] != '-')
1486 return(0);
1487
1488 nxt++;
1489 cur = nxt;
1490 /* now we can have script or region or variant */
1491 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492 goto region_m49;
1493
1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496 nxt++;
1497 if (nxt - cur == 2)
1498 goto region;
1499 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1500 goto variant;
1501 if (nxt - cur != 4)
1502 return(0);
1503 /* we parsed a script */
1504 script:
1505 if (nxt[0] == 0)
1506 return(1);
1507 if (nxt[0] != '-')
1508 return(0);
1509
1510 nxt++;
1511 cur = nxt;
1512 /* now we can have region or variant */
1513 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 goto region_m49;
1515
1516 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1517 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1518 nxt++;
1519
1520 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1521 goto variant;
1522 if (nxt - cur != 2)
1523 return(0);
1524 /* we parsed a region */
1525 region:
1526 if (nxt[0] == 0)
1527 return(1);
1528 if (nxt[0] != '-')
1529 return(0);
1530
1531 nxt++;
1532 cur = nxt;
1533 /* now we can just have a variant */
1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536 nxt++;
1537
1538 if ((nxt - cur < 5) || (nxt - cur > 8))
1539 return(0);
1540
1541 /* we parsed a variant */
1542 variant:
1543 if (nxt[0] == 0)
1544 return(1);
1545 if (nxt[0] != '-')
1546 return(0);
1547 /* extensions and private use subtags not checked */
1548 return (1);
1549
1550 region_m49:
1551 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1552 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1553 nxt += 3;
1554 goto region;
1555 }
1556 return(0);
1557 }
1558
1559 /************************************************************************
1560 * *
1561 * Parser stacks related functions and macros *
1562 * *
1563 ************************************************************************/
1564
1565 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1566 const xmlChar ** str);
1567
1568 #ifdef SAX2
1569 /**
1570 * nsPush:
1571 * @ctxt: an XML parser context
1572 * @prefix: the namespace prefix or NULL
1573 * @URL: the namespace name
1574 *
1575 * Pushes a new parser namespace on top of the ns stack
1576 *
1577 * Returns -1 in case of error, -2 if the namespace should be discarded
1578 * and the index in the stack otherwise.
1579 */
1580 static int
1581 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1582 {
1583 if (ctxt->options & XML_PARSE_NSCLEAN) {
1584 int i;
1585 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1586 if (ctxt->nsTab[i] == prefix) {
1587 /* in scope */
1588 if (ctxt->nsTab[i + 1] == URL)
1589 return(-2);
1590 /* out of scope keep it */
1591 break;
1592 }
1593 }
1594 }
1595 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1596 ctxt->nsMax = 10;
1597 ctxt->nsNr = 0;
1598 ctxt->nsTab = (const xmlChar **)
1599 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1600 if (ctxt->nsTab == NULL) {
1601 xmlErrMemory(ctxt, NULL);
1602 ctxt->nsMax = 0;
1603 return (-1);
1604 }
1605 } else if (ctxt->nsNr >= ctxt->nsMax) {
1606 const xmlChar ** tmp;
1607 ctxt->nsMax *= 2;
1608 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1609 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1610 if (tmp == NULL) {
1611 xmlErrMemory(ctxt, NULL);
1612 ctxt->nsMax /= 2;
1613 return (-1);
1614 }
1615 ctxt->nsTab = tmp;
1616 }
1617 ctxt->nsTab[ctxt->nsNr++] = prefix;
1618 ctxt->nsTab[ctxt->nsNr++] = URL;
1619 return (ctxt->nsNr);
1620 }
1621 /**
1622 * nsPop:
1623 * @ctxt: an XML parser context
1624 * @nr: the number to pop
1625 *
1626 * Pops the top @nr parser prefix/namespace from the ns stack
1627 *
1628 * Returns the number of namespaces removed
1629 */
1630 static int
1631 nsPop(xmlParserCtxtPtr ctxt, int nr)
1632 {
1633 int i;
1634
1635 if (ctxt->nsTab == NULL) return(0);
1636 if (ctxt->nsNr < nr) {
1637 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1638 nr = ctxt->nsNr;
1639 }
1640 if (ctxt->nsNr <= 0)
1641 return (0);
1642
1643 for (i = 0;i < nr;i++) {
1644 ctxt->nsNr--;
1645 ctxt->nsTab[ctxt->nsNr] = NULL;
1646 }
1647 return(nr);
1648 }
1649 #endif
1650
1651 static int
1652 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1653 const xmlChar **atts;
1654 int *attallocs;
1655 int maxatts;
1656
1657 if (ctxt->atts == NULL) {
1658 maxatts = 55; /* allow for 10 attrs by default */
1659 atts = (const xmlChar **)
1660 xmlMalloc(maxatts * sizeof(xmlChar *));
1661 if (atts == NULL) goto mem_error;
1662 ctxt->atts = atts;
1663 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1664 if (attallocs == NULL) goto mem_error;
1665 ctxt->attallocs = attallocs;
1666 ctxt->maxatts = maxatts;
1667 } else if (nr + 5 > ctxt->maxatts) {
1668 maxatts = (nr + 5) * 2;
1669 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1670 maxatts * sizeof(const xmlChar *));
1671 if (atts == NULL) goto mem_error;
1672 ctxt->atts = atts;
1673 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1674 (maxatts / 5) * sizeof(int));
1675 if (attallocs == NULL) goto mem_error;
1676 ctxt->attallocs = attallocs;
1677 ctxt->maxatts = maxatts;
1678 }
1679 return(ctxt->maxatts);
1680 mem_error:
1681 xmlErrMemory(ctxt, NULL);
1682 return(-1);
1683 }
1684
1685 /**
1686 * inputPush:
1687 * @ctxt: an XML parser context
1688 * @value: the parser input
1689 *
1690 * Pushes a new parser input on top of the input stack
1691 *
1692 * Returns -1 in case of error, the index in the stack otherwise
1693 */
1694 int
1695 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1696 {
1697 if ((ctxt == NULL) || (value == NULL))
1698 return(-1);
1699 if (ctxt->inputNr >= ctxt->inputMax) {
1700 ctxt->inputMax *= 2;
1701 ctxt->inputTab =
1702 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1703 ctxt->inputMax *
1704 sizeof(ctxt->inputTab[0]));
1705 if (ctxt->inputTab == NULL) {
1706 xmlErrMemory(ctxt, NULL);
1707 xmlFreeInputStream(value);
1708 ctxt->inputMax /= 2;
1709 value = NULL;
1710 return (-1);
1711 }
1712 }
1713 ctxt->inputTab[ctxt->inputNr] = value;
1714 ctxt->input = value;
1715 return (ctxt->inputNr++);
1716 }
1717 /**
1718 * inputPop:
1719 * @ctxt: an XML parser context
1720 *
1721 * Pops the top parser input from the input stack
1722 *
1723 * Returns the input just removed
1724 */
1725 xmlParserInputPtr
1726 inputPop(xmlParserCtxtPtr ctxt)
1727 {
1728 xmlParserInputPtr ret;
1729
1730 if (ctxt == NULL)
1731 return(NULL);
1732 if (ctxt->inputNr <= 0)
1733 return (NULL);
1734 ctxt->inputNr--;
1735 if (ctxt->inputNr > 0)
1736 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1737 else
1738 ctxt->input = NULL;
1739 ret = ctxt->inputTab[ctxt->inputNr];
1740 ctxt->inputTab[ctxt->inputNr] = NULL;
1741 return (ret);
1742 }
1743 /**
1744 * nodePush:
1745 * @ctxt: an XML parser context
1746 * @value: the element node
1747 *
1748 * Pushes a new element node on top of the node stack
1749 *
1750 * Returns -1 in case of error, the index in the stack otherwise
1751 */
1752 int
1753 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1754 {
1755 if (ctxt == NULL) return(0);
1756 if (ctxt->nodeNr >= ctxt->nodeMax) {
1757 xmlNodePtr *tmp;
1758
1759 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1760 ctxt->nodeMax * 2 *
1761 sizeof(ctxt->nodeTab[0]));
1762 if (tmp == NULL) {
1763 xmlErrMemory(ctxt, NULL);
1764 return (-1);
1765 }
1766 ctxt->nodeTab = tmp;
1767 ctxt->nodeMax *= 2;
1768 }
1769 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1770 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1771 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1772 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1773 xmlParserMaxDepth);
1774 ctxt->instate = XML_PARSER_EOF;
1775 return(-1);
1776 }
1777 ctxt->nodeTab[ctxt->nodeNr] = value;
1778 ctxt->node = value;
1779 return (ctxt->nodeNr++);
1780 }
1781
1782 /**
1783 * nodePop:
1784 * @ctxt: an XML parser context
1785 *
1786 * Pops the top element node from the node stack
1787 *
1788 * Returns the node just removed
1789 */
1790 xmlNodePtr
1791 nodePop(xmlParserCtxtPtr ctxt)
1792 {
1793 xmlNodePtr ret;
1794
1795 if (ctxt == NULL) return(NULL);
1796 if (ctxt->nodeNr <= 0)
1797 return (NULL);
1798 ctxt->nodeNr--;
1799 if (ctxt->nodeNr > 0)
1800 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1801 else
1802 ctxt->node = NULL;
1803 ret = ctxt->nodeTab[ctxt->nodeNr];
1804 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1805 return (ret);
1806 }
1807
1808 #ifdef LIBXML_PUSH_ENABLED
1809 /**
1810 * nameNsPush:
1811 * @ctxt: an XML parser context
1812 * @value: the element name
1813 * @prefix: the element prefix
1814 * @URI: the element namespace name
1815 *
1816 * Pushes a new element name/prefix/URL on top of the name stack
1817 *
1818 * Returns -1 in case of error, the index in the stack otherwise
1819 */
1820 static int
1821 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1822 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1823 {
1824 if (ctxt->nameNr >= ctxt->nameMax) {
1825 const xmlChar * *tmp;
1826 void **tmp2;
1827 ctxt->nameMax *= 2;
1828 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1829 ctxt->nameMax *
1830 sizeof(ctxt->nameTab[0]));
1831 if (tmp == NULL) {
1832 ctxt->nameMax /= 2;
1833 goto mem_error;
1834 }
1835 ctxt->nameTab = tmp;
1836 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1837 ctxt->nameMax * 3 *
1838 sizeof(ctxt->pushTab[0]));
1839 if (tmp2 == NULL) {
1840 ctxt->nameMax /= 2;
1841 goto mem_error;
1842 }
1843 ctxt->pushTab = tmp2;
1844 }
1845 ctxt->nameTab[ctxt->nameNr] = value;
1846 ctxt->name = value;
1847 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1848 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1849 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1850 return (ctxt->nameNr++);
1851 mem_error:
1852 xmlErrMemory(ctxt, NULL);
1853 return (-1);
1854 }
1855 /**
1856 * nameNsPop:
1857 * @ctxt: an XML parser context
1858 *
1859 * Pops the top element/prefix/URI name from the name stack
1860 *
1861 * Returns the name just removed
1862 */
1863 static const xmlChar *
1864 nameNsPop(xmlParserCtxtPtr ctxt)
1865 {
1866 const xmlChar *ret;
1867
1868 if (ctxt->nameNr <= 0)
1869 return (NULL);
1870 ctxt->nameNr--;
1871 if (ctxt->nameNr > 0)
1872 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1873 else
1874 ctxt->name = NULL;
1875 ret = ctxt->nameTab[ctxt->nameNr];
1876 ctxt->nameTab[ctxt->nameNr] = NULL;
1877 return (ret);
1878 }
1879 #endif /* LIBXML_PUSH_ENABLED */
1880
1881 /**
1882 * namePush:
1883 * @ctxt: an XML parser context
1884 * @value: the element name
1885 *
1886 * Pushes a new element name on top of the name stack
1887 *
1888 * Returns -1 in case of error, the index in the stack otherwise
1889 */
1890 int
1891 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1892 {
1893 if (ctxt == NULL) return (-1);
1894
1895 if (ctxt->nameNr >= ctxt->nameMax) {
1896 const xmlChar * *tmp;
1897 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1898 ctxt->nameMax * 2 *
1899 sizeof(ctxt->nameTab[0]));
1900 if (tmp == NULL) {
1901 goto mem_error;
1902 }
1903 ctxt->nameTab = tmp;
1904 ctxt->nameMax *= 2;
1905 }
1906 ctxt->nameTab[ctxt->nameNr] = value;
1907 ctxt->name = value;
1908 return (ctxt->nameNr++);
1909 mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1912 }
1913 /**
1914 * namePop:
1915 * @ctxt: an XML parser context
1916 *
1917 * Pops the top element name from the name stack
1918 *
1919 * Returns the name just removed
1920 */
1921 const xmlChar *
1922 namePop(xmlParserCtxtPtr ctxt)
1923 {
1924 const xmlChar *ret;
1925
1926 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1927 return (NULL);
1928 ctxt->nameNr--;
1929 if (ctxt->nameNr > 0)
1930 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1931 else
1932 ctxt->name = NULL;
1933 ret = ctxt->nameTab[ctxt->nameNr];
1934 ctxt->nameTab[ctxt->nameNr] = NULL;
1935 return (ret);
1936 }
1937
1938 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1939 if (ctxt->spaceNr >= ctxt->spaceMax) {
1940 int *tmp;
1941
1942 ctxt->spaceMax *= 2;
1943 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1944 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1945 if (tmp == NULL) {
1946 xmlErrMemory(ctxt, NULL);
1947 ctxt->spaceMax /=2;
1948 return(-1);
1949 }
1950 ctxt->spaceTab = tmp;
1951 }
1952 ctxt->spaceTab[ctxt->spaceNr] = val;
1953 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1954 return(ctxt->spaceNr++);
1955 }
1956
1957 static int spacePop(xmlParserCtxtPtr ctxt) {
1958 int ret;
1959 if (ctxt->spaceNr <= 0) return(0);
1960 ctxt->spaceNr--;
1961 if (ctxt->spaceNr > 0)
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1963 else
1964 ctxt->space = &ctxt->spaceTab[0];
1965 ret = ctxt->spaceTab[ctxt->spaceNr];
1966 ctxt->spaceTab[ctxt->spaceNr] = -1;
1967 return(ret);
1968 }
1969
1970 /*
1971 * Macros for accessing the content. Those should be used only by the parser,
1972 * and not exported.
1973 *
1974 * Dirty macros, i.e. one often need to make assumption on the context to
1975 * use them
1976 *
1977 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1978 * To be used with extreme caution since operations consuming
1979 * characters may move the input buffer to a different location !
1980 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1981 * This should be used internally by the parser
1982 * only to compare to ASCII values otherwise it would break when
1983 * running with UTF-8 encoding.
1984 * RAW same as CUR but in the input buffer, bypass any token
1985 * extraction that may have been done
1986 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1987 * to compare on ASCII based substring.
1988 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1989 * strings without newlines within the parser.
1990 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1991 * defined char within the parser.
1992 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1993 *
1994 * NEXT Skip to the next character, this does the proper decoding
1995 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1996 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1997 * CUR_CHAR(l) returns the current unicode character (int), set l
1998 * to the number of xmlChars used for the encoding [0-5].
1999 * CUR_SCHAR same but operate on a string instead of the context
2000 * COPY_BUF copy the current unicode char to the target buffer, increment
2001 * the index
2002 * GROW, SHRINK handling of input buffers
2003 */
2004
2005 #define RAW (*ctxt->input->cur)
2006 #define CUR (*ctxt->input->cur)
2007 #define NXT(val) ctxt->input->cur[(val)]
2008 #define CUR_PTR ctxt->input->cur
2009
2010 #define CMP4( s, c1, c2, c3, c4 ) \
2011 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2012 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2013 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2014 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2015 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2016 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2017 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2018 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2019 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2020 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2021 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2022 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2023 ((unsigned char *) s)[ 8 ] == c9 )
2024 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2025 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2026 ((unsigned char *) s)[ 9 ] == c10 )
2027
2028 #define SKIP(val) do { \
2029 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2031 if ((*ctxt->input->cur == 0) && \
2032 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2033 xmlPopInput(ctxt); \
2034 } while (0)
2035
2036 #define SKIPL(val) do { \
2037 int skipl; \
2038 for(skipl=0; skipl<val; skipl++) { \
2039 if (*(ctxt->input->cur) == '\n') { \
2040 ctxt->input->line++; ctxt->input->col = 1; \
2041 } else ctxt->input->col++; \
2042 ctxt->nbChars++; \
2043 ctxt->input->cur++; \
2044 } \
2045 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2046 if ((*ctxt->input->cur == 0) && \
2047 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2048 xmlPopInput(ctxt); \
2049 } while (0)
2050
2051 #define SHRINK if ((ctxt->progressive == 0) && \
2052 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2053 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2054 xmlSHRINK (ctxt);
2055
2056 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2057 xmlParserInputShrink(ctxt->input);
2058 if ((*ctxt->input->cur == 0) &&
2059 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2060 xmlPopInput(ctxt);
2061 }
2062
2063 #define GROW if ((ctxt->progressive == 0) && \
2064 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2065 xmlGROW (ctxt);
2066
2067 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2068 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2069 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2070
2071 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2072 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2073 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2074 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2075 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2076 ctxt->instate = XML_PARSER_EOF;
2077 }
2078 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2080 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2081 xmlPopInput(ctxt);
2082 }
2083
2084 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085
2086 #define NEXT xmlNextChar(ctxt)
2087
2088 #define NEXT1 { \
2089 ctxt->input->col++; \
2090 ctxt->input->cur++; \
2091 ctxt->nbChars++; \
2092 if (*ctxt->input->cur == 0) \
2093 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2094 }
2095
2096 #define NEXTL(l) do { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur += l; \
2101 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2102 } while (0)
2103
2104 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2105 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2106
2107 #define COPY_BUF(l,b,i,v) \
2108 if (l == 1) b[i++] = (xmlChar) v; \
2109 else i += xmlCopyCharMultiByte(&b[i],v)
2110
2111 /**
2112 * xmlSkipBlankChars:
2113 * @ctxt: the XML parser context
2114 *
2115 * skip all blanks character found at that point in the input streams.
2116 * It pops up finished entities in the process if allowable at that point.
2117 *
2118 * Returns the number of space chars skipped
2119 */
2120
2121 int
2122 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2123 int res = 0;
2124
2125 /*
2126 * It's Okay to use CUR/NEXT here since all the blanks are on
2127 * the ASCII range.
2128 */
2129 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2130 const xmlChar *cur;
2131 /*
2132 * if we are in the document content, go really fast
2133 */
2134 cur = ctxt->input->cur;
2135 while (IS_BLANK_CH(*cur)) {
2136 if (*cur == '\n') {
2137 ctxt->input->line++; ctxt->input->col = 1;
2138 } else {
2139 ctxt->input->col++;
2140 }
2141 cur++;
2142 res++;
2143 if (*cur == 0) {
2144 ctxt->input->cur = cur;
2145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146 cur = ctxt->input->cur;
2147 }
2148 }
2149 ctxt->input->cur = cur;
2150 } else {
2151 int cur;
2152 do {
2153 cur = CUR;
2154 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2155 NEXT;
2156 cur = CUR;
2157 res++;
2158 }
2159 while ((cur == 0) && (ctxt->inputNr > 1) &&
2160 (ctxt->instate != XML_PARSER_COMMENT)) {
2161 xmlPopInput(ctxt);
2162 cur = CUR;
2163 }
2164 /*
2165 * Need to handle support of entities branching here
2166 */
2167 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2168 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2169 }
2170 return(res);
2171 }
2172
2173 /************************************************************************
2174 * *
2175 * Commodity functions to handle entities *
2176 * *
2177 ************************************************************************/
2178
2179 /**
2180 * xmlPopInput:
2181 * @ctxt: an XML parser context
2182 *
2183 * xmlPopInput: the current input pointed by ctxt->input came to an end
2184 * pop it and return the next char.
2185 *
2186 * Returns the current xmlChar in the parser context
2187 */
2188 xmlChar
2189 xmlPopInput(xmlParserCtxtPtr ctxt) {
2190 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2191 if (xmlParserDebugEntities)
2192 xmlGenericError(xmlGenericErrorContext,
2193 "Popping input %d\n", ctxt->inputNr);
2194 xmlFreeInputStream(inputPop(ctxt));
2195 if ((*ctxt->input->cur == 0) &&
2196 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2197 return(xmlPopInput(ctxt));
2198 return(CUR);
2199 }
2200
2201 /**
2202 * xmlPushInput:
2203 * @ctxt: an XML parser context
2204 * @input: an XML parser input fragment (entity, XML fragment ...).
2205 *
2206 * xmlPushInput: switch to a new input stream which is stacked on top
2207 * of the previous one(s).
2208 * Returns -1 in case of error or the index in the input stack
2209 */
2210 int
2211 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2212 int ret;
2213 if (input == NULL) return(-1);
2214
2215 if (xmlParserDebugEntities) {
2216 if ((ctxt->input != NULL) && (ctxt->input->filename))
2217 xmlGenericError(xmlGenericErrorContext,
2218 "%s(%d): ", ctxt->input->filename,
2219 ctxt->input->line);
2220 xmlGenericError(xmlGenericErrorContext,
2221 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2222 }
2223 ret = inputPush(ctxt, input);
2224 if (ctxt->instate == XML_PARSER_EOF)
2225 return(-1);
2226 GROW;
2227 return(ret);
2228 }
2229
2230 /**
2231 * xmlParseCharRef:
2232 * @ctxt: an XML parser context
2233 *
2234 * parse Reference declarations
2235 *
2236 * [66] CharRef ::= '&#' [0-9]+ ';' |
2237 * '&#x' [0-9a-fA-F]+ ';'
2238 *
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2242 *
2243 * Returns the value parsed (as an int), 0 in case of error
2244 */
2245 int
2246 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2247 unsigned int val = 0;
2248 int count = 0;
2249 unsigned int outofrange = 0;
2250
2251 /*
2252 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2253 */
2254 if ((RAW == '&') && (NXT(1) == '#') &&
2255 (NXT(2) == 'x')) {
2256 SKIP(3);
2257 GROW;
2258 while (RAW != ';') { /* loop blocked by count */
2259 if (count++ > 20) {
2260 count = 0;
2261 GROW;
2262 if (ctxt->instate == XML_PARSER_EOF)
2263 return(0);
2264 }
2265 if ((RAW >= '0') && (RAW <= '9'))
2266 val = val * 16 + (CUR - '0');
2267 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2268 val = val * 16 + (CUR - 'a') + 10;
2269 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2270 val = val * 16 + (CUR - 'A') + 10;
2271 else {
2272 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2273 val = 0;
2274 break;
2275 }
2276 if (val > 0x10FFFF)
2277 outofrange = val;
2278
2279 NEXT;
2280 count++;
2281 }
2282 if (RAW == ';') {
2283 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2284 ctxt->input->col++;
2285 ctxt->nbChars ++;
2286 ctxt->input->cur++;
2287 }
2288 } else if ((RAW == '&') && (NXT(1) == '#')) {
2289 SKIP(2);
2290 GROW;
2291 while (RAW != ';') { /* loop blocked by count */
2292 if (count++ > 20) {
2293 count = 0;
2294 GROW;
2295 if (ctxt->instate == XML_PARSER_EOF)
2296 return(0);
2297 }
2298 if ((RAW >= '0') && (RAW <= '9'))
2299 val = val * 10 + (CUR - '0');
2300 else {
2301 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2302 val = 0;
2303 break;
2304 }
2305 if (val > 0x10FFFF)
2306 outofrange = val;
2307
2308 NEXT;
2309 count++;
2310 }
2311 if (RAW == ';') {
2312 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2313 ctxt->input->col++;
2314 ctxt->nbChars ++;
2315 ctxt->input->cur++;
2316 }
2317 } else {
2318 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2319 }
2320
2321 /*
2322 * [ WFC: Legal Character ]
2323 * Characters referred to using character references must match the
2324 * production for Char.
2325 */
2326 if ((IS_CHAR(val) && (outofrange == 0))) {
2327 return(val);
2328 } else {
2329 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2330 "xmlParseCharRef: invalid xmlChar value %d\n",
2331 val);
2332 }
2333 return(0);
2334 }
2335
2336 /**
2337 * xmlParseStringCharRef:
2338 * @ctxt: an XML parser context
2339 * @str: a pointer to an index in the string
2340 *
2341 * parse Reference declarations, variant parsing from a string rather
2342 * than an an input flow.
2343 *
2344 * [66] CharRef ::= '&#' [0-9]+ ';' |
2345 * '&#x' [0-9a-fA-F]+ ';'
2346 *
2347 * [ WFC: Legal Character ]
2348 * Characters referred to using character references must match the
2349 * production for Char.
2350 *
2351 * Returns the value parsed (as an int), 0 in case of error, str will be
2352 * updated to the current value of the index
2353 */
2354 static int
2355 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2356 const xmlChar *ptr;
2357 xmlChar cur;
2358 unsigned int val = 0;
2359 unsigned int outofrange = 0;
2360
2361 if ((str == NULL) || (*str == NULL)) return(0);
2362 ptr = *str;
2363 cur = *ptr;
2364 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2365 ptr += 3;
2366 cur = *ptr;
2367 while (cur != ';') { /* Non input consuming loop */
2368 if ((cur >= '0') && (cur <= '9'))
2369 val = val * 16 + (cur - '0');
2370 else if ((cur >= 'a') && (cur <= 'f'))
2371 val = val * 16 + (cur - 'a') + 10;
2372 else if ((cur >= 'A') && (cur <= 'F'))
2373 val = val * 16 + (cur - 'A') + 10;
2374 else {
2375 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2376 val = 0;
2377 break;
2378 }
2379 if (val > 0x10FFFF)
2380 outofrange = val;
2381
2382 ptr++;
2383 cur = *ptr;
2384 }
2385 if (cur == ';')
2386 ptr++;
2387 } else if ((cur == '&') && (ptr[1] == '#')){
2388 ptr += 2;
2389 cur = *ptr;
2390 while (cur != ';') { /* Non input consuming loops */
2391 if ((cur >= '0') && (cur <= '9'))
2392 val = val * 10 + (cur - '0');
2393 else {
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2395 val = 0;
2396 break;
2397 }
2398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
2401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else {
2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 return(0);
2409 }
2410 *str = ptr;
2411
2412 /*
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
2415 * production for Char.
2416 */
2417 if ((IS_CHAR(val) && (outofrange == 0))) {
2418 return(val);
2419 } else {
2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2422 val);
2423 }
2424 return(0);
2425 }
2426
2427 /**
2428 * xmlNewBlanksWrapperInputStream:
2429 * @ctxt: an XML parser context
2430 * @entity: an Entity pointer
2431 *
2432 * Create a new input stream for wrapping
2433 * blanks around a PEReference
2434 *
2435 * Returns the new input stream or NULL
2436 */
2437
2438 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2439
2440 static xmlParserInputPtr
2441 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2442 xmlParserInputPtr input;
2443 xmlChar *buffer;
2444 size_t length;
2445 if (entity == NULL) {
2446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2447 "xmlNewBlanksWrapperInputStream entity\n");
2448 return(NULL);
2449 }
2450 if (xmlParserDebugEntities)
2451 xmlGenericError(xmlGenericErrorContext,
2452 "new blanks wrapper for entity: %s\n", entity->name);
2453 input = xmlNewInputStream(ctxt);
2454 if (input == NULL) {
2455 return(NULL);
2456 }
2457 length = xmlStrlen(entity->name) + 5;
2458 buffer = xmlMallocAtomic(length);
2459 if (buffer == NULL) {
2460 xmlErrMemory(ctxt, NULL);
2461 xmlFree(input);
2462 return(NULL);
2463 }
2464 buffer [0] = ' ';
2465 buffer [1] = '%';
2466 buffer [length-3] = ';';
2467 buffer [length-2] = ' ';
2468 buffer [length-1] = 0;
2469 memcpy(buffer + 2, entity->name, length - 5);
2470 input->free = deallocblankswrapper;
2471 input->base = buffer;
2472 input->cur = buffer;
2473 input->length = length;
2474 input->end = &buffer[length];
2475 return(input);
2476 }
2477
2478 /**
2479 * xmlParserHandlePEReference:
2480 * @ctxt: the parser context
2481 *
2482 * [69] PEReference ::= '%' Name ';'
2483 *
2484 * [ WFC: No Recursion ]
2485 * A parsed entity must not contain a recursive
2486 * reference to itself, either directly or indirectly.
2487 *
2488 * [ WFC: Entity Declared ]
2489 * In a document without any DTD, a document with only an internal DTD
2490 * subset which contains no parameter entity references, or a document
2491 * with "standalone='yes'", ... ... The declaration of a parameter
2492 * entity must precede any reference to it...
2493 *
2494 * [ VC: Entity Declared ]
2495 * In a document with an external subset or external parameter entities
2496 * with "standalone='no'", ... ... The declaration of a parameter entity
2497 * must precede any reference to it...
2498 *
2499 * [ WFC: In DTD ]
2500 * Parameter-entity references may only appear in the DTD.
2501 * NOTE: misleading but this is handled.
2502 *
2503 * A PEReference may have been detected in the current input stream
2504 * the handling is done accordingly to
2505 * http://www.w3.org/TR/REC-xml#entproc
2506 * i.e.
2507 * - Included in literal in entity values
2508 * - Included as Parameter Entity reference within DTDs
2509 */
2510 void
2511 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2512 const xmlChar *name;
2513 xmlEntityPtr entity = NULL;
2514 xmlParserInputPtr input;
2515
2516 if (RAW != '%') return;
2517 switch(ctxt->instate) {
2518 case XML_PARSER_CDATA_SECTION:
2519 return;
2520 case XML_PARSER_COMMENT:
2521 return;
2522 case XML_PARSER_START_TAG:
2523 return;
2524 case XML_PARSER_END_TAG:
2525 return;
2526 case XML_PARSER_EOF:
2527 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2528 return;
2529 case XML_PARSER_PROLOG:
2530 case XML_PARSER_START:
2531 case XML_PARSER_MISC:
2532 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2533 return;
2534 case XML_PARSER_ENTITY_DECL:
2535 case XML_PARSER_CONTENT:
2536 case XML_PARSER_ATTRIBUTE_VALUE:
2537 case XML_PARSER_PI:
2538 case XML_PARSER_SYSTEM_LITERAL:
2539 case XML_PARSER_PUBLIC_LITERAL:
2540 /* we just ignore it there */
2541 return;
2542 case XML_PARSER_EPILOG:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2544 return;
2545 case XML_PARSER_ENTITY_VALUE:
2546 /*
2547 * NOTE: in the case of entity values, we don't do the
2548 * substitution here since we need the literal
2549 * entity value to be able to save the internal
2550 * subset of the document.
2551 * This will be handled by xmlStringDecodeEntities
2552 */
2553 return;
2554 case XML_PARSER_DTD:
2555 /*
2556 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2557 * In the internal DTD subset, parameter-entity references
2558 * can occur only where markup declarations can occur, not
2559 * within markup declarations.
2560 * In that case this is handled in xmlParseMarkupDecl
2561 */
2562 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2563 return;
2564 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2565 return;
2566 break;
2567 case XML_PARSER_IGNORE:
2568 return;
2569 }
2570
2571 NEXT;
2572 name = xmlParseName(ctxt);
2573 if (xmlParserDebugEntities)
2574 xmlGenericError(xmlGenericErrorContext,
2575 "PEReference: %s\n", name);
2576 if (name == NULL) {
2577 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2578 } else {
2579 if (RAW == ';') {
2580 NEXT;
2581 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2582 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2583 if (ctxt->instate == XML_PARSER_EOF)
2584 return;
2585 if (entity == NULL) {
2586
2587 /*
2588 * [ WFC: Entity Declared ]
2589 * In a document without any DTD, a document with only an
2590 * internal DTD subset which contains no parameter entity
2591 * references, or a document with "standalone='yes'", ...
2592 * ... The declaration of a parameter entity must precede
2593 * any reference to it...
2594 */
2595 if ((ctxt->standalone == 1) ||
2596 ((ctxt->hasExternalSubset == 0) &&
2597 (ctxt->hasPErefs == 0))) {
2598 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2599 "PEReference: %%%s; not found\n", name);
2600 } else {
2601 /*
2602 * [ VC: Entity Declared ]
2603 * In a document with an external subset or external
2604 * parameter entities with "standalone='no'", ...
2605 * ... The declaration of a parameter entity must precede
2606 * any reference to it...
2607 */
2608 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2609 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n",
2611 name, NULL);
2612 } else
2613 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2614 "PEReference: %%%s; not found\n",
2615 name, NULL);
2616 ctxt->valid = 0;
2617 }
2618 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2619 } else if (ctxt->input->free != deallocblankswrapper) {
2620 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2621 if (xmlPushInput(ctxt, input) < 0)
2622 return;
2623 } else {
2624 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2625 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2626 xmlChar start[4];
2627 xmlCharEncoding enc;
2628
2629 /*
2630 * Note: external parameter entities will not be loaded, it
2631 * is not required for a non-validating parser, unless the
2632 * option of validating, or substituting entities were
2633 * given. Doing so is far more secure as the parser will
2634 * only process data coming from the document entity by
2635 * default.
2636 */
2637 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2638 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2639 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2640 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2641 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2642 (ctxt->replaceEntities == 0) &&
2643 (ctxt->validate == 0))
2644 return;
2645
2646 /*
2647 * handle the extra spaces added before and after
2648 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2649 * this is done independently.
2650 */
2651 input = xmlNewEntityInputStream(ctxt, entity);
2652 if (xmlPushInput(ctxt, input) < 0)
2653 return;
2654
2655 /*
2656 * Get the 4 first bytes and decode the charset
2657 * if enc != XML_CHAR_ENCODING_NONE
2658 * plug some encoding conversion routines.
2659 * Note that, since we may have some non-UTF8
2660 * encoding (like UTF16, bug 135229), the 'length'
2661 * is not known, but we can calculate based upon
2662 * the amount of data in the buffer.
2663 */
2664 GROW
2665 if (ctxt->instate == XML_PARSER_EOF)
2666 return;
2667 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2668 start[0] = RAW;
2669 start[1] = NXT(1);
2670 start[2] = NXT(2);
2671 start[3] = NXT(3);
2672 enc = xmlDetectCharEncoding(start, 4);
2673 if (enc != XML_CHAR_ENCODING_NONE) {
2674 xmlSwitchEncoding(ctxt, enc);
2675 }
2676 }
2677
2678 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2679 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2680 (IS_BLANK_CH(NXT(5)))) {
2681 xmlParseTextDecl(ctxt);
2682 }
2683 } else {
2684 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2685 "PEReference: %s is not a parameter entity\n",
2686 name);
2687 }
2688 }
2689 } else {
2690 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2691 }
2692 }
2693 }
2694
2695 /*
2696 * Macro used to grow the current buffer.
2697 * buffer##_size is expected to be a size_t
2698 * mem_error: is expected to handle memory allocation failures
2699 */
2700 #define growBuffer(buffer, n) { \
2701 xmlChar *tmp; \
2702 size_t new_size = buffer##_size * 2 + n; \
2703 if (new_size < buffer##_size) goto mem_error; \
2704 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2705 if (tmp == NULL) goto mem_error; \
2706 buffer = tmp; \
2707 buffer##_size = new_size; \
2708 }
2709
2710 /**
2711 * xmlStringLenDecodeEntities:
2712 * @ctxt: the parser context
2713 * @str: the input string
2714 * @len: the string length
2715 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2719 *
2720 * Takes a entity string content and process to do the adequate substitutions.
2721 *
2722 * [67] Reference ::= EntityRef | CharRef
2723 *
2724 * [69] PEReference ::= '%' Name ';'
2725 *
2726 * Returns A newly allocated string with the substitution done. The caller
2727 * must deallocate it !
2728 */
2729 xmlChar *
2730 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2731 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2732 xmlChar *buffer = NULL;
2733 size_t buffer_size = 0;
2734 size_t nbchars = 0;
2735
2736 xmlChar *current = NULL;
2737 xmlChar *rep = NULL;
2738 const xmlChar *last;
2739 xmlEntityPtr ent;
2740 int c,l;
2741
2742 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2743 return(NULL);
2744 last = str + len;
2745
2746 if (((ctxt->depth > 40) &&
2747 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2748 (ctxt->depth > 1024)) {
2749 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2750 return(NULL);
2751 }
2752
2753 /*
2754 * allocate a translation buffer.
2755 */
2756 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2757 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2758 if (buffer == NULL) goto mem_error;
2759
2760 /*
2761 * OK loop until we reach one of the ending char or a size limit.
2762 * we are operating on already parsed values.
2763 */
2764 if (str < last)
2765 c = CUR_SCHAR(str, l);
2766 else
2767 c = 0;
2768 while ((c != 0) && (c != end) && /* non input consuming loop */
2769 (c != end2) && (c != end3)) {
2770
2771 if (c == 0) break;
2772 if ((c == '&') && (str[1] == '#')) {
2773 int val = xmlParseStringCharRef(ctxt, &str);
2774 if (val != 0) {
2775 COPY_BUF(0,buffer,nbchars,val);
2776 }
2777 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2779 }
2780 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2781 if (xmlParserDebugEntities)
2782 xmlGenericError(xmlGenericErrorContext,
2783 "String decoding Entity Reference: %.30s\n",
2784 str);
2785 ent = xmlParseStringEntityRef(ctxt, &str);
2786 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2787 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2788 goto int_error;
2789 xmlParserEntityCheck(ctxt, 0, ent, 0);
2790 if (ent != NULL)
2791 ctxt->nbentities += ent->checked / 2;
2792 if ((ent != NULL) &&
2793 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2794 if (ent->content != NULL) {
2795 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 }
2799 } else {
2800 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2801 "predefined entity has no content\n");
2802 }
2803 } else if ((ent != NULL) && (ent->content != NULL)) {
2804 ctxt->depth++;
2805 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2806 0, 0, 0);
2807 ctxt->depth--;
2808
2809 if (rep != NULL) {
2810 current = rep;
2811 while (*current != 0) { /* non input consuming loop */
2812 buffer[nbchars++] = *current++;
2813 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2814 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2815 goto int_error;
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 }
2818 }
2819 xmlFree(rep);
2820 rep = NULL;
2821 }
2822 } else if (ent != NULL) {
2823 int i = xmlStrlen(ent->name);
2824 const xmlChar *cur = ent->name;
2825
2826 buffer[nbchars++] = '&';
2827 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2828 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2829 }
2830 for (;i > 0;i--)
2831 buffer[nbchars++] = *cur++;
2832 buffer[nbchars++] = ';';
2833 }
2834 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2835 if (xmlParserDebugEntities)
2836 xmlGenericError(xmlGenericErrorContext,
2837 "String decoding PE Reference: %.30s\n", str);
2838 ent = xmlParseStringPEReference(ctxt, &str);
2839 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2840 goto int_error;
2841 xmlParserEntityCheck(ctxt, 0, ent, 0);
2842 if (ent != NULL)
2843 ctxt->nbentities += ent->checked / 2;
2844 if (ent != NULL) {
2845 if (ent->content == NULL) {
2846 xmlLoadEntityContent(ctxt, ent);
2847 }
2848 ctxt->depth++;
2849 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2850 0, 0, 0);
2851 ctxt->depth--;
2852 if (rep != NULL) {
2853 current = rep;
2854 while (*current != 0) { /* non input consuming loop */
2855 buffer[nbchars++] = *current++;
2856 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2857 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2858 goto int_error;
2859 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2860 }
2861 }
2862 xmlFree(rep);
2863 rep = NULL;
2864 }
2865 }
2866 } else {
2867 COPY_BUF(l,buffer,nbchars,c);
2868 str += l;
2869 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2870 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2871 }
2872 }
2873 if (str < last)
2874 c = CUR_SCHAR(str, l);
2875 else
2876 c = 0;
2877 }
2878 buffer[nbchars] = 0;
2879 return(buffer);
2880
2881 mem_error:
2882 xmlErrMemory(ctxt, NULL);
2883 int_error:
2884 if (rep != NULL)
2885 xmlFree(rep);
2886 if (buffer != NULL)
2887 xmlFree(buffer);
2888 return(NULL);
2889 }
2890
2891 /**
2892 * xmlStringDecodeEntities:
2893 * @ctxt: the parser context
2894 * @str: the input string
2895 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2896 * @end: an end marker xmlChar, 0 if none
2897 * @end2: an end marker xmlChar, 0 if none
2898 * @end3: an end marker xmlChar, 0 if none
2899 *
2900 * Takes a entity string content and process to do the adequate substitutions.
2901 *
2902 * [67] Reference ::= EntityRef | CharRef
2903 *
2904 * [69] PEReference ::= '%' Name ';'
2905 *
2906 * Returns A newly allocated string with the substitution done. The caller
2907 * must deallocate it !
2908 */
2909 xmlChar *
2910 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2911 xmlChar end, xmlChar end2, xmlChar end3) {
2912 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2913 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2914 end, end2, end3));
2915 }
2916
2917 /************************************************************************
2918 * *
2919 * Commodity functions, cleanup needed ? *
2920 * *
2921 ************************************************************************/
2922
2923 /**
2924 * areBlanks:
2925 * @ctxt: an XML parser context
2926 * @str: a xmlChar *
2927 * @len: the size of @str
2928 * @blank_chars: we know the chars are blanks
2929 *
2930 * Is this a sequence of blank chars that one can ignore ?
2931 *
2932 * Returns 1 if ignorable 0 otherwise.
2933 */
2934
2935 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2936 int blank_chars) {
2937 int i, ret;
2938 xmlNodePtr lastChild;
2939
2940 /*
2941 * Don't spend time trying to differentiate them, the same callback is
2942 * used !
2943 */
2944 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2945 return(0);
2946
2947 /*
2948 * Check for xml:space value.
2949 */
2950 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2951 (*(ctxt->space) == -2))
2952 return(0);
2953
2954 /*
2955 * Check that the string is made of blanks
2956 */
2957 if (blank_chars == 0) {
2958 for (i = 0;i < len;i++)
2959 if (!(IS_BLANK_CH(str[i]))) return(0);
2960 }
2961
2962 /*
2963 * Look if the element is mixed content in the DTD if available
2964 */
2965 if (ctxt->node == NULL) return(0);
2966 if (ctxt->myDoc != NULL) {
2967 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2968 if (ret == 0) return(1);
2969 if (ret == 1) return(0);
2970 }
2971
2972 /*
2973 * Otherwise, heuristic :-\
2974 */
2975 if ((RAW != '<') && (RAW != 0xD)) return(0);
2976 if ((ctxt->node->children == NULL) &&
2977 (RAW == '<') && (NXT(1) == '/')) return(0);
2978
2979 lastChild = xmlGetLastChild(ctxt->node);
2980 if (lastChild == NULL) {
2981 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2982 (ctxt->node->content != NULL)) return(0);
2983 } else if (xmlNodeIsText(lastChild))
2984 return(0);
2985 else if ((ctxt->node->children != NULL) &&
2986 (xmlNodeIsText(ctxt->node->children)))
2987 return(0);
2988 return(1);
2989 }
2990
2991 /************************************************************************
2992 * *
2993 * Extra stuff for namespace support *
2994 * Relates to http://www.w3.org/TR/WD-xml-names *
2995 * *
2996 ************************************************************************/
2997
2998 /**
2999 * xmlSplitQName:
3000 * @ctxt: an XML parser context
3001 * @name: an XML parser context
3002 * @prefix: a xmlChar **
3003 *
3004 * parse an UTF8 encoded XML qualified name string
3005 *
3006 * [NS 5] QName ::= (Prefix ':')? LocalPart
3007 *
3008 * [NS 6] Prefix ::= NCName
3009 *
3010 * [NS 7] LocalPart ::= NCName
3011 *
3012 * Returns the local part, and prefix is updated
3013 * to get the Prefix if any.
3014 */
3015
3016 xmlChar *
3017 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3018 xmlChar buf[XML_MAX_NAMELEN + 5];
3019 xmlChar *buffer = NULL;
3020 int len = 0;
3021 int max = XML_MAX_NAMELEN;
3022 xmlChar *ret = NULL;
3023 const xmlChar *cur = name;
3024 int c;
3025
3026 if (prefix == NULL) return(NULL);
3027 *prefix = NULL;
3028
3029 if (cur == NULL) return(NULL);
3030
3031 #ifndef XML_XML_NAMESPACE
3032 /* xml: prefix is not really a namespace */
3033 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3034 (cur[2] == 'l') && (cur[3] == ':'))
3035 return(xmlStrdup(name));
3036 #endif
3037
3038 /* nasty but well=formed */
3039 if (cur[0] == ':')
3040 return(xmlStrdup(name));
3041
3042 c = *cur++;
3043 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3044 buf[len++] = c;
3045 c = *cur++;
3046 }
3047 if (len >= max) {
3048 /*
3049 * Okay someone managed to make a huge name, so he's ready to pay
3050 * for the processing speed.
3051 */
3052 max = len * 2;
3053
3054 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3055 if (buffer == NULL) {
3056 xmlErrMemory(ctxt, NULL);
3057 return(NULL);
3058 }
3059 memcpy(buffer, buf, len);
3060 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3061 if (len + 10 > max) {
3062 xmlChar *tmp;
3063
3064 max *= 2;
3065 tmp = (xmlChar *) xmlRealloc(buffer,
3066 max * sizeof(xmlChar));
3067 if (tmp == NULL) {
3068 xmlFree(buffer);
3069 xmlErrMemory(ctxt, NULL);
3070 return(NULL);
3071 }
3072 buffer = tmp;
3073 }
3074 buffer[len++] = c;
3075 c = *cur++;
3076 }
3077 buffer[len] = 0;
3078 }
3079
3080 if ((c == ':') && (*cur == 0)) {
3081 if (buffer != NULL)
3082 xmlFree(buffer);
3083 *prefix = NULL;
3084 return(xmlStrdup(name));
3085 }
3086
3087 if (buffer == NULL)
3088 ret = xmlStrndup(buf, len);
3089 else {
3090 ret = buffer;
3091 buffer = NULL;
3092 max = XML_MAX_NAMELEN;
3093 }
3094
3095
3096 if (c == ':') {
3097 c = *cur;
3098 *prefix = ret;
3099 if (c == 0) {
3100 return(xmlStrndup(BAD_CAST "", 0));
3101 }
3102 len = 0;
3103
3104 /*
3105 * Check that the first character is proper to start
3106 * a new name
3107 */
3108 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3109 ((c >= 0x41) && (c <= 0x5A)) ||
3110 (c == '_') || (c == ':'))) {
3111 int l;
3112 int first = CUR_SCHAR(cur, l);
3113
3114 if (!IS_LETTER(first) && (first != '_')) {
3115 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3116 "Name %s is not XML Namespace compliant\n",
3117 name);
3118 }
3119 }
3120 cur++;
3121
3122 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3123 buf[len++] = c;
3124 c = *cur++;
3125 }
3126 if (len >= max) {
3127 /*
3128 * Okay someone managed to make a huge name, so he's ready to pay
3129 * for the processing speed.
3130 */
3131 max = len * 2;
3132
3133 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3134 if (buffer == NULL) {
3135 xmlErrMemory(ctxt, NULL);
3136 return(NULL);
3137 }
3138 memcpy(buffer, buf, len);
3139 while (c != 0) { /* tested bigname2.xml */
3140 if (len + 10 > max) {
3141 xmlChar *tmp;
3142
3143 max *= 2;
3144 tmp = (xmlChar *) xmlRealloc(buffer,
3145 max * sizeof(xmlChar));
3146 if (tmp == NULL) {
3147 xmlErrMemory(ctxt, NULL);
3148 xmlFree(buffer);
3149 return(NULL);
3150 }
3151 buffer = tmp;
3152 }
3153 buffer[len++] = c;
3154 c = *cur++;
3155 }
3156 buffer[len] = 0;
3157 }
3158
3159 if (buffer == NULL)
3160 ret = xmlStrndup(buf, len);
3161 else {
3162 ret = buffer;
3163 }
3164 }
3165
3166 return(ret);
3167 }
3168
3169 /************************************************************************
3170 * *
3171 * The parser itself *
3172 * Relates to http://www.w3.org/TR/REC-xml *
3173 * *
3174 ************************************************************************/
3175
3176 /************************************************************************
3177 * *
3178 * Routines to parse Name, NCName and NmToken *
3179 * *
3180 ************************************************************************/
3181 #ifdef DEBUG
3182 static unsigned long nbParseName = 0;
3183 static unsigned long nbParseNmToken = 0;
3184 static unsigned long nbParseNCName = 0;
3185 static unsigned long nbParseNCNameComplex = 0;
3186 static unsigned long nbParseNameComplex = 0;
3187 static unsigned long nbParseStringName = 0;
3188 #endif
3189
3190 /*
3191 * The two following functions are related to the change of accepted
3192 * characters for Name and NmToken in the Revision 5 of XML-1.0
3193 * They correspond to the modified production [4] and the new production [4a]
3194 * changes in that revision. Also note that the macros used for the
3195 * productions Letter, Digit, CombiningChar and Extender are not needed
3196 * anymore.
3197 * We still keep compatibility to pre-revision5 parsing semantic if the
3198 * new XML_PARSE_OLD10 option is given to the parser.
3199 */
3200 static int
3201 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3202 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3203 /*
3204 * Use the new checks of production [4] [4a] amd [5] of the
3205 * Update 5 of XML-1.0
3206 */
3207 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 (c == '_') || (c == ':') ||
3211 ((c >= 0xC0) && (c <= 0xD6)) ||
3212 ((c >= 0xD8) && (c <= 0xF6)) ||
3213 ((c >= 0xF8) && (c <= 0x2FF)) ||
3214 ((c >= 0x370) && (c <= 0x37D)) ||
3215 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3216 ((c >= 0x200C) && (c <= 0x200D)) ||
3217 ((c >= 0x2070) && (c <= 0x218F)) ||
3218 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3219 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3220 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3221 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3222 ((c >= 0x10000) && (c <= 0xEFFFF))))
3223 return(1);
3224 } else {
3225 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3226 return(1);
3227 }
3228 return(0);
3229 }
3230
3231 static int
3232 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3233 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3234 /*
3235 * Use the new checks of production [4] [4a] amd [5] of the
3236 * Update 5 of XML-1.0
3237 */
3238 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3239 (((c >= 'a') && (c <= 'z')) ||
3240 ((c >= 'A') && (c <= 'Z')) ||
3241 ((c >= '0') && (c <= '9')) || /* !start */
3242 (c == '_') || (c == ':') ||
3243 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3244 ((c >= 0xC0) && (c <= 0xD6)) ||
3245 ((c >= 0xD8) && (c <= 0xF6)) ||
3246 ((c >= 0xF8) && (c <= 0x2FF)) ||
3247 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3248 ((c >= 0x370) && (c <= 0x37D)) ||
3249 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3250 ((c >= 0x200C) && (c <= 0x200D)) ||
3251 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3252 ((c >= 0x2070) && (c <= 0x218F)) ||
3253 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 ((c >= 0x10000) && (c <= 0xEFFFF))))
3258 return(1);
3259 } else {
3260 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3261 (c == '.') || (c == '-') ||
3262 (c == '_') || (c == ':') ||
3263 (IS_COMBINING(c)) ||
3264 (IS_EXTENDER(c)))
3265 return(1);
3266 }
3267 return(0);
3268 }
3269
3270 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3271 int *len, int *alloc, int normalize);
3272
3273 static const xmlChar *
3274 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3275 int len = 0, l;
3276 int c;
3277 int count = 0;
3278
3279 #ifdef DEBUG
3280 nbParseNameComplex++;
3281 #endif
3282
3283 /*
3284 * Handler for more complex cases
3285 */
3286 GROW;
3287 if (ctxt->instate == XML_PARSER_EOF)
3288 return(NULL);
3289 c = CUR_CHAR(l);
3290 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3291 /*
3292 * Use the new checks of production [4] [4a] amd [5] of the
3293 * Update 5 of XML-1.0
3294 */
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!(((c >= 'a') && (c <= 'z')) ||
3297 ((c >= 'A') && (c <= 'Z')) ||
3298 (c == '_') || (c == ':') ||
3299 ((c >= 0xC0) && (c <= 0xD6)) ||
3300 ((c >= 0xD8) && (c <= 0xF6)) ||
3301 ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 ((c >= 0x370) && (c <= 0x37D)) ||
3303 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3304 ((c >= 0x200C) && (c <= 0x200D)) ||
3305 ((c >= 0x2070) && (c <= 0x218F)) ||
3306 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3307 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3308 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3309 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3310 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3311 return(NULL);
3312 }
3313 len += l;
3314 NEXTL(l);
3315 c = CUR_CHAR(l);
3316 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3317 (((c >= 'a') && (c <= 'z')) ||
3318 ((c >= 'A') && (c <= 'Z')) ||
3319 ((c >= '0') && (c <= '9')) || /* !start */
3320 (c == '_') || (c == ':') ||
3321 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3322 ((c >= 0xC0) && (c <= 0xD6)) ||
3323 ((c >= 0xD8) && (c <= 0xF6)) ||
3324 ((c >= 0xF8) && (c <= 0x2FF)) ||
3325 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3326 ((c >= 0x370) && (c <= 0x37D)) ||
3327 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3328 ((c >= 0x200C) && (c <= 0x200D)) ||
3329 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3330 ((c >= 0x2070) && (c <= 0x218F)) ||
3331 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3332 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3333 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3334 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3335 ((c >= 0x10000) && (c <= 0xEFFFF))
3336 )) {
3337 if (count++ > XML_PARSER_CHUNK_SIZE) {
3338 count = 0;
3339 GROW;
3340 if (ctxt->instate == XML_PARSER_EOF)
3341 return(NULL);
3342 }
3343 len += l;
3344 NEXTL(l);
3345 c = CUR_CHAR(l);
3346 }
3347 } else {
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!IS_LETTER(c) && (c != '_') &&
3350 (c != ':'))) {
3351 return(NULL);
3352 }
3353 len += l;
3354 NEXTL(l);
3355 c = CUR_CHAR(l);
3356
3357 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3358 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3359 (c == '.') || (c == '-') ||
3360 (c == '_') || (c == ':') ||
3361 (IS_COMBINING(c)) ||
3362 (IS_EXTENDER(c)))) {
3363 if (count++ > XML_PARSER_CHUNK_SIZE) {
3364 count = 0;
3365 GROW;
3366 if (ctxt->instate == XML_PARSER_EOF)
3367 return(NULL);
3368 }
3369 len += l;
3370 NEXTL(l);
3371 c = CUR_CHAR(l);
3372 if (c == 0) {
3373 count = 0;
3374 GROW;
3375 if (ctxt->instate == XML_PARSER_EOF)
3376 return(NULL);
3377 c = CUR_CHAR(l);
3378 }
3379 }
3380 }
3381 if ((len > XML_MAX_NAME_LENGTH) &&
3382 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3383 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384 return(NULL);
3385 }
3386 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3387 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3388 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3389 }
3390
3391 /**
3392 * xmlParseName:
3393 * @ctxt: an XML parser context
3394 *
3395 * parse an XML name.
3396 *
3397 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3398 * CombiningChar | Extender
3399 *
3400 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3401 *
3402 * [6] Names ::= Name (#x20 Name)*
3403 *
3404 * Returns the Name parsed or NULL
3405 */
3406
3407 const xmlChar *
3408 xmlParseName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in;
3410 const xmlChar *ret;
3411 int count = 0;
3412
3413 GROW;
3414
3415 #ifdef DEBUG
3416 nbParseName++;
3417 #endif
3418
3419 /*
3420 * Accelerator for simple ASCII names
3421 */
3422 in = ctxt->input->cur;
3423 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3424 ((*in >= 0x41) && (*in <= 0x5A)) ||
3425 (*in == '_') || (*in == ':')) {
3426 in++;
3427 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3428 ((*in >= 0x41) && (*in <= 0x5A)) ||
3429 ((*in >= 0x30) && (*in <= 0x39)) ||
3430 (*in == '_') || (*in == '-') ||
3431 (*in == ':') || (*in == '.'))
3432 in++;
3433 if ((*in > 0) && (*in < 0x80)) {
3434 count = in - ctxt->input->cur;
3435 if ((count > XML_MAX_NAME_LENGTH) &&
3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3438 return(NULL);
3439 }
3440 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3441 ctxt->input->cur = in;
3442 ctxt->nbChars += count;
3443 ctxt->input->col += count;
3444 if (ret == NULL)
3445 xmlErrMemory(ctxt, NULL);
3446 return(ret);
3447 }
3448 }
3449 /* accelerator for special cases */
3450 return(xmlParseNameComplex(ctxt));
3451 }
3452
3453 static const xmlChar *
3454 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3455 int len = 0, l;
3456 int c;
3457 int count = 0;
3458 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3459
3460 #ifdef DEBUG
3461 nbParseNCNameComplex++;
3462 #endif
3463
3464 /*
3465 * Handler for more complex cases
3466 */
3467 GROW;
3468 end = ctxt->input->cur;
3469 c = CUR_CHAR(l);
3470 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3471 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3472 return(NULL);
3473 }
3474
3475 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3476 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3477 if (count++ > XML_PARSER_CHUNK_SIZE) {
3478 if ((len > XML_MAX_NAME_LENGTH) &&
3479 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481 return(NULL);
3482 }
3483 count = 0;
3484 GROW;
3485 if (ctxt->instate == XML_PARSER_EOF)
3486 return(NULL);
3487 }
3488 len += l;
3489 NEXTL(l);
3490 end = ctxt->input->cur;
3491 c = CUR_CHAR(l);
3492 if (c == 0) {
3493 count = 0;
3494 GROW;
3495 if (ctxt->instate == XML_PARSER_EOF)
3496 return(NULL);
3497 end = ctxt->input->cur;
3498 c = CUR_CHAR(l);
3499 }
3500 }
3501 if ((len > XML_MAX_NAME_LENGTH) &&
3502 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3503 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504 return(NULL);
3505 }
3506 return(xmlDictLookup(ctxt->dict, end - len, len));
3507 }
3508
3509 /**
3510 * xmlParseNCName:
3511 * @ctxt: an XML parser context
3512 * @len: length of the string parsed
3513 *
3514 * parse an XML name.
3515 *
3516 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3517 * CombiningChar | Extender
3518 *
3519 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3520 *
3521 * Returns the Name parsed or NULL
3522 */
3523
3524 static const xmlChar *
3525 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3526 const xmlChar *in;
3527 const xmlChar *ret;
3528 int count = 0;
3529
3530 #ifdef DEBUG
3531 nbParseNCName++;
3532 #endif
3533
3534 /*
3535 * Accelerator for simple ASCII names
3536 */
3537 in = ctxt->input->cur;
3538 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3539 ((*in >= 0x41) && (*in <= 0x5A)) ||
3540 (*in == '_')) {
3541 in++;
3542 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3543 ((*in >= 0x41) && (*in <= 0x5A)) ||
3544 ((*in >= 0x30) && (*in <= 0x39)) ||
3545 (*in == '_') || (*in == '-') ||
3546 (*in == '.'))
3547 in++;
3548 if ((*in > 0) && (*in < 0x80)) {
3549 count = in - ctxt->input->cur;
3550 if ((count > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 return(NULL);
3554 }
3555 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3556 ctxt->input->cur = in;
3557 ctxt->nbChars += count;
3558 ctxt->input->col += count;
3559 if (ret == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3561 }
3562 return(ret);
3563 }
3564 }
3565 return(xmlParseNCNameComplex(ctxt));
3566 }
3567
3568 /**
3569 * xmlParseNameAndCompare:
3570 * @ctxt: an XML parser context
3571 *
3572 * parse an XML name and compares for match