Update libxml to 2.7.7
[reactos.git] / reactos / lib / 3rdparty / libxml2 / parser.c
1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
90 /************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
98
99 /*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105 #define XML_PARSER_NON_LINEAR 10
106
107 /*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116 static int
117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119 {
120 unsigned long consumed = 0;
121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175 }
176
177 /**
178 * xmlParserMaxDepth:
179 *
180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
184 */
185 unsigned int xmlParserMaxDepth = 256;
186
187
188
189 #define SAX2 1
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
194 /*
195 * List of XML prefixed PI allowed by W3C specs
196 */
197
198 static const char *xmlW3CPIs[] = {
199 "xml-stylesheet",
200 NULL
201 };
202
203
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
207
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
211 void *user_data, int depth, const xmlChar *URL,
212 const xmlChar *ID, xmlNodePtr *list);
213
214 static int
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
217 #ifdef LIBXML_LEGACY_ENABLED
218 static void
219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
221 #endif /* LIBXML_LEGACY_ENABLED */
222
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
226
227 static int
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
230 /************************************************************************
231 * *
232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236 /**
237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244 static void
245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247 {
248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
253
254 if (prefix == NULL)
255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
259 else
260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
270 }
271
272 /**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280 static void
281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
282 {
283 const char *errmsg;
284
285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
292 case XML_ERR_INVALID_DEC_CHARREF:
293 errmsg = "CharRef: invalid decimal value\n";
294 break;
295 case XML_ERR_INVALID_CHARREF:
296 errmsg = "CharRef: invalid value\n";
297 break;
298 case XML_ERR_INTERNAL_ERROR:
299 errmsg = "internal error";
300 break;
301 case XML_ERR_PEREF_AT_EOF:
302 errmsg = "PEReference at end of document\n";
303 break;
304 case XML_ERR_PEREF_IN_PROLOG:
305 errmsg = "PEReference in prolog\n";
306 break;
307 case XML_ERR_PEREF_IN_EPILOG:
308 errmsg = "PEReference in epilog\n";
309 break;
310 case XML_ERR_PEREF_NO_NAME:
311 errmsg = "PEReference: no name\n";
312 break;
313 case XML_ERR_PEREF_SEMICOL_MISSING:
314 errmsg = "PEReference: expecting ';'\n";
315 break;
316 case XML_ERR_ENTITY_LOOP:
317 errmsg = "Detected an entity reference loop\n";
318 break;
319 case XML_ERR_ENTITY_NOT_STARTED:
320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
322 case XML_ERR_ENTITY_PE_INTERNAL:
323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
325 case XML_ERR_ENTITY_NOT_FINISHED:
326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
329 errmsg = "AttValue: \" or ' expected\n";
330 break;
331 case XML_ERR_LT_IN_ATTRIBUTE:
332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
334 case XML_ERR_LITERAL_NOT_STARTED:
335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
337 case XML_ERR_LITERAL_NOT_FINISHED:
338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
340 case XML_ERR_MISPLACED_CDATA_END:
341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
343 case XML_ERR_URI_REQUIRED:
344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
346 case XML_ERR_PUBID_REQUIRED:
347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
349 case XML_ERR_HYPHEN_IN_COMMENT:
350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
352 case XML_ERR_PI_NOT_STARTED:
353 errmsg = "xmlParsePI : no target name\n";
354 break;
355 case XML_ERR_RESERVED_XML_NAME:
356 errmsg = "Invalid PI name\n";
357 break;
358 case XML_ERR_NOTATION_NOT_STARTED:
359 errmsg = "NOTATION: Name expected here\n";
360 break;
361 case XML_ERR_NOTATION_NOT_FINISHED:
362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
364 case XML_ERR_VALUE_REQUIRED:
365 errmsg = "Entity value required\n";
366 break;
367 case XML_ERR_URI_FRAGMENT:
368 errmsg = "Fragment not allowed";
369 break;
370 case XML_ERR_ATTLIST_NOT_STARTED:
371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
373 case XML_ERR_NMTOKEN_REQUIRED:
374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
376 case XML_ERR_ATTLIST_NOT_FINISHED:
377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
379 case XML_ERR_MIXED_NOT_STARTED:
380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
382 case XML_ERR_PCDATA_REQUIRED:
383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
391 case XML_ERR_PEREF_IN_INT_SUBSET:
392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
395 case XML_ERR_GT_REQUIRED:
396 errmsg = "expected '>'\n";
397 break;
398 case XML_ERR_CONDSEC_INVALID:
399 errmsg = "XML conditional section '[' expected\n";
400 break;
401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
408 case XML_ERR_CONDSEC_NOT_FINISHED:
409 errmsg = "XML conditional section not closed\n";
410 break;
411 case XML_ERR_XMLDECL_NOT_STARTED:
412 errmsg = "Text declaration '<?xml' required\n";
413 break;
414 case XML_ERR_XMLDECL_NOT_FINISHED:
415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
417 case XML_ERR_EXT_ENTITY_STANDALONE:
418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421 errmsg = "EntityRef: expecting ';'\n";
422 break;
423 case XML_ERR_DOCTYPE_NOT_FINISHED:
424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
426 case XML_ERR_LTSLASH_REQUIRED:
427 errmsg = "EndTag: '</' not found\n";
428 break;
429 case XML_ERR_EQUAL_REQUIRED:
430 errmsg = "expected '='\n";
431 break;
432 case XML_ERR_STRING_NOT_CLOSED:
433 errmsg = "String not closed expecting \" or '\n";
434 break;
435 case XML_ERR_STRING_NOT_STARTED:
436 errmsg = "String not started expecting ' or \"\n";
437 break;
438 case XML_ERR_ENCODING_NAME:
439 errmsg = "Invalid XML encoding name\n";
440 break;
441 case XML_ERR_STANDALONE_VALUE:
442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
444 case XML_ERR_DOCUMENT_EMPTY:
445 errmsg = "Document is empty\n";
446 break;
447 case XML_ERR_DOCUMENT_END:
448 errmsg = "Extra content at the end of the document\n";
449 break;
450 case XML_ERR_NOT_WELL_BALANCED:
451 errmsg = "chunk is not well balanced\n";
452 break;
453 case XML_ERR_EXTRA_CONTENT:
454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
456 case XML_ERR_VERSION_MISSING:
457 errmsg = "Malformed declaration expecting version\n";
458 break;
459 #if 0
460 case:
461 errmsg = "\n";
462 break;
463 #endif
464 default:
465 errmsg = "Unregistered error message\n";
466 }
467 if (ctxt != NULL)
468 ctxt->errNo = error;
469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
477 }
478
479 /**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487 static void
488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
490 {
491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
494 if (ctxt != NULL)
495 ctxt->errNo = error;
496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
503 }
504
505 /**
506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515 static void
516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518 {
519 xmlStructuredErrorFunc schannel = NULL;
520
521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
526 schannel = ctxt->sax->serror;
527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
541 }
542 }
543
544 /**
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
550 *
551 * Handle a validity error.
552 */
553 static void
554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555 const char *msg, const xmlChar *str1, const xmlChar *str2)
556 {
557 xmlStructuredErrorFunc schannel = NULL;
558
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
566 }
567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
569 ctxt->vctxt.error, ctxt->vctxt.userData,
570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
574 ctxt->valid = 0;
575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
581 }
582 }
583
584 /**
585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593 static void
594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, int val)
596 {
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL,
603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
610 }
611
612 /**
613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
620 *
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622 */
623 static void
624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
627 {
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL)
632 ctxt->errNo = error;
633 __xmlRaiseError(NULL, NULL, NULL,
634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
641 }
642 }
643
644 /**
645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
650 *
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652 */
653 static void
654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655 const char *msg, const xmlChar * val)
656 {
657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
660 if (ctxt != NULL)
661 ctxt->errNo = error;
662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
670 }
671 }
672
673 /**
674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
679 *
680 * Handle a non fatal parser error
681 */
682 static void
683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
685 {
686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
689 if (ctxt != NULL)
690 ctxt->errNo = error;
691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
695 }
696
697 /**
698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707 static void
708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
712 {
713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
716 if (ctxt != NULL)
717 ctxt->errNo = error;
718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
724 }
725
726 /**
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
733 *
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735 */
736 static void
737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
741 {
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
749 }
750
751 /************************************************************************
752 * *
753 * Library wide options *
754 * *
755 ************************************************************************/
756
757 /**
758 * xmlHasFeature:
759 * @feature: the feature to be examined
760 *
761 * Examines if the library has been compiled with a given feature.
762 *
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
766 */
767 int
768 xmlHasFeature(xmlFeature feature)
769 {
770 switch (feature) {
771 case XML_WITH_THREAD:
772 #ifdef LIBXML_THREAD_ENABLED
773 return(1);
774 #else
775 return(0);
776 #endif
777 case XML_WITH_TREE:
778 #ifdef LIBXML_TREE_ENABLED
779 return(1);
780 #else
781 return(0);
782 #endif
783 case XML_WITH_OUTPUT:
784 #ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786 #else
787 return(0);
788 #endif
789 case XML_WITH_PUSH:
790 #ifdef LIBXML_PUSH_ENABLED
791 return(1);
792 #else
793 return(0);
794 #endif
795 case XML_WITH_READER:
796 #ifdef LIBXML_READER_ENABLED
797 return(1);
798 #else
799 return(0);
800 #endif
801 case XML_WITH_PATTERN:
802 #ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804 #else
805 return(0);
806 #endif
807 case XML_WITH_WRITER:
808 #ifdef LIBXML_WRITER_ENABLED
809 return(1);
810 #else
811 return(0);
812 #endif
813 case XML_WITH_SAX1:
814 #ifdef LIBXML_SAX1_ENABLED
815 return(1);
816 #else
817 return(0);
818 #endif
819 case XML_WITH_FTP:
820 #ifdef LIBXML_FTP_ENABLED
821 return(1);
822 #else
823 return(0);
824 #endif
825 case XML_WITH_HTTP:
826 #ifdef LIBXML_HTTP_ENABLED
827 return(1);
828 #else
829 return(0);
830 #endif
831 case XML_WITH_VALID:
832 #ifdef LIBXML_VALID_ENABLED
833 return(1);
834 #else
835 return(0);
836 #endif
837 case XML_WITH_HTML:
838 #ifdef LIBXML_HTML_ENABLED
839 return(1);
840 #else
841 return(0);
842 #endif
843 case XML_WITH_LEGACY:
844 #ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846 #else
847 return(0);
848 #endif
849 case XML_WITH_C14N:
850 #ifdef LIBXML_C14N_ENABLED
851 return(1);
852 #else
853 return(0);
854 #endif
855 case XML_WITH_CATALOG:
856 #ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_XPATH:
862 #ifdef LIBXML_XPATH_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_XPTR:
868 #ifdef LIBXML_XPTR_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_XINCLUDE:
874 #ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_ICONV:
880 #ifdef LIBXML_ICONV_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_ISO8859X:
886 #ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_UNICODE:
892 #ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_REGEXP:
898 #ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_AUTOMATA:
904 #ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_EXPR:
910 #ifdef LIBXML_EXPR_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_SCHEMAS:
916 #ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_SCHEMATRON:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_MODULES:
928 #ifdef LIBXML_MODULES_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_DEBUG:
934 #ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_DEBUG_MEM:
940 #ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_DEBUG_RUN:
946 #ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_ZLIB:
952 #ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 default:
958 break;
959 }
960 return(0);
961 }
962
963 /************************************************************************
964 * *
965 * SAX2 defaulted attributes handling *
966 * *
967 ************************************************************************/
968
969 /**
970 * xmlDetectSAX2:
971 * @ctxt: an XML parser context
972 *
973 * Do the SAX2 detection and specific intialization
974 */
975 static void
976 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
977 if (ctxt == NULL) return;
978 #ifdef LIBXML_SAX1_ENABLED
979 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
980 ((ctxt->sax->startElementNs != NULL) ||
981 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
982 #else
983 ctxt->sax2 = 1;
984 #endif /* LIBXML_SAX1_ENABLED */
985
986 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
987 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
988 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
989 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
990 (ctxt->str_xml_ns == NULL)) {
991 xmlErrMemory(ctxt, NULL);
992 }
993 }
994
995 typedef struct _xmlDefAttrs xmlDefAttrs;
996 typedef xmlDefAttrs *xmlDefAttrsPtr;
997 struct _xmlDefAttrs {
998 int nbAttrs; /* number of defaulted attributes on that element */
999 int maxAttrs; /* the size of the array */
1000 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1001 };
1002
1003 /**
1004 * xmlAttrNormalizeSpace:
1005 * @src: the source string
1006 * @dst: the target string
1007 *
1008 * Normalize the space in non CDATA attribute values:
1009 * If the attribute type is not CDATA, then the XML processor MUST further
1010 * process the normalized attribute value by discarding any leading and
1011 * trailing space (#x20) characters, and by replacing sequences of space
1012 * (#x20) characters by a single space (#x20) character.
1013 * Note that the size of dst need to be at least src, and if one doesn't need
1014 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1015 * passing src as dst is just fine.
1016 *
1017 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1018 * is needed.
1019 */
1020 static xmlChar *
1021 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1022 {
1023 if ((src == NULL) || (dst == NULL))
1024 return(NULL);
1025
1026 while (*src == 0x20) src++;
1027 while (*src != 0) {
1028 if (*src == 0x20) {
1029 while (*src == 0x20) src++;
1030 if (*src != 0)
1031 *dst++ = 0x20;
1032 } else {
1033 *dst++ = *src++;
1034 }
1035 }
1036 *dst = 0;
1037 if (dst == src)
1038 return(NULL);
1039 return(dst);
1040 }
1041
1042 /**
1043 * xmlAttrNormalizeSpace2:
1044 * @src: the source string
1045 *
1046 * Normalize the space in non CDATA attribute values, a slightly more complex
1047 * front end to avoid allocation problems when running on attribute values
1048 * coming from the input.
1049 *
1050 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1051 * is needed.
1052 */
1053 static const xmlChar *
1054 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1055 {
1056 int i;
1057 int remove_head = 0;
1058 int need_realloc = 0;
1059 const xmlChar *cur;
1060
1061 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1062 return(NULL);
1063 i = *len;
1064 if (i <= 0)
1065 return(NULL);
1066
1067 cur = src;
1068 while (*cur == 0x20) {
1069 cur++;
1070 remove_head++;
1071 }
1072 while (*cur != 0) {
1073 if (*cur == 0x20) {
1074 cur++;
1075 if ((*cur == 0x20) || (*cur == 0)) {
1076 need_realloc = 1;
1077 break;
1078 }
1079 } else
1080 cur++;
1081 }
1082 if (need_realloc) {
1083 xmlChar *ret;
1084
1085 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1086 if (ret == NULL) {
1087 xmlErrMemory(ctxt, NULL);
1088 return(NULL);
1089 }
1090 xmlAttrNormalizeSpace(ret, ret);
1091 *len = (int) strlen((const char *)ret);
1092 return(ret);
1093 } else if (remove_head) {
1094 *len -= remove_head;
1095 memmove(src, src + remove_head, 1 + *len);
1096 return(src);
1097 }
1098 return(NULL);
1099 }
1100
1101 /**
1102 * xmlAddDefAttrs:
1103 * @ctxt: an XML parser context
1104 * @fullname: the element fullname
1105 * @fullattr: the attribute fullname
1106 * @value: the attribute value
1107 *
1108 * Add a defaulted attribute for an element
1109 */
1110 static void
1111 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1112 const xmlChar *fullname,
1113 const xmlChar *fullattr,
1114 const xmlChar *value) {
1115 xmlDefAttrsPtr defaults;
1116 int len;
1117 const xmlChar *name;
1118 const xmlChar *prefix;
1119
1120 /*
1121 * Allows to detect attribute redefinitions
1122 */
1123 if (ctxt->attsSpecial != NULL) {
1124 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1125 return;
1126 }
1127
1128 if (ctxt->attsDefault == NULL) {
1129 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1130 if (ctxt->attsDefault == NULL)
1131 goto mem_error;
1132 }
1133
1134 /*
1135 * split the element name into prefix:localname , the string found
1136 * are within the DTD and then not associated to namespace names.
1137 */
1138 name = xmlSplitQName3(fullname, &len);
1139 if (name == NULL) {
1140 name = xmlDictLookup(ctxt->dict, fullname, -1);
1141 prefix = NULL;
1142 } else {
1143 name = xmlDictLookup(ctxt->dict, name, -1);
1144 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1145 }
1146
1147 /*
1148 * make sure there is some storage
1149 */
1150 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1151 if (defaults == NULL) {
1152 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1153 (4 * 5) * sizeof(const xmlChar *));
1154 if (defaults == NULL)
1155 goto mem_error;
1156 defaults->nbAttrs = 0;
1157 defaults->maxAttrs = 4;
1158 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1159 defaults, NULL) < 0) {
1160 xmlFree(defaults);
1161 goto mem_error;
1162 }
1163 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1164 xmlDefAttrsPtr temp;
1165
1166 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1167 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1168 if (temp == NULL)
1169 goto mem_error;
1170 defaults = temp;
1171 defaults->maxAttrs *= 2;
1172 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1173 defaults, NULL) < 0) {
1174 xmlFree(defaults);
1175 goto mem_error;
1176 }
1177 }
1178
1179 /*
1180 * Split the element name into prefix:localname , the string found
1181 * are within the DTD and hen not associated to namespace names.
1182 */
1183 name = xmlSplitQName3(fullattr, &len);
1184 if (name == NULL) {
1185 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1186 prefix = NULL;
1187 } else {
1188 name = xmlDictLookup(ctxt->dict, name, -1);
1189 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1190 }
1191
1192 defaults->values[5 * defaults->nbAttrs] = name;
1193 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1194 /* intern the string and precompute the end */
1195 len = xmlStrlen(value);
1196 value = xmlDictLookup(ctxt->dict, value, len);
1197 defaults->values[5 * defaults->nbAttrs + 2] = value;
1198 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1199 if (ctxt->external)
1200 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1201 else
1202 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1203 defaults->nbAttrs++;
1204
1205 return;
1206
1207 mem_error:
1208 xmlErrMemory(ctxt, NULL);
1209 return;
1210 }
1211
1212 /**
1213 * xmlAddSpecialAttr:
1214 * @ctxt: an XML parser context
1215 * @fullname: the element fullname
1216 * @fullattr: the attribute fullname
1217 * @type: the attribute type
1218 *
1219 * Register this attribute type
1220 */
1221 static void
1222 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1223 const xmlChar *fullname,
1224 const xmlChar *fullattr,
1225 int type)
1226 {
1227 if (ctxt->attsSpecial == NULL) {
1228 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1229 if (ctxt->attsSpecial == NULL)
1230 goto mem_error;
1231 }
1232
1233 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1234 return;
1235
1236 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1237 (void *) (long) type);
1238 return;
1239
1240 mem_error:
1241 xmlErrMemory(ctxt, NULL);
1242 return;
1243 }
1244
1245 /**
1246 * xmlCleanSpecialAttrCallback:
1247 *
1248 * Removes CDATA attributes from the special attribute table
1249 */
1250 static void
1251 xmlCleanSpecialAttrCallback(void *payload, void *data,
1252 const xmlChar *fullname, const xmlChar *fullattr,
1253 const xmlChar *unused ATTRIBUTE_UNUSED) {
1254 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1255
1256 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1257 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1258 }
1259 }
1260
1261 /**
1262 * xmlCleanSpecialAttr:
1263 * @ctxt: an XML parser context
1264 *
1265 * Trim the list of attributes defined to remove all those of type
1266 * CDATA as they are not special. This call should be done when finishing
1267 * to parse the DTD and before starting to parse the document root.
1268 */
1269 static void
1270 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1271 {
1272 if (ctxt->attsSpecial == NULL)
1273 return;
1274
1275 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1276
1277 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1278 xmlHashFree(ctxt->attsSpecial, NULL);
1279 ctxt->attsSpecial = NULL;
1280 }
1281 return;
1282 }
1283
1284 /**
1285 * xmlCheckLanguageID:
1286 * @lang: pointer to the string value
1287 *
1288 * Checks that the value conforms to the LanguageID production:
1289 *
1290 * NOTE: this is somewhat deprecated, those productions were removed from
1291 * the XML Second edition.
1292 *
1293 * [33] LanguageID ::= Langcode ('-' Subcode)*
1294 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1295 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1296 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1297 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1298 * [38] Subcode ::= ([a-z] | [A-Z])+
1299 *
1300 * Returns 1 if correct 0 otherwise
1301 **/
1302 int
1303 xmlCheckLanguageID(const xmlChar * lang)
1304 {
1305 const xmlChar *cur = lang;
1306
1307 if (cur == NULL)
1308 return (0);
1309 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1310 ((cur[0] == 'I') && (cur[1] == '-'))) {
1311 /*
1312 * IANA code
1313 */
1314 cur += 2;
1315 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1316 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1317 cur++;
1318 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1319 ((cur[0] == 'X') && (cur[1] == '-'))) {
1320 /*
1321 * User code
1322 */
1323 cur += 2;
1324 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1325 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1326 cur++;
1327 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1329 /*
1330 * ISO639
1331 */
1332 cur++;
1333 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1335 cur++;
1336 else
1337 return (0);
1338 } else
1339 return (0);
1340 while (cur[0] != 0) { /* non input consuming */
1341 if (cur[0] != '-')
1342 return (0);
1343 cur++;
1344 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1345 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1346 cur++;
1347 else
1348 return (0);
1349 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1350 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1351 cur++;
1352 }
1353 return (1);
1354 }
1355
1356 /************************************************************************
1357 * *
1358 * Parser stacks related functions and macros *
1359 * *
1360 ************************************************************************/
1361
1362 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1363 const xmlChar ** str);
1364
1365 #ifdef SAX2
1366 /**
1367 * nsPush:
1368 * @ctxt: an XML parser context
1369 * @prefix: the namespace prefix or NULL
1370 * @URL: the namespace name
1371 *
1372 * Pushes a new parser namespace on top of the ns stack
1373 *
1374 * Returns -1 in case of error, -2 if the namespace should be discarded
1375 * and the index in the stack otherwise.
1376 */
1377 static int
1378 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1379 {
1380 if (ctxt->options & XML_PARSE_NSCLEAN) {
1381 int i;
1382 for (i = 0;i < ctxt->nsNr;i += 2) {
1383 if (ctxt->nsTab[i] == prefix) {
1384 /* in scope */
1385 if (ctxt->nsTab[i + 1] == URL)
1386 return(-2);
1387 /* out of scope keep it */
1388 break;
1389 }
1390 }
1391 }
1392 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1393 ctxt->nsMax = 10;
1394 ctxt->nsNr = 0;
1395 ctxt->nsTab = (const xmlChar **)
1396 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1397 if (ctxt->nsTab == NULL) {
1398 xmlErrMemory(ctxt, NULL);
1399 ctxt->nsMax = 0;
1400 return (-1);
1401 }
1402 } else if (ctxt->nsNr >= ctxt->nsMax) {
1403 const xmlChar ** tmp;
1404 ctxt->nsMax *= 2;
1405 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1406 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1407 if (tmp == NULL) {
1408 xmlErrMemory(ctxt, NULL);
1409 ctxt->nsMax /= 2;
1410 return (-1);
1411 }
1412 ctxt->nsTab = tmp;
1413 }
1414 ctxt->nsTab[ctxt->nsNr++] = prefix;
1415 ctxt->nsTab[ctxt->nsNr++] = URL;
1416 return (ctxt->nsNr);
1417 }
1418 /**
1419 * nsPop:
1420 * @ctxt: an XML parser context
1421 * @nr: the number to pop
1422 *
1423 * Pops the top @nr parser prefix/namespace from the ns stack
1424 *
1425 * Returns the number of namespaces removed
1426 */
1427 static int
1428 nsPop(xmlParserCtxtPtr ctxt, int nr)
1429 {
1430 int i;
1431
1432 if (ctxt->nsTab == NULL) return(0);
1433 if (ctxt->nsNr < nr) {
1434 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1435 nr = ctxt->nsNr;
1436 }
1437 if (ctxt->nsNr <= 0)
1438 return (0);
1439
1440 for (i = 0;i < nr;i++) {
1441 ctxt->nsNr--;
1442 ctxt->nsTab[ctxt->nsNr] = NULL;
1443 }
1444 return(nr);
1445 }
1446 #endif
1447
1448 static int
1449 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1450 const xmlChar **atts;
1451 int *attallocs;
1452 int maxatts;
1453
1454 if (ctxt->atts == NULL) {
1455 maxatts = 55; /* allow for 10 attrs by default */
1456 atts = (const xmlChar **)
1457 xmlMalloc(maxatts * sizeof(xmlChar *));
1458 if (atts == NULL) goto mem_error;
1459 ctxt->atts = atts;
1460 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1461 if (attallocs == NULL) goto mem_error;
1462 ctxt->attallocs = attallocs;
1463 ctxt->maxatts = maxatts;
1464 } else if (nr + 5 > ctxt->maxatts) {
1465 maxatts = (nr + 5) * 2;
1466 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1467 maxatts * sizeof(const xmlChar *));
1468 if (atts == NULL) goto mem_error;
1469 ctxt->atts = atts;
1470 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1471 (maxatts / 5) * sizeof(int));
1472 if (attallocs == NULL) goto mem_error;
1473 ctxt->attallocs = attallocs;
1474 ctxt->maxatts = maxatts;
1475 }
1476 return(ctxt->maxatts);
1477 mem_error:
1478 xmlErrMemory(ctxt, NULL);
1479 return(-1);
1480 }
1481
1482 /**
1483 * inputPush:
1484 * @ctxt: an XML parser context
1485 * @value: the parser input
1486 *
1487 * Pushes a new parser input on top of the input stack
1488 *
1489 * Returns -1 in case of error, the index in the stack otherwise
1490 */
1491 int
1492 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1493 {
1494 if ((ctxt == NULL) || (value == NULL))
1495 return(-1);
1496 if (ctxt->inputNr >= ctxt->inputMax) {
1497 ctxt->inputMax *= 2;
1498 ctxt->inputTab =
1499 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1500 ctxt->inputMax *
1501 sizeof(ctxt->inputTab[0]));
1502 if (ctxt->inputTab == NULL) {
1503 xmlErrMemory(ctxt, NULL);
1504 xmlFreeInputStream(value);
1505 ctxt->inputMax /= 2;
1506 value = NULL;
1507 return (-1);
1508 }
1509 }
1510 ctxt->inputTab[ctxt->inputNr] = value;
1511 ctxt->input = value;
1512 return (ctxt->inputNr++);
1513 }
1514 /**
1515 * inputPop:
1516 * @ctxt: an XML parser context
1517 *
1518 * Pops the top parser input from the input stack
1519 *
1520 * Returns the input just removed
1521 */
1522 xmlParserInputPtr
1523 inputPop(xmlParserCtxtPtr ctxt)
1524 {
1525 xmlParserInputPtr ret;
1526
1527 if (ctxt == NULL)
1528 return(NULL);
1529 if (ctxt->inputNr <= 0)
1530 return (NULL);
1531 ctxt->inputNr--;
1532 if (ctxt->inputNr > 0)
1533 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1534 else
1535 ctxt->input = NULL;
1536 ret = ctxt->inputTab[ctxt->inputNr];
1537 ctxt->inputTab[ctxt->inputNr] = NULL;
1538 return (ret);
1539 }
1540 /**
1541 * nodePush:
1542 * @ctxt: an XML parser context
1543 * @value: the element node
1544 *
1545 * Pushes a new element node on top of the node stack
1546 *
1547 * Returns -1 in case of error, the index in the stack otherwise
1548 */
1549 int
1550 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1551 {
1552 if (ctxt == NULL) return(0);
1553 if (ctxt->nodeNr >= ctxt->nodeMax) {
1554 xmlNodePtr *tmp;
1555
1556 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1557 ctxt->nodeMax * 2 *
1558 sizeof(ctxt->nodeTab[0]));
1559 if (tmp == NULL) {
1560 xmlErrMemory(ctxt, NULL);
1561 return (-1);
1562 }
1563 ctxt->nodeTab = tmp;
1564 ctxt->nodeMax *= 2;
1565 }
1566 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1567 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1568 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1569 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1570 xmlParserMaxDepth);
1571 ctxt->instate = XML_PARSER_EOF;
1572 return(-1);
1573 }
1574 ctxt->nodeTab[ctxt->nodeNr] = value;
1575 ctxt->node = value;
1576 return (ctxt->nodeNr++);
1577 }
1578
1579 /**
1580 * nodePop:
1581 * @ctxt: an XML parser context
1582 *
1583 * Pops the top element node from the node stack
1584 *
1585 * Returns the node just removed
1586 */
1587 xmlNodePtr
1588 nodePop(xmlParserCtxtPtr ctxt)
1589 {
1590 xmlNodePtr ret;
1591
1592 if (ctxt == NULL) return(NULL);
1593 if (ctxt->nodeNr <= 0)
1594 return (NULL);
1595 ctxt->nodeNr--;
1596 if (ctxt->nodeNr > 0)
1597 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1598 else
1599 ctxt->node = NULL;
1600 ret = ctxt->nodeTab[ctxt->nodeNr];
1601 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1602 return (ret);
1603 }
1604
1605 #ifdef LIBXML_PUSH_ENABLED
1606 /**
1607 * nameNsPush:
1608 * @ctxt: an XML parser context
1609 * @value: the element name
1610 * @prefix: the element prefix
1611 * @URI: the element namespace name
1612 *
1613 * Pushes a new element name/prefix/URL on top of the name stack
1614 *
1615 * Returns -1 in case of error, the index in the stack otherwise
1616 */
1617 static int
1618 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1619 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1620 {
1621 if (ctxt->nameNr >= ctxt->nameMax) {
1622 const xmlChar * *tmp;
1623 void **tmp2;
1624 ctxt->nameMax *= 2;
1625 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1626 ctxt->nameMax *
1627 sizeof(ctxt->nameTab[0]));
1628 if (tmp == NULL) {
1629 ctxt->nameMax /= 2;
1630 goto mem_error;
1631 }
1632 ctxt->nameTab = tmp;
1633 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1634 ctxt->nameMax * 3 *
1635 sizeof(ctxt->pushTab[0]));
1636 if (tmp2 == NULL) {
1637 ctxt->nameMax /= 2;
1638 goto mem_error;
1639 }
1640 ctxt->pushTab = tmp2;
1641 }
1642 ctxt->nameTab[ctxt->nameNr] = value;
1643 ctxt->name = value;
1644 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1645 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1646 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1647 return (ctxt->nameNr++);
1648 mem_error:
1649 xmlErrMemory(ctxt, NULL);
1650 return (-1);
1651 }
1652 /**
1653 * nameNsPop:
1654 * @ctxt: an XML parser context
1655 *
1656 * Pops the top element/prefix/URI name from the name stack
1657 *
1658 * Returns the name just removed
1659 */
1660 static const xmlChar *
1661 nameNsPop(xmlParserCtxtPtr ctxt)
1662 {
1663 const xmlChar *ret;
1664
1665 if (ctxt->nameNr <= 0)
1666 return (NULL);
1667 ctxt->nameNr--;
1668 if (ctxt->nameNr > 0)
1669 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1670 else
1671 ctxt->name = NULL;
1672 ret = ctxt->nameTab[ctxt->nameNr];
1673 ctxt->nameTab[ctxt->nameNr] = NULL;
1674 return (ret);
1675 }
1676 #endif /* LIBXML_PUSH_ENABLED */
1677
1678 /**
1679 * namePush:
1680 * @ctxt: an XML parser context
1681 * @value: the element name
1682 *
1683 * Pushes a new element name on top of the name stack
1684 *
1685 * Returns -1 in case of error, the index in the stack otherwise
1686 */
1687 int
1688 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1689 {
1690 if (ctxt == NULL) return (-1);
1691
1692 if (ctxt->nameNr >= ctxt->nameMax) {
1693 const xmlChar * *tmp;
1694 ctxt->nameMax *= 2;
1695 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1696 ctxt->nameMax *
1697 sizeof(ctxt->nameTab[0]));
1698 if (tmp == NULL) {
1699 ctxt->nameMax /= 2;
1700 goto mem_error;
1701 }
1702 ctxt->nameTab = tmp;
1703 }
1704 ctxt->nameTab[ctxt->nameNr] = value;
1705 ctxt->name = value;
1706 return (ctxt->nameNr++);
1707 mem_error:
1708 xmlErrMemory(ctxt, NULL);
1709 return (-1);
1710 }
1711 /**
1712 * namePop:
1713 * @ctxt: an XML parser context
1714 *
1715 * Pops the top element name from the name stack
1716 *
1717 * Returns the name just removed
1718 */
1719 const xmlChar *
1720 namePop(xmlParserCtxtPtr ctxt)
1721 {
1722 const xmlChar *ret;
1723
1724 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1725 return (NULL);
1726 ctxt->nameNr--;
1727 if (ctxt->nameNr > 0)
1728 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1729 else
1730 ctxt->name = NULL;
1731 ret = ctxt->nameTab[ctxt->nameNr];
1732 ctxt->nameTab[ctxt->nameNr] = NULL;
1733 return (ret);
1734 }
1735
1736 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1737 if (ctxt->spaceNr >= ctxt->spaceMax) {
1738 int *tmp;
1739
1740 ctxt->spaceMax *= 2;
1741 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1742 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1743 if (tmp == NULL) {
1744 xmlErrMemory(ctxt, NULL);
1745 ctxt->spaceMax /=2;
1746 return(-1);
1747 }
1748 ctxt->spaceTab = tmp;
1749 }
1750 ctxt->spaceTab[ctxt->spaceNr] = val;
1751 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1752 return(ctxt->spaceNr++);
1753 }
1754
1755 static int spacePop(xmlParserCtxtPtr ctxt) {
1756 int ret;
1757 if (ctxt->spaceNr <= 0) return(0);
1758 ctxt->spaceNr--;
1759 if (ctxt->spaceNr > 0)
1760 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1761 else
1762 ctxt->space = &ctxt->spaceTab[0];
1763 ret = ctxt->spaceTab[ctxt->spaceNr];
1764 ctxt->spaceTab[ctxt->spaceNr] = -1;
1765 return(ret);
1766 }
1767
1768 /*
1769 * Macros for accessing the content. Those should be used only by the parser,
1770 * and not exported.
1771 *
1772 * Dirty macros, i.e. one often need to make assumption on the context to
1773 * use them
1774 *
1775 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1776 * To be used with extreme caution since operations consuming
1777 * characters may move the input buffer to a different location !
1778 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1779 * This should be used internally by the parser
1780 * only to compare to ASCII values otherwise it would break when
1781 * running with UTF-8 encoding.
1782 * RAW same as CUR but in the input buffer, bypass any token
1783 * extraction that may have been done
1784 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1785 * to compare on ASCII based substring.
1786 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1787 * strings without newlines within the parser.
1788 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1789 * defined char within the parser.
1790 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1791 *
1792 * NEXT Skip to the next character, this does the proper decoding
1793 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1794 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1795 * CUR_CHAR(l) returns the current unicode character (int), set l
1796 * to the number of xmlChars used for the encoding [0-5].
1797 * CUR_SCHAR same but operate on a string instead of the context
1798 * COPY_BUF copy the current unicode char to the target buffer, increment
1799 * the index
1800 * GROW, SHRINK handling of input buffers
1801 */
1802
1803 #define RAW (*ctxt->input->cur)
1804 #define CUR (*ctxt->input->cur)
1805 #define NXT(val) ctxt->input->cur[(val)]
1806 #define CUR_PTR ctxt->input->cur
1807
1808 #define CMP4( s, c1, c2, c3, c4 ) \
1809 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1810 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1811 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1812 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1813 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1814 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1815 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1816 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1817 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1818 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1819 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1820 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1821 ((unsigned char *) s)[ 8 ] == c9 )
1822 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1823 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1824 ((unsigned char *) s)[ 9 ] == c10 )
1825
1826 #define SKIP(val) do { \
1827 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
1834 #define SKIPL(val) do { \
1835 int skipl; \
1836 for(skipl=0; skipl<val; skipl++) { \
1837 if (*(ctxt->input->cur) == '\n') { \
1838 ctxt->input->line++; ctxt->input->col = 1; \
1839 } else ctxt->input->col++; \
1840 ctxt->nbChars++; \
1841 ctxt->input->cur++; \
1842 } \
1843 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1844 if ((*ctxt->input->cur == 0) && \
1845 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1846 xmlPopInput(ctxt); \
1847 } while (0)
1848
1849 #define SHRINK if ((ctxt->progressive == 0) && \
1850 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1851 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1852 xmlSHRINK (ctxt);
1853
1854 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1855 xmlParserInputShrink(ctxt->input);
1856 if ((*ctxt->input->cur == 0) &&
1857 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1858 xmlPopInput(ctxt);
1859 }
1860
1861 #define GROW if ((ctxt->progressive == 0) && \
1862 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1863 xmlGROW (ctxt);
1864
1865 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1867 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1868 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1869 xmlPopInput(ctxt);
1870 }
1871
1872 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1873
1874 #define NEXT xmlNextChar(ctxt)
1875
1876 #define NEXT1 { \
1877 ctxt->input->col++; \
1878 ctxt->input->cur++; \
1879 ctxt->nbChars++; \
1880 if (*ctxt->input->cur == 0) \
1881 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1882 }
1883
1884 #define NEXTL(l) do { \
1885 if (*(ctxt->input->cur) == '\n') { \
1886 ctxt->input->line++; ctxt->input->col = 1; \
1887 } else ctxt->input->col++; \
1888 ctxt->input->cur += l; \
1889 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1890 } while (0)
1891
1892 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1893 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1894
1895 #define COPY_BUF(l,b,i,v) \
1896 if (l == 1) b[i++] = (xmlChar) v; \
1897 else i += xmlCopyCharMultiByte(&b[i],v)
1898
1899 /**
1900 * xmlSkipBlankChars:
1901 * @ctxt: the XML parser context
1902 *
1903 * skip all blanks character found at that point in the input streams.
1904 * It pops up finished entities in the process if allowable at that point.
1905 *
1906 * Returns the number of space chars skipped
1907 */
1908
1909 int
1910 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1911 int res = 0;
1912
1913 /*
1914 * It's Okay to use CUR/NEXT here since all the blanks are on
1915 * the ASCII range.
1916 */
1917 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1918 const xmlChar *cur;
1919 /*
1920 * if we are in the document content, go really fast
1921 */
1922 cur = ctxt->input->cur;
1923 while (IS_BLANK_CH(*cur)) {
1924 if (*cur == '\n') {
1925 ctxt->input->line++; ctxt->input->col = 1;
1926 }
1927 cur++;
1928 res++;
1929 if (*cur == 0) {
1930 ctxt->input->cur = cur;
1931 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1932 cur = ctxt->input->cur;
1933 }
1934 }
1935 ctxt->input->cur = cur;
1936 } else {
1937 int cur;
1938 do {
1939 cur = CUR;
1940 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1941 NEXT;
1942 cur = CUR;
1943 res++;
1944 }
1945 while ((cur == 0) && (ctxt->inputNr > 1) &&
1946 (ctxt->instate != XML_PARSER_COMMENT)) {
1947 xmlPopInput(ctxt);
1948 cur = CUR;
1949 }
1950 /*
1951 * Need to handle support of entities branching here
1952 */
1953 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1954 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1955 }
1956 return(res);
1957 }
1958
1959 /************************************************************************
1960 * *
1961 * Commodity functions to handle entities *
1962 * *
1963 ************************************************************************/
1964
1965 /**
1966 * xmlPopInput:
1967 * @ctxt: an XML parser context
1968 *
1969 * xmlPopInput: the current input pointed by ctxt->input came to an end
1970 * pop it and return the next char.
1971 *
1972 * Returns the current xmlChar in the parser context
1973 */
1974 xmlChar
1975 xmlPopInput(xmlParserCtxtPtr ctxt) {
1976 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1977 if (xmlParserDebugEntities)
1978 xmlGenericError(xmlGenericErrorContext,
1979 "Popping input %d\n", ctxt->inputNr);
1980 xmlFreeInputStream(inputPop(ctxt));
1981 if ((*ctxt->input->cur == 0) &&
1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1983 return(xmlPopInput(ctxt));
1984 return(CUR);
1985 }
1986
1987 /**
1988 * xmlPushInput:
1989 * @ctxt: an XML parser context
1990 * @input: an XML parser input fragment (entity, XML fragment ...).
1991 *
1992 * xmlPushInput: switch to a new input stream which is stacked on top
1993 * of the previous one(s).
1994 * Returns -1 in case of error or the index in the input stack
1995 */
1996 int
1997 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1998 int ret;
1999 if (input == NULL) return(-1);
2000
2001 if (xmlParserDebugEntities) {
2002 if ((ctxt->input != NULL) && (ctxt->input->filename))
2003 xmlGenericError(xmlGenericErrorContext,
2004 "%s(%d): ", ctxt->input->filename,
2005 ctxt->input->line);
2006 xmlGenericError(xmlGenericErrorContext,
2007 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2008 }
2009 ret = inputPush(ctxt, input);
2010 GROW;
2011 return(ret);
2012 }
2013
2014 /**
2015 * xmlParseCharRef:
2016 * @ctxt: an XML parser context
2017 *
2018 * parse Reference declarations
2019 *
2020 * [66] CharRef ::= '&#' [0-9]+ ';' |
2021 * '&#x' [0-9a-fA-F]+ ';'
2022 *
2023 * [ WFC: Legal Character ]
2024 * Characters referred to using character references must match the
2025 * production for Char.
2026 *
2027 * Returns the value parsed (as an int), 0 in case of error
2028 */
2029 int
2030 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2031 unsigned int val = 0;
2032 int count = 0;
2033 unsigned int outofrange = 0;
2034
2035 /*
2036 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2037 */
2038 if ((RAW == '&') && (NXT(1) == '#') &&
2039 (NXT(2) == 'x')) {
2040 SKIP(3);
2041 GROW;
2042 while (RAW != ';') { /* loop blocked by count */
2043 if (count++ > 20) {
2044 count = 0;
2045 GROW;
2046 }
2047 if ((RAW >= '0') && (RAW <= '9'))
2048 val = val * 16 + (CUR - '0');
2049 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2050 val = val * 16 + (CUR - 'a') + 10;
2051 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2052 val = val * 16 + (CUR - 'A') + 10;
2053 else {
2054 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2055 val = 0;
2056 break;
2057 }
2058 if (val > 0x10FFFF)
2059 outofrange = val;
2060
2061 NEXT;
2062 count++;
2063 }
2064 if (RAW == ';') {
2065 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2066 ctxt->input->col++;
2067 ctxt->nbChars ++;
2068 ctxt->input->cur++;
2069 }
2070 } else if ((RAW == '&') && (NXT(1) == '#')) {
2071 SKIP(2);
2072 GROW;
2073 while (RAW != ';') { /* loop blocked by count */
2074 if (count++ > 20) {
2075 count = 0;
2076 GROW;
2077 }
2078 if ((RAW >= '0') && (RAW <= '9'))
2079 val = val * 10 + (CUR - '0');
2080 else {
2081 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2082 val = 0;
2083 break;
2084 }
2085 if (val > 0x10FFFF)
2086 outofrange = val;
2087
2088 NEXT;
2089 count++;
2090 }
2091 if (RAW == ';') {
2092 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2093 ctxt->input->col++;
2094 ctxt->nbChars ++;
2095 ctxt->input->cur++;
2096 }
2097 } else {
2098 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2099 }
2100
2101 /*
2102 * [ WFC: Legal Character ]
2103 * Characters referred to using character references must match the
2104 * production for Char.
2105 */
2106 if ((IS_CHAR(val) && (outofrange == 0))) {
2107 return(val);
2108 } else {
2109 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2110 "xmlParseCharRef: invalid xmlChar value %d\n",
2111 val);
2112 }
2113 return(0);
2114 }
2115
2116 /**
2117 * xmlParseStringCharRef:
2118 * @ctxt: an XML parser context
2119 * @str: a pointer to an index in the string
2120 *
2121 * parse Reference declarations, variant parsing from a string rather
2122 * than an an input flow.
2123 *
2124 * [66] CharRef ::= '&#' [0-9]+ ';' |
2125 * '&#x' [0-9a-fA-F]+ ';'
2126 *
2127 * [ WFC: Legal Character ]
2128 * Characters referred to using character references must match the
2129 * production for Char.
2130 *
2131 * Returns the value parsed (as an int), 0 in case of error, str will be
2132 * updated to the current value of the index
2133 */
2134 static int
2135 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2136 const xmlChar *ptr;
2137 xmlChar cur;
2138 unsigned int val = 0;
2139 unsigned int outofrange = 0;
2140
2141 if ((str == NULL) || (*str == NULL)) return(0);
2142 ptr = *str;
2143 cur = *ptr;
2144 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2145 ptr += 3;
2146 cur = *ptr;
2147 while (cur != ';') { /* Non input consuming loop */
2148 if ((cur >= '0') && (cur <= '9'))
2149 val = val * 16 + (cur - '0');
2150 else if ((cur >= 'a') && (cur <= 'f'))
2151 val = val * 16 + (cur - 'a') + 10;
2152 else if ((cur >= 'A') && (cur <= 'F'))
2153 val = val * 16 + (cur - 'A') + 10;
2154 else {
2155 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2156 val = 0;
2157 break;
2158 }
2159 if (val > 0x10FFFF)
2160 outofrange = val;
2161
2162 ptr++;
2163 cur = *ptr;
2164 }
2165 if (cur == ';')
2166 ptr++;
2167 } else if ((cur == '&') && (ptr[1] == '#')){
2168 ptr += 2;
2169 cur = *ptr;
2170 while (cur != ';') { /* Non input consuming loops */
2171 if ((cur >= '0') && (cur <= '9'))
2172 val = val * 10 + (cur - '0');
2173 else {
2174 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2175 val = 0;
2176 break;
2177 }
2178 if (val > 0x10FFFF)
2179 outofrange = val;
2180
2181 ptr++;
2182 cur = *ptr;
2183 }
2184 if (cur == ';')
2185 ptr++;
2186 } else {
2187 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2188 return(0);
2189 }
2190 *str = ptr;
2191
2192 /*
2193 * [ WFC: Legal Character ]
2194 * Characters referred to using character references must match the
2195 * production for Char.
2196 */
2197 if ((IS_CHAR(val) && (outofrange == 0))) {
2198 return(val);
2199 } else {
2200 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2201 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2202 val);
2203 }
2204 return(0);
2205 }
2206
2207 /**
2208 * xmlNewBlanksWrapperInputStream:
2209 * @ctxt: an XML parser context
2210 * @entity: an Entity pointer
2211 *
2212 * Create a new input stream for wrapping
2213 * blanks around a PEReference
2214 *
2215 * Returns the new input stream or NULL
2216 */
2217
2218 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2219
2220 static xmlParserInputPtr
2221 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2222 xmlParserInputPtr input;
2223 xmlChar *buffer;
2224 size_t length;
2225 if (entity == NULL) {
2226 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2227 "xmlNewBlanksWrapperInputStream entity\n");
2228 return(NULL);
2229 }
2230 if (xmlParserDebugEntities)
2231 xmlGenericError(xmlGenericErrorContext,
2232 "new blanks wrapper for entity: %s\n", entity->name);
2233 input = xmlNewInputStream(ctxt);
2234 if (input == NULL) {
2235 return(NULL);
2236 }
2237 length = xmlStrlen(entity->name) + 5;
2238 buffer = xmlMallocAtomic(length);
2239 if (buffer == NULL) {
2240 xmlErrMemory(ctxt, NULL);
2241 xmlFree(input);
2242 return(NULL);
2243 }
2244 buffer [0] = ' ';
2245 buffer [1] = '%';
2246 buffer [length-3] = ';';
2247 buffer [length-2] = ' ';
2248 buffer [length-1] = 0;
2249 memcpy(buffer + 2, entity->name, length - 5);
2250 input->free = deallocblankswrapper;
2251 input->base = buffer;
2252 input->cur = buffer;
2253 input->length = length;
2254 input->end = &buffer[length];
2255 return(input);
2256 }
2257
2258 /**
2259 * xmlParserHandlePEReference:
2260 * @ctxt: the parser context
2261 *
2262 * [69] PEReference ::= '%' Name ';'
2263 *
2264 * [ WFC: No Recursion ]
2265 * A parsed entity must not contain a recursive
2266 * reference to itself, either directly or indirectly.
2267 *
2268 * [ WFC: Entity Declared ]
2269 * In a document without any DTD, a document with only an internal DTD
2270 * subset which contains no parameter entity references, or a document
2271 * with "standalone='yes'", ... ... The declaration of a parameter
2272 * entity must precede any reference to it...
2273 *
2274 * [ VC: Entity Declared ]
2275 * In a document with an external subset or external parameter entities
2276 * with "standalone='no'", ... ... The declaration of a parameter entity
2277 * must precede any reference to it...
2278 *
2279 * [ WFC: In DTD ]
2280 * Parameter-entity references may only appear in the DTD.
2281 * NOTE: misleading but this is handled.
2282 *
2283 * A PEReference may have been detected in the current input stream
2284 * the handling is done accordingly to
2285 * http://www.w3.org/TR/REC-xml#entproc
2286 * i.e.
2287 * - Included in literal in entity values
2288 * - Included as Parameter Entity reference within DTDs
2289 */
2290 void
2291 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2292 const xmlChar *name;
2293 xmlEntityPtr entity = NULL;
2294 xmlParserInputPtr input;
2295
2296 if (RAW != '%') return;
2297 switch(ctxt->instate) {
2298 case XML_PARSER_CDATA_SECTION:
2299 return;
2300 case XML_PARSER_COMMENT:
2301 return;
2302 case XML_PARSER_START_TAG:
2303 return;
2304 case XML_PARSER_END_TAG:
2305 return;
2306 case XML_PARSER_EOF:
2307 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2308 return;
2309 case XML_PARSER_PROLOG:
2310 case XML_PARSER_START:
2311 case XML_PARSER_MISC:
2312 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2313 return;
2314 case XML_PARSER_ENTITY_DECL:
2315 case XML_PARSER_CONTENT:
2316 case XML_PARSER_ATTRIBUTE_VALUE:
2317 case XML_PARSER_PI:
2318 case XML_PARSER_SYSTEM_LITERAL:
2319 case XML_PARSER_PUBLIC_LITERAL:
2320 /* we just ignore it there */
2321 return;
2322 case XML_PARSER_EPILOG:
2323 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2324 return;
2325 case XML_PARSER_ENTITY_VALUE:
2326 /*
2327 * NOTE: in the case of entity values, we don't do the
2328 * substitution here since we need the literal
2329 * entity value to be able to save the internal
2330 * subset of the document.
2331 * This will be handled by xmlStringDecodeEntities
2332 */
2333 return;
2334 case XML_PARSER_DTD:
2335 /*
2336 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2337 * In the internal DTD subset, parameter-entity references
2338 * can occur only where markup declarations can occur, not
2339 * within markup declarations.
2340 * In that case this is handled in xmlParseMarkupDecl
2341 */
2342 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2343 return;
2344 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2345 return;
2346 break;
2347 case XML_PARSER_IGNORE:
2348 return;
2349 }
2350
2351 NEXT;
2352 name = xmlParseName(ctxt);
2353 if (xmlParserDebugEntities)
2354 xmlGenericError(xmlGenericErrorContext,
2355 "PEReference: %s\n", name);
2356 if (name == NULL) {
2357 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2358 } else {
2359 if (RAW == ';') {
2360 NEXT;
2361 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2362 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2363 if (entity == NULL) {
2364
2365 /*
2366 * [ WFC: Entity Declared ]
2367 * In a document without any DTD, a document with only an
2368 * internal DTD subset which contains no parameter entity
2369 * references, or a document with "standalone='yes'", ...
2370 * ... The declaration of a parameter entity must precede
2371 * any reference to it...
2372 */
2373 if ((ctxt->standalone == 1) ||
2374 ((ctxt->hasExternalSubset == 0) &&
2375 (ctxt->hasPErefs == 0))) {
2376 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n", name);
2378 } else {
2379 /*
2380 * [ VC: Entity Declared ]
2381 * In a document with an external subset or external
2382 * parameter entities with "standalone='no'", ...
2383 * ... The declaration of a parameter entity must precede
2384 * any reference to it...
2385 */
2386 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2387 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2388 "PEReference: %%%s; not found\n",
2389 name, NULL);
2390 } else
2391 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2392 "PEReference: %%%s; not found\n",
2393 name, NULL);
2394 ctxt->valid = 0;
2395 }
2396 } else if (ctxt->input->free != deallocblankswrapper) {
2397 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2398 if (xmlPushInput(ctxt, input) < 0)
2399 return;
2400 } else {
2401 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2402 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2403 xmlChar start[4];
2404 xmlCharEncoding enc;
2405
2406 /*
2407 * handle the extra spaces added before and after
2408 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2409 * this is done independently.
2410 */
2411 input = xmlNewEntityInputStream(ctxt, entity);
2412 if (xmlPushInput(ctxt, input) < 0)
2413 return;
2414
2415 /*
2416 * Get the 4 first bytes and decode the charset
2417 * if enc != XML_CHAR_ENCODING_NONE
2418 * plug some encoding conversion routines.
2419 * Note that, since we may have some non-UTF8
2420 * encoding (like UTF16, bug 135229), the 'length'
2421 * is not known, but we can calculate based upon
2422 * the amount of data in the buffer.
2423 */
2424 GROW
2425 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2426 start[0] = RAW;
2427 start[1] = NXT(1);
2428 start[2] = NXT(2);
2429 start[3] = NXT(3);
2430 enc = xmlDetectCharEncoding(start, 4);
2431 if (enc != XML_CHAR_ENCODING_NONE) {
2432 xmlSwitchEncoding(ctxt, enc);
2433 }
2434 }
2435
2436 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2437 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2438 (IS_BLANK_CH(NXT(5)))) {
2439 xmlParseTextDecl(ctxt);
2440 }
2441 } else {
2442 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2443 "PEReference: %s is not a parameter entity\n",
2444 name);
2445 }
2446 }
2447 } else {
2448 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2449 }
2450 }
2451 }
2452
2453 /*
2454 * Macro used to grow the current buffer.
2455 */
2456 #define growBuffer(buffer, n) { \
2457 xmlChar *tmp; \
2458 buffer##_size *= 2; \
2459 buffer##_size += n; \
2460 tmp = (xmlChar *) \
2461 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2462 if (tmp == NULL) goto mem_error; \
2463 buffer = tmp; \
2464 }
2465
2466 /**
2467 * xmlStringLenDecodeEntities:
2468 * @ctxt: the parser context
2469 * @str: the input string
2470 * @len: the string length
2471 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2472 * @end: an end marker xmlChar, 0 if none
2473 * @end2: an end marker xmlChar, 0 if none
2474 * @end3: an end marker xmlChar, 0 if none
2475 *
2476 * Takes a entity string content and process to do the adequate substitutions.
2477 *
2478 * [67] Reference ::= EntityRef | CharRef
2479 *
2480 * [69] PEReference ::= '%' Name ';'
2481 *
2482 * Returns A newly allocated string with the substitution done. The caller
2483 * must deallocate it !
2484 */
2485 xmlChar *
2486 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2487 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2488 xmlChar *buffer = NULL;
2489 int buffer_size = 0;
2490
2491 xmlChar *current = NULL;
2492 xmlChar *rep = NULL;
2493 const xmlChar *last;
2494 xmlEntityPtr ent;
2495 int c,l;
2496 int nbchars = 0;
2497
2498 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2499 return(NULL);
2500 last = str + len;
2501
2502 if (((ctxt->depth > 40) &&
2503 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2504 (ctxt->depth > 1024)) {
2505 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2506 return(NULL);
2507 }
2508
2509 /*
2510 * allocate a translation buffer.
2511 */
2512 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2513 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2514 if (buffer == NULL) goto mem_error;
2515
2516 /*
2517 * OK loop until we reach one of the ending char or a size limit.
2518 * we are operating on already parsed values.
2519 */
2520 if (str < last)
2521 c = CUR_SCHAR(str, l);
2522 else
2523 c = 0;
2524 while ((c != 0) && (c != end) && /* non input consuming loop */
2525 (c != end2) && (c != end3)) {
2526
2527 if (c == 0) break;
2528 if ((c == '&') && (str[1] == '#')) {
2529 int val = xmlParseStringCharRef(ctxt, &str);
2530 if (val != 0) {
2531 COPY_BUF(0,buffer,nbchars,val);
2532 }
2533 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2534 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2535 }
2536 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2537 if (xmlParserDebugEntities)
2538 xmlGenericError(xmlGenericErrorContext,
2539 "String decoding Entity Reference: %.30s\n",
2540 str);
2541 ent = xmlParseStringEntityRef(ctxt, &str);
2542 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2543 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2544 goto int_error;
2545 if (ent != NULL)
2546 ctxt->nbentities += ent->checked;
2547 if ((ent != NULL) &&
2548 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2549 if (ent->content != NULL) {
2550 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2551 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2552 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2553 }
2554 } else {
2555 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2556 "predefined entity has no content\n");
2557 }
2558 } else if ((ent != NULL) && (ent->content != NULL)) {
2559 ctxt->depth++;
2560 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2561 0, 0, 0);
2562 ctxt->depth--;
2563
2564 if (rep != NULL) {
2565 current = rep;
2566 while (*current != 0) { /* non input consuming loop */
2567 buffer[nbchars++] = *current++;
2568 if (nbchars >
2569 buffer_size - XML_PARSER_BUFFER_SIZE) {
2570 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2571 goto int_error;
2572 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2573 }
2574 }
2575 xmlFree(rep);
2576 rep = NULL;
2577 }
2578 } else if (ent != NULL) {
2579 int i = xmlStrlen(ent->name);
2580 const xmlChar *cur = ent->name;
2581
2582 buffer[nbchars++] = '&';
2583 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2584 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2585 }
2586 for (;i > 0;i--)
2587 buffer[nbchars++] = *cur++;
2588 buffer[nbchars++] = ';';
2589 }
2590 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2591 if (xmlParserDebugEntities)
2592 xmlGenericError(xmlGenericErrorContext,
2593 "String decoding PE Reference: %.30s\n", str);
2594 ent = xmlParseStringPEReference(ctxt, &str);
2595 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2596 goto int_error;
2597 if (ent != NULL)
2598 ctxt->nbentities += ent->checked;
2599 if (ent != NULL) {
2600 if (ent->content == NULL) {
2601 xmlLoadEntityContent(ctxt, ent);
2602 }
2603 ctxt->depth++;
2604 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2605 0, 0, 0);
2606 ctxt->depth--;
2607 if (rep != NULL) {
2608 current = rep;
2609 while (*current != 0) { /* non input consuming loop */
2610 buffer[nbchars++] = *current++;
2611 if (nbchars >
2612 buffer_size - XML_PARSER_BUFFER_SIZE) {
2613 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2614 goto int_error;
2615 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2616 }
2617 }
2618 xmlFree(rep);
2619 rep = NULL;
2620 }
2621 }
2622 } else {
2623 COPY_BUF(l,buffer,nbchars,c);
2624 str += l;
2625 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2626 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2627 }
2628 }
2629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
2633 }
2634 buffer[nbchars] = 0;
2635 return(buffer);
2636
2637 mem_error:
2638 xmlErrMemory(ctxt, NULL);
2639 int_error:
2640 if (rep != NULL)
2641 xmlFree(rep);
2642 if (buffer != NULL)
2643 xmlFree(buffer);
2644 return(NULL);
2645 }
2646
2647 /**
2648 * xmlStringDecodeEntities:
2649 * @ctxt: the parser context
2650 * @str: the input string
2651 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2652 * @end: an end marker xmlChar, 0 if none
2653 * @end2: an end marker xmlChar, 0 if none
2654 * @end3: an end marker xmlChar, 0 if none
2655 *
2656 * Takes a entity string content and process to do the adequate substitutions.
2657 *
2658 * [67] Reference ::= EntityRef | CharRef
2659 *
2660 * [69] PEReference ::= '%' Name ';'
2661 *
2662 * Returns A newly allocated string with the substitution done. The caller
2663 * must deallocate it !
2664 */
2665 xmlChar *
2666 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2667 xmlChar end, xmlChar end2, xmlChar end3) {
2668 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2669 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2670 end, end2, end3));
2671 }
2672
2673 /************************************************************************
2674 * *
2675 * Commodity functions, cleanup needed ? *
2676 * *
2677 ************************************************************************/
2678
2679 /**
2680 * areBlanks:
2681 * @ctxt: an XML parser context
2682 * @str: a xmlChar *
2683 * @len: the size of @str
2684 * @blank_chars: we know the chars are blanks
2685 *
2686 * Is this a sequence of blank chars that one can ignore ?
2687 *
2688 * Returns 1 if ignorable 0 otherwise.
2689 */
2690
2691 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2692 int blank_chars) {
2693 int i, ret;
2694 xmlNodePtr lastChild;
2695
2696 /*
2697 * Don't spend time trying to differentiate them, the same callback is
2698 * used !
2699 */
2700 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2701 return(0);
2702
2703 /*
2704 * Check for xml:space value.
2705 */
2706 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2707 (*(ctxt->space) == -2))
2708 return(0);
2709
2710 /*
2711 * Check that the string is made of blanks
2712 */
2713 if (blank_chars == 0) {
2714 for (i = 0;i < len;i++)
2715 if (!(IS_BLANK_CH(str[i]))) return(0);
2716 }
2717
2718 /*
2719 * Look if the element is mixed content in the DTD if available
2720 */
2721 if (ctxt->node == NULL) return(0);
2722 if (ctxt->myDoc != NULL) {
2723 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2724 if (ret == 0) return(1);
2725 if (ret == 1) return(0);
2726 }
2727
2728 /*
2729 * Otherwise, heuristic :-\
2730 */
2731 if ((RAW != '<') && (RAW != 0xD)) return(0);
2732 if ((ctxt->node->children == NULL) &&
2733 (RAW == '<') && (NXT(1) == '/')) return(0);
2734
2735 lastChild = xmlGetLastChild(ctxt->node);
2736 if (lastChild == NULL) {
2737 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2738 (ctxt->node->content != NULL)) return(0);
2739 } else if (xmlNodeIsText(lastChild))
2740 return(0);
2741 else if ((ctxt->node->children != NULL) &&
2742 (xmlNodeIsText(ctxt->node->children)))
2743 return(0);
2744 return(1);
2745 }
2746
2747 /************************************************************************
2748 * *
2749 * Extra stuff for namespace support *
2750 * Relates to http://www.w3.org/TR/WD-xml-names *
2751 * *
2752 ************************************************************************/
2753
2754 /**
2755 * xmlSplitQName:
2756 * @ctxt: an XML parser context
2757 * @name: an XML parser context
2758 * @prefix: a xmlChar **
2759 *
2760 * parse an UTF8 encoded XML qualified name string
2761 *
2762 * [NS 5] QName ::= (Prefix ':')? LocalPart
2763 *
2764 * [NS 6] Prefix ::= NCName
2765 *
2766 * [NS 7] LocalPart ::= NCName
2767 *
2768 * Returns the local part, and prefix is updated
2769 * to get the Prefix if any.
2770 */
2771
2772 xmlChar *
2773 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2774 xmlChar buf[XML_MAX_NAMELEN + 5];
2775 xmlChar *buffer = NULL;
2776 int len = 0;
2777 int max = XML_MAX_NAMELEN;
2778 xmlChar *ret = NULL;
2779 const xmlChar *cur = name;
2780 int c;
2781
2782 if (prefix == NULL) return(NULL);
2783 *prefix = NULL;
2784
2785 if (cur == NULL) return(NULL);
2786
2787 #ifndef XML_XML_NAMESPACE
2788 /* xml: prefix is not really a namespace */
2789 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2790 (cur[2] == 'l') && (cur[3] == ':'))
2791 return(xmlStrdup(name));
2792 #endif
2793
2794 /* nasty but well=formed */
2795 if (cur[0] == ':')
2796 return(xmlStrdup(name));
2797
2798 c = *cur++;
2799 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2800 buf[len++] = c;
2801 c = *cur++;
2802 }
2803 if (len >= max) {
2804 /*
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2807 */
2808 max = len * 2;
2809
2810 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2811 if (buffer == NULL) {
2812 xmlErrMemory(ctxt, NULL);
2813 return(NULL);
2814 }
2815 memcpy(buffer, buf, len);
2816 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2817 if (len + 10 > max) {
2818 xmlChar *tmp;
2819
2820 max *= 2;
2821 tmp = (xmlChar *) xmlRealloc(buffer,
2822 max * sizeof(xmlChar));
2823 if (tmp == NULL) {
2824 xmlFree(buffer);
2825 xmlErrMemory(ctxt, NULL);
2826 return(NULL);
2827 }
2828 buffer = tmp;
2829 }
2830 buffer[len++] = c;
2831 c = *cur++;
2832 }
2833 buffer[len] = 0;
2834 }
2835
2836 if ((c == ':') && (*cur == 0)) {
2837 if (buffer != NULL)
2838 xmlFree(buffer);
2839 *prefix = NULL;
2840 return(xmlStrdup(name));
2841 }
2842
2843 if (buffer == NULL)
2844 ret = xmlStrndup(buf, len);
2845 else {
2846 ret = buffer;
2847 buffer = NULL;
2848 max = XML_MAX_NAMELEN;
2849 }
2850
2851
2852 if (c == ':') {
2853 c = *cur;
2854 *prefix = ret;
2855 if (c == 0) {
2856 return(xmlStrndup(BAD_CAST "", 0));
2857 }
2858 len = 0;
2859
2860 /*
2861 * Check that the first character is proper to start
2862 * a new name
2863 */
2864 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2865 ((c >= 0x41) && (c <= 0x5A)) ||
2866 (c == '_') || (c == ':'))) {
2867 int l;
2868 int first = CUR_SCHAR(cur, l);
2869
2870 if (!IS_LETTER(first) && (first != '_')) {
2871 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2872 "Name %s is not XML Namespace compliant\n",
2873 name);
2874 }
2875 }
2876 cur++;
2877
2878 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2879 buf[len++] = c;
2880 c = *cur++;
2881 }
2882 if (len >= max) {
2883 /*
2884 * Okay someone managed to make a huge name, so he's ready to pay
2885 * for the processing speed.
2886 */
2887 max = len * 2;
2888
2889 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2890 if (buffer == NULL) {
2891 xmlErrMemory(ctxt, NULL);
2892 return(NULL);
2893 }
2894 memcpy(buffer, buf, len);
2895 while (c != 0) { /* tested bigname2.xml */
2896 if (len + 10 > max) {
2897 xmlChar *tmp;
2898
2899 max *= 2;
2900 tmp = (xmlChar *) xmlRealloc(buffer,
2901 max * sizeof(xmlChar));
2902 if (tmp == NULL) {
2903 xmlErrMemory(ctxt, NULL);
2904 xmlFree(buffer);
2905 return(NULL);
2906 }
2907 buffer = tmp;
2908 }
2909 buffer[len++] = c;
2910 c = *cur++;
2911 }
2912 buffer[len] = 0;
2913 }
2914
2915 if (buffer == NULL)
2916 ret = xmlStrndup(buf, len);
2917 else {
2918 ret = buffer;
2919 }
2920 }
2921
2922 return(ret);
2923 }
2924
2925 /************************************************************************
2926 * *
2927 * The parser itself *
2928 * Relates to http://www.w3.org/TR/REC-xml *
2929 * *
2930 ************************************************************************/
2931
2932 /************************************************************************
2933 * *
2934 * Routines to parse Name, NCName and NmToken *
2935 * *
2936 ************************************************************************/
2937 #ifdef DEBUG
2938 static unsigned long nbParseName = 0;
2939 static unsigned long nbParseNmToken = 0;
2940 static unsigned long nbParseNCName = 0;
2941 static unsigned long nbParseNCNameComplex = 0;
2942 static unsigned long nbParseNameComplex = 0;
2943 static unsigned long nbParseStringName = 0;
2944 #endif
2945
2946 /*
2947 * The two following functions are related to the change of accepted
2948 * characters for Name and NmToken in the Revision 5 of XML-1.0
2949 * They correspond to the modified production [4] and the new production [4a]
2950 * changes in that revision. Also note that the macros used for the
2951 * productions Letter, Digit, CombiningChar and Extender are not needed
2952 * anymore.
2953 * We still keep compatibility to pre-revision5 parsing semantic if the
2954 * new XML_PARSE_OLD10 option is given to the parser.
2955 */
2956 static int
2957 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2958 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2959 /*
2960 * Use the new checks of production [4] [4a] amd [5] of the
2961 * Update 5 of XML-1.0
2962 */
2963 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2964 (((c >= 'a') && (c <= 'z')) ||
2965 ((c >= 'A') && (c <= 'Z')) ||
2966 (c == '_') || (c == ':') ||
2967 ((c >= 0xC0) && (c <= 0xD6)) ||
2968 ((c >= 0xD8) && (c <= 0xF6)) ||
2969 ((c >= 0xF8) && (c <= 0x2FF)) ||
2970 ((c >= 0x370) && (c <= 0x37D)) ||
2971 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2972 ((c >= 0x200C) && (c <= 0x200D)) ||
2973 ((c >= 0x2070) && (c <= 0x218F)) ||
2974 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2975 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2976 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2977 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2978 ((c >= 0x10000) && (c <= 0xEFFFF))))
2979 return(1);
2980 } else {
2981 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2982 return(1);
2983 }
2984 return(0);
2985 }
2986
2987 static int
2988 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2989 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2990 /*
2991 * Use the new checks of production [4] [4a] amd [5] of the
2992 * Update 5 of XML-1.0
2993 */
2994 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2995 (((c >= 'a') && (c <= 'z')) ||
2996 ((c >= 'A') && (c <= 'Z')) ||
2997 ((c >= '0') && (c <= '9')) || /* !start */
2998 (c == '_') || (c == ':') ||
2999 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3000 ((c >= 0xC0) && (c <= 0xD6)) ||
3001 ((c >= 0xD8) && (c <= 0xF6)) ||
3002 ((c >= 0xF8) && (c <= 0x2FF)) ||
3003 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3004 ((c >= 0x370) && (c <= 0x37D)) ||
3005 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3006 ((c >= 0x200C) && (c <= 0x200D)) ||
3007 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3008 ((c >= 0x2070) && (c <= 0x218F)) ||
3009 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3010 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3011 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3012 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3013 ((c >= 0x10000) && (c <= 0xEFFFF))))
3014 return(1);
3015 } else {
3016 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3017 (c == '.') || (c == '-') ||
3018 (c == '_') || (c == ':') ||
3019 (IS_COMBINING(c)) ||
3020 (IS_EXTENDER(c)))
3021 return(1);
3022 }
3023 return(0);
3024 }
3025
3026 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3027 int *len, int *alloc, int normalize);
3028
3029 static const xmlChar *
3030 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3031 int len = 0, l;
3032 int c;
3033 int count = 0;
3034
3035 #ifdef DEBUG
3036 nbParseNameComplex++;
3037 #endif
3038
3039 /*
3040 * Handler for more complex cases
3041 */
3042 GROW;
3043 c = CUR_CHAR(l);
3044 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3045 /*
3046 * Use the new checks of production [4] [4a] amd [5] of the
3047 * Update 5 of XML-1.0
3048 */
3049 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3050 (!(((c >= 'a') && (c <= 'z')) ||
3051 ((c >= 'A') && (c <= 'Z')) ||
3052 (c == '_') || (c == ':') ||
3053 ((c >= 0xC0) && (c <= 0xD6)) ||
3054 ((c >= 0xD8) && (c <= 0xF6)) ||
3055 ((c >= 0xF8) && (c <= 0x2FF)) ||
3056 ((c >= 0x370) && (c <= 0x37D)) ||
3057 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3058 ((c >= 0x200C) && (c <= 0x200D)) ||
3059 ((c >= 0x2070) && (c <= 0x218F)) ||
3060 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3061 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3062 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3063 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3064 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3065 return(NULL);
3066 }
3067 len += l;
3068 NEXTL(l);
3069 c = CUR_CHAR(l);
3070 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3071 (((c >= 'a') && (c <= 'z')) ||
3072 ((c >= 'A') && (c <= 'Z')) ||
3073 ((c >= '0') && (c <= '9')) || /* !start */
3074 (c == '_') || (c == ':') ||
3075 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3076 ((c >= 0xC0) && (c <= 0xD6)) ||
3077 ((c >= 0xD8) && (c <= 0xF6)) ||
3078 ((c >= 0xF8) && (c <= 0x2FF)) ||
3079 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3080 ((c >= 0x370) && (c <= 0x37D)) ||
3081 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3082 ((c >= 0x200C) && (c <= 0x200D)) ||
3083 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3084 ((c >= 0x2070) && (c <= 0x218F)) ||
3085 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3086 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3087 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3088 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3089 ((c >= 0x10000) && (c <= 0xEFFFF))
3090 )) {
3091 if (count++ > 100) {
3092 count = 0;
3093 GROW;
3094 }
3095 len += l;
3096 NEXTL(l);
3097 c = CUR_CHAR(l);
3098 }
3099 } else {
3100 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3101 (!IS_LETTER(c) && (c != '_') &&
3102 (c != ':'))) {
3103 return(NULL);
3104 }
3105 len += l;
3106 NEXTL(l);
3107 c = CUR_CHAR(l);
3108
3109 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3110 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3111 (c == '.') || (c == '-') ||
3112 (c == '_') || (c == ':') ||
3113 (IS_COMBINING(c)) ||
3114 (IS_EXTENDER(c)))) {
3115 if (count++ > 100) {
3116 count = 0;
3117 GROW;
3118 }
3119 len += l;
3120 NEXTL(l);
3121 c = CUR_CHAR(l);
3122 }
3123 }
3124 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3126 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3127 }
3128
3129 /**
3130 * xmlParseName:
3131 * @ctxt: an XML parser context
3132 *
3133 * parse an XML name.
3134 *
3135 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3136 * CombiningChar | Extender
3137 *
3138 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3139 *
3140 * [6] Names ::= Name (#x20 Name)*
3141 *
3142 * Returns the Name parsed or NULL
3143 */
3144
3145 const xmlChar *
3146 xmlParseName(xmlParserCtxtPtr ctxt) {
3147 const xmlChar *in;
3148 const xmlChar *ret;
3149 int count = 0;
3150
3151 GROW;
3152
3153 #ifdef DEBUG
3154 nbParseName++;
3155 #endif
3156
3157 /*
3158 * Accelerator for simple ASCII names
3159 */
3160 in = ctxt->input->cur;
3161 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3162 ((*in >= 0x41) && (*in <= 0x5A)) ||
3163 (*in == '_') || (*in == ':')) {
3164 in++;
3165 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3166 ((*in >= 0x41) && (*in <= 0x5A)) ||
3167 ((*in >= 0x30) && (*in <= 0x39)) ||
3168 (*in == '_') || (*in == '-') ||
3169 (*in == ':') || (*in == '.'))
3170 in++;
3171 if ((*in > 0) && (*in < 0x80)) {
3172 count = in - ctxt->input->cur;
3173 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3174 ctxt->input->cur = in;
3175 ctxt->nbChars += count;
3176 ctxt->input->col += count;
3177 if (ret == NULL)
3178 xmlErrMemory(ctxt, NULL);
3179 return(ret);
3180 }
3181 }
3182 /* accelerator for special cases */
3183 return(xmlParseNameComplex(ctxt));
3184 }
3185
3186 static const xmlChar *
3187 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3188 int len = 0, l;
3189 int c;
3190 int count = 0;
3191
3192 #ifdef DEBUG
3193 nbParseNCNameComplex++;
3194 #endif
3195
3196 /*
3197 * Handler for more complex cases
3198 */
3199 GROW;
3200 c = CUR_CHAR(l);
3201 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3202 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3203 return(NULL);
3204 }
3205
3206 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3207 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3208 if (count++ > 100) {
3209 count = 0;
3210 GROW;
3211 }
3212 len += l;
3213 NEXTL(l);
3214 c = CUR_CHAR(l);
3215 }
3216 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3217 }
3218
3219 /**
3220 * xmlParseNCName:
3221 * @ctxt: an XML parser context
3222 * @len: lenght of the string parsed
3223 *
3224 * parse an XML name.
3225 *
3226 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3227 * CombiningChar | Extender
3228 *
3229 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3230 *
3231 * Returns the Name parsed or NULL
3232 */
3233
3234 static const xmlChar *
3235 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3236 const xmlChar *in;
3237 const xmlChar *ret;
3238 int count = 0;
3239
3240 #ifdef DEBUG
3241 nbParseNCName++;
3242 #endif
3243
3244 /*
3245 * Accelerator for simple ASCII names
3246 */
3247 in = ctxt->input->cur;
3248 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3249 ((*in >= 0x41) && (*in <= 0x5A)) ||
3250 (*in == '_')) {
3251 in++;
3252 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3253 ((*in >= 0x41) && (*in <= 0x5A)) ||
3254 ((*in >= 0x30) && (*in <= 0x39)) ||
3255 (*in == '_') || (*in == '-') ||
3256 (*in == '.'))
3257 in++;
3258 if ((*in > 0) && (*in < 0x80)) {
3259 count = in - ctxt->input->cur;
3260 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3261 ctxt->input->cur = in;
3262 ctxt->nbChars += count;
3263 ctxt->input->col += count;
3264 if (ret == NULL) {
3265 xmlErrMemory(ctxt, NULL);
3266 }
3267 return(ret);
3268 }
3269 }
3270 return(xmlParseNCNameComplex(ctxt));
3271 }
3272
3273 /**
3274 * xmlParseNameAndCompare:
3275 * @ctxt: an XML parser context
3276 *
3277 * parse an XML name and compares for match
3278 * (specialized for endtag parsing)
3279 *
3280 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3281 * and the name for mismatch
3282 */
3283
3284 static const xmlChar *
3285 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3286 register const xmlChar *cmp = other;
3287 register const xmlChar *in;
3288 const xmlChar *ret;
3289
3290 GROW;
3291
3292 in = ctxt->input->cur;
3293 while (*in != 0 && *in == *cmp) {
3294 ++in;
3295 ++cmp;
3296 ctxt->input->col++;
3297 }
3298 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3299 /* success */
3300 ctxt->input->cur = in;
3301 return (const xmlChar*) 1;
3302 }
3303 /* failure (or end of input buffer), check with full function */
3304 ret = xmlParseName (ctxt);
3305 /* strings coming from the dictionnary direct compare possible */
3306 if (ret == other) {
3307 return (const xmlChar*) 1;
3308 }
3309 return ret;
3310 }
3311
3312 /**
3313 * xmlParseStringName:
3314 * @ctxt: an XML parser context
3315 * @str: a pointer to the string pointer (IN/OUT)
3316 *
3317 * parse an XML name.
3318 *
3319 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3320 * CombiningChar | Extender
3321 *
3322 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3323 *
3324 * [6] Names ::= Name (#x20 Name)*
3325 *
3326 * Returns the Name parsed or NULL. The @str pointer
3327 * is updated to the current location in the string.
3328 */
3329
3330 static xmlChar *
3331 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3332 xmlChar buf[XML_MAX_NAMELEN + 5];
3333 const xmlChar *cur = *str;
3334 int len = 0, l;
3335 int c;
3336
3337 #ifdef DEBUG
3338 nbParseStringName++;
3339 #endif
3340
3341 c = CUR_SCHAR(cur, l);
3342 if (!xmlIsNameStartChar(ctxt, c)) {
3343 return(NULL);
3344 }
3345
3346 COPY_BUF(l,buf,len,c);
3347 cur += l;
3348 c = CUR_SCHAR(cur, l);
3349 while (xmlIsNameChar(ctxt, c)) {
3350 COPY_BUF(l,buf,len,c);
3351 cur += l;
3352 c = CUR_SCHAR(cur, l);
3353 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3354 /*
3355 * Okay someone managed to make a huge name, so he's ready to pay
3356 * for the processing speed.
3357 */
3358 xmlChar *buffer;
3359 int max = len * 2;
3360
3361 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3362 if (buffer == NULL) {
3363 xmlErrMemory(ctxt, NULL);
3364 return(NULL);
3365 }
3366 memcpy(buffer, buf, len);
3367 while (xmlIsNameChar(ctxt, c)) {
3368 if (len + 10 > max) {
3369 xmlChar *tmp;
3370 max *= 2;
3371 tmp = (xmlChar *) xmlRealloc(buffer,
3372 max * sizeof(xmlChar));
3373 if (tmp == NULL) {
3374 xmlErrMemory(ctxt, NULL);
3375 xmlFree(buffer);
3376 return(NULL);
3377 }
3378 buffer = tmp;
3379 }
3380 COPY_BUF(l,buffer,len,c);
3381 cur += l;
3382 c = CUR_SCHAR(cur, l);
3383 }
3384 buffer[len] = 0;
3385 *str = cur;
3386 return(buffer);
3387 }
3388 }
3389 *str = cur;
3390 return(xmlStrndup(buf, len));
3391 }
3392
3393 /**
3394 * xmlParseNmtoken:
3395 * @ctxt: an XML parser context
3396 *
3397 * parse an XML Nmtoken.
3398 *
3399 * [7] Nmtoken ::= (NameChar)+
3400 *
3401 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3402 *
3403 * Returns the Nmtoken parsed or NULL
3404 */
3405
3406 xmlChar *
3407 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3408 xmlChar buf[XML_MAX_NAMELEN + 5];
3409 int len = 0, l;
3410 int c;
3411 int count = 0;
3412
3413 #ifdef DEBUG
3414 nbParseNmToken++;
3415 #endif
3416
3417 GROW;
3418 c = CUR_CHAR(l);
3419
3420 while (xmlIsNameChar(ctxt, c)) {
3421 if (count++ > 100) {
3422 count = 0;
3423 GROW;
3424 }
3425 COPY_BUF(l,buf,len,c);
3426 NEXTL(l);
3427 c = CUR_CHAR(l);
3428 if (len >= XML_MAX_NAMELEN) {
3429 /*
3430 * Okay someone managed to make a huge token, so he's ready to pay
3431 * for the processing speed.
3432 */
3433 xmlChar *buffer;
3434 int max = len * 2;
3435
3436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3437 if (buffer == NULL) {
3438 xmlErrMemory(ctxt, NULL);
3439 return(NULL);
3440 }
3441 memcpy(buffer, buf, len);
3442 while (xmlIsNameChar(ctxt, c)) {
3443 if (count++ > 100) {
3444 count = 0;
3445 GROW;
3446 }
3447 if (len + 10 > max) {
3448 xmlChar *tmp;
3449
3450 max *= 2;
3451 tmp = (xmlChar *) xmlRealloc(buffer,
3452 max * sizeof(xmlChar));
3453 if (tmp == NULL) {
3454 xmlErrMemory(ctxt, NULL);
3455 xmlFree(buffer);
3456 return(NULL);
3457 }
3458 buffer = tmp;
3459 }
3460 COPY_BUF(l,buffer,len,c);
3461 NEXTL(l);
3462 c = CUR_CHAR(l);
3463 }
3464 buffer[len] = 0;
3465 return(buffer);
3466 }
3467 }
3468 if (len == 0)
3469 return(NULL);
3470 return(xmlStrndup(buf, len));
3471 }
3472
3473 /**
3474 * xmlParseEntityValue:
3475 * @ctxt: an XML parser context
3476 * @orig: if non-NULL store a copy of the original entity value
3477 *
3478 * parse a value for ENTITY declarations
3479 *
3480 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3481 * "'" ([^%&'] | PEReference | Reference)* "'"
3482 *
3483 * Returns the EntityValue parsed with reference substituted or NULL
3484 */
3485
3486 xmlChar *
3487 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3488 xmlChar *buf = NULL;
3489 int len = 0;
3490 int size = XML_PARSER_BUFFER_SIZE;
3491 int c, l;
3492 xmlChar stop;
3493 xmlChar *ret = NULL;
3494 const xmlChar *cur = NULL;
3495 xmlParserInputPtr input;
3496
3497 if (RAW == '"') stop = '"';
3498 else if (RAW == '\'') stop = '\'';
3499 else {
3500 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3501 return(NULL);
3502 }
3503 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3504 if (buf == NULL) {
3505 xmlErrMemory(ctxt, NULL);
3506 return(NULL);
3507 }
3508
3509 /*
3510 * The content of the entity definition is copied in a buffer.
3511 */
3512
3513 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3514 input = ctxt->input;
3515 GROW;
3516 NEXT;
3517 c = CUR_CHAR(l);
3518 /*
3519 * NOTE: 4.4.5 Included in Literal
3520 * When a parameter entity reference appears in a literal entity
3521 * value, ... a single or double quote character in the replacement
3522 * text is always treated as a normal data character and will not
3523 * terminate the literal.
3524 * In practice it means we stop the loop only when back at parsing
3525 * the initial entity and the quote is found
3526 */
3527 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3528 (ctxt->input != input))) {
3529 if (len + 5 >= size) {
3530 xmlChar *tmp;
3531
3532 size *= 2;
3533 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3534 if (tmp == NULL) {
3535 xmlErrMemory(ctxt, NULL);
3536 xmlFree(buf);
3537 return(NULL);
3538 }
3539 buf = tmp;
3540 }
3541 COPY_BUF(l,buf,len,c);
3542 NEXTL(l);
3543 /*
3544 * Pop-up of finished entities.
3545 */
3546 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3547 xmlPopInput(ctxt);
3548
3549 GROW;
3550 c = CUR_CHAR(l);
3551 if (c == 0) {
3552 GROW;
3553 c = CUR_CHAR(l);
3554 }
3555 }
3556 buf[len] = 0;
3557
3558 /*
3559 * Raise problem w.r.t. '&' and '%' being used in non-entities
3560 * reference constructs. Note Charref will be handled in
3561 * xmlStringDecodeEntities()
3562 */
3563 cur = buf;
3564 while (*cur != 0) { /* non input consuming */
3565 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3566 xmlChar *name;
3567 xmlChar tmp = *cur;
3568
3569 cur++;
3570 name = xmlParseStringName(ctxt, &cur);
3571 if ((name == NULL) || (*cur != ';')) {
3572 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3573 "EntityValue: '%c' forbidden except for entities references\n",
3574 tmp);
3575 }
3576 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3577 (ctxt->inputNr == 1)) {
3578 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3579 }
3580 if (name != NULL)
3581 xmlFree(name);
3582 if (*cur == 0)
3583 break;
3584 }
3585 cur++;
3586 }
3587
3588 /*
3589 * Then PEReference entities are substituted.
3590 */
3591 if (c != stop) {
3592 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3593 xmlFree(buf);
3594 } else {
3595 NEXT;
3596 /*
3597 * NOTE: 4.4.7 Bypassed
3598 * When a general entity reference appears in the EntityValue in
3599 * an entity declaration, it is bypassed and left as is.
3600 * so XML_SUBSTITUTE_REF is not set here.
3601 */
3602 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3603 0, 0, 0);
3604 if (orig != NULL)
3605 *orig = buf;
3606 else
3607 xmlFree(buf);
3608 }
3609
3610 return(ret);
3611 }
3612
3613 /**
3614 * xmlParseAttValueComplex:
3615 * @ctxt: an XML parser context
3616 * @len: the resulting attribute len
3617 * @normalize: wether to apply the inner normalization
3618 *
3619 * parse a value for an attribute, this is the fallback function
3620 * of xmlParseAttValue() when the attribute parsing requires handling
3621 * of non-ASCII characters, or normalization compaction.
3622 *
3623 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3624 */
3625 static xmlChar *
3626 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3627 xmlChar limit = 0;
3628 xmlChar *buf = NULL;
3629 xmlChar *rep = NULL;
3630 int len = 0;
3631 int buf_size = 0;
3632 int c, l, in_space = 0;
3633 xmlChar *current = NULL;
3634 xmlEntityPtr ent;
3635
3636 if (NXT(0) == '"') {
3637 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3638 limit = '"';
3639 NEXT;
3640 } else if (NXT(0) == '\'') {
3641 limit = '\'';
3642 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3643 NEXT;
3644 } else {
3645 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3646 return(NULL);
3647 }
3648
3649 /*
3650 * allocate a translation buffer.
3651 */
3652 buf_size = XML_PARSER_BUFFER_SIZE;
3653 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3654 if (buf == NULL) goto mem_error;
3655
3656 /*
3657 * OK loop until we reach one of the ending char or a size limit.
3658 */
3659 c = CUR_CHAR(l);
3660 while ((NXT(0) != limit) && /* checked */
3661 (IS_CHAR(c)) && (c != '<')) {
3662 if (c == 0) break;
3663 if (c == '&') {
3664 in_space = 0;
3665 if (NXT(1) == '#') {
3666 int val = xmlParseCharRef(ctxt);
3667
3668 if (val == '&') {
3669 if (ctxt->replaceEntities) {
3670 if (len > buf_size - 10) {
3671 growBuffer(buf, 10);
3672 }
3673 buf[len++] = '&';
3674 } else {
3675 /*
3676 * The reparsing will be done in xmlStringGetNodeList()
3677 * called by the attribute() function in SAX.c
3678 */
3679 if (len > buf_size - 10) {
3680 growBuffer(buf, 10);
3681 }
3682 buf[len++] = '&';
3683 buf[len++] = '#';
3684 buf[len++] = '3';
3685 buf[len++] = '8';
3686 buf[len++] = ';';
3687 }
3688 } else if (val != 0) {
3689 if (len > buf_size - 10) {
3690 growBuffer(buf, 10);
3691 }
3692 len += xmlCopyChar(0, &buf[len], val);
3693 }
3694 } else {
3695 ent = xmlParseEntityRef(ctxt);
3696 ctxt->nbentities++;
3697 if (ent != NULL)
3698 ctxt->nbentities += ent->owner;
3699 if ((ent != NULL) &&
3700 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3701 if (len > buf_size - 10) {
3702 growBuffer(buf, 10);
3703 }
3704 if ((ctxt->replaceEntities == 0) &&
3705 (ent->content[0] == '&')) {
3706 buf[len++] = '&';
3707 buf[len++] = '#';
3708 buf[len++] = '3';
3709 buf[len++] = '8';
3710 buf[len++] = ';';
3711 } else {
3712 buf[len++] = ent->content[0];
3713 }
3714 } else if ((ent != NULL) &&
3715 (ctxt->replaceEntities != 0)) {
3716 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3717 rep = xmlStringDecodeEntities(ctxt, ent->content,
3718 XML_SUBSTITUTE_REF,
3719 0, 0, 0);
3720 if (rep != NULL) {
3721 current = rep;
3722 while (*current != 0) { /* non input consuming */
3723 if ((*current == 0xD) || (*current == 0xA) ||
3724 (*current == 0x9)) {
3725 buf[len++] = 0x20;
3726 current++;
3727 } else
3728 buf[len++] = *current++;
3729 if (len > buf_size - 10) {
3730 growBuffer(buf, 10);
3731 }
3732 }
3733 xmlFree(rep);
3734 rep = NULL;
3735 }
3736 } else {
3737 if (len > buf_size - 10) {
3738 growBuffer(buf, 10);
3739 }
3740 if (ent->content != NULL)
3741 buf[len++] = ent->content[0];
3742 }
3743 } else if (ent != NULL) {
3744 int i = xmlStrlen(ent->name);
3745 const xmlChar *cur = ent->name;
3746
3747 /*
3748 * This may look absurd but is needed to detect
3749 * entities problems
3750 */
3751 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3752 (ent->content != NULL)) {
3753 rep = xmlStringDecodeEntities(ctxt, ent->content,
3754 XML_SUBSTITUTE_REF, 0, 0, 0);
3755 if (rep != NULL) {
3756 xmlFree(rep);
3757 rep = NULL;
3758 }
3759 }
3760
3761 /*
3762 * Just output the reference
3763 */
3764 buf[len++] = '&';
3765 while (len > buf_size - i - 10) {
3766 growBuffer(buf, i + 10);
3767 }
3768 for (;i > 0;i--)
3769 buf[len++] = *cur++;
3770 buf[len++] = ';';
3771 }
3772 }
3773 } else {
3774 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3775 if ((len != 0) || (!normalize)) {
3776 if ((!normalize) || (!in_space)) {
3777 COPY_BUF(l,buf,len,0x20);
3778 while (len > buf_size - 10) {
3779 growBuffer(buf, 10);
3780 }
3781 }
3782 in_space = 1;
3783 }
3784 } else {
3785 in_space = 0;
3786 COPY_BUF(l,buf,len,c);
3787 if (len > buf_size - 10) {
3788 growBuffer(buf, 10);
3789 }
3790 }
3791 NEXTL(l);
3792 }
3793 GROW;
3794 c = CUR_CHAR(l);
3795 }
3796 if ((in_space) && (normalize)) {
3797 while (buf[len - 1] == 0x20) len--;
3798 }
3799 buf[len] = 0;
3800 if (RAW == '<') {
3801 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3802 } else if (RAW != limit) {
3803 if ((c != 0) && (!IS_CHAR(c))) {
3804 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3805 "invalid character in attribute value\n");
3806 } else {
3807 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3808 "AttValue: ' expected\n");
3809 }
3810 } else
3811 NEXT;
3812 if (attlen != NULL) *attlen = len;
3813 return(buf);
3814
3815 mem_error:
3816 xmlErrMemory(ctxt, NULL);
3817 if (buf != NULL)
3818 xmlFree(buf);
3819 if (rep != NULL)
3820 xmlFree(rep);
3821 return(NULL);
3822 }
3823
3824 /**
3825 * xmlParseAttValue:
3826 * @ctxt: an XML parser context
3827 *
3828 * parse a value for an attribute
3829 * Note: the parser won't do substitution of entities here, this
3830 * will be handled later in xmlStringGetNodeList
3831 *
3832 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3833 * "'" ([^<&'] | Reference)* "'"
3834 *
3835 * 3.3.3 Attribute-Value Normalization:
3836 * Before the value of an attribute is passed to the application or
3837 * checked for validity, the XML processor must normalize it as follows:
3838 * - a character reference is processed by appending the referenced
3839 * character to the attribute value
3840 * - an entity reference is processed by recursively processing the
3841 * replacement text of the entity
3842 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3843 * appending #x20 to the normalized value, except that only a single
3844 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3845 * parsed entity or the literal entity value of an internal parsed entity
3846 * - other characters are processed by appending them to the normalized value
3847 * If the declared value is not CDATA, then the XML processor must further
3848 * process the normalized attribute value by discarding any leading and
3849 * trailing space (#x20) characters, and by replacing sequences of space
3850 * (#x20) characters by a single space (#x20) character.
3851 * All attributes for which no declaration has been read should be treated
3852 * by a non-validating parser as if declared CDATA.
3853 *
3854 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3855 */
3856
3857
3858 xmlChar *
3859 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3860 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3861 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3862 }
3863
3864 /**
3865 * xmlParseSystemLiteral:
3866 * @ctxt: an XML parser context
3867 *
3868 * parse an XML Literal
3869 *
3870 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3871 *
3872 * Returns the SystemLiteral parsed or NULL
3873 */
3874
3875 xmlChar *
3876 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3877 xmlChar *buf = NULL;
3878 int len = 0;
3879 int size = XML_PARSER_BUFFER_SIZE;
3880 int cur, l;
3881 xmlChar stop;
3882 int state = ctxt->instate;
3883 int count = 0;
3884
3885 SHRINK;
3886 if (RAW == '"') {
3887 NEXT;
3888 stop = '"';
3889 } else if (RAW == '\'') {
3890 NEXT;
3891 stop = '\'';
3892 } else {
3893 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3894 return(NULL);
3895 }
3896
3897 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3898 if (buf == NULL) {
3899 xmlErrMemory(ctxt, NULL);
3900 return(NULL);
3901 }
3902 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3903 cur = CUR_CHAR(l);
3904 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3905 if (len + 5 >= size) {
3906 xmlChar *tmp;
3907
3908 size *= 2;
3909 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3910 if (tmp == NULL) {
3911 xmlFree(buf);
3912 xmlErrMemory(ctxt, NULL);
3913 ctxt->instate = (xmlParserInputState) state;
3914 return(NULL);
3915 }
3916 buf = tmp;
3917 }
3918 count++;
3919 if (count > 50) {
3920 GROW;
3921 count = 0;
3922 }
3923 COPY_BUF(l,buf,len,cur);
3924 NEXTL(l);
3925 cur = CUR_CHAR(l);
3926 if (cur == 0) {
3927 GROW;
3928 SHRINK;
3929 cur = CUR_CHAR(l);
3930 }
3931 }
3932 buf[len] = 0;
3933 ctxt->instate = (xmlParserInputState) state;
3934 if (!IS_CHAR(cur)) {
3935 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3936 } else {
3937 NEXT;
3938 }
3939 return(buf);
3940 }
3941
3942 /**
3943 * xmlParsePubidLiteral:
3944 * @ctxt: an XML parser context
3945 *
3946 * parse an XML public literal
3947 *
3948 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3949 *
3950 * Returns the PubidLiteral parsed or NULL.
3951 */
3952
3953 xmlChar *
3954 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3955 xmlChar *buf = NULL;
3956 int len = 0;
3957 int size = XML_PARSER_BUFFER_SIZE;
3958 xmlChar cur;
3959 xmlChar stop;
3960 int count = 0;
3961 xmlParserInputState oldstate = ctxt->instate;
3962
3963 SHRINK;
3964 if (RAW == '"') {
3965 NEXT;
3966 stop = '"';
3967 } else if (RAW == '\'') {
3968 NEXT;
3969 stop = '\'';
3970 } else {
3971 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3972 return(NULL);
3973 }
3974 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3975 if (buf == NULL) {
3976 xmlErrMemory(ctxt, NULL);
3977 return(NULL);
3978 }
3979 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3980 cur = CUR;
3981 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3982 if (len + 1 >= size) {
3983 xmlChar *tmp;
3984
3985 size *= 2;
3986 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3987 if (tmp == NULL) {
3988 xmlErrMemory(ctxt, NULL);
3989 xmlFree(buf);
3990 return(NULL);
3991 }
3992 buf = tmp;
3993 }
3994 buf[len++] = cur;
3995 count++;
3996 if (count > 50) {
3997 GROW;
3998 count = 0;
3999 }
4000 NEXT;
4001 cur = CUR;
4002 if (cur == 0) {
4003 GROW;
4004 SHRINK;
4005 cur = CUR;
4006 }
4007 }
4008 buf[len] = 0;
4009 if (cur != stop) {
4010 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4011 } else {
4012 NEXT;
4013 }
4014 ctxt->instate = oldstate;
4015 return(buf);
4016 }
4017
4018 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4019
4020 /*
4021 * used for the test in the inner loop of the char data testing
4022 */
4023 static const unsigned char test_char_data[256] = {
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4029 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4030 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4031 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4032 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4033 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4034 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4035 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4036 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4037 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4038 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4039 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4056 };
4057
4058 /**
4059 * xmlParseCharData:
4060 * @ctxt: an XML parser context
4061 * @cdata: int indicating whether we are within a CDATA section
4062 *
4063 * parse a CharData section.
4064 * if we are within a CDATA section ']]>' marks an end of section.
4065 *
4066 * The right angle bracket (>) may be represented using the string "&gt;",
4067 * and must, for compatibility, be escaped using "&gt;" or a character
4068 * reference when it appears in the string "]]>" in content, when that
4069 * string is not marking the end of a CDATA section.
4070 *
4071 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4072 */
4073
4074 void
4075 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4076 const xmlChar *in;
4077 int nbchar = 0;
4078 int line = ctxt->input->line;
4079 int col = ctxt->input->col;
4080 int ccol;
4081
4082 SHRINK;
4083 GROW;
4084 /*
4085 * Accelerated common case where input don't need to be
4086 * modified before passing it to the handler.
4087 */
4088 if (!cdata) {
4089 in = ctxt->input->cur;
4090 do {
4091 get_more_space:
4092 while (*in == 0x20) { in++; ctxt->input->col++; }
4093 if (*in == 0xA) {
4094 do {
4095 ctxt->input->line++; ctxt->input->col = 1;
4096 in++;
4097 } while (*in == 0xA);
4098 goto get_more_space;
4099 }
4100 if (*in == '<') {
4101 nbchar = in - ctxt->input->cur;
4102 if (nbchar > 0) {
4103 const xmlChar *tmp = ctxt->input->cur;
4104 ctxt->input->cur = in;
4105
4106 if ((ctxt->sax != NULL) &&
4107 (ctxt->sax->ignorableWhitespace !=
4108 ctxt->sax->characters)) {
4109 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4110 if (ctxt->sax->ignorableWhitespace != NULL)
4111 ctxt->sax->ignorableWhitespace(ctxt->userData,
4112 tmp, nbchar);
4113 } else {
4114 if (ctxt->sax->characters != NULL)
4115 ctxt->sax->characters(ctxt->userData,
4116 tmp, nbchar);
4117 if (*ctxt->space == -1)
4118 *ctxt->space = -2;
4119 }
4120 } else if ((ctxt->sax != NULL) &&
4121 (ctxt->sax->characters != NULL)) {
4122 ctxt->sax->characters(ctxt->userData,
4123 tmp, nbchar);
4124 }
4125 }
4126 return;
4127 }
4128
4129 get_more:
4130 ccol = ctxt->input->col;
4131 while (test_char_data[*in]) {
4132 in++;
4133 ccol++;
4134 }
4135 ctxt->input->col = ccol;
4136 if (*in == 0xA) {
4137 do {
4138 ctxt->input->line++; ctxt->input->col = 1;
4139 in++;
4140 } while (*in == 0xA);
4141 goto get_more;
4142 }
4143 if (*in == ']') {
4144 if ((in[1] == ']') && (in[2] == '>')) {
4145 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4146 ctxt->input->cur = in;
4147 return;
4148 }
4149 in++;
4150 ctxt->input->col++;
4151 goto get_more;
4152 }
4153 nbchar = in - ctxt->input->cur;
4154 if (nbchar > 0) {
4155 if ((ctxt->sax != NULL) &&
4156 (ctxt->sax->ignorableWhitespace !=
4157 ctxt->sax->characters) &&
4158 (IS_BLANK_CH(*ctxt->input->cur))) {
4159 const xmlChar *tmp = ctxt->input->cur;
4160 ctxt->input->cur = in;
4161
4162 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4163 if (ctxt->sax->ignorableWhitespace != NULL)
4164 ctxt->sax->ignorableWhitespace(ctxt->userData,
4165 tmp, nbchar);
4166 } else {
4167 if (ctxt->sax->characters != NULL)
4168 ctxt->sax->characters(ctxt->userData,
4169 tmp, nbchar);
4170 if (*ctxt->space == -1)
4171 *ctxt->space = -2;
4172 }
4173 line = ctxt->input->line;
4174 col = ctxt->input->col;
4175 } else if (ctxt->sax != NULL) {
4176 if (ctxt->sax->characters != NULL)
4177 ctxt->sax->characters(ctxt->userData,
4178 ctxt->input->cur, nbchar);
4179 line = ctxt->input->line;
4180 col = ctxt->input->col;
4181 }
4182 /* something really bad happened in the SAX callback */
4183 if (ctxt->instate != XML_PARSER_CONTENT)
4184 return;
4185 }
4186 ctxt->input->cur = in;
4187 if (*in == 0xD) {
4188 in++;
4189 if (*in == 0xA) {
4190 ctxt->input->cur = in;
4191 in++;
4192 ctxt->input->line++; ctxt->input->col = 1;
4193 continue; /* while */
4194 }
4195 in--;
4196 }
4197 if (*in == '<') {
4198 return;
4199 }
4200 if (*in == '&') {
4201 return;
4202 }
4203 SHRINK;
4204 GROW;
4205 in = ctxt->input->cur;
4206 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4207 nbchar = 0;
4208 }
4209 ctxt->input->line = line;
4210 ctxt->input->col = col;
4211 xmlParseCharDataComplex(ctxt, cdata);
4212 }
4213
4214 /**
4215 * xmlParseCharDataComplex:
4216 * @ctxt: an XML parser context
4217 * @cdata: int indicating whether we are within a CDATA section
4218 *
4219 * parse a CharData section.this is the fallback function
4220 * of xmlParseCharData() when the parsing requires handling
4221 * of non-ASCII characters.
4222 */
4223 static void
4224 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4225 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4226 int nbchar = 0;
4227 int cur, l;
4228 int count = 0;
4229
4230 SHRINK;
4231 GROW;
4232 cur = CUR_CHAR(l);
4233 while ((cur != '<') && /* checked */
4234 (cur != '&') &&
4235 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4236 if ((cur == ']') && (NXT(1) == ']') &&
4237 (NXT(2) == '>')) {
4238 if (cdata) break;
4239 else {
4240 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4241 }
4242 }
4243 COPY_BUF(l,buf,nbchar,cur);
4244 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4245 buf[nbchar] = 0;
4246
4247 /*
4248 * OK the segment is to be consumed as chars.
4249 */
4250 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4251 if (areBlanks(ctxt, buf, nbchar, 0)) {
4252 if (ctxt->sax->ignorableWhitespace != NULL)
4253 ctxt->sax->ignorableWhitespace(ctxt->userData,
4254 buf, nbchar);
4255 } else {
4256 if (ctxt->sax->characters != NULL)
4257 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4258 if ((ctxt->sax->characters !=
4259 ctxt->sax->ignorableWhitespace) &&
4260 (*ctxt->space == -1))
4261 *ctxt->space = -2;
4262 }
4263 }
4264 nbchar = 0;
4265 /* something really bad happened in the SAX callback */
4266 if (ctxt->instate != XML_PARSER_CONTENT)
4267 return;
4268 }
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
4273 }
4274 NEXTL(l);
4275 cur = CUR_CHAR(l);
4276 }
4277 if (nbchar != 0) {
4278 buf[nbchar] = 0;
4279 /*
4280 * OK the segment is to be consumed as chars.
4281 */
4282 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4283 if (areBlanks(ctxt, buf, nbchar, 0)) {
4284 if (ctxt->sax->ignorableWhitespace != NULL)
4285 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4286 } else {
4287 if (ctxt->sax->characters != NULL)
4288 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4289 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4290 (*ctxt->space == -1))
4291 *ctxt->space = -2;
4292 }
4293 }
4294 }
4295 if ((cur != 0) && (!IS_CHAR(cur))) {
4296 /* Generate the error and skip the offending character */
4297 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4298 "PCDATA invalid Char value %d\n",
4299 cur);
4300 NEXTL(l);
4301 }
4302 }
4303
4304 /**
4305 * xmlParseExternalID:
4306 * @ctxt: an XML parser context
4307 * @publicID: a xmlChar** receiving PubidLiteral
4308 * @strict: indicate whether we should restrict parsing to only
4309 * production [75], see NOTE below
4310 *
4311 * Parse an External ID or a Public ID
4312 *
4313 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4314 * 'PUBLIC' S PubidLiteral S SystemLiteral
4315 *
4316 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4317 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4318 *
4319 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4320 *
4321 * Returns the function returns SystemLiteral and in the second
4322 * case publicID receives PubidLiteral, is strict is off
4323 * it is possible to return NULL and have publicID set.
4324 */
4325
4326 xmlChar *
4327 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4328 xmlChar *URI = NULL;
4329
4330 SHRINK;
4331
4332 *publicID = NULL;
4333 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4334 SKIP(6);
4335 if (!IS_BLANK_CH(CUR)) {
4336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4337 "Space required after 'SYSTEM'\n");
4338 }
4339 SKIP_BLANKS;
4340 URI = xmlParseSystemLiteral(ctxt);
4341 if (URI == NULL) {
4342 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4343 }
4344 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4345 SKIP(6);
4346 if (!IS_BLANK_CH(CUR)) {
4347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4348 "Space required after 'PUBLIC'\n");
4349 }
4350 SKIP_BLANKS;
4351 *publicID = xmlParsePubidLiteral(ctxt);
4352 if (*publicID == NULL) {
4353 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4354 }
4355 if (strict) {
4356 /*
4357 * We don't handle [83] so "S SystemLiteral" is required.
4358 */
4359 if (!IS_BLANK_CH(CUR)) {
4360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4361 "Space required after the Public Identifier\n");
4362 }
4363 } else {
4364 /*
4365 * We handle [83] so we return immediately, if
4366 * "S SystemLiteral" is not detected. From a purely parsing
4367 * point of view that's a nice mess.
4368 */
4369 const xmlChar *ptr;
4370 GROW;
4371
4372 ptr = CUR_PTR;
4373 if (!IS_BLANK_CH(*ptr)) return(NULL);
4374
4375 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4376 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4377 }
4378 SKIP_BLANKS;
4379 URI = xmlParseSystemLiteral(ctxt);
4380 if (URI == NULL) {
4381 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4382 }
4383 }
4384 return(URI);
4385 }
4386
4387 /**
4388 * xmlParseCommentComplex:
4389 * @ctxt: an XML parser context
4390 * @buf: the already parsed part of the buffer
4391 * @len: number of bytes filles in the buffer
4392 * @size: allocated size of the buffer
4393 *
4394 * Skip an XML (SGML) comment <!-- .... -->
4395 * The spec says that "For compatibility, the string "--" (double-hyphen)
4396 * must not occur within comments. "
4397 * This is the slow routine in case the accelerator for ascii didn't work
4398 *
4399 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4400 */
4401 static void
4402 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4403 int q, ql;
4404 int r, rl;
4405 int cur, l;
4406 int count = 0;
4407 int inputid;
4408
4409 inputid = ctxt->input->id;
4410
4411 if (buf == NULL) {
4412 len = 0;
4413 size = XML_PARSER_BUFFER_SIZE;
4414 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4415 if (buf == NULL) {
4416 xmlErrMemory(ctxt, NULL);
4417 return;
4418 }
4419 }
4420 GROW; /* Assure there's enough input data */
4421 q = CUR_CHAR(ql);
4422 if (q == 0)
4423 goto not_terminated;
4424 if (!IS_CHAR(q)) {
4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426 "xmlParseComment: invalid xmlChar value %d\n",
4427 q);
4428 xmlFree (buf);
4429 return;
4430 }
4431 NEXTL(ql);
4432 r = CUR_CHAR(rl);
4433 if (r == 0)
4434 goto not_terminated;
4435 if (!IS_CHAR(r)) {
4436 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4437 "xmlParseComment: invalid xmlChar value %d\n",
4438 q);
4439 xmlFree (buf);
4440 return;
4441 }
4442 NEXTL(rl);
4443 cur = CUR_CHAR(l);
4444 if (cur == 0)
4445 goto not_terminated;
4446 while (IS_CHAR(cur) && /* checked */
4447 ((cur != '>') ||
4448 (r != '-') || (q != '-'))) {
4449 if ((r == '-') && (q == '-')) {
4450 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4451 }
4452 if (len + 5 >= size) {
4453 xmlChar *new_buf;
4454 size *= 2;
4455 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4456 if (new_buf == NULL) {
4457 xmlFree (buf);
4458 xmlErrMemory(ctxt, NULL);
4459 return;
4460 }
4461 buf = new_buf;
4462 }
4463 COPY_BUF(ql,buf,len,q);
4464 q = r;
4465 ql = rl;
4466 r = cur;
4467 rl = l;
4468
4469 count++;
4470 if (count > 50) {
4471 GROW;
4472 count = 0;
4473 }
4474 NEXTL(l);
4475 cur = CUR_CHAR(l);
4476 if (cur == 0) {
4477 SHRINK;
4478 GROW;
4479 cur = CUR_CHAR(l);
4480 }
4481 }
4482 buf[len] = 0;
4483 if (cur == 0) {
4484 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4485 "Comment not terminated \n<!--%.50s\n", buf);
4486 } else if (!IS_CHAR(cur)) {
4487 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4488 "xmlParseComment: invalid xmlChar value %d\n",
4489 cur);
4490 } else {
4491 if (inputid != ctxt->input->id) {
4492 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4493 "Comment doesn't start and stop in the same entity\n");
4494 }
4495 NEXT;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4497 (!ctxt->disableSAX))
4498 ctxt->sax->comment(ctxt->userData, buf);
4499 }
4500 xmlFree(buf);
4501 return;
4502 not_terminated:
4503 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4504 "Comment not terminated\n", NULL);
4505 xmlFree(buf);
4506 return;
4507 }
4508
4509 /**
4510 * xmlParseComment:
4511 * @ctxt: an XML parser context
4512 *
4513 * Skip an XML (SGML) comment <!-- .... -->
4514 * The spec says that "For compatibility, the string "--" (double-hyphen)
4515 * must not occur within comments. "
4516 *
4517 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4518 */
4519 void
4520 xmlParseComment(xmlParserCtxtPtr ctxt) {
4521 xmlChar *buf = NULL;
4522 int size = XML_PARSER_BUFFER_SIZE;
4523 int len = 0;
4524 xmlParserInputState state;
4525 const xmlChar *in;
4526 int nbchar = 0, ccol;
4527 int inputid;
4528
4529 /*
4530 * Check that there is a comment right here.
4531 */
4532 if ((RAW != '<') || (NXT(1) != '!') ||
4533 (NXT(2) != '-') || (NXT(3) != '-')) return;
4534 state = ctxt->instate;
4535 ctxt->instate = XML_PARSER_COMMENT;
4536 inputid = ctxt->input->id;
4537 SKIP(4);
4538 SHRINK;
4539 GROW;
4540
4541 /*
4542 * Accelerated common case where input don't need to be
4543 * modified before passing it to the handler.
4544 */
4545 in = ctxt->input->cur;
4546 do {
4547 if (*in == 0xA) {
4548 do {
4549 ctxt->input->line++; ctxt->input->col = 1;
4550 in++;
4551 } while (*in == 0xA);
4552 }
4553 get_more:
4554 ccol = ctxt->input->col;
4555 while (((*in > '-') && (*in <= 0x7F)) ||
4556 ((*in >= 0x20) && (*in < '-')) ||
4557 (*in == 0x09)) {
4558 in++;
4559 ccol++;
4560 }
4561 ctxt->input->col = ccol;
4562 if (*in == 0xA) {
4563 do {
4564 ctxt->input->line++; ctxt->input->col = 1;
4565 in++;
4566 } while (*in == 0xA);
4567 goto get_more;
4568 }
4569 nbchar = in - ctxt->input->cur;
4570 /*
4571 * save current set of data
4572 */
4573 if (nbchar > 0) {
4574 if ((ctxt->sax != NULL) &&
4575 (ctxt->sax->comment != NULL)) {
4576 if (buf == NULL) {
4577 if ((*in == '-') && (in[1] == '-'))
4578 size = nbchar + 1;
4579 else
4580 size = XML_PARSER_BUFFER_SIZE + nbchar;
4581 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4582 if (buf == NULL) {
4583 xmlErrMemory(ctxt, NULL);
4584 ctxt->instate = state;
4585 return;
4586 }
4587 len = 0;
4588 } else if (len + nbchar + 1 >= size) {
4589 xmlChar *new_buf;
4590 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4591 new_buf = (xmlChar *) xmlRealloc(buf,
4592 size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4594 xmlFree (buf);
4595 xmlErrMemory(ctxt, NULL);
4596 ctxt->instate = state;
4597 return;
4598 }
4599 buf = new_buf;
4600 }
4601 memcpy(&buf[len], ctxt->input->cur, nbchar);
4602 len += nbchar;
4603 buf[len] = 0;
4604 }
4605 }
4606 ctxt->input->cur = in;
4607 if (*in == 0xA) {
4608 in++;
4609 ctxt->input->line++; ctxt->input->col = 1;
4610 }
4611 if (*in == 0xD) {
4612 in++;
4613 if (*in == 0xA) {
4614 ctxt->input->cur = in;
4615 in++;
4616 ctxt->input->line++; ctxt->input->col = 1;
4617 continue; /* while */
4618 }
4619 in--;
4620 }
4621 SHRINK;
4622 GROW;
4623 in = ctxt->input->cur;
4624 if (*in == '-') {
4625 if (in[1] == '-') {
4626 if (in[2] == '>') {
4627 if (ctxt->input->id != inputid) {
4628 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4629 "comment doesn't start and stop in the same entity\n");
4630 }
4631 SKIP(3);
4632 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4633 (!ctxt->disableSAX)) {
4634 if (buf != NULL)
4635 ctxt->sax->comment(ctxt->userData, buf);
4636 else
4637 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4638 }
4639 if (buf != NULL)
4640 xmlFree(buf);
4641 ctxt->instate = state;
4642 return;
4643 }
4644 if (buf != NULL)
4645 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4646 "Comment not terminated \n<!--%.50s\n",
4647 buf);
4648 else
4649 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4650 "Comment not terminated \n", NULL);
4651 in++;
4652 ctxt->input->col++;
4653 }
4654 in++;
4655 ctxt->input->col++;
4656 goto get_more;
4657 }
4658 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4659 xmlParseCommentComplex(ctxt, buf, len, size);
4660 ctxt->instate = state;
4661 return;
4662 }
4663
4664
4665 /**
4666 * xmlParsePITarget:
4667 * @ctxt: an XML parser context
4668 *
4669 * parse the name of a PI
4670 *
4671 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4672 *
4673 * Returns the PITarget name or NULL
4674 */
4675
4676 const xmlChar *
4677 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4678 const xmlChar *name;
4679
4680 name = xmlParseName(ctxt);
4681 if ((name != NULL) &&
4682 ((name[0] == 'x') || (name[0] == 'X')) &&
4683 ((name[1] == 'm') || (name[1] == 'M')) &&
4684 ((name[2] == 'l') || (name[2] == 'L'))) {
4685 int i;
4686 if ((name[0] == 'x') && (name[1] == 'm') &&
4687 (name[2] == 'l') && (name[3] == 0)) {
4688 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4689 "XML declaration allowed only at the start of the document\n");
4690 return(name);
4691 } else if (name[3] == 0) {
4692 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4693 return(name);
4694 }
4695 for (i = 0;;i++) {
4696 if (xmlW3CPIs[i] == NULL) break;
4697 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4698 return(name);
4699 }
4700 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4701 "xmlParsePITarget: invalid name prefix 'xml'\n",
4702 NULL, NULL);
4703 }
4704 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4705 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4706 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4707 }
4708 return(name);
4709 }
4710
4711 #ifdef LIBXML_CATALOG_ENABLED
4712 /**
4713 * xmlParseCatalogPI:
4714 * @ctxt: an XML parser context
4715 * @catalog: the PI value string
4716 *
4717 * parse an XML Catalog Processing Instruction.
4718 *
4719 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4720 *
4721 * Occurs only if allowed by the user and if happening in the Misc
4722 * part of the document before any doctype informations
4723 * This will add the given catalog to the parsing context in order
4724 * to be used if there is a resolution need further down in the document
4725 */
4726
4727 static void
4728 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4729 xmlChar *URL = NULL;
4730 const xmlChar *tmp, *base;
4731 xmlChar marker;
4732
4733 tmp = catalog;
4734 while (IS_BLANK_CH(*tmp)) tmp++;
4735 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4736 goto error;
4737 tmp += 7;
4738 while (IS_BLANK_CH(*tmp)) tmp++;
4739 if (*tmp != '=') {
4740 return;
4741 }
4742 tmp++;
4743 while (IS_BLANK_CH(*tmp)) tmp++;
4744 marker = *tmp;
4745 if ((marker != '\'') && (marker != '"'))
4746 goto error;
4747 tmp++;
4748 base = tmp;
4749 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4750 if (*tmp == 0)
4751 goto error;
4752 URL = xmlStrndup(base, tmp - base);
4753 tmp++;
4754 while (IS_BLANK_CH(*tmp)) tmp++;
4755 if (*tmp != 0)
4756 goto error;
4757
4758 if (URL != NULL) {
4759 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4760 xmlFree(URL);
4761 }
4762 return;
4763
4764 error:
4765 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4766 "Catalog PI syntax error: %s\n",
4767 catalog, NULL);
4768 if (URL != NULL)
4769 xmlFree(URL);
4770 }
4771 #endif
4772
4773 /**
4774 * xmlParsePI:
4775 * @ctxt: an XML parser context
4776 *
4777 * parse an XML Processing Instruction.
4778 *
4779 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4780 *
4781 * The processing is transfered to SAX once parsed.
4782 */
4783
4784 void
4785 xmlParsePI(xmlParserCtxtPtr ctxt) {
4786 xmlChar *buf = NULL;
4787 int len = 0;
4788 int size = XML_PARSER_BUFFER_SIZE;
4789 int cur, l;
4790 const xmlChar *target;
4791 xmlParserInputState state;
4792 int count = 0;
4793
4794 if ((RAW == '<') && (NXT(1) == '?')) {
4795 xmlParserInputPtr input = ctxt->input;
4796 state = ctxt->instate;
4797 ctxt->instate = XML_PARSER_PI;
4798 /*
4799 * this is a Processing Instruction.
4800 */
4801 SKIP(2);
4802 SHRINK;
4803
4804 /*
4805 * Parse the target name and check for special support like
4806 * namespace.
4807 */
4808 target = xmlParsePITarget(ctxt);
4809 if (target != NULL) {
4810 if ((RAW == '?') && (NXT(1) == '>')) {
4811 if (input != ctxt->input) {
4812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4813 "PI declaration doesn't start and stop in the same entity\n");
4814 }
4815 SKIP(2);
4816
4817 /*
4818 * SAX: PI detected.
4819 */
4820 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4821 (ctxt->sax->processingInstruction != NULL))
4822 ctxt->sax->processingInstruction(ctxt->userData,
4823 target, NULL);
4824 ctxt->instate = state;
4825 return;
4826 }
4827 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4828 if (buf == NULL) {
4829 xmlErrMemory(ctxt, NULL);
4830 ctxt->instate = state;
4831 return;
4832 }
4833 cur = CUR;
4834 if (!IS_BLANK(cur)) {
4835 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4836 "ParsePI: PI %s space expected\n", target);
4837 }
4838 SKIP_BLANKS;
4839 cur = CUR_CHAR(l);
4840 while (IS_CHAR(cur) && /* checked */
4841 ((cur != '?') || (NXT(1) != '>'))) {
4842 if (len + 5 >= size) {
4843 xmlChar *tmp;
4844
4845 size *= 2;
4846 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4847 if (tmp == NULL) {
4848 xmlErrMemory(ctxt, NULL);
4849 xmlFree(buf);
4850 ctxt->instate = state;
4851 return;
4852 }
4853 buf = tmp;
4854 }
4855 count++;
4856 if (count > 50) {
4857 GROW;
4858 count = 0;
4859 }
4860 COPY_BUF(l,buf,len,cur);
4861 NEXTL(l);
4862 cur = CUR_CHAR(l);
4863 if (cur == 0) {
4864 SHRINK;
4865 GROW;
4866 cur = CUR_CHAR(l);
4867 }
4868 }
4869 buf[len] = 0;
4870 if (cur != '?') {
4871 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4872 "ParsePI: PI %s never end ...\n", target);
4873 } else {
4874 if (input != ctxt->input) {
4875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4876 "PI declaration doesn't start and stop in the same entity\n");
4877 }
4878 SKIP(2);
4879
4880 #ifdef LIBXML_CATALOG_ENABLED
4881 if (((state == XML_PARSER_MISC) ||
4882 (state == XML_PARSER_START)) &&
4883 (xmlStrEqual(target, XML_CATALOG_PI))) {
4884 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4885 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4886 (allow == XML_CATA_ALLOW_ALL))
4887 xmlParseCatalogPI(ctxt, buf);
4888 }
4889 #endif
4890
4891
4892 /*
4893 * SAX: PI detected.
4894 */
4895 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4896 (ctxt->sax->processingInstruction != NULL))
4897 ctxt->sax->processingInstruction(ctxt->userData,
4898 target, buf);
4899 }
4900 xmlFree(buf);
4901 } else {
4902 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4903 }
4904 ctxt->instate = state;
4905 }
4906 }
4907
4908 /**
4909 * xmlParseNotationDecl:
4910 * @ctxt: an XML parser context
4911 *
4912 * parse a notation declaration
4913 *
4914 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4915 *
4916 * Hence there is actually 3 choices:
4917 * 'PUBLIC' S PubidLiteral
4918 * 'PUBLIC' S PubidLiteral S SystemLiteral
4919 * and 'SYSTEM' S SystemLiteral
4920 *
4921 * See the NOTE on xmlParseExternalID().
4922 */
4923
4924 void
4925 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4926 const xmlChar *name;
4927 xmlChar *Pubid;
4928 xmlChar *Systemid;
4929
4930 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4931 xmlParserInputPtr input = ctxt->input;
4932 SHRINK;
4933 SKIP(10);
4934 if (!IS_BLANK_CH(CUR)) {
4935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4936 "Space required after '<!NOTATION'\n");
4937 return;
4938 }
4939 SKIP_BLANKS;
4940
4941 name = xmlParseName(ctxt);
4942 if (name == NULL) {
4943 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4944 return;
4945 }
4946 if (!IS_BLANK_CH(CUR)) {
4947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4948 "Space required after the NOTATION name'\n");
4949 return;
4950 }
4951 if (xmlStrchr(name, ':') != NULL) {
4952 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4953 "colon are forbidden from notation names '%s'\n",
4954 name, NULL, NULL);
4955 }
4956 SKIP_BLANKS;
4957
4958 /*
4959 * Parse the IDs.
4960 */
4961 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4962 SKIP_BLANKS;
4963
4964 if (RAW == '>') {
4965 if (input != ctxt->input) {
4966 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4967 "Notation declaration doesn't start and stop in the same entity\n");
4968 }
4969 NEXT;
4970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->notationDecl != NULL))
4972 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4973 } else {
4974 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4975 }
4976 if (Systemid != NULL) xmlFree(Systemid);
4977 if (Pubid != NULL) xmlFree(Pubid);
4978 }
4979 }
4980
4981 /**
4982 * xmlParseEntityDecl:
4983 * @ctxt: an XML parser context
4984 *
4985 * parse <!ENTITY declarations
4986 *
4987 * [70] EntityDecl ::= GEDecl | PEDecl
4988 *
4989 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4990 *
4991 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4992 *
4993 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4994 *
4995 * [74] PEDef ::= EntityValue | ExternalID
4996 *
4997 * [76] NDataDecl ::= S 'NDATA' S Name
4998 *
4999 * [ VC: Notation Declared ]
5000 * The Name must match the declared name of a notation.
5001 */
5002
5003 void
5004 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5005 const xmlChar *name = NULL;
5006 xmlChar *value = NULL;
5007 xmlChar *URI = NULL, *literal = NULL;
5008 const xmlChar *ndata = NULL;
5009 int isParameter = 0;
5010 xmlChar *orig = NULL;
5011 int skipped;
5012
5013 /* GROW; done in the caller */
5014 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5015 xmlParserInputPtr input = ctxt->input;
5016 SHRINK;
5017 SKIP(8);
5018 skipped = SKIP_BLANKS;
5019 if (skipped == 0) {
5020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5021 "Space required after '<!ENTITY'\n");
5022 }
5023
5024 if (RAW == '%') {
5025 NEXT;
5026 skipped = SKIP_BLANKS;
5027 if (skipped == 0) {
5028 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5029 "Space required after '%'\n");
5030 }
5031 isParameter = 1;
5032 }
5033
5034 name = xmlParseName(ctxt);
5035 if (name == NULL) {
5036 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5037 "xmlParseEntityDecl: no name\n");
5038 return;
5039 }
5040 if (xmlStrchr(name, ':') != NULL) {
5041 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5042 "colon are forbidden from entities names '%s'\n",
5043 name, NULL, NULL);
5044 }
5045 skipped = SKIP_BLANKS;
5046 if (skipped == 0) {
5047 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5048 "Space required after the entity name\n");
5049 }
5050
5051 ctxt->instate = XML_PARSER_ENTITY_DECL;
5052 /*
5053 * handle the various case of definitions...
5054 */
5055 if (isParameter) {
5056 if ((RAW == '"') || (RAW == '\'')) {
5057 value = xmlParseEntityValue(ctxt, &orig);
5058 if (value) {
5059 if ((ctxt->sax != NULL) &&
5060 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5061 ctxt->sax->entityDecl(ctxt->userData, name,
5062 XML_INTERNAL_PARAMETER_ENTITY,
5063 NULL, NULL, value);
5064 }
5065 } else {
5066 URI = xmlParseExternalID(ctxt, &literal, 1);
5067 if ((URI == NULL) && (literal == NULL)) {
5068 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5069 }
5070 if (URI) {
5071 xmlURIPtr uri;
5072
5073 uri = xmlParseURI((const char *) URI);
5074 if (uri == NULL) {
5075 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5076 "Invalid URI: %s\n", URI);
5077 /*
5078 * This really ought to be a well formedness error
5079 * but the XML Core WG decided otherwise c.f. issue
5080 * E26 of the XML erratas.
5081 */
5082 } else {
5083 if (uri->fragment != NULL) {
5084 /*
5085 * Okay this is foolish to block those but not
5086 * invalid URIs.
5087 */
5088 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5089 } else {
5090 if ((ctxt->sax != NULL) &&
5091 (!ctxt->disableSAX) &&
5092 (ctxt->sax->entityDecl != NULL))
5093 ctxt->sax->entityDecl(ctxt->userData, name,
5094 XML_EXTERNAL_PARAMETER_ENTITY,
5095 literal, URI, NULL);
5096 }
5097 xmlFreeURI(uri);
5098 }
5099 }
5100 }
5101 } else {
5102 if ((RAW == '"') || (RAW == '\'')) {
5103 value = xmlParseEntityValue(ctxt, &orig);
5104 if ((ctxt->sax != NULL) &&
5105 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5106 ctxt->sax->entityDecl(ctxt->userData, name,
5107 XML_INTERNAL_GENERAL_ENTITY,
5108 NULL, NULL, value);
5109 /*
5110 * For expat compatibility in SAX mode.
5111 */
5112 if ((ctxt->myDoc == NULL) ||
5113 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5114 if (ctxt->myDoc == NULL) {
5115 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5116 if (ctxt->myDoc == NULL) {
5117 xmlErrMemory(ctxt, "New Doc failed");
5118 return;
5119 }
5120 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5121 }
5122 if (ctxt->myDoc->intSubset == NULL)
5123 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5124 BAD_CAST "fake", NULL, NULL);
5125
5126 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5127 NULL, NULL, value);
5128 }
5129 } else {
5130 URI = xmlParseExternalID(ctxt, &literal, 1);
5131 if ((URI == NULL) && (literal == NULL)) {
5132 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5133 }
5134 if (URI) {
5135 xmlURIPtr uri;
5136
5137 uri = xmlParseURI((const char *)URI);
5138 if (uri == NULL) {
5139 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5140 "Invalid URI: %s\n", URI);
5141 /*
5142 * This really ought to be a well formedness error
5143 * but the XML Core WG decided otherwise c.f. issue
5144 * E26 of the XML erratas.
5145 */
5146 } else {
5147 if (uri->fragment != NULL) {
5148 /*
5149 * Okay this is foolish to block those but not
5150 * invalid URIs.
5151 */
5152 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5153 }
5154 xmlFreeURI(uri);
5155 }
5156 }
5157 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5159 "Space required before 'NDATA'\n");
5160 }
5161 SKIP_BLANKS;
5162 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5163 SKIP(5);
5164 if (!IS_BLANK_CH(CUR)) {
5165 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5166 "Space required after 'NDATA'\n");
5167 }
5168 SKIP_BLANKS;
5169 ndata = xmlParseName(ctxt);
5170 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5171 (ctxt->sax->unparsedEntityDecl != NULL))
5172 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5173 literal, URI, ndata);
5174 } else {
5175 if ((ctxt->sax != NULL) &&
5176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5177 ctxt->sax->entityDecl(ctxt->userData, name,
5178 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5179 literal, URI, NULL);
5180 /*
5181 * For expat compatibility in SAX mode.
5182 * assuming the entity repalcement was asked for
5183 */
5184 if ((ctxt->replaceEntities != 0) &&
5185 ((ctxt->myDoc == NULL) ||
5186 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5187 if (ctxt->myDoc == NULL) {
5188 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5189 if (ctxt->myDoc == NULL) {
5190 xmlErrMemory(ctxt, "New Doc failed");
5191 return;
5192 }
5193 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5194 }
5195
5196 if (ctxt->myDoc->intSubset == NULL)
5197 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5198 BAD_CAST "fake", NULL, NULL);
5199 xmlSAX2EntityDecl(ctxt, name,
5200 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5201 literal, URI, NULL);
5202 }
5203 }
5204 }
5205 }
5206 SKIP_BLANKS;
5207 if (RAW != '>') {
5208 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5209 "xmlParseEntityDecl: entity %s not terminated\n", name);
5210 } else {
5211 if (input != ctxt->input) {
5212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5213 "Entity declaration doesn't start and stop in the same entity\n");
5214 }
5215 NEXT;
5216 }
5217 if (orig != NULL) {
5218 /*
5219 * Ugly mechanism to save the raw entity value.
5220 */
5221 xmlEntityPtr cur = NULL;
5222
5223 if (isParameter) {
5224 if ((ctxt->sax != NULL) &&
5225 (ctxt->sax->getParameterEntity != NULL))
5226 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5227 } else {
5228 if ((ctxt->sax != NULL) &&
5229 (ctxt->sax->getEntity != NULL))
5230 cur = ctxt->sax->getEntity(ctxt->userData, name);
5231 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5232 cur = xmlSAX2GetEntity(ctxt, name);
5233 }
5234 }
5235 if (cur != NULL) {
5236 if (cur->orig != NULL)
5237 xmlFree(orig);
5238 else
5239 cur->orig = orig;
5240 } else
5241 xmlFree(orig);
5242 }
5243 if (value != NULL) xmlFree(value);
5244 if (URI != NULL) xmlFree(URI);
5245 if (literal != NULL) xmlFree(literal);
5246 }
5247 }
5248
5249 /**
5250 * xmlParseDefaultDecl:
5251 * @ctxt: an XML parser context
5252 * @value: Receive a possible fixed default value for the attribute
5253 *
5254 * Parse an attribute default declaration
5255 *
5256 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5257 *
5258 * [ VC: Required Attribute ]
5259 * if the default declaration is the keyword #REQUIRED, then the
5260 * attribute must be specified for all elements of the type in the
5261 * attribute-list declaration.
5262 *
5263 * [ VC: Attribute Default Legal ]
5264 * The declared default value must meet the lexical constraints of
5265 * the declared attribute type c.f. xmlValidateAttributeDecl()
5266 *
5267 * [ VC: Fixed Attribute Default ]
5268 * if an attribute has a default value declared with the #FIXED
5269 * keyword, instances of that attribute must match the default value.
5270 *
5271 * [ WFC: No < in Attribute Values ]
5272 * handled in xmlParseAttValue()
5273 *
5274 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5275 * or XML_ATTRIBUTE_FIXED.
5276 */
5277
5278 int
5279 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5280 int val;
5281 xmlChar *ret;
5282
5283 *value = NULL;
5284 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5285 SKIP(9);
5286 return(XML_ATTRIBUTE_REQUIRED);
5287 }
5288 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5289 SKIP(8);
5290 return(XML_ATTRIBUTE_IMPLIED);
5291 }
5292 val = XML_ATTRIBUTE_NONE;
5293 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5294 SKIP(6);
5295 val = XML_ATTRIBUTE_FIXED;
5296 if (!IS_BLANK_CH(CUR)) {
5297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 "Space required after '#FIXED'\n");
5299 }
5300 SKIP_BLANKS;
5301 }
5302 ret = xmlParseAttValue(ctxt);
5303 ctxt->instate = XML_PARSER_DTD;
5304 if (ret == NULL) {
5305 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5306 "Attribute default value declaration error\n");
5307 } else
5308 *value = ret;
5309 return(val);
5310 }
5311
5312 /**
5313 * xmlParseNotationType:
5314 * @ctxt: an XML parser context
5315 *
5316 * parse an Notation attribute type.
5317 *
5318 * Note: the leading 'NOTATION' S part has already being parsed...
5319 *
5320 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5321 *
5322 * [ VC: Notation Attributes ]
5323 * Values of this type must match one of the notation names included
5324 * in the declaration; all notation names in the declaration must be declared.
5325 *
5326 * Returns: the notation attribute tree built while parsing
5327 */
5328
5329 xmlEnumerationPtr
5330 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5331 const xmlChar *name;
5332 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5333
5334 if (RAW != '(') {
5335 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5336 return(NULL);
5337 }
5338 SHRINK;
5339 do {
5340 NEXT;
5341 SKIP_BLANKS;
5342 name = xmlParseName(ctxt);
5343 if (name == NULL) {
5344 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5345 "Name expected in NOTATION declaration\n");
5346 xmlFreeEnumeration(ret);
5347 return(NULL);
5348 }
5349 tmp = ret;
5350 while (tmp != NULL) {
5351 if (xmlStrEqual(name, tmp->name)) {
5352 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5353 "standalone: attribute notation value token %s duplicated\n",
5354 name, NULL);
5355 if (!xmlDictOwns(ctxt->dict, name))
5356 xmlFree((xmlChar *) name);
5357 break;
5358 }
5359 tmp = tmp->next;
5360 }
5361 if (tmp == NULL) {
5362 cur = xmlCreateEnumeration(name);
5363 if (cur == NULL) {
5364 xmlFreeEnumeration(ret);
5365 return(NULL);
5366 }
5367 if (last == NULL) ret = last = cur;
5368 else {
5369 last->next = cur;
5370 last = cur;
5371 }
5372 }
5373 SKIP_BLANKS;
5374 } while (RAW == '|');
5375 if (RAW != ')') {
5376 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5377 xmlFreeEnumeration(ret);
5378 return(NULL);
5379 }
5380 NEXT;
5381 return(ret);
5382 }
5383
5384 /**
5385 * xmlParseEnumerationType:
5386 * @ctxt: an XML parser context
5387 *
5388 * parse an Enumeration attribute type.
5389 *
5390 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5391 *
5392 * [ VC: Enumeration ]
5393 * Values of this type must match one of the Nmtoken tokens in
5394 * the declaration
5395 *
5396 * Returns: the enumeration attribute tree built while parsing
5397 */
5398
5399 xmlEnumerationPtr
5400 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5401 xmlChar *name;
5402 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5403
5404 if (RAW != '(') {
5405 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5406 return(NULL);
5407 }
5408 SHRINK;
5409 do {
5410 NEXT;
5411 SKIP_BLANKS;
5412 name = xmlParseNmtoken(ctxt);
5413 if (name == NULL) {
5414 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5415 return(ret);
5416 }
5417 tmp = ret;
5418 while (tmp != NULL) {
5419 if (xmlStrEqual(name, tmp->name)) {
5420 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5421 "standalone: attribute enumeration value token %s duplicated\n",
5422 name, NULL);
5423 if (!xmlDictOwns(ctxt->dict, name))
5424 xmlFree(name);
5425 break;
5426 }
5427 tmp = tmp->next;
5428 }
5429 if (tmp == NULL) {
5430 cur = xmlCreateEnumeration(name);
5431 if (!xmlDictOwns(ctxt->dict, name))
5432 xmlFree(name);
5433 if (cur == NULL) {
5434 xmlFreeEnumeration(ret);
5435 return(NULL);
5436 }
5437 if (last == NULL) ret = last = cur;
5438 else {
5439 last->next = cur;
5440 last = cur;
5441 }
5442 }
5443 SKIP_BLANKS;
5444 } while (RAW == '|');
5445 if (RAW != ')') {
5446 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5447 return(ret);
5448 }
5449 NEXT;
5450 return(ret);
5451 }
5452
5453 /**
5454 * xmlParseEnumeratedType:
5455 * @ctxt: an XML parser context
5456 * @tree: the enumeration tree built while parsing
5457 *
5458 * parse an Enumerated attribute type.
5459 *
5460 * [57] EnumeratedType ::= NotationType | Enumeration
5461 *
5462 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5463 *
5464 *
5465 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5466 */
5467
5468 int
5469 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5470 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5471 SKIP(8);
5472 if (!IS_BLANK_CH(CUR)) {
5473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5474 "Space required after 'NOTATION'\n");
5475 return(0);
5476 }
5477 SKIP_BLANKS;
5478 *tree = xmlParseNotationType(ctxt);
5479 if (*tree == NULL) return(0);
5480 return(XML_ATTRIBUTE_NOTATION);
5481 }
5482 *tree = xmlParseEnumerationType(ctxt);
5483 if (*tree == NULL) return(0);
5484 return(XML_ATTRIBUTE_ENUMERATION);
5485 }
5486
5487 /**
5488 * xmlParseAttributeType:
5489 * @ctxt: an XML parser context
5490 * @tree: the enumeration tree built while parsing
5491 *
5492 * parse the Attribute list def for an element
5493 *
5494 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5495 *
5496 * [55] StringType ::= 'CDATA'
5497 *
5498 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5499 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5500 *
5501 * Validity constraints for attribute values syntax are checked in
5502 * xmlValidateAttributeValue()
5503 *
5504 * [ VC: ID ]
5505 * Values of type ID must match the Name production. A name must not
5506 * appear more than once in an XML document as a value of this type;
5507 * i.e., ID values must uniquely identify the elements which bear them.
5508 *
5509 * [ VC: One ID per Element Type ]
5510 * No element type may have more than one ID attribute specified.
5511 *
5512 * [ VC: ID Attribute Default ]
5513 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5514 *
5515 * [ VC: IDREF ]
5516 * Values of type IDREF must match the Name production, and values
5517 * of type IDREFS must match Names; each IDREF Name must match the value
5518 * of an ID attribute on some element in the XML document; i.e. IDREF
5519 * values must match the value of some ID attribute.
5520 *
5521 * [ VC: Entity Name ]
5522 * Values of type ENTITY must match the Name production, values
5523 * of type ENTITIES must match Names; each Entity Name must match the
5524 * name of an unparsed entity declared in the DTD.
5525 *
5526 * [ VC: Name Token ]
5527 * Values of type NMTOKEN must match the Nmtoken production; values
5528 * of type NMTOKENS must match Nmtokens.
5529 *
5530 * Returns the attribute type
5531 */
5532 int
5533 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5534 SHRINK;
5535 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5536 SKIP(5);
5537 return(XML_ATTRIBUTE_CDATA);
5538 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5539 SKIP(6);
5540 return(XML_ATTRIBUTE_IDREFS);
5541 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5542 SKIP(5);
5543 return(XML_ATTRIBUTE_IDREF);
5544 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5545 SKIP(2);
5546 return(XML_ATTRIBUTE_ID);
5547 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5548 SKIP(6);
5549 return(XML_ATTRIBUTE_ENTITY);
5550 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5551 SKIP(8);
5552 return(XML_ATTRIBUTE_ENTITIES);
5553 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5554 SKIP(8);
5555 return(XML_ATTRIBUTE_NMTOKENS);
5556 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5557 SKIP(7);
5558 return(XML_ATTRIBUTE_NMTOKEN);
5559 }
5560 return(xmlParseEnumeratedType(ctxt, tree));
5561 }
5562
5563 /**
5564 * xmlParseAttributeListDecl:
5565 * @ctxt: an XML parser context
5566 *
5567 * : parse the Attribute list def for an element
5568 *
5569 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5570 *
5571 * [53] AttDef ::= S Name S AttType S DefaultDecl
5572 *
5573 */
5574 void
5575 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5576 const xmlChar *elemName;
5577 const xmlChar *attrName;
5578 xmlEnumerationPtr tree;
5579
5580 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5581 xmlParserInputPtr input = ctxt->input;
5582
5583 SKIP(9);
5584 if (!IS_BLANK_CH(CUR)) {
5585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5586 "Space required after '<!ATTLIST'\n");
5587 }
5588 SKIP_BLANKS;
5589 elemName = xmlParseName(ctxt);
5590 if (elemName == NULL) {
5591 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5592 "ATTLIST: no name for Element\n");
5593 return;
5594 }
5595 SKIP_BLANKS;
5596 GROW;
5597 while (RAW != '>') {
5598 const xmlChar *check = CUR_PTR;
5599 int type;
5600 int def;
5601 xmlChar *defaultValue = NULL;
5602
5603 GROW;
5604 tree = NULL;
5605 attrName = xmlParseName(ctxt);
5606 if (attrName == NULL) {
5607 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5608 "ATTLIST: no name for Attribute\n");
5609 break;
5610 }
5611 GROW;
5612 if (!IS_BLANK_CH(CUR)) {
5613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5614 "Space required after the attribute name\n");
5615 break;
5616 }
5617 SKIP_BLANKS;
5618
5619 type = xmlParseAttributeType(ctxt, &tree);
5620 if (type <= 0) {
5621 break;
5622 }
5623
5624 GROW;
5625 if (!IS_BLANK_CH(CUR)) {
5626 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627 "Space required after the attribute type\n");
5628 if (tree != NULL)
5629 xmlFreeEnumeration(tree);
5630 break;
5631 }
5632 SKIP_BLANKS;
5633
5634 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5635 if (def <= 0) {
5636 if (defaultValue != NULL)
5637 xmlFree(defaultValue);
5638 if (tree != NULL)
5639 xmlFreeEnumeration(tree);
5640 break;
5641 }
5642 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5643 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5644
5645 GROW;
5646 if (RAW != '>') {
5647 if (!IS_BLANK_CH(CUR)) {
5648 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5649 "Space required after the attribute default value\n");
5650 if (defaultValue != NULL)
5651 xmlFree(defaultValue);
5652 if (tree != NULL)
5653 xmlFreeEnumeration(tree);
5654 break;
5655 }
5656 SKIP_BLANKS;
5657 }
5658 if (check == CUR_PTR) {
5659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5660 "in xmlParseAttributeListDecl\n");
5661 if (defaultValue != NULL)
5662 xmlFree(defaultValue);
5663 if (tree != NULL)
5664 xmlFreeEnumeration(tree);
5665 break;
5666 }
5667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5668 (ctxt->sax->attributeDecl != NULL))
5669 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5670 type, def, defaultValue, tree);
5671 else if (tree != NULL)
5672 xmlFreeEnumeration(tree);
5673
5674 if ((ctxt->sax2) && (defaultValue != NULL) &&
5675 (def != XML_ATTRIBUTE_IMPLIED) &&
5676 (def != XML_ATTRIBUTE_REQUIRED)) {
5677 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5678 }
5679 if (ctxt->sax2) {
5680 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5681 }
5682 if (defaultValue != NULL)
5683 xmlFree(defaultValue);
5684 GROW;
5685 }
5686 if (RAW == '>') {
5687 if (input != ctxt->input) {
5688 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5689 "Attribute list declaration doesn't start and stop in the same entity\n",
5690 NULL, NULL);
5691 }
5692 NEXT;
5693 }
5694 }
5695 }
5696
5697 /**
5698 * xmlParseElementMixedContentDecl:
5699 * @ctxt: an XML parser context
5700 * @inputchk: the input used for the current entity, needed for boundary checks
5701 *
5702 * parse the declaration for a Mixed Element content
5703 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5704 *
5705 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5706 * '(' S? '#PCDATA' S? ')'
5707 *
5708 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5709 *
5710 * [ VC: No Duplicate Types ]
5711 * The same name must not appear more than once in a single
5712 * mixed-content declaration.
5713 *
5714 * returns: the list of the xmlElementContentPtr describing the element choices
5715 */
5716 xmlElementContentPtr
5717 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5718 xmlElementContentPtr ret = NULL, cur = NULL, n;
5719 const xmlChar *elem = NULL;
5720
5721 GROW;
5722 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5723 SKIP(7);
5724 SKIP_BLANKS;
5725 SHRINK;
5726 if (RAW == ')') {
5727 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5728 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5729 "Element content declaration doesn't start and stop in the same entity\n",
5730 NULL, NULL);
5731 }
5732 NEXT;
5733 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5734 if (ret == NULL)
5735 return(NULL);
5736 if (RAW == '*') {
5737 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5738 NEXT;
5739 }
5740 return(ret);
5741 }
5742 if ((RAW == '(') || (RAW == '|')) {
5743 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5744 if (ret == NULL) return(NULL);
5745 }
5746 while (RAW == '|') {
5747 NEXT;
5748 if (elem == NULL) {
5749 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5750 if (ret == NULL) return(NULL);
5751 ret->c1 = cur;
5752 if (cur != NULL)
5753 cur->parent = ret;
5754 cur = ret;
5755 } else {
5756 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5757 if (n == NULL) return(NULL);
5758 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5759 if (n->c1 != NULL)
5760 n->c1->parent = n;
5761 cur->c2 = n;
5762 if (n != NULL)
5763 n->parent = cur;
5764 cur = n;
5765 }
5766 SKIP_BLANKS;
5767 elem = xmlParseName(ctxt);
5768 if (elem == NULL) {
5769 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5770 "xmlParseElementMixedContentDecl : Name expected\n");
5771 xmlFreeDocElementContent(ctxt->myDoc, cur);
5772 return(NULL);
5773 }
5774 SKIP_BLANKS;
5775 GROW;
5776 }
5777 if ((RAW == ')') && (NXT(1) == '*')) {
5778 if (elem != NULL) {
5779 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5780 XML_ELEMENT_CONTENT_ELEMENT);
5781 if (cur->c2 != NULL)
5782 cur->c2->parent = cur;
5783 }
5784 if (ret != NULL)
5785 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5786 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5787 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5788 "Element content declaration doesn't start and stop in the same entity\n",
5789 NULL, NULL);
5790 }
5791 SKIP(2);
5792 } else {
5793 xmlFreeDocElementContent(ctxt->myDoc, ret);
5794 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5795 return(NULL);
5796 }
5797
5798 } else {
5799 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5800 }
5801 return(ret);
5802 }
5803
5804 /**
5805 * xmlParseElementChildrenContentDeclPriv:
5806 * @ctxt: an XML parser context
5807 * @inputchk: the input used for the current entity, needed for boundary checks
5808 * @depth: the level of recursion
5809 *
5810 * parse the declaration for a Mixed Element content
5811 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5812 *
5813 *
5814 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5815 *
5816 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5817 *
5818 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5819 *
5820 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5821 *
5822 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5823 * TODO Parameter-entity replacement text must be properly nested
5824 * with parenthesized groups. That is to say, if either of the
5825 * opening or closing parentheses in a choice, seq, or Mixed
5826 * construct is contained in the replacement text for a parameter
5827 * entity, both must be contained in the same replacement text. For
5828 * interoperability, if a parameter-entity reference appears in a
5829 * choice, seq, or Mixed construct, its replacement text should not
5830 * be empty, and neither the first nor last non-blank character of
5831 * the replacement text should be a connector (| or ,).
5832 *
5833 * Returns the tree of xmlElementContentPtr describing the element
5834 * hierarchy.
5835 */
5836 static xmlElementContentPtr
5837 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5838 int depth) {
5839 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5840 const xmlChar *elem;
5841 xmlChar type = 0;
5842
5843 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5844 (depth > 2048)) {
5845 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5846 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5847 depth);
5848 return(NULL);
5849 }
5850 SKIP_BLANKS;
5851 GROW;
5852 if (RAW == '(') {
5853 int inputid = ctxt->input->id;
5854
5855 /* Recurse on first child */
5856 NEXT;
5857 SKIP_BLANKS;
5858 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5859 depth + 1);
5860 SKIP_BLANKS;
5861 GROW;
5862 } else {
5863 elem = xmlParseName(ctxt);
5864 if (elem == NULL) {
5865 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5866 return(NULL);
5867 }
5868 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5869 if (cur == NULL) {
5870 xmlErrMemory(ctxt, NULL);
5871 return(NULL);
5872 }
5873 GROW;
5874 if (RAW == '?') {
5875 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5876 NEXT;
5877 } else if (RAW == '*') {
5878 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5879 NEXT;
5880 } else if (RAW == '+') {
5881 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5882 NEXT;
5883 } else {
5884 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5885 }
5886 GROW;
5887 }
5888 SKIP_BLANKS;
5889 SHRINK;
5890 while (RAW != ')') {
5891 /*
5892 * Each loop we parse one separator and one element.
5893 */
5894 if (RAW == ',') {
5895 if (type == 0) type = CUR;
5896
5897 /*
5898 * Detect "Name | Name , Name" error
5899 */
5900 else if (type != CUR) {
5901 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5902 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5903 type);
5904 if ((last != NULL) && (last != ret))
5905 xmlFreeDocElementContent(ctxt->myDoc, last);
5906 if (ret != NULL)
5907 xmlFreeDocElementContent(ctxt->myDoc, ret);
5908 return(NULL);
5909 }
5910 NEXT;
5911
5912 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5913 if (op == NULL) {
5914 if ((last != NULL) && (last != ret))
5915 xmlFreeDocElementContent(ctxt->myDoc, last);
5916 xmlFreeDocElementContent(ctxt->myDoc, ret);
5917 return(NULL);
5918 }
5919 if (last == NULL) {
5920 op->c1 = ret;
5921 if (ret != NULL)
5922 ret->parent = op;
5923 ret = cur = op;
5924 } else {
5925 cur->c2 = op;
5926 if (op != NULL)
5927 op->parent = cur;
5928 op->c1 = last;
5929 if (last != NULL)
5930 last->parent = op;
5931 cur =op;
5932 last = NULL;
5933 }
5934 } else if (RAW == '|') {
5935 if (type == 0) type = CUR;
5936
5937 /*
5938 * Detect "Name , Name | Name" error
5939 */
5940 else if (type != CUR) {
5941 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5942 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5943 type);
5944 if ((last != NULL) && (last != ret))
5945 xmlFreeDocElementContent(ctxt->myDoc, last);
5946 if (ret != NULL)
5947 xmlFreeDocElementContent(ctxt->myDoc, ret);
5948 return(NULL);
5949 }
5950 NEXT;
5951
5952 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5953 if (op == NULL) {
5954 if ((last != NULL) && (last != ret))
5955 xmlFreeDocElementContent(ctxt->myDoc, last);
5956 if (ret != NULL)
5957 xmlFreeDocElementContent(ctxt->myDoc, ret);
5958 return(NULL);
5959 }
5960 if (last == NULL) {
5961 op->c1 = ret;
5962 if (ret != NULL)
5963 ret->parent = op;
5964 ret = cur = op;
5965 } else {
5966 cur->c2 = op;
5967 if (op != NULL)
5968 op->parent = cur;
5969 op->c1 = last;
5970 if (last != NULL)
5971 last->parent = op;
5972 cur =op;
5973 last = NULL;
5974 }
5975 } else {
5976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5977 if ((last != NULL) && (last != ret))
5978 xmlFreeDocElementContent(ctxt->myDoc, last);
5979 if (ret != NULL)
5980 xmlFreeDocElementContent(ctxt->myDoc, ret);
5981 return(NULL);
5982 }
5983 GROW;
5984 SKIP_BLANKS;
5985 GROW;
5986 if (RAW == '(') {
5987 int inputid = ctxt->input->id;
5988 /* Recurse on second child */
5989 NEXT;
5990 SKIP_BLANKS;
5991 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5992 depth + 1);
5993 SKIP_BLANKS;
5994 } else {
5995 elem = xmlParseName(ctxt);
5996 if (elem == NULL) {
5997 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5998 if (ret != NULL)
5999 xmlFreeDocElementContent(ctxt->myDoc, ret);
6000 return(NULL);
6001 }
6002 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6003 if (last == NULL) {
6004 if (ret != NULL)
6005 xmlFreeDocElementContent(ctxt->myDoc, ret);
6006 return(NULL);
6007 }
6008 if (RAW == '?') {
6009 last->ocur = XML_ELEMENT_CONTENT_OPT;
6010 NEXT;
6011 } else if (RAW == '*') {
6012 last->ocur = XML_ELEMENT_CONTENT_MULT;
6013 NEXT;
6014 } else if (RAW == '+') {
6015 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6016 NEXT;
6017 } else {
6018 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6019 }
6020 }
6021 SKIP_BLANKS;
6022 GROW;
6023 }
6024 if ((cur != NULL) && (last != NULL)) {
6025 cur->c2 = last;
6026 if (last != NULL)
6027 last->parent = cur;
6028 }
6029 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6030 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6031 "Element content declaration doesn't start and stop in the same entity\n",
6032 NULL, NULL);
6033 }
6034 NEXT;
6035 if (RAW == '?') {
6036 if (ret != NULL) {
6037 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6038 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6040 else
6041 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6042 }
6043 NEXT;
6044 } else if (RAW == '*') {
6045 if (ret != NULL) {
6046 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6047 cur = ret;
6048 /*
6049 * Some normalization:
6050 * (a | b* | c?)* == (a | b | c)*
6051 */
6052 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6053 if ((cur->c1 != NULL) &&
6054 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6055 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6056 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6057 if ((cur->c2 != NULL) &&
6058 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6059 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6060 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6061 cur = cur->c2;
6062 }
6063 }
6064 NEXT;
6065 } else if (RAW == '+') {
6066 if (ret != NULL) {
6067 int found = 0;
6068
6069 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6070 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6071 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6072 else
6073 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6074 /*
6075 * Some normalization:
6076 * (a | b*)+ == (a | b)*
6077 * (a | b?)+ == (a | b)*
6078 */
6079 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6080 if ((cur->c1 != NULL) &&
6081 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6082 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6083 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6084 found = 1;
6085 }
6086 if ((cur->c2 != NULL) &&
6087 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6088 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6089 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6090 found = 1;
6091 }
6092 cur = cur->c2;
6093 }
6094 if (found)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 }
6097 NEXT;
6098 }
6099 return(ret);
6100 }
6101
6102 /**
6103 * xmlParseElementChildrenContentDecl:
6104 * @ctxt: an XML parser context
6105 * @inputchk: the input used for the current entity, needed for boundary checks
6106 *
6107 * parse the declaration for a Mixed Element content
6108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6109 *
6110 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6111 *
6112 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6113 *
6114 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6115 *
6116 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6117 *
6118 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6119 * TODO Parameter-entity replacement text must be properly nested
6120 * with parenthesized groups. That is to say, if either of the
6121 * opening or closing parentheses in a choice, seq, or Mixed
6122 * construct is contained in the replacement text for a parameter
6123 * entity, both must be contained in the same replacement text. For
6124 * interoperability, if a parameter-entity reference appears in a
6125 * choice, seq, or Mixed construct, its replacement text should not
6126 * be empty, and neither the first nor last non-blank character of
6127 * the replacement text should be a connector (| or ,).
6128 *
6129 * Returns the tree of xmlElementContentPtr describing the element
6130 * hierarchy.
6131 */
6132 xmlElementContentPtr
6133 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6134 /* stub left for API/ABI compat */
6135 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6136 }
6137
6138 /**
6139 * xmlParseElementContentDecl:
6140 * @ctxt: an XML parser context
6141 * @name: the name of the element being defined.
6142 * @result: the Element Content pointer will be stored here if any
6143 *
6144 * parse the declaration for an Element content either Mixed or Children,
6145 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6146 *
6147 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6148 *
6149 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6150 */
6151
6152 int
6153 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6154 xmlElementContentPtr *result) {
6155
6156 xmlElementContentPtr tree = NULL;
6157 int inputid = ctxt->input->id;
6158 int res;
6159
6160 *result = NULL;
6161
6162 if (RAW != '(') {
6163 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6164 "xmlParseElementContentDecl : %s '(' expected\n", name);
6165 return(-1);
6166 }
6167 NEXT;
6168 GROW;
6169 SKIP_BLANKS;
6170 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6171 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6172 res = XML_ELEMENT_TYPE_MIXED;
6173 } else {
6174 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6175 res = XML_ELEMENT_TYPE_ELEMENT;
6176 }
6177 SKIP_BLANKS;
6178 *result = tree;
6179 return(res);
6180 }
6181
6182 /**
6183 * xmlParseElementDecl:
6184 * @ctxt: an XML parser context
6185 *
6186 * parse an Element declaration.
6187 *
6188 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6189 *
6190 * [ VC: Unique Element Type Declaration ]
6191 * No element type may be declared more than once
6192 *
6193 * Returns the type of the element, or -1 in case of error
6194 */
6195 int
6196 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6197 const xmlChar *name;
6198 int ret = -1;
6199 xmlElementContentPtr content = NULL;
6200
6201 /* GROW; done in the caller */
6202 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6203 xmlParserInputPtr input = ctxt->input;
6204
6205 SKIP(9);
6206 if (!IS_BLANK_CH(CUR)) {
6207 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6208 "Space required after 'ELEMENT'\n");
6209 }
6210 SKIP_BLANKS;
6211 name = xmlParseName(ctxt);
6212 if (name == NULL) {
6213 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6214 "xmlParseElementDecl: no name for Element\n");
6215 return(-1);
6216 }
6217 while ((RAW == 0) && (ctxt->inputNr > 1))
6218 xmlPopInput(ctxt);
6219 if (!IS_BLANK_CH(CUR)) {
6220 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6221 "Space required after the element name\n");
6222 }
6223 SKIP_BLANKS;
6224 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6225 SKIP(5);
6226 /*
6227 * Element must always be empty.
6228 */
6229 ret = XML_ELEMENT_TYPE_EMPTY;
6230 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6231 (NXT(2) == 'Y')) {
6232 SKIP(3);
6233 /*
6234 * Element is a generic container.
6235 */
6236 ret = XML_ELEMENT_TYPE_ANY;
6237 } else if (RAW == '(') {
6238 ret = xmlParseElementContentDecl(ctxt, name, &content);
6239 } else {
6240 /*
6241 * [ WFC: PEs in Internal Subset ] error handling.
6242 */
6243 if ((RAW == '%') && (ctxt->external == 0) &&
6244 (ctxt->inputNr == 1)) {
6245 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6246 "PEReference: forbidden within markup decl in internal subset\n");
6247 } else {
6248 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6249 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6250 }
6251 return(-1);
6252 }
6253
6254 SKIP_BLANKS;
6255 /*
6256 * Pop-up of finished entities.
6257 */
6258 while ((RAW == 0) && (ctxt->inputNr > 1))
6259 xmlPopInput(ctxt);
6260 SKIP_BLANKS;
6261
6262 if (RAW != '>') {
6263 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6264 if (content != NULL) {
6265 xmlFreeDocElementContent(ctxt->myDoc, content);
6266 }
6267 } else {
6268 if (input != ctxt->input) {
6269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270 "Element declaration doesn't start and stop in the same entity\n");
6271 }
6272
6273 NEXT;
6274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6275 (ctxt->sax->elementDecl != NULL)) {
6276 if (content != NULL)
6277 content->parent = NULL;
6278 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6279 content);
6280 if ((content != NULL) && (content->parent == NULL)) {
6281 /*
6282 * this is a trick: if xmlAddElementDecl is called,
6283 * instead of copying the full tree it is plugged directly
6284 * if called from the parser. Avoid duplicating the
6285 * interfaces or change the API/ABI
6286 */
6287 xmlFreeDocElementContent(ctxt->myDoc, content);
6288 }
6289 } else if (content != NULL) {
6290 xmlFreeDocElementContent(ctxt->myDoc, content);
6291 }
6292 }
6293 }
6294 return(ret);
6295 }
6296
6297 /**
6298 * xmlParseConditionalSections
6299 * @ctxt: an XML parser context
6300 *
6301 * [61] conditionalSect ::= includeSect | ignoreSect
6302 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6303 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6304 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6305 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6306 */
6307
6308 static void
6309 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6310 int id = ctxt->input->id;
6311
6312 SKIP(3);
6313 SKIP_BLANKS;
6314 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6315 SKIP(7);
6316 SKIP_BLANKS;
6317 if (RAW != '[') {
6318 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6319 } else {
6320 if (ctxt->input->id != id) {
6321 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6322 "All markup of the conditional section is not in the same entity\n",
6323 NULL, NULL);
6324 }
6325 NEXT;
6326 }
6327 if (xmlParserDebugEntities) {
6328 if ((ctxt->input != NULL) && (ctxt->input->filename))
6329 xmlGenericError(xmlGenericErrorContext,
6330 "%s(%d): ", ctxt->input->filename,
6331 ctxt->input->line);
6332 xmlGenericError(xmlGenericErrorContext,
6333 "Entering INCLUDE Conditional Section\n");
6334 }
6335
6336 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6337 (NXT(2) != '>'))) {
6338 const xmlChar *check = CUR_PTR;
6339 unsigned int cons = ctxt->input->consumed;
6340
6341 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6342 xmlParseConditionalSections(ctxt);
6343 } else if (IS_BLANK_CH(CUR)) {
6344 NEXT;
6345 } else if (RAW == '%') {
6346 xmlParsePEReference(ctxt);
6347 } else
6348 xmlParseMarkupDecl(ctxt);
6349
6350 /*
6351 * Pop-up of finished entities.
6352 */
6353 while ((RAW == 0) && (ctxt->inputNr > 1))
6354 xmlPopInput(ctxt);
6355
6356 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6357 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6358 break;
6359 }
6360 }
6361 if (xmlParserDebugEntities) {
6362 if ((ctxt->input != NULL) && (ctxt->input->filename))
6363 xmlGenericError(xmlGenericErrorContext,
6364 "%s(%d): ", ctxt->input->filename,
6365 ctxt->input->line);
6366 xmlGenericError(xmlGenericErrorContext,
6367 "Leaving INCLUDE Conditional Section\n");
6368 }
6369
6370 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6371 int state;
6372 xmlParserInputState instate;
6373 int depth = 0;
6374
6375 SKIP(6);
6376 SKIP_BLANKS;
6377 if (RAW != '[') {
6378 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6379 } else {
6380 if (ctxt->input->id != id) {
6381 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6382 "All markup of the conditional section is not in the same entity\n",
6383 NULL, NULL);
6384 }
6385 NEXT;
6386 }
6387 if (xmlParserDebugEntities) {
6388 if ((ctxt->input != NULL) && (ctxt->input->filename))
6389 xmlGenericError(xmlGenericErrorContext,
6390 "%s(%d): ", ctxt->input->filename,
6391 ctxt->input->line);
6392 xmlGenericError(xmlGenericErrorContext,
6393 "Entering IGNORE Conditional Section\n");
6394 }
6395
6396 /*
6397 * Parse up to the end of the conditional section
6398 * But disable SAX event generating DTD building in the meantime
6399 */
6400 state = ctxt->disableSAX;
6401 instate = ctxt->instate;
6402 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6403 ctxt->instate = XML_PARSER_IGNORE;
6404
6405 while ((depth >= 0) && (RAW != 0)) {
6406 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6407 depth++;
6408 SKIP(3);
6409 continue;
6410 }
6411 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6412 if (--depth >= 0) SKIP(3);
6413 continue;
6414 }
6415 NEXT;
6416 continue;
6417 }
6418
6419 ctxt->disableSAX = state;
6420 ctxt->instate = instate;
6421
6422 if (xmlParserDebugEntities) {
6423 if ((ctxt->input != NULL) && (ctxt->input->filename))
6424 xmlGenericError(xmlGenericErrorContext,
6425 "%s(%d): ", ctxt->input->filename,
6426 ctxt->input->line);
6427 xmlGenericError(xmlGenericErrorContext,
6428 "Leaving IGNORE Conditional Section\n");
6429 }
6430
6431 } else {
6432 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6433 }
6434
6435 if (RAW == 0)
6436 SHRINK;
6437
6438 if (RAW == 0) {
6439 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6440 } else {
6441 if (ctxt->input->id != id) {
6442 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6443 "All markup of the conditional section is not in the same entity\n",
6444 NULL, NULL);
6445 }
6446 SKIP(3);
6447 }
6448 }
6449
6450 /**
6451 * xmlParseMarkupDecl:
6452 * @ctxt: an XML parser context
6453 *
6454 * parse Markup declarations
6455 *
6456 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6457 * NotationDecl | PI | Comment
6458 *
6459 * [ VC: Proper Declaration/PE Nesting ]
6460 * Parameter-entity replacement text must be properly nested with
6461 * markup declarations. That is to say, if either the first character
6462 * or the last character of a markup declaration (markupdecl above) is
6463 * contained in the replacement text for a parameter-entity reference,
6464 * both must be contained in the same replacement text.
6465 *
6466 * [ WFC: PEs in Internal Subset ]
6467 * In the internal DTD subset, parameter-entity references can occur
6468 * only where markup declarations can occur, not within markup declarations.
6469 * (This does not apply to references that occur in external parameter
6470 * entities or to the external subset.)
6471 */
6472 void
6473 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6474 GROW;
6475 if (CUR == '<') {
6476 if (NXT(1) == '!') {
6477 switch (NXT(2)) {
6478 case 'E':
6479 if (NXT(3) == 'L')
6480 xmlParseElementDecl(ctxt);
6481 else if (NXT(3) == 'N')
6482 xmlParseEntityDecl(ctxt);
6483 break;
6484 case 'A':
6485 xmlParseAttributeListDecl(ctxt);
6486 break;
6487 case 'N':
6488 xmlParseNotationDecl(ctxt);
6489 break;
6490 case '-':
6491 xmlParseComment(ctxt);
6492 break;
6493 default:
6494 /* there is an error but it will be detected later */
6495 break;
6496 }
6497 } else if (NXT(1) == '?') {
6498 xmlParsePI(ctxt);
6499 }
6500 }
6501 /*
6502 * This is only for internal subset. On external entities,
6503 * the replacement is done before parsing stage
6504 */
6505 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6506 xmlParsePEReference(ctxt);
6507
6508 /*
6509 * Conditional sections are allowed from entities included
6510 * by PE References in the internal subset.
6511 */
6512 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6513 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6514 xmlParseConditionalSections(ctxt);
6515 }
6516 }
6517
6518 ctxt->instate = XML_PARSER_DTD;
6519 }
6520
6521 /**
6522 * xmlParseTextDecl:
6523 * @ctxt: an XML parser context
6524 *
6525 * parse an XML declaration header for external entities
6526 *
6527 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6528 */
6529
6530 void
6531 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6532 xmlChar *version;
6533 const xmlChar *encoding;
6534
6535 /*
6536 * We know that '<?xml' is here.
6537 */
6538 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6539 SKIP(5);
6540 } else {
6541 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6542 return;
6543 }
6544
6545 if (!IS_BLANK_CH(CUR)) {
6546 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6547 "Space needed after '<?xml'\n");
6548 }
6549 SKIP_BLANKS;
6550
6551 /*
6552 * We may have the VersionInfo here.
6553 */
6554 version = xmlParseVersionInfo(ctxt);
6555 if (version == NULL)
6556 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6557 else {
6558 if (!IS_BLANK_CH(CUR)) {
6559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6560 "Space needed here\n");
6561 }
6562 }
6563 ctxt->input->version = version;
6564
6565 /*
6566 * We must have the encoding declaration
6567 */
6568 encoding = xmlParseEncodingDecl(ctxt);
6569 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6570 /*
6571 * The XML REC instructs us to stop parsing right here
6572 */
6573 return;
6574 }
6575 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6576 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6577 "Missing encoding in text declaration\n");
6578 }
6579
6580 SKIP_BLANKS;
6581 if ((RAW == '?') && (NXT(1) == '>')) {
6582 SKIP(2);
6583 } else if (RAW == '>') {
6584 /* Deprecated old WD ... */
6585 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6586 NEXT;
6587 } else {
6588 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6589 MOVETO_ENDTAG(CUR_PTR);
6590 NEXT;
6591 }
6592 }
6593
6594 /**
6595 * xmlParseExternalSubset:
6596 * @ctxt: an XML parser context
6597 * @ExternalID: the external identifier
6598 * @SystemID: the system identifier (or URL)
6599 *
6600 * parse Markup declarations from an external subset
6601 *
6602 * [30] extSubset ::= textDecl? extSubsetDecl
6603 *
6604 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6605 */
6606 void
6607 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6608 const xmlChar *SystemID) {
6609 xmlDetectSAX2(ctxt);
6610 GROW;
6611
6612 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6613 (ctxt->input->end - ctxt->input->cur >= 4)) {
6614 xmlChar start[4];
6615 xmlCharEncoding enc;
6616
6617 start[0] = RAW;
6618 start[1] = NXT(1);
6619 start[2] = NXT(2);
6620 start[3] = NXT(3);
6621 enc = xmlDetectCharEncoding(start, 4);
6622 if (enc != XML_CHAR_ENCODING_NONE)
6623 xmlSwitchEncoding(ctxt, enc);
6624 }
6625
6626 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6627 xmlParseTextDecl(ctxt);
6628 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6629 /*
6630 * The XML REC instructs us to stop parsing right here
6631 */
6632 ctxt->instate = XML_PARSER_EOF;
6633 return;
6634 }
6635 }
6636 if (ctxt->myDoc == NULL) {
6637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6638 if (ctxt->myDoc == NULL) {
6639 xmlErrMemory(ctxt, "New Doc failed");
6640 return;
6641 }
6642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6643 }
6644 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6645 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6646
6647 ctxt->instate = XML_PARSER_DTD;
6648 ctxt->external = 1;
6649 while (((RAW == '<') && (NXT(1) == '?')) ||
6650 ((RAW == '<') && (NXT(1) == '!')) ||
6651 (RAW == '%') || IS_BLANK_CH(CUR)) {
6652 const xmlChar *check = CUR_PTR;
6653 unsigned int cons = ctxt->input->consumed;
6654
6655 GROW;
6656 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6657 xmlParseConditionalSections(ctxt);
6658 } else if (IS_BLANK_CH(CUR)) {
6659 NEXT;
6660 } else if (RAW == '%') {
6661 xmlParsePEReference(ctxt);
6662 } else
6663 xmlParseMarkupDecl(ctxt);
6664
6665 /*
6666 * Pop-up of finished entities.
6667 */
6668 while ((RAW == 0) && (ctxt->inputNr > 1))
6669 xmlPopInput(ctxt);
6670
6671 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6672 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6673 break;
6674 }
6675 }
6676
6677 if (RAW != 0) {
6678 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6679 }
6680
6681 }
6682
6683 /**
6684 * xmlParseReference:
6685 * @ctxt: an XML parser context
6686 *
6687 * parse and handle entity references in content, depending on the SAX
6688 * interface, this may end-up in a call to character() if this is a
6689 * CharRef, a predefined entity, if there is no reference() callback.
6690 * or if the parser was asked to switch to that mode.
6691 *
6692 * [67] Reference ::= EntityRef | CharRef
6693 */
6694 void
6695 xmlParseReference(xmlParserCtxtPtr ctxt) {
6696 xmlEntityPtr ent;
6697 xmlChar *val;
6698 int was_checked;
6699 xmlNodePtr list = NULL;
6700 xmlParserErrors ret = XML_ERR_OK;
6701
6702
6703 if (RAW != '&')
6704 return;
6705
6706 /*
6707 * Simple case of a CharRef
6708 */
6709 if (NXT(1) == '#') {
6710 int i = 0;
6711 xmlChar out[10];
6712 int hex = NXT(2);
6713 int value = xmlParseCharRef(ctxt);
6714
6715 if (value == 0)
6716 return;
6717 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6718 /*
6719 * So we are using non-UTF-8 buffers
6720 * Check that the char fit on 8bits, if not
6721 * generate a CharRef.
6722 */
6723 if (value <= 0xFF) {
6724 out[0] = value;
6725 out[1] = 0;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6727 (!ctxt->disableSAX))
6728 ctxt->sax->characters(ctxt->userData, out, 1);
6729 } else {
6730 if ((hex == 'x') || (hex == 'X'))
6731 snprintf((char *)out, sizeof(out), "#x%X", value);
6732 else
6733 snprintf((char *)out, sizeof(out), "#%d", value);
6734 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6735 (!ctxt->disableSAX))
6736 ctxt->sax->reference(ctxt->userData, out);
6737 }
6738 } else {
6739 /*
6740 * Just encode the value in UTF-8
6741 */
6742 COPY_BUF(0 ,out, i, value);
6743 out[i] = 0;
6744 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6745 (!ctxt->disableSAX))
6746 ctxt->sax->characters(ctxt->userData, out, i);
6747 }
6748 return;
6749 }
6750
6751 /*
6752 * We are seeing an entity reference
6753 */
6754 ent = xmlParseEntityRef(ctxt);
6755 if (ent == NULL) return;
6756 if (!ctxt->wellFormed)
6757 return;
6758 was_checked = ent->checked;
6759
6760 /* special case of predefined entities */
6761 if ((ent->name == NULL) ||
6762 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6763 val = ent->content;
6764 if (val == NULL) return;
6765 /*
6766 * inline the entity.
6767 */
6768 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6769 (!ctxt->disableSAX))
6770 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6771 return;
6772 }
6773
6774 /*
6775 * The first reference to the entity trigger a parsing phase
6776 * where the ent->children is filled with the result from
6777 * the parsing.
6778 */
6779 if (ent->checked == 0) {
6780 unsigned long oldnbent = ctxt->nbentities;
6781
6782 /*
6783 * This is a bit hackish but this seems the best
6784 * way to make sure both SAX and DOM entity support
6785 * behaves okay.
6786 */
6787 void *user_data;
6788 if (ctxt->userData == ctxt)
6789 user_data = NULL;
6790 else
6791 user_data = ctxt->userData;
6792
6793 /*
6794 * Check that this entity is well formed
6795 * 4.3.2: An internal general parsed entity is well-formed
6796 * if its replacement text matches the production labeled
6797 * content.
6798 */
6799 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6800 ctxt->depth++;
6801 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6802 user_data, &list);
6803 ctxt->depth--;
6804
6805 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6806 ctxt->depth++;
6807 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6808 user_data, ctxt->depth, ent->URI,
6809 ent->ExternalID, &list);
6810 ctxt->depth--;
6811 } else {
6812 ret = XML_ERR_ENTITY_PE_INTERNAL;
6813 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6814 "invalid entity type found\n", NULL);
6815 }
6816
6817 /*
6818 * Store the number of entities needing parsing for this entity
6819 * content and do checkings
6820 */
6821 ent->checked = ctxt->nbentities - oldnbent;
6822 if (ret == XML_ERR_ENTITY_LOOP) {
6823 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6824 xmlFreeNodeList(list);
6825 return;
6826 }
6827 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6828 xmlFreeNodeList(list);
6829 return;
6830 }
6831
6832 if ((ret == XML_ERR_OK) && (list != NULL)) {
6833 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6835 (ent->children == NULL)) {
6836 ent->children = list;
6837 if (ctxt->replaceEntities) {
6838 /*
6839 * Prune it directly in the generated document
6840 * except for single text nodes.
6841 */
6842 if (((list->type == XML_TEXT_NODE) &&
6843 (list->next == NULL)) ||
6844 (ctxt->parseMode == XML_PARSE_READER)) {
6845 list->parent = (xmlNodePtr) ent;
6846 list = NULL;
6847 ent->owner = 1;
6848 } else {
6849 ent->owner = 0;
6850 while (list != NULL) {
6851 list->parent = (xmlNodePtr) ctxt->node;
6852 list->doc = ctxt->myDoc;
6853 if (list->next == NULL)
6854 ent->last = list;
6855 list = list->next;
6856 }
6857 list = ent->children;
6858 #ifdef LIBXML_LEGACY_ENABLED
6859 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6860 xmlAddEntityReference(ent, list, NULL);
6861 #endif /* LIBXML_LEGACY_ENABLED */
6862 }
6863 } else {
6864 ent->owner = 1;
6865 while (list != NULL) {
6866 list->parent = (xmlNodePtr) ent;
6867 if (list->next == NULL)
6868 ent->last = list;
6869 list = list->next;
6870 }
6871 }
6872 } else {
6873 xmlFreeNodeList(list);
6874 list = NULL;
6875 }
6876 } else if ((ret != XML_ERR_OK) &&
6877 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6879 "Entity '%s' failed to parse\n", ent->name);
6880 } else if (list != NULL) {
6881 xmlFreeNodeList(list);
6882 list = NULL;
6883 }
6884 if (ent->checked == 0)
6885 ent->checked = 1;
6886 } else if (ent->checked != 1) {
6887 ctxt->nbentities += ent->checked;
6888 }
6889
6890 /*
6891 * Now that the entity content has been gathered
6892 * provide it to the application, this can take different forms based
6893 * on the parsing modes.
6894 */
6895 if (ent->children == NULL) {
6896 /*
6897 * Probably running in SAX mode and the callbacks don't
6898 * build the entity content. So unless we already went
6899 * though parsing for first checking go though the entity
6900 * content to generate callbacks associated to the entity
6901 */
6902 if (was_checked != 0) {
6903 void *user_data;
6904 /*
6905 * This is a bit hackish but this seems the best
6906 * way to make sure both SAX and DOM entity support
6907 * behaves okay.
6908 */
6909 if (ctxt->userData == ctxt)
6910 user_data = NULL;
6911 else
6912 user_data = ctxt->userData;
6913
6914 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6915 ctxt->depth++;
6916 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6917 ent->content, user_data, NULL);
6918 ctxt->depth--;
6919 } else if (ent->etype ==
6920 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6921 ctxt->depth++;
6922 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6923 ctxt->sax, user_data, ctxt->depth,
6924 ent->URI, ent->ExternalID, NULL);
6925 ctxt->depth--;
6926 } else {
6927 ret = XML_ERR_ENTITY_PE_INTERNAL;
6928 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6929 "invalid entity type found\n", NULL);
6930 }
6931 if (ret == XML_ERR_ENTITY_LOOP) {
6932 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6933 return;
6934 }
6935 }
6936 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6937 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6938 /*
6939 * Entity reference callback comes second, it's somewhat
6940 * superfluous but a compatibility to historical behaviour
6941 */
6942 ctxt->sax->reference(ctxt->userData, ent->name);
6943 }
6944 return;
6945 }
6946
6947 /*
6948 * If we didn't get any children for the entity being built
6949 */
6950 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6951 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6952 /*
6953 * Create a node.
6954 */
6955 ctxt->sax->reference(ctxt->userData, ent->name);
6956 return;
6957 }
6958
6959 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6960 /*
6961 * There is a problem on the handling of _private for entities
6962 * (bug 155816): Should we copy the content of the field from
6963 * the entity (possibly overwriting some value set by the user
6964 * when a copy is created), should we leave it alone, or should
6965 * we try to take care of different situations? The problem
6966 * is exacerbated by the usage of this field by the xmlReader.
6967 * To fix this bug, we look at _private on the created node
6968 * and, if it's NULL, we copy in whatever was in the entity.
6969 * If it's not NULL we leave it alone. This is somewhat of a
6970 * hack - maybe we should have further tests to determine
6971 * what to do.
6972 */
6973 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6974 /*
6975 * Seems we are generating the DOM content, do
6976 * a simple tree copy for all references except the first
6977 * In the first occurrence list contains the replacement.
6978 * progressive == 2 means we are operating on the Reader
6979 * and since nodes are discarded we must copy all the time.
6980 */
6981 if (((list == NULL) && (ent->owner == 0)) ||
6982 (ctxt->parseMode == XML_PARSE_READER)) {
6983 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6984
6985 /*
6986 * when operating on a reader, the entities definitions
6987 * are always owning the entities subtree.
6988 if (ctxt->parseMode == XML_PARSE_READER)
6989 ent->owner = 1;
6990 */
6991
6992 cur = ent->children;
6993 while (cur != NULL) {
6994 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6995 if (nw != NULL) {
6996 if (nw->_private == NULL)
6997 nw->_private = cur->_private;
6998 if (firstChild == NULL){
6999 firstChild = nw;
7000 }
7001 nw = xmlAddChild(ctxt->node, nw);
7002 }
7003 if (cur == ent->last) {
7004 /*
7005 * needed to detect some strange empty
7006 * node cases in the reader tests
7007 */
7008 if ((ctxt->parseMode == XML_PARSE_READER) &&
7009 (nw != NULL) &&
7010 (nw->type == XML_ELEMENT_NODE) &&
7011 (nw->children == NULL))
7012 nw->extra = 1;
7013
7014 break;
7015 }
7016 cur = cur->next;
7017 }
7018 #ifdef LIBXML_LEGACY_ENABLED
7019 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7020 xmlAddEntityReference(ent, firstChild, nw);
7021 #endif /* LIBXML_LEGACY_ENABLED */
7022 } else if (list == NULL) {
7023 xmlNodePtr nw = NULL, cur, next, last,
7024 firstChild = NULL;
7025 /*
7026 * Copy the entity child list and make it the new
7027 * entity child list. The goal is to make sure any
7028 * ID or REF referenced will be the one from the
7029 * document content and not the entity copy.
7030 */
7031 cur = ent->children;
7032 ent->children = NULL;
7033 last = ent->last;
7034 ent->last = NULL;
7035 while (cur != NULL) {
7036 next = cur->next;
7037 cur->next = NULL;
7038 cur->parent = NULL;
7039 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7040 if (nw != NULL) {
7041 if (nw->_private == NULL)
7042 nw->_private = cur->_private;
7043 if (firstChild == NULL){
7044 firstChild = cur;
7045 }
7046 xmlAddChild((xmlNodePtr) ent, nw);
7047 xmlAddChild(ctxt->node, cur);
7048 }
7049 if (cur == last)
7050 break;
7051 cur = next;
7052 }
7053 if (ent->owner == 0)
7054 ent->owner = 1;
7055 #ifdef LIBXML_LEGACY_ENABLED
7056 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7057 xmlAddEntityReference(ent, firstChild, nw);
7058 #endif /* LIBXML_LEGACY_ENABLED */
7059 } else {
7060 const xmlChar *nbktext;
7061
7062 /*
7063 * the name change is to avoid coalescing of the
7064 * node with a possible previous text one which
7065 * would make ent->children a dangling pointer
7066 */
7067 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7068 -1);
7069 if (ent->children->type == XML_TEXT_NODE)
7070 ent->children->name = nbktext;
7071 if ((ent->last != ent->children) &&
7072 (ent->last->type == XML_TEXT_NODE))
7073 ent->last->name = nbktext;
7074 xmlAddChildList(ctxt->node, ent->children);
7075 }
7076
7077 /*
7078 * This is to avoid a nasty side effect, see
7079 * characters() in SAX.c
7080 */
7081 ctxt->nodemem = 0;
7082 ctxt->nodelen = 0;
7083 return;
7084 }
7085 }
7086 }
7087
7088 /**
7089 * xmlParseEntityRef:
7090 * @ctxt: an XML parser context
7091 *
7092 * parse ENTITY references declarations
7093 *
7094 * [68] EntityRef ::= '&' Name ';'
7095 *
7096 * [ WFC: Entity Declared ]
7097 * In a document without any DTD, a document with only an internal DTD
7098 * subset which contains no parameter entity references, or a document
7099 * with "standalone='yes'", the Name given in the entity reference
7100 * must match that in an entity declaration, except that well-formed
7101 * documents need not declare any of the following entities: amp, lt,
7102 * gt, apos, quot. The declaration of a parameter entity must precede
7103 * any reference to it. Similarly, the declaration of a general entity
7104 * must precede any reference to it which appears in a default value in an
7105 * attribute-list declaration. Note that if entities are declared in the
7106 * external subset or in external parameter entities, a non-validating
7107 * processor is not obligated to read and process their declarations;
7108 * for such documents, the rule that an entity must be declared is a
7109 * well-formedness constraint only if standalone='yes'.
7110 *
7111 * [ WFC: Parsed Entity ]
7112 * An entity reference must not contain the name of an unparsed entity
7113 *
7114 * Returns the xmlEntityPtr if found, or NULL otherwise.
7115 */
7116 xmlEntityPtr
7117 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7118 const xmlChar *name;
7119 xmlEntityPtr ent = NULL;
7120
7121 GROW;
7122
7123 if (RAW != '&')
7124 return(NULL);
7125 NEXT;
7126 name = xmlParseName(ctxt);
7127 if (name == NULL) {
7128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7129 "xmlParseEntityRef: no name\n");
7130 return(NULL);
7131 }
7132 if (RAW != ';') {
7133 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7134 return(NULL);
7135 }
7136 NEXT;
7137
7138 /*
7139 * Predefined entites override any extra definition
7140 */
7141 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7142 ent = xmlGetPredefinedEntity(name);
7143 if (ent != NULL)
7144 return(ent);
7145 }
7146
7147 /*
7148 * Increate the number of entity references parsed
7149 */
7150 ctxt->nbentities++;
7151
7152 /*
7153 * Ask first SAX for entity resolution, otherwise try the
7154 * entities which may have stored in the parser context.
7155 */
7156 if (ctxt->sax != NULL) {
7157 if (ctxt->sax->getEntity != NULL)
7158 ent = ctxt->sax->getEntity(ctxt->userData, name);
7159 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7160 (ctxt->options & XML_PARSE_OLDSAX))
7161 ent = xmlGetPredefinedEntity(name);
7162 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7163 (ctxt->userData==ctxt)) {
7164 ent = xmlSAX2GetEntity(ctxt, name);
7165 }
7166 }
7167 /*
7168 * [ WFC: Entity Declared ]
7169 * In a document without any DTD, a document with only an
7170 * internal DTD subset which contains no parameter entity
7171 * references, or a document with "standalone='yes'", the
7172 * Name given in the entity reference must match that in an
7173 * entity declaration, except that well-formed documents
7174 * need not declare any of the following entities: amp, lt,
7175 * gt, apos, quot.
7176 * The declaration of a parameter entity must precede any
7177 * reference to it.
7178 * Similarly, the declaration of a general entity must
7179 * precede any reference to it which appears in a default
7180 * value in an attribute-list declaration. Note that if
7181 * entities are declared in the external subset or in
7182 * external parameter entities, a non-validating processor
7183 * is not obligated to read and process their declarations;
7184 * for such documents, the rule that an entity must be
7185 * declared is a well-formedness constraint only if
7186 * standalone='yes'.
7187 */
7188 if (ent == NULL) {
7189 if ((ctxt->standalone == 1) ||
7190 ((ctxt->hasExternalSubset == 0) &&
7191 (ctxt->hasPErefs == 0))) {
7192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7193 "Entity '%s' not defined\n", name);
7194 } else {
7195 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7196 "Entity '%s' not defined\n", name);
7197 if ((ctxt->inSubset == 0) &&
7198 (ctxt->sax != NULL) &&
7199 (ctxt->sax->reference != NULL)) {
7200 ctxt->sax->reference(ctxt->userData, name);
7201 }
7202 }
7203 ctxt->valid = 0;
7204 }
7205
7206 /*
7207 * [ WFC: Parsed Entity ]
7208 * An entity reference must not contain the name of an
7209 * unparsed entity
7210 */
7211 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7212 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7213 "Entity reference to unparsed entity %s\n", name);
7214 }
7215
7216 /*
7217 * [ WFC: No External Entity References ]
7218 * Attribute values cannot contain direct or indirect
7219 * entity references to external entities.
7220 */
7221 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7222 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7223 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7224 "Attribute references external entity '%s'\n", name);
7225 }
7226 /*
7227 * [ WFC: No < in Attribute Values ]
7228 * The replacement text of any entity referred to directly or
7229 * indirectly in an attribute value (other than "&lt;") must
7230 * not contain a <.
7231 */
7232 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7233 (ent != NULL) && (ent->content != NULL) &&
7234 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7235 (xmlStrchr(ent->content, '<'))) {
7236 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7237 "'<' in entity '%s' is not allowed in attributes values\n", name);
7238 }
7239
7240 /*
7241 * Internal check, no parameter entities here ...
7242 */
7243 else {
7244 switch (ent->etype) {
7245 case XML_INTERNAL_PARAMETER_ENTITY:
7246 case XML_EXTERNAL_PARAMETER_ENTITY:
7247 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7248 "Attempt to reference the parameter entity '%s'\n",
7249 name);
7250 break;
7251 default:
7252 break;
7253 }
7254 }
7255
7256 /*
7257 * [ WFC: No Recursion ]
7258 * A parsed entity must not contain a recursive reference
7259 * to itself, either directly or indirectly.
7260 * Done somewhere else
7261 */
7262 return(ent);
7263 }
7264
7265 /**
7266 * xmlParseStringEntityRef:
7267 * @ctxt: an XML parser context
7268 * @str: a pointer to an index in the string
7269 *
7270 * parse ENTITY references declarations, but this version parses it from
7271 * a string value.
7272 *
7273 * [68] EntityRef ::= '&' Name ';'
7274 *
7275 * [ WFC: Entity Declared ]
7276 * In a document without any DTD, a document with only an internal DTD
7277 * subset which contains no parameter entity references, or a document
7278 * with "standalone='yes'", the Name given in the entity reference
7279 * must match that in an entity declaration, except that well-formed
7280 * documents need not declare any of the following entities: amp, lt,
7281 * gt, apos, quot. The declaration of a parameter entity must precede
7282 * any reference to it. Similarly, the declaration of a general entity
7283 * must precede any reference to it which appears in a default value in an
7284 * attribute-list declaration. Note that if entities are declared in the
7285 * external subset or in external parameter entities, a non-validating
7286 * processor is not obligated to read and process their declarations;
7287 * for such documents, the rule that an entity must be declared is a
7288 * well-formedness constraint only if standalone='yes'.
7289 *
7290 * [ WFC: Parsed Entity ]
7291 * An entity reference must not contain the name of an unparsed entity
7292 *
7293 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7294 * is updated to the current location in the string.
7295 */
7296 static xmlEntityPtr
7297 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7298 xmlChar *name;
7299 const xmlChar *ptr;
7300 xmlChar cur;
7301 xmlEntityPtr ent = NULL;
7302
7303 if ((str == NULL) || (*str == NULL))
7304 return(NULL);
7305 ptr = *str;
7306 cur = *ptr;
7307 if (cur != '&')
7308 return(NULL);
7309
7310 ptr++;
7311 name = xmlParseStringName(ctxt, &ptr);
7312 if (name == NULL) {
7313 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7314 "xmlParseStringEntityRef: no name\n");
7315 *str = ptr;
7316 return(NULL);
7317 }
7318 if (*ptr != ';') {
7319 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7320 xmlFree(name);
7321 *str = ptr;
7322 return(NULL);
7323 }
7324 ptr++;
7325
7326
7327 /*
7328 * Predefined entites override any extra definition
7329 */
7330 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7331 ent = xmlGetPredefinedEntity(name);
7332 if (ent != NULL) {
7333 xmlFree(name);
7334 *str = ptr;
7335 return(ent);
7336 }
7337 }
7338
7339 /*
7340 * Increate the number of entity references parsed
7341 */
7342 ctxt->nbentities++;
7343
7344 /*
7345 * Ask first SAX for entity resolution, otherwise try the
7346 * entities which may have stored in the parser context.
7347 */
7348 if (ctxt->sax != NULL) {
7349 if (ctxt->sax->getEntity != NULL)
7350 ent = ctxt->sax->getEntity(ctxt->userData, name);
7351 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7352 ent = xmlGetPredefinedEntity(name);
7353 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7354 ent = xmlSAX2GetEntity(ctxt, name);
7355 }
7356 }
7357
7358 /*
7359 * [ WFC: Entity Declared ]
7360 * In a document without any DTD, a document with only an
7361 * internal DTD subset which contains no parameter entity
7362 * references, or a document with "standalone='yes'", the
7363 * Name given in the entity reference must match that in an
7364 * entity declaration, except that well-formed documents
7365 * need not declare any of the following entities: amp, lt,
7366 * gt, apos, quot.
7367 * The declaration of a parameter entity must precede any
7368 * reference to it.
7369 * Similarly, the declaration of a general entity must
7370 * precede any reference to it which appears in a default
7371 * value in an attribute-list declaration. Note that if
7372 * entities are declared in the external subset or in
7373 * external parameter entities, a non-validating processor
7374 * is not obligated to read and process their declarations;
7375 * for such documents, the rule that an entity must be
7376 * declared is a well-formedness constraint only if
7377 * standalone='yes'.
7378 */
7379 if (ent == NULL) {
7380 if ((ctxt->standalone == 1) ||
7381 ((ctxt->hasExternalSubset == 0) &&
7382 (ctxt->hasPErefs == 0))) {
7383 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7384 "Entity '%s' not defined\n", name);
7385 } else {
7386 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7387 "Entity '%s' not defined\n",
7388 name);
7389 }
7390 /* TODO ? check regressions ctxt->valid = 0; */
7391 }
7392
7393 /*
7394 * [ WFC: Parsed Entity ]
7395 * An entity reference must not contain the name of an
7396 * unparsed entity
7397 */
7398 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7399 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7400 "Entity reference to unparsed entity %s\n", name);
7401 }
7402
7403 /*
7404 * [ WFC: No External Entity References ]
7405 * Attribute values cannot contain direct or indirect
7406 * entity references to external entities.
7407 */
7408 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7409 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7410 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7411 "Attribute references external entity '%s'\n", name);
7412 }
7413 /*
7414 * [ WFC: No < in Attribute Values ]
7415 * The replacement text of any entity referred to directly or
7416 * indirectly in an attribute value (other than "&lt;") must
7417 * not contain a <.
7418 */
7419 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7420 (ent != NULL) && (ent->content != NULL) &&
7421 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7422 (xmlStrchr(ent->content, '<'))) {
7423 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7424 "'<' in entity '%s' is not allowed in attributes values\n",
7425 name);
7426 }
7427
7428 /*
7429 * Internal check, no parameter entities here ...
7430 */
7431 else {
7432 switch (ent->etype) {
7433 case XML_INTERNAL_PARAMETER_ENTITY:
7434 case XML_EXTERNAL_PARAMETER_ENTITY:
7435 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7436 "Attempt to reference the parameter entity '%s'\n",
7437 name);
7438 break;
7439 default:
7440 break;
7441 }
7442 }
7443
7444 /*
7445 * [ WFC: No Recursion ]
7446 * A parsed entity must not contain a recursive reference
7447 * to itself, either directly or indirectly.
7448 * Done somewhere else
7449 */
7450
7451 xmlFree(name);
7452 *str = ptr;
7453 return(ent);
7454 }
7455
7456 /**
7457 * xmlParsePEReference:
7458 * @ctxt: an XML parser context
7459 *
7460 * parse PEReference declarations
7461 * The entity content is handled directly by pushing it's content as
7462 * a new input stream.
7463 *
7464 * [69] PEReference ::= '%' Name ';'
7465 *
7466 * [ WFC: No Recursion ]
7467 * A parsed entity must not contain a recursive
7468 * reference to itself, either directly or indirectly.
7469 *
7470 * [ WFC: Entity Declared ]
7471 * In a document without any DTD, a document with only an internal DTD
7472 * subset which contains no parameter entity references, or a document
7473 * with "standalone='yes'", ... ... The declaration of a parameter
7474 * entity must precede any reference to it...
7475 *
7476 * [ VC: Entity Declared ]
7477 * In a document with an external subset or external parameter entities
7478 * with "standalone='no'", ... ... The declaration of a parameter entity
7479 * must precede any reference to it...
7480 *
7481 * [ WFC: In DTD ]
7482 * Parameter-entity references may only appear in the DTD.
7483 * NOTE: misleading but this is handled.
7484 */
7485 void
7486 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7487 {
7488 const xmlChar *name;
7489 xmlEntityPtr entity = NULL;
7490 xmlParserInputPtr input;
7491
7492 if (RAW != '%')
7493 return;
7494 NEXT;
7495 name = xmlParseName(ctxt);
7496 if (name == NULL) {
7497 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7498 "xmlParsePEReference: no name\n");
7499 return;
7500 }
7501 if (RAW != ';') {
7502 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7503 return;
7504 }
7505
7506 NEXT;
7507
7508 /*
7509 * Increate the number of entity references parsed
7510 */
7511 ctxt->nbentities++;
7512
7513 /*
7514 * Request the entity from SAX
7515 */
7516 if ((ctxt->sax != NULL) &&
7517 (ctxt->sax->getParameterEntity != NULL))
7518 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7519 name);
7520 if (entity == NULL) {
7521 /*
7522 * [ WFC: Entity Declared ]
7523 * In a document without any DTD, a document with only an
7524 * internal DTD subset which contains no parameter entity
7525 * references, or a document with "standalone='yes'", ...
7526 * ... The declaration of a parameter entity must precede
7527 * any reference to it...
7528 */
7529 if ((ctxt->standalone == 1) ||
7530 ((ctxt->hasExternalSubset == 0) &&
7531 (ctxt->hasPErefs == 0))) {
7532 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7533 "PEReference: %%%s; not found\n",
7534 name);
7535 } else {
7536 /*
7537 * [ VC: Entity Declared ]
7538 * In a document with an external subset or external
7539 * parameter entities with "standalone='no'", ...
7540 * ... The declaration of a parameter entity must
7541 * precede any reference to it...
7542 */
7543 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7544 "PEReference: %%%s; not found\n",
7545 name, NULL);
7546 ctxt->valid = 0;
7547 }
7548 } else {
7549 /*
7550 * Internal checking in case the entity quest barfed
7551 */
7552 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7553 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7554 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7555 "Internal: %%%s; is not a parameter entity\n",
7556 name, NULL);
7557 } else if (ctxt->input->free != deallocblankswrapper) {
7558 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7559 if (xmlPushInput(ctxt, input) < 0)
7560 return;
7561 } else {
7562 /*
7563 * TODO !!!
7564 * handle the extra spaces added before and after
7565 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7566 */
7567 input = xmlNewEntityInputStream(ctxt, entity);
7568 if (xmlPushInput(ctxt, input) < 0)
7569 return;
7570 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7571 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7572 (IS_BLANK_CH(NXT(5)))) {
7573 xmlParseTextDecl(ctxt);
7574 if (ctxt->errNo ==
7575 XML_ERR_UNSUPPORTED_ENCODING) {
7576 /*
7577 * The XML REC instructs us to stop parsing
7578 * right here
7579 */
7580 ctxt->instate = XML_PARSER_EOF;
7581 return;
7582 }
7583 }
7584 }
7585 }
7586 ctxt->hasPErefs = 1;
7587 }
7588
7589 /**
7590 * xmlLoadEntityContent:
7591 * @ctxt: an XML parser context
7592 * @entity: an unloaded system entity
7593 *
7594 * Load the original content of the given system entity from the
7595 * ExternalID/SystemID given. This is to be used for Included in Literal
7596 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7597 *
7598 * Returns 0 in case of success and -1 in case of failure
7599 */
7600 static int
7601 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7602 xmlParserInputPtr input;
7603 xmlBufferPtr buf;
7604 int l, c;
7605 int count = 0;
7606
7607 if ((ctxt == NULL) || (entity == NULL) ||
7608 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7609 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7610 (entity->content != NULL)) {
7611 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7612 "xmlLoadEntityContent parameter error");
7613 return(-1);
7614 }
7615
7616 if (xmlParserDebugEntities)
7617 xmlGenericError(xmlGenericErrorContext,
7618 "Reading %s entity content input\n", entity->name);
7619
7620 buf = xmlBufferCreate();
7621 if (buf == NULL) {
7622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7623 "xmlLoadEntityContent parameter error");
7624 return(-1);
7625 }
7626
7627 input = xmlNewEntityInputStream(ctxt, entity);
7628 if (input == NULL) {
7629 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7630 "xmlLoadEntityContent input error");
7631 xmlBufferFree(buf);
7632 return(-1);
7633 }
7634
7635 /*
7636 * Push the entity as the current input, read char by char
7637 * saving to the buffer until the end of the entity or an error
7638 */
7639 if (xmlPushInput(ctxt, input) < 0) {
7640 xmlBufferFree(buf);
7641 return(-1);
7642 }
7643
7644 GROW;
7645 c = CUR_CHAR(l);
7646 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7647 (IS_CHAR(c))) {
7648 xmlBufferAdd(buf, ctxt->input->cur, l);
7649 if (count++ > 100) {
7650 count = 0;
7651 GROW;
7652 }
7653 NEXTL(l);
7654 c = CUR_CHAR(l);
7655 }
7656
7657 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7658 xmlPopInput(ctxt);
7659 } else if (!IS_CHAR(c)) {
7660 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7661 "xmlLoadEntityContent: invalid char value %d\n",
7662 c);
7663 xmlBufferFree(buf);
7664 return(-1);
7665 }
7666 entity->content = buf->content;
7667 buf->content = NULL;
7668 xmlBufferFree(buf);
7669
7670 return(0);
7671 }
7672
7673 /**
7674 * xmlParseStringPEReference:
7675 * @ctxt: an XML parser context
7676 * @str: a pointer to an index in the string
7677 *
7678 * parse PEReference declarations
7679 *
7680 * [69] PEReference ::= '%' Name ';'
7681 *
7682 * [ WFC: No Recursion ]
7683 * A parsed entity must not contain a recursive
7684 * reference to itself, either directly or indirectly.
7685 *
7686 * [ WFC: Entity Declared ]
7687 * In a document without any DTD, a document with only an internal DTD
7688 * subset which contains no parameter entity references, or a document
7689 * with "standalone='yes'", ... ... The declaration of a parameter
7690 * entity must precede any reference to it...
7691 *
7692 * [ VC: Entity Declared ]
7693 * In a document with an external subset or external parameter entities
7694 * with "standalone='no'", ... ... The declaration of a parameter entity
7695 * must precede any reference to it...
7696 *
7697 * [ WFC: In DTD ]
7698 * Parameter-entity references may only appear in the DTD.
7699 * NOTE: misleading but this is handled.
7700 *
7701 * Returns the string of the entity content.
7702 * str is updated to the current value of the index
7703 */
7704 static xmlEntityPtr
7705 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7706 const xmlChar *ptr;
7707 xmlChar cur;
7708 xmlChar *name;
7709 xmlEntityPtr entity = NULL;
7710
7711 if ((str == NULL) || (*str == NULL)) return(NULL);
7712 ptr = *str;
7713 cur = *ptr;
7714 if (cur != '%')
7715 return(NULL);
7716 ptr++;
7717 name = xmlParseStringName(ctxt, &ptr);
7718 if (name == NULL) {
7719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7720 "xmlParseStringPEReference: no name\n");
7721 *str = ptr;
7722 return(NULL);
7723 }
7724 cur = *ptr;
7725 if (cur != ';') {
7726 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7727 xmlFree(name);
7728 *str = ptr;
7729 return(NULL);
7730 }
7731 ptr++;
7732
7733 /*
7734 * Increate the number of entity references parsed
7735 */
7736 ctxt->nbentities++;
7737
7738 /*
7739 * Request the entity from SAX
7740 */
7741 if ((ctxt->sax != NULL) &&
7742 (ctxt->sax->getParameterEntity != NULL))
7743 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7744 name);
7745 if (entity == NULL) {
7746 /*
7747 * [ WFC: Entity Declared ]
7748 * In a document without any DTD, a document with only an
7749 * internal DTD subset which contains no parameter entity
7750 * references, or a document with "standalone='yes'", ...
7751 * ... The declaration of a parameter entity must precede
7752 * any reference to it...
7753 */
7754 if ((ctxt->standalone == 1) ||
7755 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7756 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7757 "PEReference: %%%s; not found\n", name);
7758 } else {
7759 /*
7760 * [ VC: Entity Declared ]
7761 * In a document with an external subset or external
7762 * parameter entities with "standalone='no'", ...
7763 * ... The declaration of a parameter entity must
7764 * precede any reference to it...
7765 */
7766 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7767 "PEReference: %%%s; not found\n",
7768 name, NULL);
7769 ctxt->valid = 0;
7770 }
7771 } else {
7772 /*
7773 * Internal checking in case the entity quest barfed
7774 */
7775 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7776 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7777 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7778 "%%%s; is not a parameter entity\n",
7779 name, NULL);
7780 }
7781 }
7782 ctxt->hasPErefs = 1;
7783 xmlFree(name);
7784 *str = ptr;
7785 return(entity);
7786 }
7787
7788 /**
7789 * xmlParseDocTypeDecl:
7790 * @ctxt: an XML parser context
7791 *
7792 * parse a DOCTYPE declaration
7793 *
7794 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7795 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7796 *
7797 * [ VC: Root Element Type ]
7798 * The Name in the document type declaration must match the element
7799 * type of the root element.
7800 */
7801
7802 void
7803 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7804 const xmlChar *name = NULL;
7805 xmlChar *ExternalID = NULL;
7806 xmlChar *URI = NULL;
7807
7808 /*
7809 * We know that '<!DOCTYPE' has been detected.
7810 */
7811 SKIP(9);
7812
7813 SKIP_BLANKS;
7814
7815 /*
7816 * Parse the DOCTYPE name.
7817 */
7818 name = xmlParseName(ctxt);
7819 if (name == NULL) {
7820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7821 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7822 }
7823 ctxt->intSubName = name;
7824
7825 SKIP_BLANKS;
7826
7827 /*
7828 * Check for SystemID and ExternalID
7829 */
7830 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7831
7832 if ((URI != NULL) || (ExternalID != NULL)) {
7833 ctxt->hasExternalSubset = 1;
7834 }
7835 ctxt->extSubURI = URI;
7836 ctxt->extSubSystem = ExternalID;
7837
7838 SKIP_BLANKS;
7839
7840 /*
7841 * Create and update the internal subset.
7842 */
7843 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7844 (!ctxt->disableSAX))
7845 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7846
7847 /*
7848 * Is there any internal subset declarations ?
7849 * they are handled separately in xmlParseInternalSubset()
7850 */
7851 if (RAW == '[')
7852 return;
7853
7854 /*
7855 * We should be at the end of the DOCTYPE declaration.
7856 */
7857 if (RAW != '>') {
7858 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7859 }
7860 NEXT;
7861 }
7862
7863 /**
7864 * xmlParseInternalSubset:
7865 * @ctxt: an XML parser context
7866 *
7867 * parse the internal subset declaration
7868 *
7869 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7870 */
7871
7872 static void
7873 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7874 /*
7875 * Is there any DTD definition ?
7876 */
7877 if (RAW == '[') {
7878 ctxt->instate = XML_PARSER_DTD;
7879 NEXT;
7880 /*
7881 * Parse the succession of Markup declarations and
7882 * PEReferences.
7883 * Subsequence (markupdecl | PEReference | S)*
7884 */
7885 while (RAW != ']') {
7886 const xmlChar *check = CUR_PTR;
7887 unsigned int cons = ctxt->input->consumed;
7888
7889 SKIP_BLANKS;
7890 xmlParseMarkupDecl(ctxt);
7891 xmlParsePEReference(ctxt);
7892
7893 /*
7894 * Pop-up of finished entities.
7895 */
7896 while ((RAW == 0) && (ctxt->inputNr > 1))
7897 xmlPopInput(ctxt);
7898
7899 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7900 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7901 "xmlParseInternalSubset: error detected in Markup declaration\n");
7902 break;
7903 }
7904 }
7905 if (RAW == ']') {
7906 NEXT;
7907 SKIP_BLANKS;
7908 }
7909 }
7910
7911 /*
7912 * We should be at the end of the DOCTYPE declaration.
7913 */
7914 if (RAW != '>') {
7915 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7916 }
7917 NEXT;
7918 }
7919
7920 #ifdef LIBXML_SAX1_ENABLED
7921 /**
7922 * xmlParseAttribute:
7923 * @ctxt: an XML parser context
7924 * @value: a xmlChar ** used to store the value of the attribute
7925 *
7926 * parse an attribute
7927 *
7928 * [41] Attribute ::= Name Eq AttValue
7929 *
7930 * [ WFC: No External Entity References ]
7931 * Attribute values cannot contain direct or indirect entity references
7932 * to external entities.
7933 *
7934 * [ WFC: No < in Attribute Values ]
7935 * The replacement text of any entity referred to directly or indirectly in
7936 * an attribute value (other than "&lt;") must not contain a <.
7937 *
7938 * [ VC: Attribute Value Type ]
7939 * The attribute must have been declared; the value must be of the type
7940 * declared for it.
7941 *
7942 * [25] Eq ::= S? '=' S?
7943 *
7944 * With namespace:
7945 *
7946 * [NS 11] Attribute ::= QName Eq AttValue
7947 *
7948 * Also the case QName == xmlns:??? is handled independently as a namespace
7949 * definition.
7950 *
7951 * Returns the attribute name, and the value in *value.
7952 */
7953
7954 const xmlChar *
7955 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7956 const xmlChar *name;
7957 xmlChar *val;
7958
7959 *value = NULL;
7960 GROW;
7961 name = xmlParseName(ctxt);
7962 if (name == NULL) {
7963 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7964 "error parsing attribute name\n");
7965 return(NULL);
7966 }
7967
7968 /*
7969 * read the value
7970 */
7971 SKIP_BLANKS;
7972 if (RAW == '=') {
7973 NEXT;
7974 SKIP_BLANKS;
7975 val = xmlParseAttValue(ctxt);
7976 ctxt->instate = XML_PARSER_CONTENT;
7977 } else {
7978 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7979 "Specification mandate value for attribute %s\n", name);
7980 return(NULL);
7981 }
7982
7983 /*
7984 * Check that xml:lang conforms to the specification
7985 * No more registered as an error, just generate a warning now
7986 * since this was deprecated in XML second edition
7987 */
7988 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7989 if (!xmlCheckLanguageID(val)) {
7990 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7991 "Malformed value for xml:lang : %s\n",
7992 val, NULL);
7993 }
7994 }
7995
7996 /*
7997 * Check that xml:space conforms to the specification
7998 */
7999 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8000 if (xmlStrEqual(val, BAD_CAST "default"))
8001 *(ctxt->space) = 0;
8002 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8003 *(ctxt->space) = 1;
8004 else {
8005 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8006 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8007 val, NULL);
8008 }
8009 }
8010
8011 *value = val;
8012 return(name);
8013 }
8014
8015 /**
8016 * xmlParseStartTag:
8017 * @ctxt: an XML parser context
8018 *
8019 * parse a start of tag either for rule element or
8020 * EmptyElement. In both case we don't parse the tag closing chars.
8021 *
8022 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8023 *
8024 * [ WFC: Unique Att Spec ]
8025 * No attribute name may appear more than once in the same start-tag or
8026 * empty-element tag.
8027 *
8028 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8029 *
8030 * [ WFC: Unique Att Spec ]
8031 * No attribute name may appear more than once in the same start-tag or
8032 * empty-element tag.
8033 *
8034 * With namespace:
8035 *
8036 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8037 *
8038 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8039 *
8040 * Returns the element name parsed
8041 */
8042
8043 const xmlChar *
8044 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8045 const xmlChar *name;
8046 const xmlChar *attname;
8047 xmlChar *attvalue;
8048 const xmlChar **atts = ctxt->atts;
8049 int nbatts = 0;
8050 int maxatts = ctxt->maxatts;
8051 int i;
8052
8053 if (RAW != '<') return(NULL);
8054 NEXT1;
8055
8056 name = xmlParseName(ctxt);
8057 if (name == NULL) {
8058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8059 "xmlParseStartTag: invalid element name\n");
8060 return(NULL);
8061 }
8062
8063 /*
8064 * Now parse the attributes, it ends up with the ending
8065 *
8066 * (S Attribute)* S?
8067 */
8068 SKIP_BLANKS;
8069 GROW;
8070
8071 while ((RAW != '>') &&
8072 ((RAW != '/') || (NXT(1) != '>')) &&
8073 (IS_BYTE_CHAR(RAW))) {
8074 const xmlChar *q = CUR_PTR;
8075 unsigned int cons = ctxt->input->consumed;
8076
8077 attname = xmlParseAttribute(ctxt, &attvalue);
8078 if ((attname != NULL) && (attvalue != NULL)) {
8079 /*
8080 * [ WFC: Unique Att Spec ]
8081 * No attribute name may appear more than once in the same
8082 * start-tag or empty-element tag.
8083 */
8084 for (i = 0; i < nbatts;i += 2) {
8085 if (xmlStrEqual(atts[i], attname)) {
8086 xmlErrAttributeDup(ctxt, NULL, attname);
8087 xmlFree(attvalue);
8088 goto failed;
8089 }
8090 }
8091 /*
8092 * Add the pair to atts
8093 */
8094 if (atts == NULL) {
8095 maxatts = 22; /* allow for 10 attrs by default */
8096 atts = (const xmlChar **)
8097 xmlMalloc(maxatts * sizeof(xmlChar *));
8098 if (atts == NULL) {
8099 xmlErrMemory(ctxt, NULL);
8100 if (attvalue != NULL)
8101 xmlFree(attvalue);
8102 goto failed;
8103 }
8104 ctxt->atts = atts;
8105 ctxt->maxatts = maxatts;
8106 } else if (nbatts + 4 > maxatts) {
8107 const xmlChar **n;
8108
8109 maxatts *= 2;
8110 n = (const xmlChar **) xmlRealloc((void *) atts,
8111 maxatts * sizeof(const xmlChar *));
8112 if (n == NULL) {
8113 xmlErrMemory(ctxt, NULL);
8114 if (attvalue != NULL)
8115 xmlFree(attvalue);
8116 goto failed;
8117 }
8118 atts = n;
8119 ctxt->atts = atts;
8120 ctxt->maxatts = maxatts;
8121 }
8122 atts[nbatts++] = attname;
8123 atts[nbatts++] = attvalue;
8124 atts[nbatts] = NULL;
8125 atts[nbatts + 1] = NULL;
8126 } else {
8127 if (attvalue != NULL)
8128 xmlFree(attvalue);
8129 }
8130
8131 failed:
8132
8133 GROW
8134 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8135 break;
8136 if (!IS_BLANK_CH(RAW)) {
8137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8138 "attributes construct error\n");
8139 }
8140 SKIP_BLANKS;
8141 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8142 (attname == NULL) && (attvalue == NULL)) {
8143 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8144 "xmlParseStartTag: problem parsing attributes\n");
8145 break;
8146 }
8147 SHRINK;
8148 GROW;
8149 }
8150
8151 /*
8152 * SAX: Start of Element !
8153 */
8154 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8155 (!ctxt->disableSAX)) {
8156 if (nbatts > 0)
8157 ctxt->sax->startElement(ctxt->userData, name, atts);
8158 else
8159 ctxt->sax->startElement(ctxt->userData, name, NULL);
8160 }
8161
8162 if (atts != NULL) {
8163 /* Free only the content strings */
8164 for (i = 1;i < nbatts;i+=2)
8165 if (atts[i] != NULL)
8166 xmlFree((xmlChar *) atts[i]);
8167 }
8168 return(name);
8169 }
8170
8171 /**
8172 * xmlParseEndTag1:
8173 * @ctxt: an XML parser context
8174 * @line: line of the start tag
8175 * @nsNr: number of namespaces on the start tag
8176 *
8177 * parse an end of tag
8178 *
8179 * [42] ETag ::= '</' Name S? '>'
8180 *
8181 * With namespace
8182 *
8183 * [NS 9] ETag ::= '</' QName S? '>'
8184 */
8185
8186 static void
8187 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8188 const xmlChar *name;
8189
8190 GROW;
8191 if ((RAW != '<') || (NXT(1) != '/')) {
8192 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8193 "xmlParseEndTag: '</' not found\n");
8194 return;
8195 }
8196 SKIP(2);
8197
8198 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8199
8200 /*
8201 * We should definitely be at the ending "S? '>'" part
8202 */
8203 GROW;
8204 SKIP_BLANKS;
8205 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8206 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8207 } else
8208 NEXT1;
8209
8210 /*
8211 * [ WFC: Element Type Match ]
8212 * The Name in an element's end-tag must match the element type in the
8213 * start-tag.
8214 *
8215 */
8216 if (name != (xmlChar*)1) {
8217 if (name == NULL) name = BAD_CAST "unparseable";
8218 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8219 "Opening and ending tag mismatch: %s line %d and %s\n",
8220 ctxt->name, line, name);
8221 }
8222
8223 /*
8224 * SAX: End of Tag
8225 */
8226 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8227 (!ctxt->disableSAX))
8228 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8229
8230 namePop(ctxt);
8231 spacePop(ctxt);
8232 return;
8233 }
8234
8235 /**
8236 * xmlParseEndTag:
8237 * @ctxt: an XML parser context
8238 *
8239 * parse an end of tag
8240 *
8241 * [42] ETag ::= '</' Name S? '>'
8242 *
8243 * With namespace
8244 *
8245 * [NS 9] ETag ::= '</' QName S? '>'
8246 */
8247
8248 void
8249 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8250 xmlParseEndTag1(ctxt, 0);
8251 }
8252 #endif /* LIBXML_SAX1_ENABLED */
8253
8254 /************************************************************************
8255 * *
8256 * SAX 2 specific operations *
8257 * *
8258 ************************************************************************/
8259
8260 /*
8261 * xmlGetNamespace:
8262 * @ctxt: an XML parser context
8263 * @prefix: the prefix to lookup
8264 *
8265 * Lookup the namespace name for the @prefix (which ca be NULL)
8266 * The prefix must come from the @ctxt->dict dictionnary
8267 *
8268 * Returns the namespace name or NULL if not bound
8269 */
8270 static const xmlChar *
8271 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8272 int i;
8273
8274 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8275 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8276 if (ctxt->nsTab[i] == prefix) {
8277 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8278 return(NULL);
8279 return(ctxt->nsTab[i + 1]);
8280 }
8281 return(NULL);
8282 }
8283
8284 /**
8285 * xmlParseQName:
8286 * @ctxt: an XML parser context
8287 * @prefix: pointer to store the prefix part
8288 *
8289 * parse an XML Namespace QName
8290 *
8291 * [6] QName ::= (Prefix ':')? LocalPart
8292 * [7] Prefix ::= NCName
8293 * [8] LocalPart ::= NCName
8294 *
8295 * Returns the Name parsed or NULL
8296 */
8297
8298 static const xmlChar *
8299 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8300 const xmlChar *l, *p;
8301
8302 GROW;
8303
8304 l = xmlParseNCName(ctxt);
8305 if (l == NULL) {
8306 if (CUR == ':') {
8307 l = xmlParseName(ctxt);
8308 if (l != NULL) {
8309 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8310 "Failed to parse QName '%s'\n", l, NULL, NULL);
8311 *prefix = NULL;
8312 return(l);
8313 }
8314 }
8315 return(NULL);
8316 }
8317 if (CUR == ':') {
8318 NEXT;
8319 p = l;
8320 l = xmlParseNCName(ctxt);
8321 if (l == NULL) {
8322 xmlChar *tmp;
8323
8324 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8325 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8326 l = xmlParseNmtoken(ctxt);
8327 if (l == NULL)
8328 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8329 else {
8330 tmp = xmlBuildQName(l, p, NULL, 0);
8331 xmlFree((char *)l);
8332 }
8333 p = xmlDictLookup(ctxt->dict, tmp, -1);
8334 if (tmp != NULL) xmlFree(tmp);
8335 *prefix = NULL;
8336 return(p);
8337 }
8338 if (CUR == ':') {
8339 xmlChar *tmp;
8340
8341 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8342 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8343 NEXT;
8344 tmp = (xmlChar *) xmlParseName(ctxt);
8345 if (tmp != NULL) {
8346 tmp = xmlBuildQName(tmp, l, NULL, 0);
8347 l = xmlDictLookup(ctxt->dict, tmp, -1);
8348 if (tmp != NULL) xmlFree(tmp);
8349 *prefix = p;
8350 return(l);
8351 }
8352 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8353 l = xmlDictLookup(ctxt->dict, tmp, -1);
8354 if (tmp != NULL) xmlFree(tmp);
8355 *prefix = p;
8356 return(l);
8357 }
8358 *prefix = p;
8359 } else
8360 *prefix = NULL;
8361 return(l);
8362 }
8363
8364 /**
8365 * xmlParseQNameAndCompare:
8366 * @ctxt: an XML parser context
8367 * @name: the localname
8368 * @prefix: the prefix, if any.
8369 *
8370 * parse an XML name and compares for match
8371 * (specialized for endtag parsing)
8372 *
8373 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8374 * and the name for mismatch
8375 */
8376
8377 static const xmlChar *
8378 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8379 xmlChar const *prefix) {
8380 const xmlChar *cmp;
8381 const xmlChar *in;
8382 const xmlChar *ret;
8383 const xmlChar *prefix2;
8384
8385 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8386
8387 GROW;
8388 in = ctxt->input->cur;
8389
8390 cmp = prefix;
8391 while (*in != 0 && *in == *cmp) {
8392 ++in;
8393 ++cmp;
8394 }
8395 if ((*cmp == 0) && (*in == ':')) {
8396 in++;
8397 cmp = name;
8398 while (*in != 0 && *in == *cmp) {
8399 ++in;
8400 ++cmp;
8401 }
8402 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8403 /* success */
8404 ctxt->input->cur = in;
8405 return((const xmlChar*) 1);
8406 }
8407 }
8408 /*
8409 * all strings coms from the dictionary, equality can be done directly
8410 */
8411 ret = xmlParseQName (ctxt, &prefix2);
8412 if ((ret == name) && (prefix == prefix2))
8413 return((const xmlChar*) 1);
8414 return ret;
8415 }
8416
8417 /**
8418 * xmlParseAttValueInternal:
8419 * @ctxt: an XML parser context
8420 * @len: attribute len result
8421 * @alloc: whether the attribute was reallocated as a new string
8422 * @normalize: if 1 then further non-CDATA normalization must be done
8423 *
8424 * parse a value for an attribute.
8425 * NOTE: if no normalization is needed, the routine will return pointers
8426 * directly from the data buffer.
8427 *
8428 * 3.3.3 Attribute-Value Normalization:
8429 * Before the value of an attribute is passed to the application or
8430 * checked for validity, the XML processor must normalize it as follows:
8431 * - a character reference is processed by appending the referenced
8432 * character to the attribute value
8433 * - an entity reference is processed by recursively processing the
8434 * replacement text of the entity
8435 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8436 * appending #x20 to the normalized value, except that only a single
8437 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8438 * parsed entity or the literal entity value of an internal parsed entity
8439 * - other characters are processed by appending them to the normalized value
8440 * If the declared value is not CDATA, then the XML processor must further
8441 * process the normalized attribute value by discarding any leading and
8442 * trailing space (#x20) characters, and by replacing sequences of space
8443 * (#x20) characters by a single space (#x20) character.
8444 * All attributes for which no declaration has been read should be treated
8445 * by a non-validating parser as if declared CDATA.
8446 *
8447 * Returns the AttValue parsed or NULL. The value has to be freed by the
8448 * caller if it was copied, this can be detected by val[*len] == 0.
8449 */
8450
8451 static xmlChar *
8452 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8453 int normalize)
8454 {
8455 xmlChar limit = 0;
8456 const xmlChar *in = NULL, *start, *end, *last;
8457 xmlChar *ret = NULL;
8458
8459 GROW;
8460 in = (xmlChar *) CUR_PTR;
8461 if (*in != '"' && *in != '\'') {
8462 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8463 return (NULL);
8464 }
8465 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8466
8467 /*
8468 * try to handle in this routine the most common case where no
8469 * allocation of a new string is required and where content is
8470 * pure ASCII.
8471 */
8472 limit = *in++;
8473 end = ctxt->input->end;
8474 start = in;
8475 if (in >= end) {
8476 const xmlChar *oldbase = ctxt->input->base;
8477 GROW;
8478 if (oldbase != ctxt->input->base) {
8479 long delta = ctxt->input->base - oldbase;
8480 start = start + delta;
8481 in = in + delta;
8482 }
8483 end = ctxt->input->end;
8484 }
8485 if (normalize) {
8486 /*
8487 * Skip any leading spaces
8488 */
8489 while ((in < end) && (*in != limit) &&
8490 ((*in == 0x20) || (*in == 0x9) ||
8491 (*in == 0xA) || (*in == 0xD))) {
8492 in++;
8493 start = in;
8494 if (in >= end) {
8495 const xmlChar *oldbase = ctxt->input->base;
8496 GROW;
8497 if (oldbase != ctxt->input->base) {
8498 long delta = ctxt->input->base - oldbase;
8499 start = start + delta;
8500 in = in + delta;
8501 }
8502 end = ctxt->input->end;
8503 }
8504 }
8505 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8506 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8507 if ((*in++ == 0x20) && (*in == 0x20)) break;
8508 if (in >= end) {
8509 const xmlChar *oldbase = ctxt->input->base;
8510 GROW;
8511 if (oldbase != ctxt->input->base) {
8512 long delta = ctxt->input->base - oldbase;
8513 start = start + delta;
8514 in = in + delta;
8515 }
8516 end = ctxt->input->end;
8517 }
8518 }
8519 last = in;
8520 /*
8521 * skip the trailing blanks
8522 */
8523 while ((last[-1] == 0x20) && (last > start)) last--;
8524 while ((in < end) && (*in != limit) &&
8525 ((*in == 0x20) || (*in == 0x9) ||
8526 (*in == 0xA) || (*in == 0xD))) {
8527 in++;
8528 if (in >= end) {
8529 const xmlChar *oldbase = ctxt->input->base;
8530 GROW;
8531 if (oldbase != ctxt->input->base) {
8532 long delta = ctxt->input->base - oldbase;
8533 start = start + delta;
8534 in = in + delta;
8535 last = last + delta;
8536 }
8537 end = ctxt->input->end;
8538 }
8539 }
8540 if (*in != limit) goto need_complex;
8541 } else {
8542 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8543 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8544 in++;
8545 if (in >= end) {
8546 const xmlChar *oldbase = ctxt->input->base;
8547 GROW;
8548 if (oldbase != ctxt->input->base) {
8549 long delta = ctxt->input->base - oldbase;
8550 start = start + delta;
8551 in = in + delta;
8552 }
8553 end = ctxt->input->end;
8554 }
8555 }
8556 last = in;
8557 if (*in != limit) goto need_complex;
8558 }
8559 in++;
8560 if (len != NULL) {
8561 *len = last - start;
8562 ret = (xmlChar *) start;
8563 } else {
8564 if (alloc) *alloc = 1;
8565 ret = xmlStrndup(start, last - start);
8566 }
8567 CUR_PTR = in;
8568 if (alloc) *alloc = 0;
8569 return ret;
8570 need_complex:
8571 if (alloc) *alloc = 1;
8572 return xmlParseAttValueComplex(ctxt, len, normalize);
8573 }
8574
8575 /**
8576 * xmlParseAttribute2:
8577 * @ctxt: an XML parser context
8578 * @pref: the element prefix
8579 * @elem: the element name
8580 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8581 * @value: a xmlChar ** used to store the value of the attribute
8582 * @len: an int * to save the length of the attribute
8583 * @alloc: an int * to indicate if the attribute was allocated
8584 *
8585 * parse an attribute in the new SAX2 framework.
8586 *
8587 * Returns the attribute name, and the value in *value, .
8588 */
8589
8590 static const xmlChar *
8591 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8592 const xmlChar * pref, const xmlChar * elem,
8593 const xmlChar ** prefix, xmlChar ** value,
8594 int *len, int *alloc)
8595 {
8596 const xmlChar *name;
8597 xmlChar *val, *internal_val = NULL;
8598 int normalize = 0;
8599
8600 *value = NULL;
8601 GROW;
8602 name = xmlParseQName(ctxt, prefix);
8603 if (name == NULL) {
8604 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8605 "error parsing attribute name\n");
8606 return (NULL);
8607 }
8608
8609 /*
8610 * get the type if needed
8611 */
8612 if (ctxt->attsSpecial != NULL) {
8613 int type;
8614
8615 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8616 pref, elem, *prefix, name);
8617 if (type != 0)
8618 normalize = 1;
8619 }
8620
8621 /*
8622 * read the value
8623 */
8624 SKIP_BLANKS;
8625 if (RAW == '=') {
8626 NEXT;
8627 SKIP_BLANKS;
8628 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8629 if (normalize) {
8630 /*
8631 * Sometimes a second normalisation pass for spaces is needed
8632 * but that only happens if charrefs or entities refernces
8633 * have been used in the attribute value, i.e. the attribute
8634 * value have been extracted in an allocated string already.
8635 */
8636 if (*alloc) {
8637 const xmlChar *val2;
8638
8639 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8640 if ((val2 != NULL) && (val2 != val)) {
8641 xmlFree(val);
8642 val = (xmlChar *) val2;
8643 }
8644 }
8645 }
8646 ctxt->instate = XML_PARSER_CONTENT;
8647 } else {
8648 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8649 "Specification mandate value for attribute %s\n",
8650 name);
8651 return (NULL);
8652 }
8653
8654 if (*prefix == ctxt->str_xml) {
8655 /*
8656 * Check that xml:lang conforms to the specification
8657 * No more registered as an error, just generate a warning now
8658 * since this was deprecated in XML second edition
8659 */
8660 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8661 internal_val = xmlStrndup(val, *len);
8662 if (!xmlCheckLanguageID(internal_val)) {
8663 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8664 "Malformed value for xml:lang : %s\n",
8665 internal_val, NULL);
8666 }
8667 }
8668
8669 /*
8670 * Check that xml:space conforms to the specification
8671 */
8672 if (xmlStrEqual(name, BAD_CAST "space")) {
8673 internal_val = xmlStrndup(val, *len);
8674 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8675 *(ctxt->space) = 0;
8676 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8677 *(ctxt->space) = 1;
8678 else {
8679 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8680 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8681 internal_val, NULL);
8682 }
8683 }
8684 if (internal_val) {
8685 xmlFree(internal_val);
8686 }
8687 }
8688
8689 *value = val;
8690 return (name);
8691 }
8692 /**
8693 * xmlParseStartTag2:
8694 * @ctxt: an XML parser context
8695 *
8696 * parse a start of tag either for rule element or
8697 * EmptyElement. In both case we don't parse the tag closing chars.
8698 * This routine is called when running SAX2 parsing
8699 *
8700 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8701 *
8702 * [ WFC: Unique Att Spec ]
8703 * No attribute name may appear more than once in the same start-tag or
8704 * empty-element tag.
8705 *
8706 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8707 *
8708 * [ WFC: Unique Att Spec ]
8709 * No attribute name may appear more than once in the same start-tag or
8710 * empty-element tag.
8711 *
8712 * With namespace:
8713 *
8714 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8715 *
8716 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8717 *
8718 * Returns the element name parsed
8719 */
8720
8721 static const xmlChar *
8722 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8723 const xmlChar **URI, int *tlen) {
8724 const xmlChar *localname;
8725 const xmlChar *prefix;
8726 const xmlChar *attname;
8727 const xmlChar *aprefix;
8728 const xmlChar *nsname;
8729 xmlChar *attvalue;
8730 const xmlChar **atts = ctxt->atts;
8731 int maxatts = ctxt->maxatts;
8732 int nratts, nbatts, nbdef;
8733 int i, j, nbNs, attval, oldline, oldcol;
8734 const xmlChar *base;
8735 unsigned long cur;
8736 int nsNr = ctxt->nsNr;
8737
8738 if (RAW != '<') return(NULL);
8739 NEXT1;
8740
8741 /*
8742 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8743 * point since the attribute values may be stored as pointers to
8744 * the buffer and calling SHRINK would destroy them !
8745 * The Shrinking is only possible once the full set of attribute
8746 * callbacks have been done.
8747 */
8748 reparse:
8749 SHRINK;
8750 base = ctxt->input->base;
8751 cur = ctxt->input->cur - ctxt->input->base;
8752 oldline = ctxt->input->line;
8753 oldcol = ctxt->input->col;
8754 nbatts = 0;
8755 nratts = 0;
8756 nbdef = 0;
8757 nbNs = 0;
8758 attval = 0;
8759 /* Forget any namespaces added during an earlier parse of this element. */
8760 ctxt->nsNr = nsNr;
8761
8762 localname = xmlParseQName(ctxt, &prefix);
8763 if (localname == NULL) {
8764 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8765 "StartTag: invalid element name\n");
8766 return(NULL);
8767 }
8768 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8769
8770 /*
8771 * Now parse the attributes, it ends up with the ending
8772 *
8773 * (S Attribute)* S?
8774 */
8775 SKIP_BLANKS;
8776 GROW;
8777 if (ctxt->input->base != base) goto base_changed;
8778
8779 while ((RAW != '>') &&
8780 ((RAW != '/') || (NXT(1) != '>')) &&
8781 (IS_BYTE_CHAR(RAW))) {
8782 const xmlChar *q = CUR_PTR;
8783 unsigned int cons = ctxt->input->consumed;
8784 int len = -1, alloc = 0;
8785
8786 attname = xmlParseAttribute2(ctxt, prefix, localname,
8787 &aprefix, &attvalue, &len, &alloc);
8788 if (ctxt->input->base != base) {
8789 if ((attvalue != NULL) && (alloc != 0))
8790 xmlFree(attvalue);
8791 attvalue = NULL;
8792 goto base_changed;
8793 }
8794 if ((attname != NULL) && (attvalue != NULL)) {
8795 if (len < 0) len = xmlStrlen(attvalue);
8796 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8797 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8798 xmlURIPtr uri;
8799
8800 if (*URL != 0) {
8801 uri = xmlParseURI((const char *) URL);
8802 if (uri == NULL) {
8803 xmlNsErr(ctxt, XML_WAR_NS_URI,
8804 "xmlns: '%s' is not a valid URI\n",
8805 URL, NULL, NULL);
8806 } else {
8807 if (uri->scheme == NULL) {
8808 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8809 "xmlns: URI %s is not absolute\n",
8810 URL, NULL, NULL);
8811 }
8812 xmlFreeURI(uri);
8813 }
8814 if (URL == ctxt->str_xml_ns) {
8815 if (attname != ctxt->str_xml) {
8816 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8817 "xml namespace URI cannot be the default namespace\n",
8818 NULL, NULL, NULL);
8819 }
8820 goto skip_default_ns;
8821 }
8822 if ((len == 29) &&
8823 (xmlStrEqual(URL,
8824 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8825 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8826 "reuse of the xmlns namespace name is forbidden\n",
8827 NULL, NULL, NULL);
8828 goto skip_default_ns;
8829 }
8830 }
8831 /*
8832 * check that it's not a defined namespace
8833 */
8834 for (j = 1;j <= nbNs;j++)
8835 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8836 break;
8837 if (j <= nbNs)
8838 xmlErrAttributeDup(ctxt, NULL, attname);
8839 else
8840 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8841 skip_default_ns:
8842 if (alloc != 0) xmlFree(attvalue);
8843 SKIP_BLANKS;
8844 continue;
8845 }
8846 if (aprefix == ctxt->str_xmlns) {
8847 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8848 xmlURIPtr uri;
8849
8850 if (attname == ctxt->str_xml) {
8851 if (URL != ctxt->str_xml_ns) {
8852 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8853 "xml namespace prefix mapped to wrong URI\n",
8854 NULL, NULL, NULL);
8855 }
8856 /*
8857 * Do not keep a namespace definition node
8858 */
8859 goto skip_ns;
8860 }
8861 if (URL == ctxt->str_xml_ns) {
8862 if (attname != ctxt->str_xml) {
8863 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8864 "xml namespace URI mapped to wrong prefix\n",
8865 NULL, NULL, NULL);
8866 }
8867 goto skip_ns;
8868 }
8869 if (attname == ctxt->str_xmlns) {
8870 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8871 "redefinition of the xmlns prefix is forbidden\n",
8872 NULL, NULL, NULL);
8873 goto skip_ns;
8874 }
8875 if ((len == 29) &&
8876 (xmlStrEqual(URL,
8877 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8878 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8879 "reuse of the xmlns namespace name is forbidden\n",
8880 NULL, NULL, NULL);
8881 goto skip_ns;
8882 }
8883 if ((URL == NULL) || (URL[0] == 0)) {
8884 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8885 "xmlns:%s: Empty XML namespace is not allowed\n",
8886 attname, NULL, NULL);
8887 goto skip_ns;
8888 } else {
8889 uri = xmlParseURI((const char *) URL);
8890 if (uri == NULL) {
8891 xmlNsErr(ctxt, XML_WAR_NS_URI,
8892 "xmlns:%s: '%s' is not a valid URI\n",
8893 attname, URL, NULL);
8894 } else {
8895 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8896 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8897 "xmlns:%s: URI %s is not absolute\n",
8898 attname, URL, NULL);
8899 }
8900 xmlFreeURI(uri);
8901 }
8902 }
8903
8904 /*
8905 * check that it's not a defined namespace
8906 */
8907 for (j = 1;j <= nbNs;j++)
8908 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8909 break;
8910 if (j <= nbNs)
8911 xmlErrAttributeDup(ctxt, aprefix, attname);
8912 else
8913 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8914 skip_ns:
8915 if (alloc != 0) xmlFree(attvalue);
8916 SKIP_BLANKS;
8917 if (ctxt->input->base != base) goto base_changed;
8918 continue;
8919 }
8920
8921 /*
8922 * Add the pair to atts
8923 */
8924 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8925 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8926 if (attvalue[len] == 0)
8927 xmlFree(attvalue);
8928 goto failed;
8929 }
8930 maxatts = ctxt->maxatts;
8931 atts = ctxt->atts;
8932 }
8933 ctxt->attallocs[nratts++] = alloc;
8934 atts[nbatts++] = attname;
8935 atts[nbatts++] = aprefix;
8936 atts[nbatts++] = NULL; /* the URI will be fetched later */
8937 atts[nbatts++] = attvalue;
8938 attvalue += len;
8939 atts[nbatts++] = attvalue;
8940 /*
8941 * tag if some deallocation is needed
8942 */
8943 if (alloc != 0) attval = 1;
8944 } else {
8945 if ((attvalue != NULL) && (attvalue[len] == 0))
8946 xmlFree(attvalue);
8947 }
8948
8949 failed:
8950
8951 GROW
8952 if (ctxt->input->base != base) goto base_changed;
8953 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8954 break;
8955 if (!IS_BLANK_CH(RAW)) {
8956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8957 "attributes construct error\n");
8958 break;
8959 }
8960 SKIP_BLANKS;
8961 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8962 (attname == NULL) && (attvalue == NULL)) {
8963 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8964 "xmlParseStartTag: problem parsing attributes\n");
8965 break;
8966 }
8967 GROW;
8968 if (ctxt->input->base != base) goto base_changed;
8969 }
8970
8971 /*
8972 * The attributes defaulting
8973 */
8974 if (ctxt->attsDefault != NULL) {
8975 xmlDefAttrsPtr defaults;
8976
8977 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8978 if (defaults != NULL) {
8979 for (i = 0;i < defaults->nbAttrs;i++) {
8980 attname = defaults->values[5 * i];
8981 aprefix = defaults->values[5 * i + 1];
8982
8983 /*
8984 * special work for namespaces defaulted defs
8985 */
8986 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8987 /*
8988 * check that it's not a defined namespace
8989 */
8990 for (j = 1;j <= nbNs;j++)
8991 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8992 break;
8993 if (j <= nbNs) continue;
8994
8995 nsname = xmlGetNamespace(ctxt, NULL);
8996 if (nsname != defaults->values[5 * i + 2]) {
8997 if (nsPush(ctxt, NULL,
8998 defaults->values[5 * i + 2]) > 0)
8999 nbNs++;
9000 }
9001 } else if (aprefix == ctxt->str_xmlns) {
9002 /*
9003 * check that it's not a defined namespace
9004 */
9005 for (j = 1;j <= nbNs;j++)
9006 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9007 break;
9008 if (j <= nbNs) continue;
9009
9010 nsname = xmlGetNamespace(ctxt, attname);
9011 if (nsname != defaults->values[2]) {
9012 if (nsPush(ctxt, attname,
9013 defaults->values[5 * i + 2]) > 0)
9014 nbNs++;
9015 }
9016 } else {
9017 /*
9018 * check that it's not a defined attribute
9019 */
9020 for (j = 0;j < nbatts;j+=5) {
9021 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9022 break;
9023 }
9024 if (j < nbatts) continue;
9025
9026 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9027 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9028 return(NULL);
9029 }
9030 maxatts = ctxt->maxatts;
9031 atts = ctxt->atts;
9032 }
9033 atts[nbatts++] = attname;
9034 atts[nbatts++] = aprefix;
9035 if (aprefix == NULL)
9036 atts[nbatts++] = NULL;
9037 else
9038 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9039 atts[nbatts++] = defaults->values[5 * i + 2];
9040 atts[nbatts++] = defaults->values[5 * i + 3];
9041 if ((ctxt->standalone == 1) &&
9042 (defaults->values[5 * i + 4] != NULL)) {
9043 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9044 "standalone: attribute %s on %s defaulted from external subset\n",
9045 attname, localname);
9046 }
9047 nbdef++;
9048 }
9049 }
9050 }
9051 }
9052
9053 /*
9054 * The attributes checkings
9055 */
9056 for (i = 0; i < nbatts;i += 5) {
9057 /*
9058 * The default namespace does not apply to attribute names.
9059 */
9060 if (atts[i + 1] != NULL) {
9061 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9062 if (nsname == NULL) {
9063 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9064 "Namespace prefix %s for %s on %s is not defined\n",
9065 atts[i + 1], atts[i], localname);
9066 }
9067 atts[i + 2] = nsname;
9068 } else
9069 nsname = NULL;
9070 /*
9071 * [ WFC: Unique Att Spec ]
9072 * No attribute name may appear more than once in the same
9073 * start-tag or empty-element tag.
9074 * As extended by the Namespace in XML REC.
9075 */
9076 for (j = 0; j < i;j += 5) {
9077 if (atts[i] == atts[j]) {
9078 if (atts[i+1] == atts[j+1]) {
9079 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9080 break;
9081 }
9082 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9083 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9084 "Namespaced Attribute %s in '%s' redefined\n",
9085 atts[i], nsname, NULL);
9086 break;
9087 }
9088 }
9089 }
9090 }
9091
9092 nsname = xmlGetNamespace(ctxt, prefix);
9093 if ((prefix != NULL) && (nsname == NULL)) {
9094 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9095 "Namespace prefix %s on %s is not defined\n",
9096 prefix, localname, NULL);
9097 }
9098 *pref = prefix;
9099 *URI = nsname;
9100
9101 /*
9102 * SAX: Start of Element !
9103 */
9104 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9105 (!ctxt->disableSAX)) {
9106 if (nbNs > 0)
9107 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9108 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9109 nbatts / 5, nbdef, atts);
9110 else
9111 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9112 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9113 }
9114
9115 /*
9116 * Free up attribute allocated strings if needed
9117 */
9118 if (attval != 0) {
9119 for (i = 3,j = 0; j < nratts;i += 5,j++)
9120 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9121 xmlFree((xmlChar *) atts[i]);
9122 }
9123
9124 return(localname);
9125
9126 base_changed:
9127 /*
9128 * the attribute strings are valid iif the base didn't changed
9129 */
9130 if (attval != 0) {
9131 for (i = 3,j = 0; j < nratts;i += 5,j++)
9132 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9133 xmlFree((xmlChar *) atts[i]);
9134 }
9135 ctxt->input->cur = ctxt->input->base + cur;
9136 ctxt->input->line = oldline;
9137 ctxt->input->col = oldcol;
9138 if (ctxt->wellFormed == 1) {
9139 goto reparse;
9140 }
9141 return(NULL);
9142 }
9143
9144 /**
9145 * xmlParseEndTag2:
9146 * @ctxt: an XML parser context
9147 * @line: line of the start tag
9148 * @nsNr: number of namespaces on the start tag
9149 *
9150 * parse an end of tag
9151 *
9152 * [42] ETag ::= '</' Name S? '>'
9153 *
9154 * With namespace
9155 *
9156 * [NS 9] ETag ::= '</' QName S? '>'
9157 */
9158
9159 static void
9160 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9161 const xmlChar *URI, int line, int nsNr, int tlen) {
9162 const xmlChar *name;
9163
9164 GROW;
9165 if ((RAW != '<') || (NXT(1) != '/')) {
9166 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9167 return;
9168 }
9169 SKIP(2);
9170
9171 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9172 if (ctxt->input->cur[tlen] == '>') {
9173 ctxt->input->cur += tlen + 1;
9174 goto done;
9175 }
9176 ctxt->input->cur += tlen;
9177 name = (xmlChar*)1;
9178 } else {
9179 if (prefix == NULL)
9180 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9181 else
9182 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9183 }
9184
9185 /*
9186 * We should definitely be at the ending "S? '>'" part
9187 */
9188 GROW;
9189 SKIP_BLANKS;
9190 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9191 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9192 } else
9193 NEXT1;
9194
9195 /*
9196 * [ WFC: Element Type Match ]
9197 * The Name in an element's end-tag must match the element type in the
9198 * start-tag.
9199 *
9200 */
9201 if (name != (xmlChar*)1) {
9202 if (name == NULL) name = BAD_CAST "unparseable";
9203 if ((line == 0) && (ctxt->node != NULL))
9204 line = ctxt->node->line;
9205 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9206 "Opening and ending tag mismatch: %s line %d and %s\n",
9207 ctxt->name, line, name);
9208 }
9209
9210 /*
9211 * SAX: End of Tag
9212 */
9213 done:
9214 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9215 (!ctxt->disableSAX))
9216 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9217
9218 spacePop(ctxt);
9219 if (nsNr != 0)
9220 nsPop(ctxt, nsNr);
9221 return;
9222 }
9223
9224 /**
9225 * xmlParseCDSect:
9226 * @ctxt: an XML parser context
9227 *
9228 * Parse escaped pure raw content.
9229 *
9230 * [18] CDSect ::= CDStart CData CDEnd
9231 *
9232 * [19] CDStart ::= '<![CDATA['
9233 *
9234 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9235 *
9236 * [21] CDEnd ::= ']]>'
9237 */
9238 void
9239 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9240 xmlChar *buf = NULL;
9241 int len = 0;
9242 int size = XML_PARSER_BUFFER_SIZE;
9243 int r, rl;
9244 int s, sl;
9245 int cur, l;
9246 int count = 0;
9247
9248 /* Check 2.6.0 was NXT(0) not RAW */
9249 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9250 SKIP(9);
9251 } else
9252 return;
9253
9254 ctxt->instate = XML_PARSER_CDATA_SECTION;
9255 r = CUR_CHAR(rl);
9256 if (!IS_CHAR(r)) {
9257 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9258 ctxt->instate = XML_PARSER_CONTENT;
9259 return;
9260 }
9261 NEXTL(rl);
9262 s = CUR_CHAR(sl);
9263 if (!IS_CHAR(s)) {
9264 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9265 ctxt->instate = XML_PARSER_CONTENT;
9266 return;
9267 }
9268 NEXTL(sl);
9269 cur = CUR_CHAR(l);
9270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9271 if (buf == NULL) {
9272 xmlErrMemory(ctxt, NULL);
9273 return;
9274 }
9275 while (IS_CHAR(cur) &&
9276 ((r != ']') || (s != ']') || (cur != '>'))) {
9277 if (len + 5 >= size) {
9278 xmlChar *tmp;
9279
9280 size *= 2;
9281 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9282 if (tmp == NULL) {
9283 xmlFree(buf);
9284 xmlErrMemory(ctxt, NULL);
9285 return;
9286 }
9287 buf = tmp;
9288 }
9289 COPY_BUF(rl,buf,len,r);
9290 r = s;
9291 rl = sl;
9292 s = cur;
9293 sl = l;
9294 count++;
9295 if (count > 50) {
9296 GROW;
9297 count = 0;
9298 }
9299 NEXTL(l);
9300 cur = CUR_CHAR(l);
9301 }
9302 buf[len] = 0;
9303 ctxt->instate = XML_PARSER_CONTENT;
9304 if (cur != '>') {
9305 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9306 "CData section not finished\n%.50s\n", buf);
9307 xmlFree(buf);
9308 return;
9309 }
9310 NEXTL(l);
9311
9312 /*
9313 * OK the buffer is to be consumed as cdata.
9314 */
9315 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9316 if (ctxt->sax->cdataBlock != NULL)
9317 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9318 else if (ctxt->sax->characters != NULL)
9319 ctxt->sax->characters(ctxt->userData, buf, len);
9320 }
9321 xmlFree(buf);
9322 }
9323
9324 /**
9325 * xmlParseContent:
9326 * @ctxt: an XML parser context
9327 *
9328 * Parse a content:
9329 *
9330 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9331 */
9332
9333 void
9334 xmlParseContent(xmlParserCtxtPtr ctxt) {
9335 GROW;
9336 while ((RAW != 0) &&
9337 ((RAW != '<') || (NXT(1) != '/')) &&
9338 (ctxt->instate != XML_PARSER_EOF)) {
9339 const xmlChar *test = CUR_PTR;
9340 unsigned int cons = ctxt->input->consumed;
9341 const xmlChar *cur = ctxt->input->cur;
9342
9343 /*
9344 * First case : a Processing Instruction.
9345 */
9346 if ((*cur == '<') && (cur[1] == '?')) {
9347 xmlParsePI(ctxt);
9348 }
9349
9350 /*
9351 * Second case : a CDSection
9352 */
9353 /* 2.6.0 test was *cur not RAW */
9354 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9355 xmlParseCDSect(ctxt);
9356 }
9357
9358 /*
9359 * Third case : a comment
9360 */
9361 else if ((*cur == '<') && (NXT(1) == '!') &&
9362 (NXT(2) == '-') && (NXT(3) == '-')) {
9363 xmlParseComment(ctxt);
9364 ctxt->instate = XML_PARSER_CONTENT;
9365 }
9366
9367 /*
9368 * Fourth case : a sub-element.
9369 */
9370 else if (*cur == '<') {
9371 xmlParseElement(ctxt);
9372 }
9373
9374 /*
9375 * Fifth case : a reference. If if has not been resolved,
9376 * parsing returns it's Name, create the node
9377 */
9378
9379 else if (*cur == '&') {
9380 xmlParseReference(ctxt);
9381 }
9382
9383 /*
9384 * Last case, text. Note that References are handled directly.
9385 */
9386 else {
9387 xmlParseCharData(ctxt, 0);
9388 }
9389
9390 GROW;
9391 /*
9392 * Pop-up of finished entities.
9393 */
9394 while ((RAW == 0) && (ctxt->inputNr > 1))
9395 xmlPopInput(ctxt);
9396 SHRINK;
9397
9398 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9399 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9400 "detected an error in element content\n");
9401 ctxt->instate = XML_PARSER_EOF;
9402 break;
9403 }
9404 }
9405 }
9406
9407 /**
9408 * xmlParseElement:
9409 * @ctxt: an XML parser context
9410 *
9411 * parse an XML element, this is highly recursive
9412 *
9413 * [39] element ::= EmptyElemTag | STag content ETag
9414 *
9415 * [ WFC: Element Type Match ]
9416 * The Name in an element's end-tag must match the element type in the
9417 * start-tag.
9418 *
9419 */
9420
9421 void
9422 xmlParseElement(xmlParserCtxtPtr ctxt) {
9423 const xmlChar *name;
9424 const xmlChar *prefix = NULL;
9425 const xmlChar *URI = NULL;
9426 xmlParserNodeInfo node_info;
9427 int line, tlen;
9428 xmlNodePtr ret;
9429 int nsNr = ctxt->nsNr;
9430
9431 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9432 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9433 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9434 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9435 xmlParserMaxDepth);
9436 ctxt->instate = XML_PARSER_EOF;
9437 return;
9438 }
9439
9440 /* Capture start position */
9441 if (ctxt->record_info) {
9442 node_info.begin_pos = ctxt->input->consumed +
9443 (CUR_PTR - ctxt->input->base);
9444 node_info.begin_line = ctxt->input->line;
9445 }
9446
9447 if (ctxt->spaceNr == 0)
9448 spacePush(ctxt, -1);
9449 else if (*ctxt->space == -2)
9450 spacePush(ctxt, -1);
9451 else
9452 spacePush(ctxt, *ctxt->space);
9453
9454 line = ctxt->input->line;
9455 #ifdef LIBXML_SAX1_ENABLED
9456 if (ctxt->sax2)
9457 #endif /* LIBXML_SAX1_ENABLED */
9458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9459 #ifdef LIBXML_SAX1_ENABLED
9460 else
9461 name = xmlParseStartTag(ctxt);
9462 #endif /* LIBXML_SAX1_ENABLED */
9463 if (name == NULL) {
9464 spacePop(ctxt);
9465 return;
9466 }
9467 namePush(ctxt, name);
9468 ret = ctxt->node;
9469
9470 #ifdef LIBXML_VALID_ENABLED
9471 /*
9472 * [ VC: Root Element Type ]
9473 * The Name in the document type declaration must match the element
9474 * type of the root element.
9475 */
9476 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9477 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9478 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9479 #endif /* LIBXML_VALID_ENABLED */
9480
9481 /*
9482 * Check for an Empty Element.
9483 */
9484 if ((RAW == '/') && (NXT(1) == '>')) {
9485 SKIP(2);
9486 if (ctxt->sax2) {
9487 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9488 (!ctxt->disableSAX))
9489 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9490 #ifdef LIBXML_SAX1_ENABLED
9491 } else {
9492 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9493 (!ctxt->disableSAX))
9494 ctxt->sax->endElement(ctxt->userData, name);
9495 #endif /* LIBXML_SAX1_ENABLED */
9496 }
9497 namePop(ctxt);
9498 spacePop(ctxt);
9499 if (nsNr != ctxt->nsNr)
9500 nsPop(ctxt, ctxt->nsNr - nsNr);
9501 if ( ret != NULL && ctxt->record_info ) {
9502 node_info.end_pos = ctxt->input->consumed +
9503 (CUR_PTR - ctxt->input->base);
9504 node_info.end_line = ctxt->input->line;
9505 node_info.node = ret;
9506 xmlParserAddNodeInfo(ctxt, &node_info);
9507 }
9508 return;
9509 }
9510 if (RAW == '>') {
9511 NEXT1;
9512 } else {
9513 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9514 "Couldn't find end of Start Tag %s line %d\n",
9515 name, line, NULL);
9516
9517 /*
9518 * end of parsing of this node.
9519 */
9520 nodePop(ctxt);
9521 namePop(ctxt);
9522 spacePop(ctxt);
9523 if (nsNr != ctxt->nsNr)
9524 nsPop(ctxt, ctxt->nsNr - nsNr);
9525
9526 /*
9527 * Capture end position and add node
9528 */
9529 if ( ret != NULL && ctxt->record_info ) {
9530 node_info.end_pos = ctxt->input->consumed +
9531 (CUR_PTR - ctxt->input->base);
9532 node_info.end_line = ctxt->input->line;
9533 node_info.node = ret;
9534 xmlParserAddNodeInfo(ctxt, &node_info);
9535 }
9536 return;
9537 }
9538
9539 /*
9540 * Parse the content of the element:
9541 */
9542 xmlParseContent(ctxt);
9543 if (!IS_BYTE_CHAR(RAW)) {
9544 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9545 "Premature end of data in tag %s line %d\n",
9546 name, line, NULL);
9547
9548 /*
9549 * end of parsing of this node.
9550 */
9551 nodePop(ctxt);
9552 namePop(ctxt);
9553 spacePop(ctxt);
9554 if (nsNr != ctxt->nsNr)
9555 nsPop(ctxt, ctxt->nsNr - nsNr);
9556 return;
9557 }
9558
9559 /*
9560 * parse the end of tag: '</' should be here.
9561 */
9562 if (ctxt->sax2) {
9563 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9564 namePop(ctxt);
9565 }
9566 #ifdef LIBXML_SAX1_ENABLED
9567 else
9568 xmlParseEndTag1(ctxt, line);
9569 #endif /* LIBXML_SAX1_ENABLED */
9570
9571 /*
9572 * Capture end position and add node
9573 */
9574 if ( ret != NULL && ctxt->record_info ) {
9575 node_info.end_pos = ctxt->input->consumed +
9576 (CUR_PTR - ctxt->input->base);
9577 node_info.end_line = ctxt->input->line;
9578 node_info.node = ret;
9579 xmlParserAddNodeInfo(ctxt, &node_info);
9580 }
9581 }
9582
9583 /**
9584 * xmlParseVersionNum:
9585 * @ctxt: an XML parser context
9586 *
9587 * parse the XML version value.
9588 *
9589 * [26] VersionNum ::= '1.' [0-9]+
9590 *
9591 * In practice allow [0-9].[0-9]+ at that level
9592 *
9593 * Returns the string giving the XML version number, or NULL
9594 */
9595 xmlChar *
9596 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9597 xmlChar *buf = NULL;
9598 int len = 0;
9599 int size = 10;
9600 xmlChar cur;
9601
9602 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9603 if (buf == NULL) {
9604 xmlErrMemory(ctxt, NULL);
9605 return(NULL);
9606 }
9607 cur = CUR;
9608 if (!((cur >= '0') && (cur <= '9'))) {
9609 xmlFree(buf);
9610 return(NULL);
9611 }
9612 buf[len++] = cur;
9613 NEXT;
9614 cur=CUR;
9615 if (cur != '.') {
9616 xmlFree(buf);
9617 return(NULL);
9618 }
9619 buf[len++] = cur;
9620 NEXT;
9621 cur=CUR;
9622 while ((cur >= '0') && (cur <= '9')) {
9623 if (len + 1 >= size) {
9624 xmlChar *tmp;
9625
9626 size *= 2;
9627 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9628 if (tmp == NULL) {
9629 xmlFree(buf);
9630 xmlErrMemory(ctxt, NULL);
9631 return(NULL);
9632 }
9633 buf = tmp;
9634 }
9635 buf[len++] = cur;
9636 NEXT;
9637 cur=CUR;
9638 }
9639 buf[len] = 0;
9640 return(buf);
9641 }
9642
9643 /**
9644 * xmlParseVersionInfo:
9645 * @ctxt: an XML parser context
9646 *
9647 * parse the XML version.
9648 *
9649 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9650 *
9651 * [25] Eq ::= S? '=' S?
9652 *
9653 * Returns the version string, e.g. "1.0"
9654 */
9655
9656 xmlChar *
9657 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9658 xmlChar *version = NULL;
9659
9660 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9661 SKIP(7);
9662 SKIP_BLANKS;
9663 if (RAW != '=') {
9664 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9665 return(NULL);
9666 }
9667 NEXT;
9668 SKIP_BLANKS;
9669 if (RAW == '"') {
9670 NEXT;
9671 version = xmlParseVersionNum(ctxt);
9672 if (RAW != '"') {
9673 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9674 } else
9675 NEXT;
9676 } else if (RAW == '\''){
9677 NEXT;
9678 version = xmlParseVersionNum(ctxt);
9679 if (RAW != '\'') {
9680 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9681 } else
9682 NEXT;
9683 } else {
9684 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9685 }
9686 }
9687 return(version);
9688 }
9689
9690 /**
9691 * xmlParseEncName:
9692 * @ctxt: an XML parser context
9693 *
9694 * parse the XML encoding name
9695 *
9696 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9697 *
9698 * Returns the encoding name value or NULL
9699 */
9700 xmlChar *
9701 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9702 xmlChar *buf = NULL;
9703 int len = 0;
9704 int size = 10;
9705 xmlChar cur;
9706
9707 cur = CUR;
9708 if (((cur >= 'a') && (cur <= 'z')) ||
9709 ((cur >= 'A') && (cur <= 'Z'))) {
9710 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9711 if (buf == NULL) {
9712 xmlErrMemory(ctxt, NULL);
9713 return(NULL);
9714 }
9715
9716 buf[len++] = cur;
9717 NEXT;
9718 cur = CUR;
9719 while (((cur >= 'a') && (cur <= 'z')) ||
9720 ((cur >= 'A') && (cur <= 'Z')) ||
9721 ((cur >= '0') && (cur <= '9')) ||
9722 (cur == '.') || (cur == '_') ||
9723 (cur == '-')) {
9724 if (len + 1 >= size) {
9725 xmlChar *tmp;
9726
9727 size *= 2;
9728 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9729 if (tmp == NULL) {
9730 xmlErrMemory(ctxt, NULL);
9731 xmlFree(buf);
9732 return(NULL);
9733 }
9734 buf = tmp;
9735 }
9736 buf[len++] = cur;
9737 NEXT;
9738 cur = CUR;
9739 if (cur == 0) {
9740 SHRINK;
9741 GROW;
9742 cur = CUR;
9743 }
9744 }
9745 buf[len] = 0;
9746 } else {
9747 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9748 }
9749 return(buf);
9750 }
9751
9752 /**
9753 * xmlParseEncodingDecl:
9754 * @ctxt: an XML parser context
9755 *
9756 * parse the XML encoding declaration
9757 *
9758 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9759 *
9760 * this setups the conversion filters.
9761 *
9762 * Returns the encoding value or NULL
9763 */
9764
9765 const xmlChar *
9766 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9767 xmlChar *encoding = NULL;
9768
9769 SKIP_BLANKS;
9770 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9771 SKIP(8);
9772 SKIP_BLANKS;
9773 if (RAW != '=') {
9774 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9775 return(NULL);
9776 }
9777 NEXT;
9778 SKIP_BLANKS;
9779 if (RAW == '"') {
9780 NEXT;
9781 encoding = xmlParseEncName(ctxt);
9782 if (RAW != '"') {
9783 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9784 } else
9785 NEXT;
9786 } else if (RAW == '\''){
9787 NEXT;
9788 encoding = xmlParseEncName(ctxt);
9789 if (RAW != '\'') {
9790 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9791 } else
9792 NEXT;
9793 } else {
9794 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9795 }
9796 /*
9797 * UTF-16 encoding stwich has already taken place at this stage,
9798 * more over the little-endian/big-endian selection is already done
9799 */
9800 if ((encoding != NULL) &&
9801 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9802 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9803 /*
9804 * If no encoding was passed to the parser, that we are
9805 * using UTF-16 and no decoder is present i.e. the
9806 * document is apparently UTF-8 compatible, then raise an
9807 * encoding mismatch fatal error
9808 */
9809 if ((ctxt->encoding == NULL) &&
9810 (ctxt->input->buf != NULL) &&
9811 (ctxt->input->buf->encoder == NULL)) {
9812 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9813 "Document labelled UTF-16 but has UTF-8 content\n");
9814 }
9815 if (ctxt->encoding != NULL)
9816 xmlFree((xmlChar *) ctxt->encoding);
9817 ctxt->encoding = encoding;
9818 }
9819 /*
9820 * UTF-8 encoding is handled natively
9821 */
9822 else if ((encoding != NULL) &&
9823 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9824 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9825 if (ctxt->encoding != NULL)
9826 xmlFree((xmlChar *) ctxt->encoding);
9827 ctxt->encoding = encoding;
9828 }
9829 else if (encoding != NULL) {
9830 xmlCharEncodingHandlerPtr handler;
9831
9832 if (ctxt->input->encoding != NULL)
9833 xmlFree((xmlChar *) ctxt->input->encoding);
9834 ctxt->input->encoding = encoding;
9835
9836 handler = xmlFindCharEncodingHandler((const char *) encoding);
9837 if (handler != NULL) {
9838 xmlSwitchToEncoding(ctxt, handler);
9839 } else {
9840 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9841 "Unsupported encoding %s\n", encoding);
9842 return(NULL);
9843 }
9844 }
9845 }
9846 return(encoding);
9847 }
9848
9849 /**
9850 * xmlParseSDDecl:
9851 * @ctxt: an XML parser context
9852 *
9853 * parse the XML standalone declaration
9854 *
9855 * [32] SDDecl ::= S 'standalone' Eq
9856 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9857 *
9858 * [ VC: Standalone Document Declaration ]
9859 * TODO The standalone document declaration must have the value "no"
9860 * if any external markup declarations contain declarations of:
9861 * - attributes with default values, if elements to which these
9862 * attributes apply appear in the document without specifications
9863 * of values for these attributes, or
9864 * - entities (other than amp, lt, gt, apos, quot), if references
9865 * to those entities appear in the document, or
9866 * - attributes with values subject to normalization, where the
9867 * attribute appears in the document with a value which will change
9868 * as a result of normalization, or
9869 * - element types with element content, if white space occurs directly
9870 * within any instance of those types.
9871 *
9872 * Returns:
9873 * 1 if standalone="yes"
9874 * 0 if standalone="no"
9875 * -2 if standalone attribute is missing or invalid
9876 * (A standalone value of -2 means that the XML declaration was found,
9877 * but no value was specified for the standalone attribute).
9878 */
9879
9880 int
9881 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9882 int standalone = -2;
9883
9884 SKIP_BLANKS;
9885 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9886 SKIP(10);
9887 SKIP_BLANKS;
9888 if (RAW != '=') {
9889 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9890 return(standalone);
9891 }
9892 NEXT;
9893 SKIP_BLANKS;
9894 if (RAW == '\''){
9895 NEXT;
9896 if ((RAW == 'n') && (NXT(1) == 'o')) {
9897 standalone = 0;
9898 SKIP(2);
9899 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9900 (NXT(2) == 's')) {
9901 standalone = 1;
9902 SKIP(3);
9903 } else {
9904 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9905 }
9906 if (RAW != '\'') {
9907 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9908 } else
9909 NEXT;
9910 } else if (RAW == '"'){
9911 NEXT;
9912 if ((RAW == 'n') && (NXT(1) == 'o')) {
9913 standalone = 0;
9914 SKIP(2);
9915 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9916 (NXT(2) == 's')) {
9917 standalone = 1;
9918 SKIP(3);
9919 } else {
9920 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9921 }
9922 if (RAW != '"') {
9923 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9924 } else
9925 NEXT;
9926 } else {
9927 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9928 }
9929 }
9930 return(standalone);
9931 }
9932
9933 /**
9934 * xmlParseXMLDecl:
9935 * @ctxt: an XML parser context
9936 *
9937 * parse an XML declaration header
9938 *
9939 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9940 */
9941
9942 void
9943 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9944 xmlChar *version;
9945
9946 /*
9947 * This value for standalone indicates that the document has an
9948 * XML declaration but it does not have a standalone attribute.
9949 * It will be overwritten later if a standalone attribute is found.
9950 */
9951 ctxt->input->standalone = -2;
9952
9953 /*
9954 * We know that '<?xml' is here.
9955 */
9956 SKIP(5);
9957
9958 if (!IS_BLANK_CH(RAW)) {
9959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9960 "Blank needed after '<?xml'\n");
9961 }
9962 SKIP_BLANKS;
9963
9964 /*
9965 * We must have the VersionInfo here.
9966 */
9967 version = xmlParseVersionInfo(ctxt);
9968 if (version == NULL) {
9969 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9970 } else {
9971 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9972 /*
9973 * Changed here for XML-1.0 5th edition
9974 */
9975 if (ctxt->options & XML_PARSE_OLD10) {
9976 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9977 "Unsupported version '%s'\n",
9978 version);
9979 } else {
9980 if ((version[0] == '1') && ((version[1] == '.'))) {
9981 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9982 "Unsupported version '%s'\n",
9983 version, NULL);
9984 } else {
9985 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9986 "Unsupported version '%s'\n",
9987 version);
9988 }
9989 }
9990 }
9991 if (ctxt->version != NULL)
9992 xmlFree((void *) ctxt->version);
9993 ctxt->version = version;
9994 }
9995
9996 /*
9997 * We may have the encoding declaration
9998 */
9999 if (!IS_BLANK_CH(RAW)) {
10000 if ((RAW == '?') && (NXT(1) == '>')) {
10001 SKIP(2);
10002 return;
10003 }
10004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10005 }
10006 xmlParseEncodingDecl(ctxt);
10007 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10008 /*
10009 * The XML REC instructs us to stop parsing right here
10010 */
10011 return;
10012 }
10013
10014 /*
10015 * We may have the standalone status.
10016 */
10017 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10018 if ((RAW == '?') && (NXT(1) == '>')) {
10019 SKIP(2);
10020 return;
10021 }
10022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10023 }
10024
10025 /*
10026 * We can grow the input buffer freely at that point
10027 */
10028 GROW;
10029
10030 SKIP_BLANKS;
10031 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10032
10033 SKIP_BLANKS;
10034 if ((RAW == '?') && (NXT(1) == '>')) {
10035 SKIP(2);
10036 } else if (RAW == '>') {
10037 /* Deprecated old WD ... */
10038 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10039 NEXT;
10040 } else {
10041 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10042 MOVETO_ENDTAG(CUR_PTR);
10043 NEXT;
10044 }
10045 }
10046
10047 /**
10048 * xmlParseMisc:
10049 * @ctxt: an XML parser context
10050 *
10051 * parse an XML Misc* optional field.
10052 *
10053 * [27] Misc ::= Comment | PI | S
10054 */
10055
10056 void
10057 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10058 while (((RAW == '<') && (NXT(1) == '?')) ||
10059 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10060 IS_BLANK_CH(CUR)) {
10061 if ((RAW == '<') && (NXT(1) == '?')) {
10062 xmlParsePI(ctxt);
10063 } else if (IS_BLANK_CH(CUR)) {
10064 NEXT;
10065 } else
10066 xmlParseComment(ctxt);
10067 }
10068 }
10069
10070 /**
10071 * xmlParseDocument:
10072 * @ctxt: an XML parser context
10073 *
10074 * parse an XML document (and build a tree if using the standard SAX
10075 * interface).
10076 *
10077 * [1] document ::= prolog element Misc*
10078 *
10079 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10080 *
10081 * Returns 0, -1 in case of error. the parser context is augmented
10082 * as a result of the parsing.
10083 */
10084
10085 int
10086 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10087 xmlChar start[4];
10088 xmlCharEncoding enc;
10089
10090 xmlInitParser();
10091
10092 if ((ctxt == NULL) || (ctxt->input == NULL))
10093 return(-1);
10094
10095 GROW;
10096
10097 /*
10098 * SAX: detecting the level.
10099 */
10100 xmlDetectSAX2(ctxt);
10101
10102 /*
10103 * SAX: beginning of the document processing.
10104 */
10105 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10106 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10107
10108 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10109 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10110 /*
10111 * Get the 4 first bytes and decode the charset
10112 * if enc != XML_CHAR_ENCODING_NONE
10113 * plug some encoding conversion routines.
10114 */
10115 start[0] = RAW;
10116 start[1] = NXT(1);
10117 start[2] = NXT(2);
10118 start[3] = NXT(3);
10119 enc = xmlDetectCharEncoding(&start[0], 4);
10120 if (enc != XML_CHAR_ENCODING_NONE) {
10121 xmlSwitchEncoding(ctxt, enc);
10122 }
10123 }
10124
10125
10126 if (CUR == 0) {
10127 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10128 }
10129
10130 /*
10131 * Check for the XMLDecl in the Prolog.
10132 * do not GROW here to avoid the detected encoder to decode more
10133 * than just the first line, unless the amount of data is really
10134 * too small to hold "<?xml version="1.0" encoding="foo"
10135 */
10136 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10137 GROW;
10138 }
10139 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10140
10141 /*
10142 * Note that we will switch encoding on the fly.
10143 */
10144 xmlParseXMLDecl(ctxt);
10145 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10146 /*
10147 * The XML REC instructs us to stop parsing right here
10148 */
10149 return(-1);
10150 }
10151 ctxt->standalone = ctxt->input->standalone;
10152 SKIP_BLANKS;
10153 } else {
10154 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10155 }
10156 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10157 ctxt->sax->startDocument(ctxt->userData);
10158
10159 /*
10160 * The Misc part of the Prolog
10161 */
10162 GROW;
10163 xmlParseMisc(ctxt);
10164
10165 /*
10166 * Then possibly doc type declaration(s) and more Misc
10167 * (doctypedecl Misc*)?
10168 */
10169 GROW;
10170 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10171
10172 ctxt->inSubset = 1;
10173 xmlParseDocTypeDecl(ctxt);
10174 if (RAW == '[') {
10175 ctxt->instate = XML_PARSER_DTD;
10176 xmlParseInternalSubset(ctxt);
10177 }
10178
10179 /*
10180 * Create and update the external subset.
10181 */
10182 ctxt->inSubset = 2;
10183 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10184 (!ctxt->disableSAX))
10185 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10186 ctxt->extSubSystem, ctxt->extSubURI);
10187 ctxt->inSubset = 0;
10188
10189 xmlCleanSpecialAttr(ctxt);
10190
10191 ctxt->instate = XML_PARSER_PROLOG;
10192 xmlParseMisc(ctxt);
10193 }
10194
10195 /*
10196 * Time to start parsing the tree itself
10197 */
10198 GROW;
10199 if (RAW != '<') {
10200 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10201 "Start tag expected, '<' not found\n");
10202 } else {
10203 ctxt->instate = XML_PARSER_CONTENT;
10204 xmlParseElement(ctxt);
10205 ctxt->instate = XML_PARSER_EPILOG;
10206
10207
10208 /*
10209 * The Misc part at the end
10210 */
10211 xmlParseMisc(ctxt);
10212
10213 if (RAW != 0) {
10214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10215 }
10216 ctxt->instate = XML_PARSER_EOF;
10217 }
10218
10219 /*
10220 * SAX: end of the document processing.
10221 */
10222 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10223 ctxt->sax->endDocument(ctxt->userData);
10224
10225 /*
10226 * Remove locally kept entity definitions if the tree was not built
10227 */
10228 if ((ctxt->myDoc != NULL) &&
10229 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10230 xmlFreeDoc(ctxt->myDoc);
10231 ctxt->myDoc = NULL;
10232 }
10233
10234 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10235 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10236 if (ctxt->valid)
10237 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10238 if (ctxt->nsWellFormed)
10239 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10240 if (ctxt->options & XML_PARSE_OLD10)
10241 ctxt->myDoc->properties |= XML_DOC_OLD10;
10242 }
10243 if (! ctxt->wellFormed) {
10244 ctxt->valid = 0;
10245 return(-1);
10246 }
10247 return(0);
10248 }
10249
10250 /**
10251 * xmlParseExtParsedEnt:
10252 * @ctxt: an XML parser context
10253 *
10254 * parse a general parsed entity
10255 * An external general parsed entity is well-formed if it matches the
10256 * production labeled extParsedEnt.
10257 *
10258 * [78] extParsedEnt ::= TextDecl? content
10259 *
10260 * Returns 0, -1 in case of error. the parser context is augmented
10261 * as a result of the parsing.
10262 */
10263
10264 int
10265 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10266 xmlChar start[4];
10267 xmlCharEncoding enc;
10268
10269 if ((ctxt == NULL) || (ctxt->input == NULL))
10270 return(-1);
10271
10272 xmlDefaultSAXHandlerInit();
10273
10274 xmlDetectSAX2(ctxt);
10275
10276 GROW;
10277
10278 /*
10279 * SAX: beginning of the document processing.
10280 */
10281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10282 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10283
10284 /*
10285 * Get the 4 first bytes and decode the charset
10286 * if enc != XML_CHAR_ENCODING_NONE
10287 * plug some encoding conversion routines.
10288 */
10289 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10290 start[0] = RAW;
10291 start[1] = NXT(1);
10292 start[2] = NXT(2);
10293 start[3] = NXT(3);
10294 enc = xmlDetectCharEncoding(start, 4);
10295 if (enc != XML_CHAR_ENCODING_NONE) {
10296 xmlSwitchEncoding(ctxt, enc);
10297 }
10298 }
10299
10300
10301 if (CUR == 0) {
10302 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10303 }
10304
10305 /*
10306 * Check for the XMLDecl in the Prolog.
10307 */
10308 GROW;
10309 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10310
10311 /*
10312 * Note that we will switch encoding on the fly.
10313 */
10314 xmlParseXMLDecl(ctxt);
10315 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10316 /*
10317 * The XML REC instructs us to stop parsing right here
10318 */
10319 return(-1);
10320 }
10321 SKIP_BLANKS;
10322 } else {
10323 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10324 }
10325 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10326 ctxt->sax->startDocument(ctxt->userData);
10327
10328 /*
10329 * Doing validity checking on chunk doesn't make sense
10330 */
10331 ctxt->instate = XML_PARSER_CONTENT;
10332 ctxt->validate = 0;
10333 ctxt->loadsubset = 0;
10334 ctxt->depth = 0;
10335
10336 xmlParseContent(ctxt);
10337
10338 if ((RAW == '<') && (NXT(1) == '/')) {
10339 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10340 } else if (RAW != 0) {
10341 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10342 }
10343
10344 /*
10345 * SAX: end of the document processing.
10346 */
10347 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10348 ctxt->sax->endDocument(ctxt->userData);
10349
10350 if (! ctxt->wellFormed) return(-1);
10351 return(0);
10352 }
10353
10354 #ifdef LIBXML_PUSH_ENABLED
10355 /************************************************************************
10356 * *
10357 * Progressive parsing interfaces *
10358 * *
10359 ************************************************************************/
10360
10361 /**
10362 * xmlParseLookupSequence:
10363 * @ctxt: an XML parser context
10364 * @first: the first char to lookup
10365 * @next: the next char to lookup or zero
10366 * @third: the next char to lookup or zero
10367 *
10368 * Try to find if a sequence (first, next, third) or just (first next) or
10369 * (first) is available in the input stream.
10370 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10371 * to avoid rescanning sequences of bytes, it DOES change the state of the
10372 * parser, do not use liberally.
10373 *
10374 * Returns the index to the current parsing point if the full sequence
10375 * is available, -1 otherwise.
10376 */
10377 static int
10378 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10379 xmlChar next, xmlChar third) {
10380 int base, len;
10381 xmlParserInputPtr in;
10382 const xmlChar *buf;
10383
10384 in = ctxt->input;
10385 if (in == NULL) return(-1);
10386 base = in->cur - in->base;
10387 if (base < 0) return(-1);
10388 if (ctxt->checkIndex > base)
10389 base = ctxt->checkIndex;
10390 if (in->buf == NULL) {
10391 buf = in->base;
10392 len = in->length;
10393 } else {
10394 buf = in->buf->buffer->content;
10395 len = in->buf->buffer->use;
10396 }
10397 /* take into account the sequence length */
10398 if (third) len -= 2;
10399 else if (next) len --;
10400 for (;base < len;base++) {
10401 if (buf[base] == first) {
10402 if (third != 0) {
10403 if ((buf[base + 1] != next) ||
10404 (buf[base + 2] != third)) continue;
10405 } else if (next != 0) {
10406 if (buf[base + 1] != next) continue;
10407 }
10408 ctxt->checkIndex = 0;
10409 #ifdef DEBUG_PUSH
10410 if (next == 0)
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: lookup '%c' found at %d\n",
10413 first, base);
10414 else if (third == 0)
10415 xmlGenericError(xmlGenericErrorContext,
10416 "PP: lookup '%c%c' found at %d\n",
10417 first, next, base);
10418 else
10419 xmlGenericError(xmlGenericErrorContext,
10420 "PP: lookup '%c%c%c' found at %d\n",
10421 first, next, third, base);
10422 #endif
10423 return(base - (in->cur - in->base));
10424 }
10425 }
10426 ctxt->checkIndex = base;
10427 #ifdef DEBUG_PUSH
10428 if (next == 0)
10429 xmlGenericError(xmlGenericErrorContext,
10430 "PP: lookup '%c' failed\n", first);
10431 else if (third == 0)
10432 xmlGenericError(xmlGenericErrorContext,
10433 "PP: lookup '%c%c' failed\n", first, next);
10434 else
10435 xmlGenericError(xmlGenericErrorContext,
10436 "PP: lookup '%c%c%c' failed\n", first, next, third);
10437 #endif
10438 return(-1);
10439 }
10440
10441 /**
10442 * xmlParseGetLasts:
10443 * @ctxt: an XML parser context
10444 * @lastlt: pointer to store the last '<' from the input
10445 * @lastgt: pointer to store the last '>' from the input
10446 *
10447 * Lookup the last < and > in the current chunk
10448 */
10449 static void
10450 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10451 const xmlChar **lastgt) {
10452 const xmlChar *tmp;
10453
10454 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10455 xmlGenericError(xmlGenericErrorContext,
10456 "Internal error: xmlParseGetLasts\n");
10457 return;
10458 }
10459 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10460 tmp = ctxt->input->end;
10461 tmp--;
10462 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10463 if (tmp < ctxt->input->base) {
10464 *lastlt = NULL;
10465 *lastgt = NULL;
10466 } else {
10467 *lastlt = tmp;
10468 tmp++;
10469 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10470 if (*tmp == '\'') {
10471 tmp++;
10472 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10473 if (tmp < ctxt->input->end) tmp++;
10474 } else if (*tmp == '"') {
10475 tmp++;
10476 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10477 if (tmp < ctxt->input->end) tmp++;
10478 } else
10479 tmp++;
10480 }
10481 if (tmp < ctxt->input->end)
10482 *lastgt = tmp;
10483 else {
10484 tmp = *lastlt;
10485 tmp--;
10486 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10487 if (tmp >= ctxt->input->base)
10488 *lastgt = tmp;
10489 else
10490 *lastgt = NULL;
10491 }
10492 }
10493 } else {
10494 *lastlt = NULL;
10495 *lastgt = NULL;
10496 }
10497 }
10498 /**
10499 * xmlCheckCdataPush:
10500 * @cur: pointer to the bock of characters
10501 * @len: length of the block in bytes
10502 *
10503 * Check that the block of characters is okay as SCdata content [20]
10504 *
10505 * Returns the number of bytes to pass if okay, a negative index where an
10506 * UTF-8 error occured otherwise
10507 */
10508 static int
10509 xmlCheckCdataPush(const xmlChar *utf, int len) {
10510 int ix;
10511 unsigned char c;
10512 int codepoint;
10513
10514 if ((utf == NULL) || (len <= 0))
10515 return(0);
10516
10517 for (ix = 0; ix < len;) { /* string is 0-terminated */
10518 c = utf[ix];
10519 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10520 if (c >= 0x20)
10521 ix++;
10522 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10523 ix++;
10524 else
10525 return(-ix);
10526 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10527 if (ix + 2 > len) return(ix);
10528 if ((utf[ix+1] & 0xc0 ) != 0x80)
10529 return(-ix);
10530 codepoint = (utf[ix] & 0x1f) << 6;
10531 codepoint |= utf[ix+1] & 0x3f;
10532 if (!xmlIsCharQ(codepoint))
10533 return(-ix);
10534 ix += 2;
10535 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10536 if (ix + 3 > len) return(ix);
10537 if (((utf[ix+1] & 0xc0) != 0x80) ||
10538 ((utf[ix+2] & 0xc0) != 0x80))
10539 return(-ix);
10540 codepoint = (utf[ix] & 0xf) << 12;
10541 codepoint |= (utf[ix+1] & 0x3f) << 6;
10542 codepoint |= utf[ix+2] & 0x3f;
10543 if (!xmlIsCharQ(codepoint))
10544 return(-ix);
10545 ix += 3;
10546 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10547 if (ix + 4 > len) return(ix);
10548 if (((utf[ix+1] & 0xc0) != 0x80) ||
10549 ((utf[ix+2] & 0xc0) != 0x80) ||
10550 ((utf[ix+3] & 0xc0) != 0x80))
10551 return(-ix);
10552 codepoint = (utf[ix] & 0x7) << 18;
10553 codepoint |= (utf[ix+1] & 0x3f) << 12;
10554 codepoint |= (utf[ix+2] & 0x3f) << 6;
10555 codepoint |= utf[ix+3] & 0x3f;
10556 if (!xmlIsCharQ(codepoint))
10557 return(-ix);
10558 ix += 4;
10559 } else /* unknown encoding */
10560 return(-ix);
10561 }
10562 return(ix);
10563 }
10564
10565 /**
10566 * xmlParseTryOrFinish:
10567 * @ctxt: an XML parser context
10568 * @terminate: last chunk indicator
10569 *
10570 * Try to progress on parsing
10571 *
10572 * Returns zero if no parsing was possible
10573 */
10574 static int
10575 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10576 int ret = 0;
10577 int avail, tlen;
10578 xmlChar cur, next;
10579 const xmlChar *lastlt, *lastgt;
10580
10581 if (ctxt->input == NULL)
10582 return(0);
10583
10584 #ifdef DEBUG_PUSH
10585 switch (ctxt->instate) {
10586 case XML_PARSER_EOF:
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: try EOF\n"); break;
10589 case XML_PARSER_START:
10590 xmlGenericError(xmlGenericErrorContext,
10591 "PP: try START\n"); break;
10592 case XML_PARSER_MISC:
10593 xmlGenericError(xmlGenericErrorContext,
10594 "PP: try MISC\n");break;
10595 case XML_PARSER_COMMENT:
10596 xmlGenericError(xmlGenericErrorContext,
10597 "PP: try COMMENT\n");break;
10598 case XML_PARSER_PROLOG:
10599 xmlGenericError(xmlGenericErrorContext,
10600 "PP: try PROLOG\n");break;
10601 case XML_PARSER_START_TAG:
10602 xmlGenericError(xmlGenericErrorContext,
10603 "PP: try START_TAG\n");break;
10604 case XML_PARSER_CONTENT:
10605 xmlGenericError(xmlGenericErrorContext,
10606 "PP: try CONTENT\n");break;
10607 case XML_PARSER_CDATA_SECTION:
10608 xmlGenericError(xmlGenericErrorContext,
10609 "PP: try CDATA_SECTION\n");break;
10610 case XML_PARSER_END_TAG:
10611 xmlGenericError(xmlGenericErrorContext,
10612 "PP: try END_TAG\n");break;
10613 case XML_PARSER_ENTITY_DECL:
10614 xmlGenericError(xmlGenericErrorContext,
10615 "PP: try ENTITY_DECL\n");break;
10616 case XML_PARSER_ENTITY_VALUE:
10617 xmlGenericError(xmlGenericErrorContext,
10618 "PP: try ENTITY_VALUE\n");break;
10619 case XML_PARSER_ATTRIBUTE_VALUE:
10620 xmlGenericError(xmlGenericErrorContext,
10621 "PP: try ATTRIBUTE_VALUE\n");break;
10622 case XML_PARSER_DTD:
10623 xmlGenericError(xmlGenericErrorContext,
10624 "PP: try DTD\n");break;
10625 case XML_PARSER_EPILOG:
10626 xmlGenericError(xmlGenericErrorContext,
10627 "PP: try EPILOG\n");break;
10628 case XML_PARSER_PI:
10629 xmlGenericError(xmlGenericErrorContext,
10630 "PP: try PI\n");break;
10631 case XML_PARSER_IGNORE:
10632 xmlGenericError(xmlGenericErrorContext,
10633 "PP: try IGNORE\n");break;
10634 }
10635 #endif
10636
10637 if ((ctxt->input != NULL) &&
10638 (ctxt->input->cur - ctxt->input->base > 4096)) {
10639 xmlSHRINK(ctxt);
10640 ctxt->checkIndex = 0;
10641 }
10642 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10643
10644 while (1) {
10645 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10646 return(0);
10647
10648
10649 /*
10650 * Pop-up of finished entities.
10651 */
10652 while ((RAW == 0) && (ctxt->inputNr > 1))
10653 xmlPopInput(ctxt);
10654
10655 if (ctxt->input == NULL) break;
10656 if (ctxt->input->buf == NULL)
10657 avail = ctxt->input->length -
10658 (ctxt->input->cur - ctxt->input->base);
10659 else {
10660 /*
10661 * If we are operating on converted input, try to flush
10662 * remainng chars to avoid them stalling in the non-converted
10663 * buffer.
10664 */
10665 if ((ctxt->input->buf->raw != NULL) &&
10666 (ctxt->input->buf->raw->use > 0)) {
10667 int base = ctxt->input->base -
10668 ctxt->input->buf->buffer->content;
10669 int current = ctxt->input->cur - ctxt->input->base;
10670
10671 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10672 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10673 ctxt->input->cur = ctxt->input->base + current;
10674 ctxt->input->end =
10675 &ctxt->input->buf->buffer->content[
10676 ctxt->input->buf->buffer->use];
10677 }
10678 avail = ctxt->input->buf->buffer->use -
10679 (ctxt->input->cur - ctxt->input->base);
10680 }
10681 if (avail < 1)
10682 goto done;
10683 switch (ctxt->instate) {
10684 case XML_PARSER_EOF:
10685 /*
10686 * Document parsing is done !
10687 */
10688 goto done;
10689 case XML_PARSER_START:
10690 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10691 xmlChar start[4];
10692 xmlCharEncoding enc;
10693
10694 /*
10695 * Very first chars read from the document flow.
10696 */
10697 if (avail < 4)
10698 goto done;
10699
10700 /*
10701 * Get the 4 first bytes and decode the charset
10702 * if enc != XML_CHAR_ENCODING_NONE
10703 * plug some encoding conversion routines,
10704 * else xmlSwitchEncoding will set to (default)
10705 * UTF8.
10706 */
10707 start[0] = RAW;
10708 start[1] = NXT(1);
10709 start[2] = NXT(2);
10710 start[3] = NXT(3);
10711 enc = xmlDetectCharEncoding(start, 4);
10712 xmlSwitchEncoding(ctxt, enc);
10713 break;
10714 }
10715
10716 if (avail < 2)
10717 goto done;
10718 cur = ctxt->input->cur[0];
10719 next = ctxt->input->cur[1];
10720 if (cur == 0) {
10721 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10722 ctxt->sax->setDocumentLocator(ctxt->userData,
10723 &xmlDefaultSAXLocator);
10724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10725 ctxt->instate = XML_PARSER_EOF;
10726 #ifdef DEBUG_PUSH
10727 xmlGenericError(xmlGenericErrorContext,
10728 "PP: entering EOF\n");
10729 #endif
10730 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10731 ctxt->sax->endDocument(ctxt->userData);
10732 goto done;
10733 }
10734 if ((cur == '<') && (next == '?')) {
10735 /* PI or XML decl */
10736 if (avail < 5) return(ret);
10737 if ((!terminate) &&
10738 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10739 return(ret);
10740 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10741 ctxt->sax->setDocumentLocator(ctxt->userData,
10742 &xmlDefaultSAXLocator);
10743 if ((ctxt->input->cur[2] == 'x') &&
10744 (ctxt->input->cur[3] == 'm') &&
10745 (ctxt->input->cur[4] == 'l') &&
10746 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10747 ret += 5;
10748 #ifdef DEBUG_PUSH
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: Parsing XML Decl\n");
10751 #endif
10752 xmlParseXMLDecl(ctxt);
10753 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10754 /*
10755 * The XML REC instructs us to stop parsing right
10756 * here
10757 */
10758 ctxt->instate = XML_PARSER_EOF;
10759 return(0);
10760 }
10761 ctxt->standalone = ctxt->input->standalone;
10762 if ((ctxt->encoding == NULL) &&
10763 (ctxt->input->encoding != NULL))
10764 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10765 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10766 (!ctxt->disableSAX))
10767 ctxt->sax->startDocument(ctxt->userData);
10768 ctxt->instate = XML_PARSER_MISC;
10769 #ifdef DEBUG_PUSH
10770 xmlGenericError(xmlGenericErrorContext,
10771 "PP: entering MISC\n");
10772 #endif
10773 } else {
10774 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10775 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10776 (!ctxt->disableSAX))
10777 ctxt->sax->startDocument(ctxt->userData);
10778 ctxt->instate = XML_PARSER_MISC;
10779 #ifdef DEBUG_PUSH
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: entering MISC\n");
10782 #endif
10783 }
10784 } else {
10785 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10786 ctxt->sax->setDocumentLocator(ctxt->userData,
10787 &xmlDefaultSAXLocator);
10788 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10789 if (ctxt->version == NULL) {
10790 xmlErrMemory(ctxt, NULL);
10791 break;
10792 }
10793 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10794 (!ctxt->disableSAX))
10795 ctxt->sax->startDocument(ctxt->userData);
10796 ctxt->instate = XML_PARSER_MISC;
10797 #ifdef DEBUG_PUSH
10798 xmlGenericError(xmlGenericErrorContext,
10799 "PP: entering MISC\n");
10800 #endif
10801 }
10802 break;
10803 case XML_PARSER_START_TAG: {
10804 const xmlChar *name;
10805 const xmlChar *prefix = NULL;
10806 const xmlChar *URI = NULL;
10807 int nsNr = ctxt->nsNr;
10808
10809 if ((avail < 2) && (ctxt->inputNr == 1))
10810 goto done;
10811 cur = ctxt->input->cur[0];
10812 if (cur != '<') {
10813 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10814 ctxt->instate = XML_PARSER_EOF;
10815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10816 ctxt->sax->endDocument(ctxt->userData);
10817 goto done;
10818 }
10819 if (!terminate) {
10820 if (ctxt->progressive) {
10821 /* > can be found unescaped in attribute values */
10822 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10823 goto done;
10824 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10825 goto done;
10826 }
10827 }
10828 if (ctxt->spaceNr == 0)
10829 spacePush(ctxt, -1);
10830 else if (*ctxt->space == -2)
10831 spacePush(ctxt, -1);
10832 else
10833 spacePush(ctxt, *ctxt->space);
10834 #ifdef LIBXML_SAX1_ENABLED
10835 if (ctxt->sax2)
10836 #endif /* LIBXML_SAX1_ENABLED */
10837 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10838 #ifdef LIBXML_SAX1_ENABLED
10839 else
10840 name = xmlParseStartTag(ctxt);
10841 #endif /* LIBXML_SAX1_ENABLED */
10842 if (name == NULL) {
10843 spacePop(ctxt);
10844 ctxt->instate = XML_PARSER_EOF;
10845 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10846 ctxt->sax->endDocument(ctxt->userData);
10847 goto done;
10848 }
10849 #ifdef LIBXML_VALID_ENABLED
10850 /*
10851 * [ VC: Root Element Type ]
10852 * The Name in the document type declaration must match
10853 * the element type of the root element.
10854 */
10855 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10856 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10857 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10858 #endif /* LIBXML_VALID_ENABLED */
10859
10860 /*
10861 * Check for an Empty Element.
10862 */
10863 if ((RAW == '/') && (NXT(1) == '>')) {
10864 SKIP(2);
10865
10866 if (ctxt->sax2) {
10867 if ((ctxt->sax != NULL) &&
10868 (ctxt->sax->endElementNs != NULL) &&
10869 (!ctxt->disableSAX))
10870 ctxt->sax->endElementNs(ctxt->userData, name,
10871 prefix, URI);
10872 if (ctxt->nsNr - nsNr > 0)
10873 nsPop(ctxt, ctxt->nsNr - nsNr);
10874 #ifdef LIBXML_SAX1_ENABLED
10875 } else {
10876 if ((ctxt->sax != NULL) &&
10877 (ctxt->sax->endElement != NULL) &&
10878 (!ctxt->disableSAX))
10879 ctxt->sax->endElement(ctxt->userData, name);
10880 #endif /* LIBXML_SAX1_ENABLED */
10881 }
10882 spacePop(ctxt);
10883 if (ctxt->nameNr == 0) {
10884 ctxt->instate = XML_PARSER_EPILOG;
10885 } else {
10886 ctxt->instate = XML_PARSER_CONTENT;
10887 }
10888 break;
10889 }
10890 if (RAW == '>') {
10891 NEXT;
10892 } else {
10893 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10894 "Couldn't find end of Start Tag %s\n",
10895 name);
10896 nodePop(ctxt);
10897 spacePop(ctxt);
10898 }
10899 if (ctxt->sax2)
10900 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10901 #ifdef LIBXML_SAX1_ENABLED
10902 else
10903 namePush(ctxt, name);
10904 #endif /* LIBXML_SAX1_ENABLED */
10905
10906 ctxt->instate = XML_PARSER_CONTENT;
10907 break;
10908 }
10909 case XML_PARSER_CONTENT: {
10910 const xmlChar *test;
10911 unsigned int cons;
10912 if ((avail < 2) && (ctxt->inputNr == 1))
10913 goto done;
10914 cur = ctxt->input->cur[0];
10915 next = ctxt->input->cur[1];
10916
10917 test = CUR_PTR;
10918 cons = ctxt->input->consumed;
10919 if ((cur == '<') && (next == '/')) {
10920 ctxt->instate = XML_PARSER_END_TAG;
10921 break;
10922 } else if ((cur == '<') && (next == '?')) {
10923 if ((!terminate) &&
10924 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10925 goto done;
10926 xmlParsePI(ctxt);
10927 } else if ((cur == '<') && (next != '!')) {
10928 ctxt->instate = XML_PARSER_START_TAG;
10929 break;
10930 } else if ((cur == '<') && (next == '!') &&
10931 (ctxt->input->cur[2] == '-') &&
10932 (ctxt->input->cur[3] == '-')) {
10933 int term;
10934
10935 if (avail < 4)
10936 goto done;
10937 ctxt->input->cur += 4;
10938 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10939 ctxt->input->cur -= 4;
10940 if ((!terminate) && (term < 0))
10941 goto done;
10942 xmlParseComment(ctxt);
10943 ctxt->instate = XML_PARSER_CONTENT;
10944 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10945 (ctxt->input->cur[2] == '[') &&
10946 (ctxt->input->cur[3] == 'C') &&
10947 (ctxt->input->cur[4] == 'D') &&
10948 (ctxt->input->cur[5] == 'A') &&
10949 (ctxt->input->cur[6] == 'T') &&
10950 (ctxt->input->cur[7] == 'A') &&
10951 (ctxt->input->cur[8] == '[')) {
10952 SKIP(9);
10953 ctxt->instate = XML_PARSER_CDATA_SECTION;
10954 break;
10955 } else if ((cur == '<') && (next == '!') &&
10956 (avail < 9)) {
10957 goto done;
10958 } else if (cur == '&') {
10959 if ((!terminate) &&
10960 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10961 goto done;
10962 xmlParseReference(ctxt);
10963 } else {
10964 /* TODO Avoid the extra copy, handle directly !!! */
10965 /*
10966 * Goal of the following test is:
10967 * - minimize calls to the SAX 'character' callback
10968 * when they are mergeable
10969 * - handle an problem for isBlank when we only parse
10970 * a sequence of blank chars and the next one is
10971 * not available to check against '<' presence.
10972 * - tries to homogenize the differences in SAX
10973 * callbacks between the push and pull versions
10974 * of the parser.
10975 */
10976 if ((ctxt->inputNr == 1) &&
10977 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10978 if (!terminate) {
10979 if (ctxt->progressive) {
10980 if ((lastlt == NULL) ||
10981 (ctxt->input->cur > lastlt))
10982 goto done;
10983 } else if (xmlParseLookupSequence(ctxt,
10984 '<', 0, 0) < 0) {
10985 goto done;
10986 }
10987 }
10988 }
10989 ctxt->checkIndex = 0;
10990 xmlParseCharData(ctxt, 0);
10991 }
10992 /*
10993 * Pop-up of finished entities.
10994 */
10995 while ((RAW == 0) && (ctxt->inputNr > 1))
10996 xmlPopInput(ctxt);
10997 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10999 "detected an error in element content\n");
11000 ctxt->instate = XML_PARSER_EOF;
11001 break;
11002 }
11003 break;
11004 }
11005 case XML_PARSER_END_TAG:
11006 if (avail < 2)
11007 goto done;
11008 if (!terminate) {
11009 if (ctxt->progressive) {
11010 /* > can be found unescaped in attribute values */
11011 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11012 goto done;
11013 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11014 goto done;
11015 }
11016 }
11017 if (ctxt->sax2) {
11018 xmlParseEndTag2(ctxt,
11019 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11020 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11021 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11022 nameNsPop(ctxt);
11023 }
11024 #ifdef LIBXML_SAX1_ENABLED
11025 else
11026 xmlParseEndTag1(ctxt, 0);
11027 #endif /* LIBXML_SAX1_ENABLED */
11028 if (ctxt->nameNr == 0) {
11029 ctxt->instate = XML_PARSER_EPILOG;
11030 } else {
11031 ctxt->instate = XML_PARSER_CONTENT;
11032 }
11033 break;
11034 case XML_PARSER_CDATA_SECTION: {
11035 /*
11036 * The Push mode need to have the SAX callback for
11037 * cdataBlock merge back contiguous callbacks.
11038 */
11039 int base;
11040
11041 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11042 if (base < 0) {
11043 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11044 int tmp;
11045
11046 tmp = xmlCheckCdataPush(ctxt->input->cur,
11047 XML_PARSER_BIG_BUFFER_SIZE);
11048 if (tmp < 0) {
11049 tmp = -tmp;
11050 ctxt->input->cur += tmp;
11051 goto encoding_error;
11052 }
11053 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11054 if (ctxt->sax->cdataBlock != NULL)
11055 ctxt->sax->cdataBlock(ctxt->userData,
11056 ctxt->input->cur, tmp);
11057 else if (ctxt->sax->characters != NULL)
11058 ctxt->sax->characters(ctxt->userData,
11059 ctxt->input->cur, tmp);
11060 }
11061 SKIPL(tmp);
11062 ctxt->checkIndex = 0;
11063 }
11064 goto done;
11065 } else {
11066 int tmp;
11067
11068 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11069 if ((tmp < 0) || (tmp != base)) {
11070 tmp = -tmp;
11071 ctxt->input->cur += tmp;
11072 goto encoding_error;
11073 }
11074 if ((ctxt->sax != NULL) && (base == 0) &&
11075 (ctxt->sax->cdataBlock != NULL) &&
11076 (!ctxt->disableSAX)) {
11077 /*
11078 * Special case to provide identical behaviour
11079 * between pull and push parsers on enpty CDATA
11080 * sections
11081 */
11082 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11083 (!strncmp((const char *)&ctxt->input->cur[-9],
11084 "<![CDATA[", 9)))
11085 ctxt->sax->cdataBlock(ctxt->userData,
11086 BAD_CAST "", 0);
11087 } else if ((ctxt->sax != NULL) && (base > 0) &&
11088 (!ctxt->disableSAX)) {
11089 if (ctxt->sax->cdataBlock != NULL)
11090 ctxt->sax->cdataBlock(ctxt->userData,
11091 ctxt->input->cur, base);
11092 else if (ctxt->sax->characters != NULL)
11093 ctxt->sax->characters(ctxt->userData,
11094 ctxt->input->cur, base);
11095 }
11096 SKIPL(base + 3);
11097 ctxt->checkIndex = 0;
11098 ctxt->instate = XML_PARSER_CONTENT;
11099 #ifdef DEBUG_PUSH
11100 xmlGenericError(xmlGenericErrorContext,
11101 "PP: entering CONTENT\n");
11102 #endif
11103 }
11104 break;
11105 }
11106 case XML_PARSER_MISC:
11107 SKIP_BLANKS;
11108 if (ctxt->input->buf == NULL)
11109 avail = ctxt->input->length -
11110 (ctxt->input->cur - ctxt->input->base);
11111 else
11112 avail = ctxt->input->buf->buffer->use -
11113 (ctxt->input->cur - ctxt->input->base);
11114 if (avail < 2)
11115 goto done;
11116 cur = ctxt->input->cur[0];
11117 next = ctxt->input->cur[1];
11118 if ((cur == '<') && (next == '?')) {
11119 if ((!terminate) &&
11120 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11121 goto done;
11122 #ifdef DEBUG_PUSH
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: Parsing PI\n");
11125 #endif
11126 xmlParsePI(ctxt);
11127 ctxt->checkIndex = 0;
11128 } else if ((cur == '<') && (next == '!') &&
11129 (ctxt->input->cur[2] == '-') &&
11130 (ctxt->input->cur[3] == '-')) {
11131 if ((!terminate) &&
11132 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11133 goto done;
11134 #ifdef DEBUG_PUSH
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: Parsing Comment\n");
11137 #endif
11138 xmlParseComment(ctxt);
11139 ctxt->instate = XML_PARSER_MISC;
11140 ctxt->checkIndex = 0;
11141 } else if ((cur == '<') && (next == '!') &&
11142 (ctxt->input->cur[2] == 'D') &&
11143 (ctxt->input->cur[3] == 'O') &&
11144 (ctxt->input->cur[4] == 'C') &&
11145 (ctxt->input->cur[5] == 'T') &&
11146 (ctxt->input->cur[6] == 'Y') &&
11147 (ctxt->input->cur[7] == 'P') &&
11148 (ctxt->input->cur[8] == 'E')) {
11149 if ((!terminate) &&
11150 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11151 goto done;
11152 #ifdef DEBUG_PUSH
11153 xmlGenericError(xmlGenericErrorContext,
11154 "PP: Parsing internal subset\n");
11155 #endif
11156 ctxt->inSubset = 1;
11157 xmlParseDocTypeDecl(ctxt);
11158 if (RAW == '[') {
11159 ctxt->instate = XML_PARSER_DTD;
11160 #ifdef DEBUG_PUSH
11161 xmlGenericError(xmlGenericErrorContext,
11162 "PP: entering DTD\n");
11163 #endif
11164 } else {
11165 /*
11166 * Create and update the external subset.
11167 */
11168 ctxt->inSubset = 2;
11169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11170 (ctxt->sax->externalSubset != NULL))
11171 ctxt->sax->externalSubset(ctxt->userData,
11172 ctxt->intSubName, ctxt->extSubSystem,
11173 ctxt->extSubURI);
11174 ctxt->inSubset = 0;
11175 xmlCleanSpecialAttr(ctxt);
11176 ctxt->instate = XML_PARSER_PROLOG;
11177 #ifdef DEBUG_PUSH
11178 xmlGenericError(xmlGenericErrorContext,
11179 "PP: entering PROLOG\n");
11180 #endif
11181 }
11182 } else if ((cur == '<') && (next == '!') &&
11183 (avail < 9)) {
11184 goto done;
11185 } else {
11186 ctxt->instate = XML_PARSER_START_TAG;
11187 ctxt->progressive = 1;
11188 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11189 #ifdef DEBUG_PUSH
11190 xmlGenericError(xmlGenericErrorContext,
11191 "PP: entering START_TAG\n");
11192 #endif
11193 }
11194 break;
11195 case XML_PARSER_PROLOG:
11196 SKIP_BLANKS;
11197 if (ctxt->input->buf == NULL)
11198 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11199 else
11200 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11201 if (avail < 2)
11202 goto done;
11203 cur = ctxt->input->cur[0];
11204 next = ctxt->input->cur[1];
11205 if ((cur == '<') && (next == '?')) {
11206 if ((!terminate) &&
11207 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11208 goto done;
11209 #ifdef DEBUG_PUSH
11210 xmlGenericError(xmlGenericErrorContext,
11211 "PP: Parsing PI\n");
11212 #endif
11213 xmlParsePI(ctxt);
11214 } else if ((cur == '<') && (next == '!') &&
11215 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11216 if ((!terminate) &&
11217 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11218 goto done;
11219 #ifdef DEBUG_PUSH
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: Parsing Comment\n");
11222 #endif
11223 xmlParseComment(ctxt);
11224 ctxt->instate = XML_PARSER_PROLOG;
11225 } else if ((cur == '<') && (next == '!') &&
11226 (avail < 4)) {
11227 goto done;
11228 } else {
11229 ctxt->instate = XML_PARSER_START_TAG;
11230 if (ctxt->progressive == 0)
11231 ctxt->progressive = 1;
11232 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11233 #ifdef DEBUG_PUSH
11234 xmlGenericError(xmlGenericErrorContext,
11235 "PP: entering START_TAG\n");
11236 #endif
11237 }
11238 break;
11239 case XML_PARSER_EPILOG:
11240 SKIP_BLANKS;
11241 if (ctxt->input->buf == NULL)
11242 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11243 else
11244 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11245 if (avail < 2)
11246 goto done;
11247 cur = ctxt->input->cur[0];
11248 next = ctxt->input->cur[1];
11249 if ((cur == '<') && (next == '?')) {
11250 if ((!terminate) &&
11251 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11252 goto done;
11253 #ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing PI\n");
11256 #endif
11257 xmlParsePI(ctxt);
11258 ctxt->instate = XML_PARSER_EPILOG;
11259 } else if ((cur == '<') && (next == '!') &&
11260 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11261 if ((!terminate) &&
11262 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11263 goto done;
11264 #ifdef DEBUG_PUSH
11265 xmlGenericError(xmlGenericErrorContext,
11266 "PP: Parsing Comment\n");
11267 #endif
11268 xmlParseComment(ctxt);
11269 ctxt->instate = XML_PARSER_EPILOG;
11270 } else if ((cur == '<') && (next == '!') &&
11271 (avail < 4)) {
11272 goto done;
11273 } else {
11274 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11275 ctxt->instate = XML_PARSER_EOF;
11276 #ifdef DEBUG_PUSH
11277 xmlGenericError(xmlGenericErrorContext,
11278 "PP: entering EOF\n");
11279 #endif
11280 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11281 ctxt->sax->endDocument(ctxt->userData);
11282 goto done;
11283 }
11284 break;
11285 case XML_PARSER_DTD: {
11286 /*
11287 * Sorry but progressive parsing of the internal subset
11288 * is not expected to be supported. We first check that
11289 * the full content of the internal subset is available and
11290 * the parsing is launched only at that point.
11291 * Internal subset ends up with "']' S? '>'" in an unescaped
11292 * section and not in a ']]>' sequence which are conditional
11293 * sections (whoever argued to keep that crap in XML deserve
11294 * a place in hell !).
11295 */
11296 int base, i;
11297 xmlChar *buf;
11298 xmlChar quote = 0;
11299
11300 base = ctxt->input->cur - ctxt->input->base;
11301 if (base < 0) return(0);
11302 if (ctxt->checkIndex > base)
11303 base = ctxt->checkIndex;
11304 buf = ctxt->input->buf->buffer->content;
11305 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11306 base++) {
11307 if (quote != 0) {
11308 if (buf[base] == quote)
11309 quote = 0;
11310 continue;
11311 }
11312 if ((quote == 0) && (buf[base] == '<')) {
11313 int found = 0;
11314 /* special handling of comments */
11315 if (((unsigned int) base + 4 <
11316 ctxt->input->buf->buffer->use) &&
11317 (buf[base + 1] == '!') &&
11318 (buf[base + 2] == '-') &&
11319 (buf[base + 3] == '-')) {
11320 for (;(unsigned int) base + 3 <
11321 ctxt->input->buf->buffer->use; base++) {
11322 if ((buf[base] == '-') &&
11323 (buf[base + 1] == '-') &&
11324 (buf[base + 2] == '>')) {
11325 found = 1;
11326 base += 2;
11327 break;
11328 }
11329 }
11330 if (!found) {
11331 #if 0
11332 fprintf(stderr, "unfinished comment\n");
11333 #endif
11334 break; /* for */
11335 }
11336 continue;
11337 }
11338 }
11339 if (buf[base] == '"') {
11340 quote = '"';
11341 continue;
11342 }
11343 if (buf[base] == '\'') {
11344 quote = '\'';
11345 continue;
11346 }
11347 if (buf[base] == ']') {
11348 #if 0
11349 fprintf(stderr, "%c%c%c%c: ", buf[base],
11350 buf[base + 1], buf[base + 2], buf[base + 3]);
11351 #endif
11352 if ((unsigned int) base +1 >=
11353 ctxt->input->buf->buffer->use)
11354 break;
11355 if (buf[base + 1] == ']') {
11356 /* conditional crap, skip both ']' ! */
11357 base++;
11358 continue;
11359 }
11360 for (i = 1;
11361 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11362 i++) {
11363 if (buf[base + i] == '>') {
11364 #if 0
11365 fprintf(stderr, "found\n");
11366 #endif
11367 goto found_end_int_subset;
11368 }
11369 if (!IS_BLANK_CH(buf[base + i])) {
11370 #if 0
11371 fprintf(stderr, "not found\n");
11372 #endif
11373 goto not_end_of_int_subset;
11374 }
11375 }
11376 #if 0
11377 fprintf(stderr, "end of stream\n");
11378 #endif
11379 break;
11380
11381 }
11382 not_end_of_int_subset:
11383 continue; /* for */
11384 }
11385 /*
11386 * We didn't found the end of the Internal subset
11387 */
11388 #ifdef DEBUG_PUSH
11389 if (next == 0)
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: lookup of int subset end filed\n");
11392 #endif
11393 goto done;
11394
11395 found_end_int_subset:
11396 xmlParseInternalSubset(ctxt);
11397 ctxt->inSubset = 2;
11398 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11399 (ctxt->sax->externalSubset != NULL))
11400 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11401 ctxt->extSubSystem, ctxt->extSubURI);
11402 ctxt->inSubset = 0;
11403 xmlCleanSpecialAttr(ctxt);
11404 ctxt->instate = XML_PARSER_PROLOG;
11405 ctxt->checkIndex = 0;
11406 #ifdef DEBUG_PUSH
11407 xmlGenericError(xmlGenericErrorContext,
11408 "PP: entering PROLOG\n");
11409 #endif
11410 break;
11411 }
11412 case XML_PARSER_COMMENT:
11413 xmlGenericError(xmlGenericErrorContext,
11414 "PP: internal error, state == COMMENT\n");
11415 ctxt->instate = XML_PARSER_CONTENT;
11416 #ifdef DEBUG_PUSH
11417 xmlGenericError(xmlGenericErrorContext,
11418 "PP: entering CONTENT\n");
11419 #endif
11420 break;
11421 case XML_PARSER_IGNORE:
11422 xmlGenericError(xmlGenericErrorContext,
11423 "PP: internal error, state == IGNORE");
11424 ctxt->instate = XML_PARSER_DTD;
11425 #ifdef DEBUG_PUSH
11426 xmlGenericError(xmlGenericErrorContext,
11427 "PP: entering DTD\n");
11428 #endif
11429 break;
11430 case XML_PARSER_PI:
11431 xmlGenericError(xmlGenericErrorContext,
11432 "PP: internal error, state == PI\n");
11433 ctxt->instate = XML_PARSER_CONTENT;
11434 #ifdef DEBUG_PUSH
11435 xmlGenericError(xmlGenericErrorContext,
11436 "PP: entering CONTENT\n");
11437 #endif
11438 break;
11439 case XML_PARSER_ENTITY_DECL:
11440 xmlGenericError(xmlGenericErrorContext,
11441 "PP: internal error, state == ENTITY_DECL\n");
11442 ctxt->instate = XML_PARSER_DTD;
11443 #ifdef DEBUG_PUSH
11444 xmlGenericError(xmlGenericErrorContext,
11445 "PP: entering DTD\n");
11446 #endif
11447 break;
11448 case XML_PARSER_ENTITY_VALUE:
11449 xmlGenericError(xmlGenericErrorContext,
11450 "PP: internal error, state == ENTITY_VALUE\n");
11451 ctxt->instate = XML_PARSER_CONTENT;
11452 #ifdef DEBUG_PUSH
11453 xmlGenericError(xmlGenericErrorContext,
11454 "PP: entering DTD\n");
11455 #endif
11456 break;
11457 case XML_PARSER_ATTRIBUTE_VALUE:
11458 xmlGenericError(xmlGenericErrorContext,
11459 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11460 ctxt->instate = XML_PARSER_START_TAG;
11461 #ifdef DEBUG_PUSH
11462 xmlGenericError(xmlGenericErrorContext,
11463 "PP: entering START_TAG\n");
11464 #endif
11465 break;
11466 case XML_PARSER_SYSTEM_LITERAL:
11467 xmlGenericError(xmlGenericErrorContext,
11468 "PP: internal error, state == SYSTEM_LITERAL\n");
11469 ctxt->instate = XML_PARSER_START_TAG;
11470 #ifdef DEBUG_PUSH
11471 xmlGenericError(xmlGenericErrorContext,
11472 "PP: entering START_TAG\n");
11473 #endif
11474 break;
11475 case XML_PARSER_PUBLIC_LITERAL:
11476 xmlGenericError(xmlGenericErrorContext,
11477 "PP: internal error, state == PUBLIC_LITERAL\n");
11478 ctxt->instate = XML_PARSER_START_TAG;
11479 #ifdef DEBUG_PUSH
11480 xmlGenericError(xmlGenericErrorContext,
11481 "PP: entering START_TAG\n");
11482 #endif
11483 break;
11484 }
11485 }
11486 done:
11487 #ifdef DEBUG_PUSH
11488 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11489 #endif
11490 return(ret);
11491 encoding_error:
11492 {
11493 char buffer[150];
11494
11495 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11496 ctxt->input->cur[0], ctxt->input->cur[1],
11497 ctxt->input->cur[2], ctxt->input->cur[3]);
11498 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11499 "Input is not proper UTF-8, indicate encoding !\n%s",
11500 BAD_CAST buffer, NULL);
11501 }
11502 return(0);
11503 }
11504
11505 /**
11506 * xmlParseChunk:
11507 * @ctxt: an XML parser context
11508 * @chunk: an char array
11509 * @size: the size in byte of the chunk
11510 * @terminate: last chunk indicator
11511 *
11512 * Parse a Chunk of memory
11513 *
11514 * Returns zero if no error, the xmlParserErrors otherwise.
11515 */
11516 int
11517 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11518 int terminate) {
11519 int end_in_lf = 0;
11520 int remain = 0;
11521
11522 if (ctxt == NULL)
11523 return(XML_ERR_INTERNAL_ERROR);
11524 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11525 return(ctxt->errNo);
11526 if (ctxt->instate == XML_PARSER_START)
11527 xmlDetectSAX2(ctxt);
11528 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11529 (chunk[size - 1] == '\r')) {
11530 end_in_lf = 1;
11531 size--;
11532 }
11533
11534 xmldecl_done:
11535
11536 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11537 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11538 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11539 int cur = ctxt->input->cur - ctxt->input->base;
11540 int res;
11541
11542 /*
11543 * Specific handling if we autodetected an encoding, we should not
11544 * push more than the first line ... which depend on the encoding
11545 * And only push the rest once the final encoding was detected
11546 */
11547 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11548 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11549 unsigned int len = 45;
11550
11551 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11552 BAD_CAST "UTF-16")) ||
11553 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11554 BAD_CAST "UTF16")))
11555 len = 90;
11556 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11557 BAD_CAST "UCS-4")) ||
11558 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11559 BAD_CAST "UCS4")))
11560 len = 180;
11561
11562 if (ctxt->input->buf->rawconsumed < len)
11563 len -= ctxt->input->buf->rawconsumed;
11564
11565 /*
11566 * Change size for reading the initial declaration only
11567 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11568 * will blindly copy extra bytes from memory.
11569 */
11570 if (size > len) {
11571 remain = size - len;
11572 size = len;
11573 } else {
11574 remain = 0;
11575 }
11576 }
11577 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11578 if (res < 0) {
11579 ctxt->errNo = XML_PARSER_EOF;
11580 ctxt->disableSAX = 1;
11581 return (XML_PARSER_EOF);
11582 }
11583 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11584 ctxt->input->cur = ctxt->input->base + cur;
11585 ctxt->input->end =
11586 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11587 #ifdef DEBUG_PUSH
11588 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11589 #endif
11590
11591 } else if (ctxt->instate != XML_PARSER_EOF) {
11592 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11593 xmlParserInputBufferPtr in = ctxt->input->buf;
11594 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11595 (in->raw != NULL)) {
11596 int nbchars;
11597
11598 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11599 if (nbchars < 0) {
11600 /* TODO 2.6.0 */
11601 xmlGenericError(xmlGenericErrorContext,
11602 "xmlParseChunk: encoder error\n");
11603 return(XML_ERR_INVALID_ENCODING);
11604 }
11605 }
11606 }
11607 }
11608 if (remain != 0)
11609 xmlParseTryOrFinish(ctxt, 0);
11610 else
11611 xmlParseTryOrFinish(ctxt, terminate);
11612 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11613 return(ctxt->errNo);
11614
11615 if (remain != 0) {
11616 chunk += size;
11617 size = remain;
11618 remain = 0;
11619 goto xmldecl_done;
11620 }
11621 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11622 (ctxt->input->buf != NULL)) {
11623 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11624 }
11625 if (terminate) {
11626 /*
11627 * Check for termination
11628 */
11629 int avail = 0;
11630
11631 if (ctxt->input != NULL) {
11632 if (ctxt->input->buf == NULL)
11633 avail = ctxt->input->length -
11634 (ctxt->input->cur - ctxt->input->base);
11635 else
11636 avail = ctxt->input->buf->buffer->use -
11637 (ctxt->input->cur - ctxt->input->base);
11638 }
11639
11640 if ((ctxt->instate != XML_PARSER_EOF) &&
11641 (ctxt->instate != XML_PARSER_EPILOG)) {
11642 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11643 }
11644 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11645 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11646 }
11647 if (ctxt->instate != XML_PARSER_EOF) {
11648 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11649 ctxt->sax->endDocument(ctxt->userData);
11650 }
11651 ctxt->instate = XML_PARSER_EOF;
11652 }
11653 return((xmlParserErrors) ctxt->errNo);
11654 }
11655
11656 /************************************************************************
11657 * *
11658 * I/O front end functions to the parser *
11659 * *
11660 ************************************************************************/
11661
11662 /**
11663 * xmlCreatePushParserCtxt:
11664 * @sax: a SAX handler
11665 * @user_data: The user data returned on SAX callbacks
11666 * @chunk: a pointer to an array of chars
11667 * @size: number of chars in the array
11668 * @filename: an optional file name or URI
11669 *
11670 * Create a parser context for using the XML parser in push mode.
11671 * If @buffer and @size are non-NULL, the data is used to detect
11672 * the encoding. The remaining characters will be parsed so they
11673 * don't need to be fed in again through xmlParseChunk.
11674 * To allow content encoding detection, @size should be >= 4
11675 * The value of @filename is used for fetching external entities
11676 * and error/warning reports.
11677 *
11678 * Returns the new parser context or NULL
11679 */
11680
11681 xmlParserCtxtPtr
11682 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11683 const char *chunk, int size, const char *filename) {
11684 xmlParserCtxtPtr ctxt;
11685 xmlParserInputPtr inputStream;
11686 xmlParserInputBufferPtr buf;
11687 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11688
11689 /*
11690 * plug some encoding conversion routines
11691 */
11692 if ((chunk != NULL) && (size >= 4))
11693 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11694
11695 buf = xmlAllocParserInputBuffer(enc);
11696 if (buf == NULL) return(NULL);
11697
11698 ctxt = xmlNewParserCtxt();
11699 if (ctxt == NULL) {
11700 xmlErrMemory(NULL, "creating parser: out of memory\n");
11701 xmlFreeParserInputBuffer(buf);
11702 return(NULL);
11703 }
11704 ctxt->dictNames = 1;
11705 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11706 if (ctxt->pushTab == NULL) {
11707 xmlErrMemory(ctxt, NULL);
11708 xmlFreeParserInputBuffer(buf);
11709 xmlFreeParserCtxt(ctxt);
11710 return(NULL);
11711 }
11712 if (sax != NULL) {
11713 #ifdef LIBXML_SAX1_ENABLED
11714 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11715 #endif /* LIBXML_SAX1_ENABLED */
11716 xmlFree(ctxt->sax);
11717 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11718 if (ctxt->sax == NULL) {
11719 xmlErrMemory(ctxt, NULL);
11720 xmlFreeParserInputBuffer(buf);
11721 xmlFreeParserCtxt(ctxt);
11722 return(NULL);
11723 }
11724 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11725 if (sax->initialized == XML_SAX2_MAGIC)
11726 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11727 else
11728 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11729 if (user_data != NULL)
11730 ctxt->userData = user_data;
11731 }
11732 if (filename == NULL) {
11733 ctxt->directory = NULL;
11734 } else {
11735 ctxt->directory = xmlParserGetDirectory(filename);
11736 }
11737
11738 inputStream = xmlNewInputStream(ctxt);
11739 if (inputStream == NULL) {
11740 xmlFreeParserCtxt(ctxt);
11741 xmlFreeParserInputBuffer(buf);
11742 return(NULL);
11743 }
11744
11745 if (filename == NULL)
11746 inputStream->filename = NULL;
11747 else {
11748 inputStream->filename = (char *)
11749 xmlCanonicPath((const xmlChar *) filename);
11750 if (inputStream->filename == NULL) {
11751 xmlFreeParserCtxt(ctxt);
11752 xmlFreeParserInputBuffer(buf);
11753 return(NULL);
11754 }
11755 }
11756 inputStream->buf = buf;
11757 inputStream->base = inputStream->buf->buffer->content;
11758 inputStream->cur = inputStream->buf->buffer->content;
11759 inputStream->end =
11760 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11761
11762 inputPush(ctxt, inputStream);
11763
11764 /*
11765 * If the caller didn't provide an initial 'chunk' for determining
11766 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11767 * that it can be automatically determined later
11768 */
11769 if ((size == 0) || (chunk == NULL)) {
11770 ctxt->charset = XML_CHAR_ENCODING_NONE;
11771 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11772 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11773 int cur = ctxt->input->cur - ctxt->input->base;
11774
11775 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11776
11777 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11778 ctxt->input->cur = ctxt->input->base + cur;
11779 ctxt->input->end =
11780 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11781 #ifdef DEBUG_PUSH
11782 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11783 #endif
11784 }
11785
11786 if (enc != XML_CHAR_ENCODING_NONE) {
11787 xmlSwitchEncoding(ctxt, enc);
11788 }
11789
11790 return(ctxt);
11791 }
11792 #endif /* LIBXML_PUSH_ENABLED */
11793
11794 /**
11795 * xmlStopParser:
11796 * @ctxt: an XML parser context
11797 *
11798 * Blocks further parser processing
11799 */
11800 void
11801 xmlStopParser(xmlParserCtxtPtr ctxt) {
11802 if (ctxt == NULL)
11803 return;
11804 ctxt->instate = XML_PARSER_EOF;
11805 ctxt->disableSAX = 1;
11806 if (ctxt->input != NULL) {
11807 ctxt->input->cur = BAD_CAST"";
11808 ctxt->input->base = ctxt->input->cur;
11809 }
11810 }
11811
11812 /**
11813 * xmlCreateIOParserCtxt:
11814 * @sax: a SAX handler
11815 * @user_data: The user data returned on SAX callbacks
11816 * @ioread: an I/O read function
11817 * @ioclose: an I/O close function
11818 * @ioctx: an I/O handler
11819 * @enc: the charset encoding if known
11820 *
11821 * Create a parser context for using the XML parser with an existing
11822 * I/O stream
11823 *
11824 * Returns the new parser context or NULL
11825 */
11826 xmlParserCtxtPtr
11827 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11828 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11829 void *ioctx, xmlCharEncoding enc) {
11830 xmlParserCtxtPtr ctxt;
11831 xmlParserInputPtr inputStream;
11832 xmlParserInputBufferPtr buf;
11833
11834 if (ioread == NULL) return(NULL);
11835
11836 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11837 if (buf == NULL) return(NULL);
11838
11839 ctxt = xmlNewParserCtxt();
11840 if (ctxt == NULL) {
11841 xmlFreeParserInputBuffer(buf);
11842 return(NULL);
11843 }
11844 if (sax != NULL) {
11845 #ifdef LIBXML_SAX1_ENABLED
11846 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11847 #endif /* LIBXML_SAX1_ENABLED */
11848 xmlFree(ctxt->sax);
11849 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11850 if (ctxt->sax == NULL) {
11851 xmlErrMemory(ctxt, NULL);
11852 xmlFreeParserCtxt(ctxt);
11853 return(NULL);
11854 }
11855 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11856 if (sax->initialized == XML_SAX2_MAGIC)
11857 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11858 else
11859 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11860 if (user_data != NULL)
11861 ctxt->userData = user_data;
11862 }
11863
11864 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11865 if (inputStream == NULL) {
11866 xmlFreeParserCtxt(ctxt);
11867 return(NULL);
11868 }
11869 inputPush(ctxt, inputStream);
11870
11871 return(ctxt);
11872 }
11873
11874 #ifdef LIBXML_VALID_ENABLED
11875 /************************************************************************
11876 * *
11877 * Front ends when parsing a DTD *
11878 * *
11879 ************************************************************************/
11880
11881 /**
11882 * xmlIOParseDTD:
11883 * @sax: the SAX handler block or NULL
11884 * @input: an Input Buffer
11885 * @enc: the charset encoding if known
11886 *
11887 * Load and parse a DTD
11888 *
11889 * Returns the resulting xmlDtdPtr or NULL in case of error.
11890 * @input will be freed by the function in any case.
11891 */
11892
11893 xmlDtdPtr
11894 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11895 xmlCharEncoding enc) {
11896 xmlDtdPtr ret = NULL;
11897 xmlParserCtxtPtr ctxt;
11898 xmlParserInputPtr pinput = NULL;
11899 xmlChar start[4];
11900
11901 if (input == NULL)
11902 return(NULL);
11903
11904 ctxt = xmlNewParserCtxt();
11905 if (ctxt == NULL) {
11906 xmlFreeParserInputBuffer(input);
11907 return(NULL);
11908 }
11909
11910 /*
11911 * Set-up the SAX context
11912 */
11913 if (sax != NULL) {
11914 if (ctxt->sax != NULL)
11915 xmlFree(ctxt->sax);
11916 ctxt->sax = sax;
11917 ctxt->userData = ctxt;
11918 }
11919 xmlDetectSAX2(ctxt);
11920
11921 /*
11922 * generate a parser input from the I/O handler
11923 */
11924
11925 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11926 if (pinput == NULL) {
11927 if (sax != NULL) ctxt->sax = NULL;
11928 xmlFreeParserInputBuffer(input);
11929 xmlFreeParserCtxt(ctxt);
11930 return(NULL);
11931 }
11932
11933 /*
11934 * plug some encoding conversion routines here.
11935 */
11936 if (xmlPushInput(ctxt, pinput) < 0) {
11937 if (sax != NULL) ctxt->sax = NULL;
11938 xmlFreeParserCtxt(ctxt);
11939 return(NULL);
11940 }
11941 if (enc != XML_CHAR_ENCODING_NONE) {
11942 xmlSwitchEncoding(ctxt, enc);
11943 }
11944
11945 pinput->filename = NULL;
11946 pinput->line = 1;
11947 pinput->col = 1;
11948 pinput->base = ctxt->input->cur;
11949 pinput->cur = ctxt->input->cur;
11950 pinput->free = NULL;
11951
11952 /*
11953 * let's parse that entity knowing it's an external subset.
11954 */
11955 ctxt->inSubset = 2;
11956 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11957 if (ctxt->myDoc == NULL) {
11958 xmlErrMemory(ctxt, "New Doc failed");
11959 return(NULL);
11960 }
11961 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11962 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11963 BAD_CAST "none", BAD_CAST "none");
11964
11965 if ((enc == XML_CHAR_ENCODING_NONE) &&
11966 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
11967 /*
11968 * Get the 4 first bytes and decode the charset
11969 * if enc != XML_CHAR_ENCODING_NONE
11970 * plug some encoding conversion routines.
11971 */
11972 start[0] = RAW;
11973 start[1] = NXT(1);
11974 start[2] = NXT(2);
11975 start[3] = NXT(3);
11976 enc = xmlDetectCharEncoding(start, 4);
11977 if (enc != XML_CHAR_ENCODING_NONE) {
11978 xmlSwitchEncoding(ctxt, enc);
11979 }
11980 }
11981
11982 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11983
11984 if (ctxt->myDoc != NULL) {
11985 if (ctxt->wellFormed) {
11986 ret = ctxt->myDoc->extSubset;
11987 ctxt->myDoc->extSubset = NULL;
11988 if (ret != NULL) {
11989 xmlNodePtr tmp;
11990
11991 ret->doc = NULL;
11992 tmp = ret->children;
11993 while (tmp != NULL) {
11994 tmp->doc = NULL;
11995 tmp = tmp->next;
11996 }
11997 }
11998 } else {
11999 ret = NULL;
12000 }
12001 xmlFreeDoc(ctxt->myDoc);
12002 ctxt->myDoc = NULL;
12003 }
12004 if (sax != NULL) ctxt->sax = NULL;
12005 xmlFreeParserCtxt(ctxt);
12006
12007 return(ret);
12008 }
12009
12010 /**
12011 * xmlSAXParseDTD:
12012 * @sax: the SAX handler block
12013 * @ExternalID: a NAME* containing the External ID of the DTD
12014 * @SystemID: a NAME* containing the URL to the DTD
12015 *
12016 * Load and parse an external subset.
12017 *
12018 * Returns the resulting xmlDtdPtr or NULL in case of error.
12019 */
12020
12021 xmlDtdPtr
12022 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12023 const xmlChar *SystemID) {
12024 xmlDtdPtr ret = NULL;
12025 xmlParserCtxtPtr ctxt;
12026 xmlParserInputPtr input = NULL;
12027 xmlCharEncoding enc;
12028 xmlChar* systemIdCanonic;
12029
12030 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12031
12032 ctxt = xmlNewParserCtxt();
12033 if (ctxt == NULL) {
12034 return(NULL);
12035 }
12036
12037 /*
12038 * Set-up the SAX context
12039 */
12040 if (sax != NULL) {
12041 if (ctxt->sax != NULL)
12042 xmlFree(ctxt->sax);
12043 ctxt->sax = sax;
12044 ctxt->userData = ctxt;
12045 }
12046
12047 /*
12048 * Canonicalise the system ID
12049 */
12050 systemIdCanonic = xmlCanonicPath(SystemID);
12051 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12052 xmlFreeParserCtxt(ctxt);
12053 return(NULL);
12054 }
12055
12056 /*
12057 * Ask the Entity resolver to load the damn thing
12058 */
12059
12060 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12061 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12062 systemIdCanonic);
12063 if (input == NULL) {
12064 if (sax != NULL) ctxt->sax = NULL;
12065 xmlFreeParserCtxt(ctxt);
12066 if (systemIdCanonic != NULL)
12067 xmlFree(systemIdCanonic);
12068 return(NULL);
12069 }
12070
12071 /*
12072 * plug some encoding conversion routines here.
12073 */
12074 if (xmlPushInput(ctxt, input) < 0) {
12075 if (sax != NULL) ctxt->sax = NULL;
12076 xmlFreeParserCtxt(ctxt);
12077 if (systemIdCanonic != NULL)
12078 xmlFree(systemIdCanonic);
12079 return(NULL);
12080 }
12081 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12082 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12083 xmlSwitchEncoding(ctxt, enc);
12084 }
12085
12086 if (input->filename == NULL)
12087 input->filename = (char *) systemIdCanonic;
12088 else
12089 xmlFree(systemIdCanonic);
12090 input->line = 1;
12091 input->col = 1;
12092 input->base = ctxt->input->cur;
12093 input->cur = ctxt->input->cur;
12094 input->free = NULL;
12095
12096 /*
12097 * let's parse that entity knowing it's an external subset.
12098 */
12099 ctxt->inSubset = 2;
12100 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12101 if (ctxt->myDoc == NULL) {
12102 xmlErrMemory(ctxt, "New Doc failed");
12103 if (sax != NULL) ctxt->sax = NULL;
12104 xmlFreeParserCtxt(ctxt);
12105 return(NULL);
12106 }
12107 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12108 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12109 ExternalID, SystemID);
12110 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12111
12112 if (ctxt->myDoc != NULL) {
12113 if (ctxt->wellFormed) {
12114 ret = ctxt->myDoc->extSubset;
12115 ctxt->myDoc->extSubset = NULL;
12116 if (ret != NULL) {
12117 xmlNodePtr tmp;
12118
12119 ret->doc = NULL;
12120 tmp = ret->children;
12121 while (tmp != NULL) {
12122 tmp->doc = NULL;
12123 tmp = tmp->next;
12124 }
12125 }
12126 } else {
12127 ret = NULL;
12128 }
12129 xmlFreeDoc(ctxt->myDoc);
12130 ctxt->myDoc = NULL;
12131 }
12132 if (sax != NULL) ctxt->sax = NULL;
12133 xmlFreeParserCtxt(ctxt);
12134
12135 return(ret);
12136 }
12137
12138
12139 /**
12140 * xmlParseDTD:
12141 * @ExternalID: a NAME* containing the External ID of the DTD
12142 * @SystemID: a NAME* containing the URL to the DTD
12143 *
12144 * Load and parse an external subset.
12145 *
12146 * Returns the resulting xmlDtdPtr or NULL in case of error.
12147 */
12148
12149 xmlDtdPtr
12150 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12151 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12152 }
12153 #endif /* LIBXML_VALID_ENABLED */
12154
12155 /************************************************************************
12156 * *
12157 * Front ends when parsing an Entity *
12158 * *
12159 ************************************************************************/
12160
12161 /**
12162 * xmlParseCtxtExternalEntity:
12163 * @ctx: the existing parsing context
12164 * @URL: the URL for the entity to load
12165 * @ID: the System ID for the entity to load
12166 * @lst: the return value for the set of parsed nodes
12167 *
12168 * Parse an external general entity within an existing parsing context
12169 * An external general parsed entity is well-formed if it matches the
12170 * production labeled extParsedEnt.
12171 *
12172 * [78] extParsedEnt ::= TextDecl? content
12173 *
12174 * Returns 0 if the entity is well formed, -1 in case of args problem and
12175 * the parser error code otherwise
12176 */
12177
12178 int
12179 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12180 const xmlChar *ID, xmlNodePtr *lst) {
12181 xmlParserCtxtPtr ctxt;
12182 xmlDocPtr newDoc;
12183 xmlNodePtr newRoot;
12184 xmlSAXHandlerPtr oldsax = NULL;
12185 int ret = 0;
12186 xmlChar start[4];
12187 xmlCharEncoding enc;
12188
12189 if (ctx == NULL) return(-1);
12190
12191 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12192 (ctx->depth > 1024)) {
12193 return(XML_ERR_ENTITY_LOOP);
12194 }
12195
12196 if (lst != NULL)
12197 *lst = NULL;
12198 if ((URL == NULL) && (ID == NULL))
12199 return(-1);
12200 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12201 return(-1);
12202
12203 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12204 if (ctxt == NULL) {
12205 return(-1);
12206 }
12207
12208 oldsax = ctxt->sax;
12209 ctxt->sax = ctx->sax;
12210 xmlDetectSAX2(ctxt);
12211 newDoc = xmlNewDoc(BAD_CAST "1.0");
12212 if (newDoc == NULL) {
12213 xmlFreeParserCtxt(ctxt);
12214 return(-1);
12215 }
12216 newDoc->properties = XML_DOC_INTERNAL;
12217 if (ctx->myDoc->dict) {
12218 newDoc->dict = ctx->myDoc->dict;
12219 xmlDictReference(newDoc->dict);
12220 }
12221 if (ctx->myDoc != NULL) {
12222 newDoc->intSubset = ctx->myDoc->intSubset;
12223 newDoc->extSubset = ctx->myDoc->extSubset;
12224 }
12225 if (ctx->myDoc->URL != NULL) {
12226 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12227 }
12228 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12229 if (newRoot == NULL) {
12230 ctxt->sax = oldsax;
12231 xmlFreeParserCtxt(ctxt);
12232 newDoc->intSubset = NULL;
12233 newDoc->extSubset = NULL;
12234 xmlFreeDoc(newDoc);
12235 return(-1);
12236 }
12237 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12238 nodePush(ctxt, newDoc->children);
12239 if (ctx->myDoc == NULL) {
12240 ctxt->myDoc = newDoc;
12241 } else {
12242 ctxt->myDoc = ctx->myDoc;
12243 newDoc->children->doc = ctx->myDoc;
12244 }
12245
12246 /*
12247 * Get the 4 first bytes and decode the charset
12248 * if enc != XML_CHAR_ENCODING_NONE
12249 * plug some encoding conversion routines.
12250 */
12251 GROW
12252 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12253 start[0] = RAW;
12254 start[1] = NXT(1);
12255 start[2] = NXT(2);
12256 start[3] = NXT(3);
12257 enc = xmlDetectCharEncoding(start, 4);
12258 if (enc != XML_CHAR_ENCODING_NONE) {
12259 xmlSwitchEncoding(ctxt, enc);
12260 }
12261 }
12262
12263 /*
12264 * Parse a possible text declaration first
12265 */
12266 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12267 xmlParseTextDecl(ctxt);
12268 /*
12269 * An XML-1.0 document can't reference an entity not XML-1.0
12270 */
12271 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12272 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12273 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12274 "Version mismatch between document and entity\n");
12275 }
12276 }
12277
12278 /*
12279 * Doing validity checking on chunk doesn't make sense
12280 */
12281 ctxt->instate = XML_PARSER_CONTENT;
12282 ctxt->validate = ctx->validate;
12283 ctxt->valid = ctx->valid;
12284 ctxt->loadsubset = ctx->loadsubset;
12285 ctxt->depth = ctx->depth + 1;
12286 ctxt->replaceEntities = ctx->replaceEntities;
12287 if (ctxt->validate) {
12288 ctxt->vctxt.error = ctx->vctxt.error;
12289 ctxt->vctxt.warning = ctx->vctxt.warning;
12290 } else {
12291 ctxt->vctxt.error = NULL;
12292 ctxt->vctxt.warning = NULL;
12293 }
12294 ctxt->vctxt.nodeTab = NULL;
12295 ctxt->vctxt.nodeNr = 0;
12296 ctxt->vctxt.nodeMax = 0;
12297 ctxt->vctxt.node = NULL;
12298 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12299 ctxt->dict = ctx->dict;
12300 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12301 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12302 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12303 ctxt->dictNames = ctx->dictNames;
12304 ctxt->attsDefault = ctx->attsDefault;
12305 ctxt->attsSpecial = ctx->attsSpecial;
12306 ctxt->linenumbers = ctx->linenumbers;
12307
12308 xmlParseContent(ctxt);
12309
12310 ctx->validate = ctxt->validate;
12311 ctx->valid = ctxt->valid;
12312 if ((RAW == '<') && (NXT(1) == '/')) {
12313 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12314 } else if (RAW != 0) {
12315 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12316 }
12317 if (ctxt->node != newDoc->children) {
12318 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12319 }
12320
12321 if (!ctxt->wellFormed) {
12322 if (ctxt->errNo == 0)
12323 ret = 1;
12324 else
12325 ret = ctxt->errNo;
12326 } else {
12327 if (lst != NULL) {
12328 xmlNodePtr cur;
12329
12330 /*
12331 * Return the newly created nodeset after unlinking it from
12332 * they pseudo parent.
12333 */
12334 cur = newDoc->children->children;
12335 *lst = cur;
12336 while (cur != NULL) {
12337 cur->parent = NULL;
12338 cur = cur->next;
12339 }
12340 newDoc->children->children = NULL;
12341 }
12342 ret = 0;
12343 }
12344 ctxt->sax = oldsax;
12345 ctxt->dict = NULL;
12346 ctxt->attsDefault = NULL;
12347 ctxt->attsSpecial = NULL;
12348 xmlFreeParserCtxt(ctxt);
12349 newDoc->intSubset = NULL;
12350 newDoc->extSubset = NULL;
12351 xmlFreeDoc(newDoc);
12352
12353 return(ret);
12354 }
12355
12356 /**
12357 * xmlParseExternalEntityPrivate:
12358 * @doc: the document the chunk pertains to
12359 * @oldctxt: the previous parser context if available
12360 * @sax: the SAX handler bloc (possibly NULL)
12361 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12362 * @depth: Used for loop detection, use 0
12363 * @URL: the URL for the entity to load
12364 * @ID: the System ID for the entity to load
12365 * @list: the return value for the set of parsed nodes
12366 *
12367 * Private version of xmlParseExternalEntity()
12368 *
12369 * Returns 0 if the entity is well formed, -1 in case of args problem and
12370 * the parser error code otherwise
12371 */
12372
12373 static xmlParserErrors
12374 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12375 xmlSAXHandlerPtr sax,
12376 void *user_data, int depth, const xmlChar *URL,
12377 const xmlChar *ID, xmlNodePtr *list) {
12378 xmlParserCtxtPtr ctxt;
12379 xmlDocPtr newDoc;
12380 xmlNodePtr newRoot;
12381 xmlSAXHandlerPtr oldsax = NULL;
12382 xmlParserErrors ret = XML_ERR_OK;
12383 xmlChar start[4];
12384 xmlCharEncoding enc;
12385
12386 if (((depth > 40) &&
12387 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12388 (depth > 1024)) {
12389 return(XML_ERR_ENTITY_LOOP);
12390 }
12391
12392 if (list != NULL)
12393 *list = NULL;
12394 if ((URL == NULL) && (ID == NULL))
12395 return(XML_ERR_INTERNAL_ERROR);
12396 if (doc == NULL)
12397 return(XML_ERR_INTERNAL_ERROR);
12398
12399
12400 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12401 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12402 ctxt->userData = ctxt;
12403 if (oldctxt != NULL) {
12404 ctxt->_private = oldctxt->_private;
12405 ctxt->loadsubset = oldctxt->loadsubset;
12406 ctxt->validate = oldctxt->validate;
12407 ctxt->external = oldctxt->external;
12408 ctxt->record_info = oldctxt->record_info;
12409 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12410 ctxt->node_seq.length = oldctxt->node_seq.length;
12411 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12412 } else {
12413 /*
12414 * Doing validity checking on chunk without context
12415 * doesn't make sense
12416 */
12417 ctxt->_private = NULL;
12418 ctxt->validate = 0;
12419 ctxt->external = 2;
12420 ctxt->loadsubset = 0;
12421 }
12422 if (sax != NULL) {
12423 oldsax = ctxt->sax;
12424 ctxt->sax = sax;
12425 if (user_data != NULL)
12426 ctxt->userData = user_data;
12427 }
12428 xmlDetectSAX2(ctxt);
12429 newDoc = xmlNewDoc(BAD_CAST "1.0");
12430 if (newDoc == NULL) {
12431 ctxt->node_seq.maximum = 0;
12432 ctxt->node_seq.length = 0;
12433 ctxt->node_seq.buffer = NULL;
12434 xmlFreeParserCtxt(ctxt);
12435 return(XML_ERR_INTERNAL_ERROR);
12436 }
12437 newDoc->properties = XML_DOC_INTERNAL;
12438 newDoc->intSubset = doc->intSubset;
12439 newDoc->extSubset = doc->extSubset;
12440 newDoc->dict = doc->dict;
12441 xmlDictReference(newDoc->dict);
12442
12443 if (doc->URL != NULL) {
12444 newDoc->URL = xmlStrdup(doc->URL);
12445 }
12446 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12447 if (newRoot == NULL) {
12448 if (sax != NULL)
12449 ctxt->sax = oldsax;
12450 ctxt->node_seq.maximum = 0;
12451 ctxt->node_seq.length = 0;
12452 ctxt->node_seq.buffer = NULL;
12453 xmlFreeParserCtxt(ctxt);
12454 newDoc->intSubset = NULL;
12455 newDoc->extSubset = NULL;
12456 xmlFreeDoc(newDoc);
12457 return(XML_ERR_INTERNAL_ERROR);
12458 }
12459 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12460 nodePush(ctxt, newDoc->children);
12461 ctxt->myDoc = doc;
12462 newRoot->doc = doc;
12463
12464 /*
12465 * Get the 4 first bytes and decode the charset
12466 * if enc != XML_CHAR_ENCODING_NONE
12467 * plug some encoding conversion routines.
12468 */
12469 GROW;
12470 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12471 start[0] = RAW;
12472 start[1] = NXT(1);
12473 start[2] = NXT(2);
12474 start[3] = NXT(3);
12475 enc = xmlDetectCharEncoding(start, 4);
12476 if (enc != XML_CHAR_ENCODING_NONE) {
12477 xmlSwitchEncoding(ctxt, enc);
12478 }
12479 }
12480
12481 /*
12482 * Parse a possible text declaration first
12483 */
12484 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12485 xmlParseTextDecl(ctxt);
12486 }
12487
12488 ctxt->instate = XML_PARSER_CONTENT;
12489 ctxt->depth = depth;
12490
12491 xmlParseContent(ctxt);
12492
12493 if ((RAW == '<') && (NXT(1) == '/')) {
12494 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12495 } else if (RAW != 0) {
12496 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12497 }
12498 if (ctxt->node != newDoc->children) {
12499 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12500 }
12501
12502 if (!ctxt->wellFormed) {
12503 if (ctxt->errNo == 0)
12504 ret = XML_ERR_INTERNAL_ERROR;
12505 else
12506 ret = (xmlParserErrors)ctxt->errNo;
12507 } else {
12508 if (list != NULL) {
12509 xmlNodePtr cur;
12510
12511 /*
12512 * Return the newly created nodeset after unlinking it from
12513 * they pseudo parent.
12514 */
12515 cur = newDoc->children->children;
12516 *list = cur;
12517 while (cur != NULL) {
12518 cur->parent = NULL;
12519 cur = cur->next;
12520 }
12521 newDoc->children->children = NULL;
12522 }
12523 ret = XML_ERR_OK;
12524 }
12525
12526 /*
12527 * Record in the parent context the number of entities replacement
12528 * done when parsing that reference.
12529 */
12530 if (oldctxt != NULL)
12531 oldctxt->nbentities += ctxt->nbentities;
12532
12533 /*
12534 * Also record the size of the entity parsed
12535 */
12536 if (ctxt->input != NULL) {
12537 oldctxt->sizeentities += ctxt->input->consumed;
12538 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12539 }
12540 /*
12541 * And record the last error if any
12542 */
12543 if (ctxt->lastError.code != XML_ERR_OK)
12544 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12545
12546 if (sax != NULL)
12547 ctxt->sax = oldsax;
12548 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12549 oldctxt->node_seq.length = ctxt->node_seq.length;
12550 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12551 ctxt->node_seq.maximum = 0;
12552 ctxt->node_seq.length = 0;
12553 ctxt->node_seq.buffer = NULL;
12554 xmlFreeParserCtxt(ctxt);
12555 newDoc->intSubset = NULL;
12556 newDoc->extSubset = NULL;
12557 xmlFreeDoc(newDoc);
12558
12559 return(ret);
12560 }
12561
12562 #ifdef LIBXML_SAX1_ENABLED
12563 /**
12564 * xmlParseExternalEntity:
12565 * @doc: the document the chunk pertains to
12566 * @sax: the SAX handler bloc (possibly NULL)
12567 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12568 * @depth: Used for loop detection, use 0
12569 * @URL: the URL for the entity to load
12570 * @ID: the System ID for the entity to load
12571 * @lst: the return value for the set of parsed nodes
12572 *
12573 * Parse an external general entity
12574 * An external general parsed entity is well-formed if it matches the
12575 * production labeled extParsedEnt.
12576 *
12577 * [78] extParsedEnt ::= TextDecl? content
12578 *
12579 * Returns 0 if the entity is well formed, -1 in case of args problem and
12580 * the parser error code otherwise
12581 */
12582
12583 int
12584 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12585 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12586 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12587 ID, lst));
12588 }
12589
12590 /**
12591 * xmlParseBalancedChunkMemory:
12592 * @doc: the document the chunk pertains to
12593 * @sax: the SAX handler bloc (possibly NULL)
12594 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12595 * @depth: Used for loop detection, use 0
12596 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12597 * @lst: the return value for the set of parsed nodes
12598 *
12599 * Parse a well-balanced chunk of an XML document
12600 * called by the parser
12601 * The allowed sequence for the Well Balanced Chunk is the one defined by
12602 * the content production in the XML grammar:
12603 *
12604 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12605 *
12606 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12607 * the parser error code otherwise
12608 */
12609
12610 int
12611 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12612 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12613 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12614 depth, string, lst, 0 );
12615 }
12616 #endif /* LIBXML_SAX1_ENABLED */
12617
12618 /**
12619 * xmlParseBalancedChunkMemoryInternal:
12620 * @oldctxt: the existing parsing context
12621 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12622 * @user_data: the user data field for the parser context
12623 * @lst: the return value for the set of parsed nodes
12624 *
12625 *
12626 * Parse a well-balanced chunk of an XML document
12627 * called by the parser
12628 * The allowed sequence for the Well Balanced Chunk is the one defined by
12629 * the content production in the XML grammar:
12630 *
12631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12632 *
12633 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12634 * error code otherwise
12635 *
12636 * In case recover is set to 1, the nodelist will not be empty even if
12637 * the parsed chunk is not well balanced.
12638 */
12639 static xmlParserErrors
12640 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12641 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12642 xmlParserCtxtPtr ctxt;
12643 xmlDocPtr newDoc = NULL;
12644 xmlNodePtr newRoot;
12645 xmlSAXHandlerPtr oldsax = NULL;
12646 xmlNodePtr content = NULL;
12647 xmlNodePtr last = NULL;
12648 int size;
12649 xmlParserErrors ret = XML_ERR_OK;
12650 #ifdef SAX2
12651 int i;
12652 #endif
12653
12654 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12655 (oldctxt->depth > 1024)) {
12656 return(XML_ERR_ENTITY_LOOP);
12657 }
12658
12659
12660 if (lst != NULL)
12661 *lst = NULL;
12662 if (string == NULL)
12663 return(XML_ERR_INTERNAL_ERROR);
12664
12665 size = xmlStrlen(string);
12666
12667 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12668 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12669 if (user_data != NULL)
12670 ctxt->userData = user_data;
12671 else
12672 ctxt->userData = ctxt;
12673 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12674 ctxt->dict = oldctxt->dict;
12675 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12676 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12677 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12678
12679 #ifdef SAX2
12680 /* propagate namespaces down the entity */
12681 for (i = 0;i < oldctxt->nsNr;i += 2) {
12682 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12683 }
12684 #endif
12685
12686 oldsax = ctxt->sax;
12687 ctxt->sax = oldctxt->sax;
12688 xmlDetectSAX2(ctxt);
12689 ctxt->replaceEntities = oldctxt->replaceEntities;
12690 ctxt->options = oldctxt->options;
12691
12692 ctxt->_private = oldctxt->_private;
12693 if (oldctxt->myDoc == NULL) {
12694 newDoc = xmlNewDoc(BAD_CAST "1.0");
12695 if (newDoc == NULL) {
12696 ctxt->sax = oldsax;
12697 ctxt->dict = NULL;
12698 xmlFreeParserCtxt(ctxt);
12699 return(XML_ERR_INTERNAL_ERROR);
12700 }
12701 newDoc->properties = XML_DOC_INTERNAL;
12702 newDoc->dict = ctxt->dict;
12703 xmlDictReference(newDoc->dict);
12704 ctxt->myDoc = newDoc;
12705 } else {
12706 ctxt->myDoc = oldctxt->myDoc;
12707 content = ctxt->myDoc->children;
12708 last = ctxt->myDoc->last;
12709 }
12710 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12711 if (newRoot == NULL) {
12712 ctxt->sax = oldsax;
12713 ctxt->dict = NULL;
12714 xmlFreeParserCtxt(ctxt);
12715 if (newDoc != NULL) {
12716 xmlFreeDoc(newDoc);
12717 }
12718 return(XML_ERR_INTERNAL_ERROR);
12719 }
12720 ctxt->myDoc->children = NULL;
12721 ctxt->myDoc->last = NULL;
12722 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12723 nodePush(ctxt, ctxt->myDoc->children);
12724 ctxt->instate = XML_PARSER_CONTENT;
12725 ctxt->depth = oldctxt->depth + 1;
12726
12727 ctxt->validate = 0;
12728 ctxt->loadsubset = oldctxt->loadsubset;
12729 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12730 /*
12731 * ID/IDREF registration will be done in xmlValidateElement below
12732 */
12733 ctxt->loadsubset |= XML_SKIP_IDS;
12734 }
12735 ctxt->dictNames = oldctxt->dictNames;
12736 ctxt->attsDefault = oldctxt->attsDefault;
12737 ctxt->attsSpecial = oldctxt->attsSpecial;
12738
12739 xmlParseContent(ctxt);
12740 if ((RAW == '<') && (NXT(1) == '/')) {
12741 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12742 } else if (RAW != 0) {
12743 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12744 }
12745 if (ctxt->node != ctxt->myDoc->children) {
12746 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12747 }
12748
12749 if (!ctxt->wellFormed) {
12750 if (ctxt->errNo == 0)
12751 ret = XML_ERR_INTERNAL_ERROR;
12752 else
12753 ret = (xmlParserErrors)ctxt->errNo;
12754 } else {
12755 ret = XML_ERR_OK;
12756 }
12757
12758 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12759 xmlNodePtr cur;
12760
12761 /*
12762 * Return the newly created nodeset after unlinking it from
12763 * they pseudo parent.
12764 */
12765 cur = ctxt->myDoc->children->children;
12766 *lst = cur;
12767 while (cur != NULL) {
12768 #ifdef LIBXML_VALID_ENABLED
12769 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12770 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12771 (cur->type == XML_ELEMENT_NODE)) {
12772 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12773 oldctxt->myDoc, cur);
12774 }
12775 #endif /* LIBXML_VALID_ENABLED */
12776 cur->parent = NULL;
12777 cur = cur->next;
12778 }
12779 ctxt->myDoc->children->children = NULL;
12780 }
12781 if (ctxt->myDoc != NULL) {
12782 xmlFreeNode(ctxt->myDoc->children);
12783 ctxt->myDoc->children = content;
12784 ctxt->myDoc->last = last;
12785 }
12786
12787 /*
12788 * Record in the parent context the number of entities replacement
12789 * done when parsing that reference.
12790 */
12791 if (oldctxt != NULL)
12792 oldctxt->nbentities += ctxt->nbentities;
12793
12794 /*
12795 * Also record the last error if any
12796 */
12797 if (ctxt->lastError.code != XML_ERR_OK)
12798 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12799
12800 ctxt->sax = oldsax;
12801 ctxt->dict = NULL;
12802 ctxt->attsDefault = NULL;
12803 ctxt->attsSpecial = NULL;
12804 xmlFreeParserCtxt(ctxt);
12805 if (newDoc != NULL) {
12806 xmlFreeDoc(newDoc);
12807 }
12808
12809 return(ret);
12810 }
12811
12812 /**
12813 * xmlParseInNodeContext:
12814 * @node: the context node
12815 * @data: the input string
12816 * @datalen: the input string length in bytes
12817 * @options: a combination of xmlParserOption
12818 * @lst: the return value for the set of parsed nodes
12819 *
12820 * Parse a well-balanced chunk of an XML document
12821 * within the context (DTD, namespaces, etc ...) of the given node.
12822 *
12823 * The allowed sequence for the data is a Well Balanced Chunk defined by
12824 * the content production in the XML grammar:
12825 *
12826 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12827 *
12828 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12829 * error code otherwise
12830 */
12831 xmlParserErrors
12832 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12833 int options, xmlNodePtr *lst) {
12834 #ifdef SAX2
12835 xmlParserCtxtPtr ctxt;
12836 xmlDocPtr doc = NULL;
12837 xmlNodePtr fake, cur;
12838 int nsnr = 0;
12839
12840 xmlParserErrors ret = XML_ERR_OK;
12841
12842 /*
12843 * check all input parameters, grab the document
12844 */
12845 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12846 return(XML_ERR_INTERNAL_ERROR);
12847 switch (node->type) {
12848 case XML_ELEMENT_NODE:
12849 case XML_ATTRIBUTE_NODE:
12850 case XML_TEXT_NODE:
12851 case XML_CDATA_SECTION_NODE:
12852 case XML_ENTITY_REF_NODE:
12853 case XML_PI_NODE:
12854 case XML_COMMENT_NODE:
12855 case XML_DOCUMENT_NODE:
12856 case XML_HTML_DOCUMENT_NODE:
12857 break;
12858 default:
12859 return(XML_ERR_INTERNAL_ERROR);
12860
12861 }
12862 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12863 (node->type != XML_DOCUMENT_NODE) &&
12864 (node->type != XML_HTML_DOCUMENT_NODE))
12865 node = node->parent;
12866 if (node == NULL)
12867 return(XML_ERR_INTERNAL_ERROR);
12868 if (node->type == XML_ELEMENT_NODE)
12869 doc = node->doc;
12870 else
12871 doc = (xmlDocPtr) node;
12872 if (doc == NULL)
12873 return(XML_ERR_INTERNAL_ERROR);
12874
12875 /*
12876 * allocate a context and set-up everything not related to the
12877 * node position in the tree
12878 */
12879 if (doc->type == XML_DOCUMENT_NODE)
12880 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12881 #ifdef LIBXML_HTML_ENABLED
12882 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12883 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12884 /*
12885 * When parsing in context, it makes no sense to add implied
12886 * elements like html/body/etc...
12887 */
12888 options |= HTML_PARSE_NOIMPLIED;
12889 }
12890 #endif
12891 else
12892 return(XML_ERR_INTERNAL_ERROR);
12893
12894 if (ctxt == NULL)
12895 return(XML_ERR_NO_MEMORY);
12896
12897 /*
12898 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12899 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12900 * we must wait until the last moment to free the original one.
12901 */
12902 if (doc->dict != NULL) {
12903 if (ctxt->dict != NULL)
12904 xmlDictFree(ctxt->dict);
12905 ctxt->dict = doc->dict;
12906 } else
12907 options |= XML_PARSE_NODICT;
12908
12909 if (doc->encoding != NULL) {
12910 xmlCharEncodingHandlerPtr hdlr;
12911
12912 if (ctxt->encoding != NULL)
12913 xmlFree((xmlChar *) ctxt->encoding);
12914 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
12915
12916 hdlr = xmlFindCharEncodingHandler(doc->encoding);
12917 if (hdlr != NULL) {
12918 xmlSwitchToEncoding(ctxt, hdlr);
12919 } else {
12920 return(XML_ERR_UNSUPPORTED_ENCODING);
12921 }
12922 }
12923
12924 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
12925 xmlDetectSAX2(ctxt);
12926 ctxt->myDoc = doc;
12927
12928 fake = xmlNewComment(NULL);
12929 if (fake == NULL) {
12930 xmlFreeParserCtxt(ctxt);
12931 return(XML_ERR_NO_MEMORY);
12932 }
12933 xmlAddChild(node, fake);
12934
12935 if (node->type == XML_ELEMENT_NODE) {
12936 nodePush(ctxt, node);
12937 /*
12938 * initialize the SAX2 namespaces stack
12939 */
12940 cur = node;
12941 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12942 xmlNsPtr ns = cur->nsDef;
12943 const xmlChar *iprefix, *ihref;
12944
12945 while (ns != NULL) {
12946 if (ctxt->dict) {
12947 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12948 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12949 } else {
12950 iprefix = ns->prefix;
12951 ihref = ns->href;
12952 }
12953
12954 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12955 nsPush(ctxt, iprefix, ihref);
12956 nsnr++;
12957 }
12958 ns = ns->next;
12959 }
12960 cur = cur->parent;
12961 }
12962 ctxt->instate = XML_PARSER_CONTENT;
12963 }
12964
12965 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12966 /*
12967 * ID/IDREF registration will be done in xmlValidateElement below
12968 */
12969 ctxt->loadsubset |= XML_SKIP_IDS;
12970 }
12971
12972 #ifdef LIBXML_HTML_ENABLED
12973 if (doc->type == XML_HTML_DOCUMENT_NODE)
12974 __htmlParseContent(ctxt);
12975 else
12976 #endif
12977 xmlParseContent(ctxt);
12978
12979 nsPop(ctxt, nsnr);
12980 if ((RAW == '<') && (NXT(1) == '/')) {
12981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12982 } else if (RAW != 0) {
12983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12984 }
12985 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12986 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12987 ctxt->wellFormed = 0;
12988 }
12989
12990 if (!ctxt->wellFormed) {
12991 if (ctxt->errNo == 0)
12992 ret = XML_ERR_INTERNAL_ERROR;
12993 else
12994 ret = (xmlParserErrors)ctxt->errNo;
12995 } else {
12996 ret = XML_ERR_OK;
12997 }
12998
12999 /*
13000 * Return the newly created nodeset after unlinking it from
13001 * the pseudo sibling.
13002 */
13003
13004 cur = fake->next;
13005 fake->next = NULL;
13006 node->last = fake;
13007
13008 if (cur != NULL) {
13009 cur->prev = NULL;
13010 }
13011
13012 *lst = cur;
13013
13014 while (cur != NULL) {
13015 cur->parent = NULL;
13016 cur = cur->next;
13017 }
13018
13019 xmlUnlinkNode(fake);
13020 xmlFreeNode(fake);
13021
13022
13023 if (ret != XML_ERR_OK) {
13024 xmlFreeNodeList(*lst);
13025 *lst = NULL;
13026 }
13027
13028 if (doc->dict != NULL)
13029 ctxt->dict = NULL;
13030 xmlFreeParserCtxt(ctxt);
13031
13032 return(ret);
13033 #else /* !SAX2 */
13034 return(XML_ERR_INTERNAL_ERROR);
13035 #endif
13036 }
13037
13038 #ifdef LIBXML_SAX1_ENABLED
13039 /**
13040 * xmlParseBalancedChunkMemoryRecover:
13041 * @doc: the document the chunk pertains to
13042 * @sax: the SAX handler bloc (possibly NULL)
13043 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13044 * @depth: Used for loop detection, use 0
13045 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13046 * @lst: the return value for the set of parsed nodes
13047 * @recover: return nodes even if the data is broken (use 0)
13048 *
13049 *
13050 * Parse a well-balanced chunk of an XML document
13051 * called by the parser
13052 * The allowed sequence for the Well Balanced Chunk is the one defined by
13053 * the content production in the XML grammar:
13054 *
13055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13056 *
13057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13058 * the parser error code otherwise
13059 *
13060 * In case recover is set to 1, the nodelist will not be empty even if
13061 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13062 * some extent.
13063 */
13064 int
13065 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13066 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13067 int recover) {
13068 xmlParserCtxtPtr ctxt;
13069 xmlDocPtr newDoc;
13070 xmlSAXHandlerPtr oldsax = NULL;
13071 xmlNodePtr content, newRoot;
13072 int size;
13073 int ret = 0;
13074
13075 if (depth > 40) {
13076 return(XML_ERR_ENTITY_LOOP);
13077 }
13078
13079
13080 if (lst != NULL)
13081 *lst = NULL;
13082 if (string == NULL)
13083 return(-1);
13084
13085 size = xmlStrlen(string);
13086
13087 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13088 if (ctxt == NULL) return(-1);
13089 ctxt->userData = ctxt;
13090 if (sax != NULL) {
13091 oldsax = ctxt->sax;
13092 ctxt->sax = sax;
13093 if (user_data != NULL)
13094 ctxt->userData = user_data;
13095 }
13096 newDoc = xmlNewDoc(BAD_CAST "1.0");
13097 if (newDoc == NULL) {
13098 xmlFreeParserCtxt(ctxt);
13099 return(-1);
13100 }
13101 newDoc->properties = XML_DOC_INTERNAL;
13102 if ((doc != NULL) && (doc->dict != NULL)) {
13103 xmlDictFree(ctxt->dict);
13104 ctxt->dict = doc->dict;
13105 xmlDictReference(ctxt->dict);
13106 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13107 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13108 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13109 ctxt->dictNames = 1;
13110 } else {
13111 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13112 }
13113 if (doc != NULL) {
13114 newDoc->intSubset = doc->intSubset;
13115 newDoc->extSubset = doc->extSubset;
13116 }
13117 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13118 if (newRoot == NULL) {
13119 if (sax != NULL)
13120 ctxt->sax = oldsax;
13121 xmlFreeParserCtxt(ctxt);
13122 newDoc->intSubset = NULL;
13123 newDoc->extSubset = NULL;
13124 xmlFreeDoc(newDoc);
13125 return(-1);
13126 }
13127 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13128 nodePush(ctxt, newRoot);
13129 if (doc == NULL) {
13130 ctxt->myDoc = newDoc;
13131 } else {
13132 ctxt->myDoc = newDoc;
13133 newDoc->children->doc = doc;
13134 /* Ensure that doc has XML spec namespace */
13135 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13136 newDoc->oldNs = doc->oldNs;
13137 }
13138 ctxt->instate = XML_PARSER_CONTENT;
13139 ctxt->depth = depth;
13140
13141 /*
13142 * Doing validity checking on chunk doesn't make sense
13143 */
13144 ctxt->validate = 0;
13145 ctxt->loadsubset = 0;
13146 xmlDetectSAX2(ctxt);
13147
13148 if ( doc != NULL ){
13149 content = doc->children;
13150 doc->children = NULL;
13151 xmlParseContent(ctxt);
13152 doc->children = content;
13153 }
13154 else {
13155 xmlParseContent(ctxt);
13156 }
13157 if ((RAW == '<') && (NXT(1) == '/')) {
13158 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13159 } else if (RAW != 0) {
13160 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13161 }
13162 if (ctxt->node != newDoc->children) {
13163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13164 }
13165
13166 if (!ctxt->wellFormed) {
13167 if (ctxt->errNo == 0)
13168 ret = 1;
13169 else
13170 ret = ctxt->errNo;
13171 } else {
13172 ret = 0;
13173 }
13174
13175 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13176 xmlNodePtr cur;
13177
13178 /*
13179 * Return the newly created nodeset after unlinking it from
13180 * they pseudo parent.
13181 */
13182 cur = newDoc->children->children;
13183 *lst = cur;
13184 while (cur != NULL) {
13185 xmlSetTreeDoc(cur, doc);
13186 cur->parent = NULL;
13187 cur = cur->next;
13188 }
13189 newDoc->children->children = NULL;
13190 }
13191
13192 if (sax != NULL)
13193 ctxt->sax = oldsax;
13194 xmlFreeParserCtxt(ctxt);
13195 newDoc->intSubset = NULL;
13196 newDoc->extSubset = NULL;
13197 newDoc->oldNs = NULL;
13198 xmlFreeDoc(newDoc);
13199
13200 return(ret);
13201 }
13202
13203 /**
13204 * xmlSAXParseEntity:
13205 * @sax: the SAX handler block
13206 * @filename: the filename
13207 *
13208 * parse an XML external entity out of context and build a tree.
13209 * It use the given SAX function block to handle the parsing callback.
13210 * If sax is NULL, fallback to the default DOM tree building routines.
13211 *
13212 * [78] extParsedEnt ::= TextDecl? content
13213 *
13214 * This correspond to a "Well Balanced" chunk
13215 *
13216 * Returns the resulting document tree
13217 */
13218
13219 xmlDocPtr
13220 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13221 xmlDocPtr ret;
13222 xmlParserCtxtPtr ctxt;
13223
13224 ctxt = xmlCreateFileParserCtxt(filename);
13225 if (ctxt == NULL) {
13226 return(NULL);
13227 }
13228 if (sax != NULL) {
13229 if (ctxt->sax != NULL)
13230 xmlFree(ctxt->sax);
13231 ctxt->sax = sax;
13232 ctxt->userData = NULL;
13233 }
13234
13235 xmlParseExtParsedEnt(ctxt);
13236
13237 if (ctxt->wellFormed)
13238 ret = ctxt->myDoc;
13239 else {
13240 ret = NULL;
13241 xmlFreeDoc(ctxt->myDoc);
13242 ctxt->myDoc = NULL;
13243 }
13244 if (sax != NULL)
13245 ctxt->sax = NULL;
13246 xmlFreeParserCtxt(ctxt);
13247
13248 return(ret);
13249 }
13250
13251 /**
13252 * xmlParseEntity:
13253 * @filename: the filename
13254 *
13255 * parse an XML external entity out of context and build a tree.
13256 *
13257 * [78] extParsedEnt ::= TextDecl? content
13258 *
13259 * This correspond to a "Well Balanced" chunk
13260 *
13261 * Returns the resulting document tree
13262 */
13263
13264 xmlDocPtr
13265 xmlParseEntity(const char *filename) {
13266 return(xmlSAXParseEntity(NULL, filename));
13267 }
13268 #endif /* LIBXML_SAX1_ENABLED */
13269
13270 /**
13271 * xmlCreateEntityParserCtxtInternal:
13272 * @URL: the entity URL
13273 * @ID: the entity PUBLIC ID
13274 * @base: a possible base for the target URI
13275 * @pctx: parser context used to set options on new context
13276 *
13277 * Create a parser context for an external entity
13278 * Automatic support for ZLIB/Compress compressed document is provided
13279 * by default if found at compile-time.
13280 *
13281 * Returns the new parser context or NULL
13282 */
13283 static xmlParserCtxtPtr
13284 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13285 const xmlChar *base, xmlParserCtxtPtr pctx) {
13286 xmlParserCtxtPtr ctxt;
13287 xmlParserInputPtr inputStream;
13288 char *directory = NULL;
13289 xmlChar *uri;
13290
13291 ctxt = xmlNewParserCtxt();
13292 if (ctxt == NULL) {
13293 return(NULL);
13294 }
13295
13296 if (pctx != NULL) {
13297 ctxt->options = pctx->options;
13298 ctxt->_private = pctx->_private;
13299 }
13300
13301 uri = xmlBuildURI(URL, base);
13302
13303 if (uri == NULL) {
13304 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13305 if (inputStream == NULL) {
13306 xmlFreeParserCtxt(ctxt);
13307 return(NULL);
13308 }
13309
13310 inputPush(ctxt, inputStream);
13311
13312 if ((ctxt->directory == NULL) && (directory == NULL))
13313 directory = xmlParserGetDirectory((char *)URL);
13314 if ((ctxt->directory == NULL) && (directory != NULL))
13315 ctxt->directory = directory;
13316 } else {
13317 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13318 if (inputStream == NULL) {
13319 xmlFree(uri);
13320 xmlFreeParserCtxt(ctxt);
13321 return(NULL);
13322 }
13323
13324 inputPush(ctxt, inputStream);
13325
13326 if ((ctxt->directory == NULL) && (directory == NULL))
13327 directory = xmlParserGetDirectory((char *)uri);
13328 if ((ctxt->directory == NULL) && (directory != NULL))
13329 ctxt->directory = directory;
13330 xmlFree(uri);
13331 }
13332 return(ctxt);
13333 }
13334
13335 /**
13336 * xmlCreateEntityParserCtxt:
13337 * @URL: the entity URL
13338 * @ID: the entity PUBLIC ID
13339 * @base: a possible base for the target URI
13340 *
13341 * Create a parser context for an external entity
13342 * Automatic support for ZLIB/Compress compressed document is provided
13343 * by default if found at compile-time.
13344 *
13345 * Returns the new parser context or NULL
13346 */
13347 xmlParserCtxtPtr
13348 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13349 const xmlChar *base) {
13350 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13351
13352 }
13353
13354 /************************************************************************
13355 * *
13356 * Front ends when parsing from a file *
13357 * *
13358 ************************************************************************/
13359
13360 /**
13361 * xmlCreateURLParserCtxt:
13362 * @filename: the filename or URL
13363 * @options: a combination of xmlParserOption
13364 *
13365 * Create a parser context for a file or URL content.
13366 * Automatic support for ZLIB/Compress compressed document is provided
13367 * by default if found at compile-time and for file accesses
13368 *
13369 * Returns the new parser context or NULL
13370 */
13371 xmlParserCtxtPtr
13372 xmlCreateURLParserCtxt(const char *filename, int options)
13373 {
13374 xmlParserCtxtPtr ctxt;
13375 xmlParserInputPtr inputStream;
13376 char *directory = NULL;
13377
13378 ctxt = xmlNewParserCtxt();
13379 if (ctxt == NULL) {
13380 xmlErrMemory(NULL, "cannot allocate parser context");
13381 return(NULL);
13382 }
13383
13384 if (options)
13385 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13386 ctxt->linenumbers = 1;
13387
13388 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13389 if (inputStream == NULL) {
13390 xmlFreeParserCtxt(ctxt);
13391 return(NULL);
13392 }
13393
13394 inputPush(ctxt, inputStream);
13395 if ((ctxt->directory == NULL) && (directory == NULL))
13396 directory = xmlParserGetDirectory(filename);
13397 if ((ctxt->directory == NULL) && (directory != NULL))
13398 ctxt->directory = directory;
13399
13400 return(ctxt);
13401 }
13402
13403 /**
13404 * xmlCreateFileParserCtxt:
13405 * @filename: the filename
13406 *
13407 * Create a parser context for a file content.
13408 * Automatic support for ZLIB/Compress compressed document is provided
13409 * by default if found at compile-time.
13410 *
13411 * Returns the new parser context or NULL
13412 */
13413 xmlParserCtxtPtr
13414 xmlCreateFileParserCtxt(const char *filename)
13415 {
13416 return(xmlCreateURLParserCtxt(filename, 0));
13417 }
13418
13419 #ifdef LIBXML_SAX1_ENABLED
13420 /**
13421 * xmlSAXParseFileWithData:
13422 * @sax: the SAX handler block
13423 * @filename: the filename
13424 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13425 * documents
13426 * @data: the userdata
13427 *
13428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13429 * compressed document is provided by default if found at compile-time.
13430 * It use the given SAX function block to handle the parsing callback.
13431 * If sax is NULL, fallback to the default DOM tree building routines.
13432 *
13433 * User data (void *) is stored within the parser context in the
13434 * context's _private member, so it is available nearly everywhere in libxml
13435 *
13436 * Returns the resulting document tree
13437 */
13438
13439 xmlDocPtr
13440 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13441 int recovery, void *data) {
13442 xmlDocPtr ret;
13443 xmlParserCtxtPtr ctxt;
13444
13445 xmlInitParser();
13446
13447 ctxt = xmlCreateFileParserCtxt(filename);
13448 if (ctxt == NULL) {
13449 return(NULL);
13450 }
13451 if (sax != NULL) {
13452 if (ctxt->sax != NULL)
13453 xmlFree(ctxt->sax);
13454 ctxt->sax = sax;
13455 }
13456 xmlDetectSAX2(ctxt);
13457 if (data!=NULL) {
13458 ctxt->_private = data;
13459 }
13460
13461 if (ctxt->directory == NULL)
13462 ctxt->directory = xmlParserGetDirectory(filename);
13463
13464 ctxt->recovery = recovery;
13465
13466 xmlParseDocument(ctxt);
13467
13468 if ((ctxt->wellFormed) || recovery) {
13469 ret = ctxt->myDoc;
13470 if (ret != NULL) {
13471 if (ctxt->input->buf->compressed > 0)
13472 ret->compression = 9;
13473 else
13474 ret->compression = ctxt->input->buf->compressed;
13475 }
13476 }
13477 else {
13478 ret = NULL;
13479 xmlFreeDoc(ctxt->myDoc);
13480 ctxt->myDoc = NULL;
13481 }
13482 if (sax != NULL)
13483 ctxt->sax = NULL;
13484 xmlFreeParserCtxt(ctxt);
13485
13486 return(ret);
13487 }
13488
13489 /**
13490 * xmlSAXParseFile:
13491 * @sax: the SAX handler block
13492 * @filename: the filename
13493 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13494 * documents
13495 *
13496 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13497 * compressed document is provided by default if found at compile-time.
13498 * It use the given SAX function block to handle the parsing callback.
13499 * If sax is NULL, fallback to the default DOM tree building routines.
13500 *
13501 * Returns the resulting document tree
13502 */
13503
13504 xmlDocPtr
13505 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13506 int recovery) {
13507 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13508 }
13509
13510 /**
13511 * xmlRecoverDoc:
13512 * @cur: a pointer to an array of xmlChar
13513 *
13514 * parse an XML in-memory document and build a tree.
13515 * In the case the document is not Well Formed, a attempt to build a
13516 * tree is tried anyway
13517 *
13518 * Returns the resulting document tree or NULL in case of failure
13519 */
13520
13521 xmlDocPtr
13522 xmlRecoverDoc(const xmlChar *cur) {
13523 return(xmlSAXParseDoc(NULL, cur, 1));
13524 }
13525
13526 /**
13527 * xmlParseFile:
13528 * @filename: the filename
13529 *
13530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13531 * compressed document is provided by default if found at compile-time.
13532 *
13533 * Returns the resulting document tree if the file was wellformed,
13534 * NULL otherwise.
13535 */
13536
13537 xmlDocPtr
13538 xmlParseFile(const char *filename) {
13539 return(xmlSAXParseFile(NULL, filename, 0));
13540 }
13541
13542 /**
13543 * xmlRecoverFile:
13544 * @filename: the filename
13545 *
13546 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13547 * compressed document is provided by default if found at compile-time.
13548 * In the case the document is not Well Formed, it attempts to build
13549 * a tree anyway
13550 *
13551 * Returns the resulting document tree or NULL in case of failure
13552 */
13553
13554 xmlDocPtr
13555 xmlRecoverFile(const char *filename) {
13556 return(xmlSAXParseFile(NULL, filename, 1));
13557 }
13558
13559
13560 /**
13561 * xmlSetupParserForBuffer:
13562 * @ctxt: an XML parser context
13563 * @buffer: a xmlChar * buffer
13564 * @filename: a file name
13565 *
13566 * Setup the parser context to parse a new buffer; Clears any prior
13567 * contents from the parser context. The buffer parameter must not be
13568 * NULL, but the filename parameter can be
13569 */
13570 void
13571 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13572 const char* filename)
13573 {
13574 xmlParserInputPtr input;
13575
13576 if ((ctxt == NULL) || (buffer == NULL))
13577 return;
13578
13579 input = xmlNewInputStream(ctxt);
13580 if (input == NULL) {
13581 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13582 xmlClearParserCtxt(ctxt);
13583 return;
13584 }
13585
13586 xmlClearParserCtxt(ctxt);
13587 if (filename != NULL)
13588 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13589 input->base = buffer;
13590 input->cur = buffer;
13591 input->end = &buffer[xmlStrlen(buffer)];
13592 inputPush(ctxt, input);
13593 }
13594
13595 /**
13596 * xmlSAXUserParseFile:
13597 * @sax: a SAX handler
13598 * @user_data: The user data returned on SAX callbacks
13599 * @filename: a file name
13600 *
13601 * parse an XML file and call the given SAX handler routines.
13602 * Automatic support for ZLIB/Compress compressed document is provided
13603 *
13604 * Returns 0 in case of success or a error number otherwise
13605 */
13606 int
13607 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13608 const char *filename) {
13609 int ret = 0;
13610 xmlParserCtxtPtr ctxt;
13611
13612 ctxt = xmlCreateFileParserCtxt(filename);
13613 if (ctxt == NULL) return -1;
13614 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13615 xmlFree(ctxt->sax);
13616 ctxt->sax = sax;
13617 xmlDetectSAX2(ctxt);
13618
13619 if (user_data != NULL)
13620 ctxt->userData = user_data;
13621
13622 xmlParseDocument(ctxt);
13623
13624 if (ctxt->wellFormed)
13625 ret = 0;
13626 else {
13627 if (ctxt->errNo != 0)
13628 ret = ctxt->errNo;
13629 else
13630 ret = -1;
13631 }
13632 if (sax != NULL)
13633 ctxt->sax = NULL;
13634 if (ctxt->myDoc != NULL) {
13635 xmlFreeDoc(ctxt->myDoc);
13636 ctxt->myDoc = NULL;
13637 }
13638 xmlFreeParserCtxt(ctxt);
13639
13640 return ret;
13641 }
13642 #endif /* LIBXML_SAX1_ENABLED */
13643
13644 /************************************************************************
13645 * *
13646 * Front ends when parsing from memory *
13647 * *
13648 ************************************************************************/
13649
13650 /**
13651 * xmlCreateMemoryParserCtxt:
13652 * @buffer: a pointer to a char array
13653 * @size: the size of the array
13654 *
13655 * Create a parser context for an XML in-memory document.
13656 *
13657 * Returns the new parser context or NULL
13658 */
13659 xmlParserCtxtPtr
13660 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13661 xmlParserCtxtPtr ctxt;
13662 xmlParserInputPtr input;
13663 xmlParserInputBufferPtr buf;
13664
13665 if (buffer == NULL)
13666 return(NULL);
13667 if (size <= 0)
13668 return(NULL);
13669
13670 ctxt = xmlNewParserCtxt();
13671 if (ctxt == NULL)
13672 return(NULL);
13673
13674 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13675 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13676 if (buf == NULL) {
13677 xmlFreeParserCtxt(ctxt);
13678 return(NULL);
13679 }
13680
13681 input = xmlNewInputStream(ctxt);
13682 if (input == NULL) {
13683 xmlFreeParserInputBuffer(buf);
13684 xmlFreeParserCtxt(ctxt);
13685 return(NULL);
13686 }
13687
13688 input->filename = NULL;
13689 input->buf = buf;
13690 input->base = input->buf->buffer->content;
13691 input->cur = input->buf->buffer->content;
13692 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13693
13694 inputPush(ctxt, input);
13695 return(ctxt);
13696 }
13697
13698 #ifdef LIBXML_SAX1_ENABLED
13699 /**
13700 * xmlSAXParseMemoryWithData:
13701 * @sax: the SAX handler block
13702 * @buffer: an pointer to a char array
13703 * @size: the size of the array
13704 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13705 * documents
13706 * @data: the userdata
13707 *
13708 * parse an XML in-memory block and use the given SAX function block
13709 * to handle the parsing callback. If sax is NULL, fallback to the default
13710 * DOM tree building routines.
13711 *
13712 * User data (void *) is stored within the parser context in the
13713 * context's _private member, so it is available nearly everywhere in libxml
13714 *
13715 * Returns the resulting document tree
13716 */
13717
13718 xmlDocPtr
13719 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13720 int size, int recovery, void *data) {
13721 xmlDocPtr ret;
13722 xmlParserCtxtPtr ctxt;
13723
13724 xmlInitParser();
13725
13726 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13727 if (ctxt == NULL) return(NULL);
13728 if (sax != NULL) {
13729 if (ctxt->sax != NULL)
13730 xmlFree(ctxt->sax);
13731 ctxt->sax = sax;
13732 }
13733 xmlDetectSAX2(ctxt);
13734 if (data!=NULL) {
13735 ctxt->_private=data;
13736 }
13737
13738 ctxt->recovery = recovery;
13739
13740 xmlParseDocument(ctxt);
13741
13742 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13743 else {
13744 ret = NULL;
13745 xmlFreeDoc(ctxt->myDoc);
13746 ctxt->myDoc = NULL;
13747 }
13748 if (sax != NULL)
13749 ctxt->sax = NULL;
13750 xmlFreeParserCtxt(ctxt);
13751
13752 return(ret);
13753 }
13754
13755 /**
13756 * xmlSAXParseMemory:
13757 * @sax: the SAX handler block
13758 * @buffer: an pointer to a char array
13759 * @size: the size of the array
13760 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13761 * documents
13762 *
13763 * parse an XML in-memory block and use the given SAX function block
13764 * to handle the parsing callback. If sax is NULL, fallback to the default
13765 * DOM tree building routines.
13766 *
13767 * Returns the resulting document tree
13768 */
13769 xmlDocPtr
13770 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13771 int size, int recovery) {
13772 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13773 }
13774
13775 /**
13776 * xmlParseMemory:
13777 * @buffer: an pointer to a char array
13778 * @size: the size of the array
13779 *
13780 * parse an XML in-memory block and build a tree.
13781 *
13782 * Returns the resulting document tree
13783 */
13784
13785 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13786 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13787 }
13788
13789 /**
13790 * xmlRecoverMemory:
13791 * @buffer: an pointer to a char array
13792 * @size: the size of the array
13793 *
13794 * parse an XML in-memory block and build a tree.
13795 * In the case the document is not Well Formed, an attempt to
13796 * build a tree is tried anyway
13797 *
13798 * Returns the resulting document tree or NULL in case of error
13799 */
13800
13801 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13802 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13803 }
13804
13805 /**
13806 * xmlSAXUserParseMemory:
13807 * @sax: a SAX handler
13808 * @user_data: The user data returned on SAX callbacks
13809 * @buffer: an in-memory XML document input
13810 * @size: the length of the XML document in bytes
13811 *
13812 * A better SAX parsing routine.
13813 * parse an XML in-memory buffer and call the given SAX handler routines.
13814 *
13815 * Returns 0 in case of success or a error number otherwise
13816 */
13817 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13818 const char *buffer, int size) {
13819 int ret = 0;
13820 xmlParserCtxtPtr ctxt;
13821
13822 xmlInitParser();
13823
13824 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13825 if (ctxt == NULL) return -1;
13826 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13827 xmlFree(ctxt->sax);
13828 ctxt->sax = sax;
13829 xmlDetectSAX2(ctxt);
13830
13831 if (user_data != NULL)
13832 ctxt->userData = user_data;
13833
13834 xmlParseDocument(ctxt);
13835
13836 if (ctxt->wellFormed)
13837 ret = 0;
13838 else {
13839 if (ctxt->errNo != 0)
13840 ret = ctxt->errNo;
13841 else
13842 ret = -1;
13843 }
13844 if (sax != NULL)
13845 ctxt->sax = NULL;
13846 if (ctxt->myDoc != NULL) {
13847 xmlFreeDoc(ctxt->myDoc);
13848 ctxt->myDoc = NULL;
13849 }
13850 xmlFreeParserCtxt(ctxt);
13851
13852 return ret;
13853 }
13854 #endif /* LIBXML_SAX1_ENABLED */
13855
13856 /**
13857 * xmlCreateDocParserCtxt:
13858 * @cur: a pointer to an array of xmlChar
13859 *
13860 * Creates a parser context for an XML in-memory document.
13861 *
13862 * Returns the new parser context or NULL
13863 */
13864 xmlParserCtxtPtr
13865 xmlCreateDocParserCtxt(const xmlChar *cur) {
13866 int len;
13867
13868 if (cur == NULL)
13869 return(NULL);
13870 len = xmlStrlen(cur);
13871 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13872 }
13873
13874 #ifdef LIBXML_SAX1_ENABLED
13875 /**
13876 * xmlSAXParseDoc:
13877 * @sax: the SAX handler block
13878 * @cur: a pointer to an array of xmlChar
13879 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13880 * documents
13881 *
13882 * parse an XML in-memory document and build a tree.
13883 * It use the given SAX function block to handle the parsing callback.
13884 * If sax is NULL, fallback to the default DOM tree building routines.
13885 *
13886 * Returns the resulting document tree
13887 */
13888
13889 xmlDocPtr
13890 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13891 xmlDocPtr ret;
13892 xmlParserCtxtPtr ctxt;
13893 xmlSAXHandlerPtr oldsax = NULL;
13894
13895 if (cur == NULL) return(NULL);
13896
13897
13898 ctxt = xmlCreateDocParserCtxt(cur);
13899 if (ctxt == NULL) return(NULL);
13900 if (sax != NULL) {
13901 oldsax = ctxt->sax;
13902 ctxt->sax = sax;
13903 ctxt->userData = NULL;
13904 }
13905 xmlDetectSAX2(ctxt);
13906
13907 xmlParseDocument(ctxt);
13908 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13909 else {
13910 ret = NULL;
13911 xmlFreeDoc(ctxt->myDoc);
13912 ctxt->myDoc = NULL;
13913 }
13914 if (sax != NULL)
13915 ctxt->sax = oldsax;
13916 xmlFreeParserCtxt(ctxt);
13917
13918 return(ret);
13919 }
13920
13921 /**
13922 * xmlParseDoc:
13923 * @cur: a pointer to an array of xmlChar
13924 *
13925 * parse an XML in-memory document and build a tree.
13926 *
13927 * Returns the resulting document tree
13928 */
13929
13930 xmlDocPtr
13931 xmlParseDoc(const xmlChar *cur) {
13932 return(xmlSAXParseDoc(NULL, cur, 0));
13933 }
13934 #endif /* LIBXML_SAX1_ENABLED */
13935
13936 #ifdef LIBXML_LEGACY_ENABLED
13937 /************************************************************************
13938 * *
13939 * Specific function to keep track of entities references *
13940 * and used by the XSLT debugger *
13941 * *
13942 ************************************************************************/
13943
13944 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13945
13946 /**
13947 * xmlAddEntityReference:
13948 * @ent : A valid entity
13949 * @firstNode : A valid first node for children of entity
13950 * @lastNode : A valid last node of children entity
13951 *
13952 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13953 */
13954 static void
13955 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13956 xmlNodePtr lastNode)
13957 {
13958 if (xmlEntityRefFunc != NULL) {
13959 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13960 }
13961 }
13962
13963
13964 /**
13965 * xmlSetEntityReferenceFunc:
13966 * @func: A valid function
13967 *
13968 * Set the function to call call back when a xml reference has been made
13969 */
13970 void
13971 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13972 {
13973 xmlEntityRefFunc = func;
13974 }
13975 #endif /* LIBXML_LEGACY_ENABLED */
13976
13977 /************************************************************************
13978 * *
13979 * Miscellaneous *
13980 * *
13981 ************************************************************************/
13982
13983 #ifdef LIBXML_XPATH_ENABLED
13984 #include <libxml/xpath.h>
13985 #endif
13986
13987 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13988 static int xmlParserInitialized = 0;
13989
13990 /**
13991 * xmlInitParser:
13992 *
13993 * Initialization function for the XML parser.
13994 * This is not reentrant. Call once before processing in case of
13995 * use in multithreaded programs.
13996 */
13997
13998 void
13999 xmlInitParser(void) {
14000 if (xmlParserInitialized != 0)
14001 return;
14002
14003 #ifdef LIBXML_THREAD_ENABLED
14004 __xmlGlobalInitMutexLock();
14005 if (xmlParserInitialized == 0) {
14006 #endif
14007 xmlInitGlobals();
14008 xmlInitThreads();
14009 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14010 (xmlGenericError == NULL))
14011 initGenericErrorDefaultFunc(NULL);
14012 xmlInitMemory();
14013 xmlInitCharEncodingHandlers();
14014 xmlDefaultSAXHandlerInit();
14015 xmlRegisterDefaultInputCallbacks();
14016 #ifdef LIBXML_OUTPUT_ENABLED
14017 xmlRegisterDefaultOutputCallbacks();
14018 #endif /* LIBXML_OUTPUT_ENABLED */
14019 #ifdef LIBXML_HTML_ENABLED
14020 htmlInitAutoClose();
14021 htmlDefaultSAXHandlerInit();
14022 #endif
14023 #ifdef LIBXML_XPATH_ENABLED
14024 xmlXPathInit();
14025 #endif
14026 xmlParserInitialized = 1;
14027 #ifdef LIBXML_THREAD_ENABLED
14028 }
14029 __xmlGlobalInitMutexUnlock();
14030 #endif
14031 }
14032
14033 /**
14034 * xmlCleanupParser:
14035 *
14036 * This function name is somewhat misleading. It does not clean up
14037 * parser state, it cleans up memory allocated by the library itself.
14038 * It is a cleanup function for the XML library. It tries to reclaim all
14039 * related global memory allocated for the library processing.
14040 * It doesn't deallocate any document related memory. One should
14041 * call xmlCleanupParser() only when the process has finished using
14042 * the library and all XML/HTML documents built with it.
14043 * See also xmlInitParser() which has the opposite function of preparing
14044 * the library for operations.
14045 *
14046 * WARNING: if your application is multithreaded or has plugin support
14047 * calling this may crash the application if another thread or
14048 * a plugin is still using libxml2. It's sometimes very hard to
14049 * guess if libxml2 is in use in the application, some libraries
14050 * or plugins may use it without notice. In case of doubt abstain
14051 * from calling this function or do it just before calling exit()
14052 * to avoid leak reports from valgrind !
14053 */
14054
14055 void
14056 xmlCleanupParser(void) {
14057 if (!xmlParserInitialized)
14058 return;
14059
14060 xmlCleanupCharEncodingHandlers();
14061 #ifdef LIBXML_CATALOG_ENABLED
14062 xmlCatalogCleanup();
14063 #endif
14064 xmlDictCleanup();
14065 xmlCleanupInputCallbacks();
14066 #ifdef LIBXML_OUTPUT_ENABLED
14067 xmlCleanupOutputCallbacks();
14068 #endif
14069 #ifdef LIBXML_SCHEMAS_ENABLED
14070 xmlSchemaCleanupTypes();
14071 xmlRelaxNGCleanupTypes();
14072 #endif
14073 xmlCleanupGlobals();
14074 xmlResetLastError();
14075 xmlCleanupThreads(); /* must be last if called not from the main thread */
14076 xmlCleanupMemory();
14077 xmlParserInitialized = 0;
14078 }
14079
14080 /************************************************************************
14081 * *
14082 * New set (2.6.0) of simpler and more flexible APIs *
14083 * *
14084 ************************************************************************/
14085
14086 /**
14087 * DICT_FREE:
14088 * @str: a string
14089 *
14090 * Free a string if it is not owned by the "dict" dictionnary in the
14091 * current scope
14092 */
14093 #define DICT_FREE(str) \
14094 if ((str) && ((!dict) || \
14095 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14096 xmlFree((char *)(str));
14097
14098 /**
14099 * xmlCtxtReset:
14100 * @ctxt: an XML parser context
14101 *
14102 * Reset a parser context
14103 */
14104 void
14105 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14106 {
14107 xmlParserInputPtr input;
14108 xmlDictPtr dict;
14109
14110 if (ctxt == NULL)
14111 return;
14112
14113 dict = ctxt->dict;
14114
14115 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14116 xmlFreeInputStream(input);
14117 }
14118 ctxt->inputNr = 0;
14119 ctxt->input = NULL;
14120
14121 ctxt->spaceNr = 0;
14122 if (ctxt->spaceTab != NULL) {
14123 ctxt->spaceTab[0] = -1;
14124 ctxt->space = &ctxt->spaceTab[0];
14125 } else {
14126 ctxt->space = NULL;
14127 }
14128
14129
14130 ctxt->nodeNr = 0;
14131 ctxt->node = NULL;
14132
14133 ctxt->nameNr = 0;
14134 ctxt->name = NULL;
14135
14136 DICT_FREE(ctxt->version);
14137 ctxt->version = NULL;
14138 DICT_FREE(ctxt->encoding);
14139 ctxt->encoding = NULL;
14140 DICT_FREE(ctxt->directory);
14141 ctxt->directory = NULL;
14142 DICT_FREE(ctxt->extSubURI);
14143 ctxt->extSubURI = NULL;
14144 DICT_FREE(ctxt->extSubSystem);
14145 ctxt->extSubSystem = NULL;
14146 if (ctxt->myDoc != NULL)
14147 xmlFreeDoc(ctxt->myDoc);
14148 ctxt->myDoc = NULL;
14149
14150 ctxt->standalone = -1;
14151 ctxt->hasExternalSubset = 0;
14152 ctxt->hasPErefs = 0;
14153 ctxt->html = 0;
14154 ctxt->external = 0;
14155 ctxt->instate = XML_PARSER_START;
14156 ctxt->token = 0;
14157
14158 ctxt->wellFormed = 1;
14159 ctxt->nsWellFormed = 1;
14160 ctxt->disableSAX = 0;
14161 ctxt->valid = 1;
14162 #if 0
14163 ctxt->vctxt.userData = ctxt;
14164 ctxt->vctxt.error = xmlParserValidityError;
14165 ctxt->vctxt.warning = xmlParserValidityWarning;
14166 #endif
14167 ctxt->record_info = 0;
14168 ctxt->nbChars = 0;
14169 ctxt->checkIndex = 0;
14170 ctxt->inSubset = 0;
14171 ctxt->errNo = XML_ERR_OK;
14172 ctxt->depth = 0;
14173 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14174 ctxt->catalogs = NULL;
14175 ctxt->nbentities = 0;
14176 ctxt->sizeentities = 0;
14177 xmlInitNodeInfoSeq(&ctxt->node_seq);
14178
14179 if (ctxt->attsDefault != NULL) {
14180 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14181 ctxt->attsDefault = NULL;
14182 }
14183 if (ctxt->attsSpecial != NULL) {
14184 xmlHashFree(ctxt->attsSpecial, NULL);
14185 ctxt->attsSpecial = NULL;
14186 }
14187
14188 #ifdef LIBXML_CATALOG_ENABLED
14189 if (ctxt->catalogs != NULL)
14190 xmlCatalogFreeLocal(ctxt->catalogs);
14191 #endif
14192 if (ctxt->lastError.code != XML_ERR_OK)
14193 xmlResetError(&ctxt->lastError);
14194 }
14195
14196 /**
14197 * xmlCtxtResetPush:
14198 * @ctxt: an XML parser context
14199 * @chunk: a pointer to an array of chars
14200 * @size: number of chars in the array
14201 * @filename: an optional file name or URI
14202 * @encoding: the document encoding, or NULL
14203 *
14204 * Reset a push parser context
14205 *
14206 * Returns 0 in case of success and 1 in case of error
14207 */
14208 int
14209 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14210 int size, const char *filename, const char *encoding)
14211 {
14212 xmlParserInputPtr inputStream;
14213 xmlParserInputBufferPtr buf;
14214 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14215
14216 if (ctxt == NULL)
14217 return(1);
14218
14219 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14220 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14221
14222 buf = xmlAllocParserInputBuffer(enc);
14223 if (buf == NULL)
14224 return(1);
14225
14226 if (ctxt == NULL) {
14227 xmlFreeParserInputBuffer(buf);
14228 return(1);
14229 }
14230
14231 xmlCtxtReset(ctxt);
14232
14233 if (ctxt->pushTab == NULL) {
14234 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14235 sizeof(xmlChar *));
14236 if (ctxt->pushTab == NULL) {
14237 xmlErrMemory(ctxt, NULL);
14238 xmlFreeParserInputBuffer(buf);
14239 return(1);
14240 }
14241 }
14242
14243 if (filename == NULL) {
14244 ctxt->directory = NULL;
14245 } else {
14246 ctxt->directory = xmlParserGetDirectory(filename);
14247 }
14248
14249 inputStream = xmlNewInputStream(ctxt);
14250 if (inputStream == NULL) {
14251 xmlFreeParserInputBuffer(buf);
14252 return(1);
14253 }
14254
14255 if (filename == NULL)
14256 inputStream->filename = NULL;
14257 else
14258 inputStream->filename = (char *)
14259 xmlCanonicPath((const xmlChar *) filename);
14260 inputStream->buf = buf;
14261 inputStream->base = inputStream->buf->buffer->content;
14262 inputStream->cur = inputStream->buf->buffer->content;
14263 inputStream->end =
14264 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14265
14266 inputPush(ctxt, inputStream);
14267
14268 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14269 (ctxt->input->buf != NULL)) {
14270 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14271 int cur = ctxt->input->cur - ctxt->input->base;
14272
14273 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14274
14275 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14276 ctxt->input->cur = ctxt->input->base + cur;
14277 ctxt->input->end =
14278 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14279 use];
14280 #ifdef DEBUG_PUSH
14281 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14282 #endif
14283 }
14284
14285 if (encoding != NULL) {
14286 xmlCharEncodingHandlerPtr hdlr;
14287
14288 if (ctxt->encoding != NULL)
14289 xmlFree((xmlChar *) ctxt->encoding);
14290 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14291
14292 hdlr = xmlFindCharEncodingHandler(encoding);
14293 if (hdlr != NULL) {
14294 xmlSwitchToEncoding(ctxt, hdlr);
14295 } else {
14296 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14297 "Unsupported encoding %s\n", BAD_CAST encoding);
14298 }
14299 } else if (enc != XML_CHAR_ENCODING_NONE) {
14300 xmlSwitchEncoding(ctxt, enc);
14301 }
14302
14303 return(0);
14304 }
14305
14306
14307 /**
14308 * xmlCtxtUseOptionsInternal:
14309 * @ctxt: an XML parser context
14310 * @options: a combination of xmlParserOption
14311 * @encoding: the user provided encoding to use
14312 *
14313 * Applies the options to the parser context
14314 *
14315 * Returns 0 in case of success, the set of unknown or unimplemented options
14316 * in case of error.
14317 */
14318 static int
14319 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14320 {
14321 if (ctxt == NULL)
14322 return(-1);
14323 if (encoding != NULL) {
14324 if (ctxt->encoding != NULL)
14325 xmlFree((xmlChar *) ctxt->encoding);
14326 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14327 }
14328 if (options & XML_PARSE_RECOVER) {
14329 ctxt->recovery = 1;
14330 options -= XML_PARSE_RECOVER;
14331 ctxt->options |= XML_PARSE_RECOVER;
14332 } else
14333 ctxt->recovery = 0;
14334 if (options & XML_PARSE_DTDLOAD) {
14335 ctxt->loadsubset = XML_DETECT_IDS;
14336 options -= XML_PARSE_DTDLOAD;
14337 ctxt->options |= XML_PARSE_DTDLOAD;
14338 } else
14339 ctxt->loadsubset = 0;
14340 if (options & XML_PARSE_DTDATTR) {
14341 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14342 options -= XML_PARSE_DTDATTR;
14343 ctxt->options |= XML_PARSE_DTDATTR;
14344 }
14345 if (options & XML_PARSE_NOENT) {
14346 ctxt->replaceEntities = 1;
14347 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14348 options -= XML_PARSE_NOENT;
14349 ctxt->options |= XML_PARSE_NOENT;
14350 } else
14351 ctxt->replaceEntities = 0;
14352 if (options & XML_PARSE_PEDANTIC) {
14353 ctxt->pedantic = 1;
14354 options -= XML_PARSE_PEDANTIC;
14355 ctxt->options |= XML_PARSE_PEDANTIC;
14356 } else
14357 ctxt->pedantic = 0;
14358 if (options & XML_PARSE_NOBLANKS) {
14359 ctxt->keepBlanks = 0;
14360 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14361 options -= XML_PARSE_NOBLANKS;
14362 ctxt->options |= XML_PARSE_NOBLANKS;
14363 } else
14364 ctxt->keepBlanks = 1;
14365 if (options & XML_PARSE_DTDVALID) {
14366 ctxt->validate = 1;
14367 if (options & XML_PARSE_NOWARNING)
14368 ctxt->vctxt.warning = NULL;
14369 if (options & XML_PARSE_NOERROR)
14370 ctxt->vctxt.error = NULL;
14371 options -= XML_PARSE_DTDVALID;
14372 ctxt->options |= XML_PARSE_DTDVALID;
14373 } else
14374 ctxt->validate = 0;
14375 if (options & XML_PARSE_NOWARNING) {
14376 ctxt->sax->warning = NULL;
14377 options -= XML_PARSE_NOWARNING;
14378 }
14379 if (options & XML_PARSE_NOERROR) {
14380 ctxt->sax->error = NULL;
14381 ctxt->sax->fatalError = NULL;
14382 options -= XML_PARSE_NOERROR;
14383 }
14384 #ifdef LIBXML_SAX1_ENABLED
14385 if (options & XML_PARSE_SAX1) {
14386 ctxt->sax->startElement = xmlSAX2StartElement;
14387 ctxt->sax->endElement = xmlSAX2EndElement;
14388 ctxt->sax->startElementNs = NULL;
14389 ctxt->sax->endElementNs = NULL;
14390 ctxt->sax->initialized = 1;
14391 options -= XML_PARSE_SAX1;
14392 ctxt->options |= XML_PARSE_SAX1;
14393 }
14394 #endif /* LIBXML_SAX1_ENABLED */
14395 if (options & XML_PARSE_NODICT) {
14396 ctxt->dictNames = 0;
14397 options -= XML_PARSE_NODICT;
14398 ctxt->options |= XML_PARSE_NODICT;
14399 } else {
14400 ctxt->dictNames = 1;
14401 }
14402 if (options & XML_PARSE_NOCDATA) {
14403 ctxt->sax->cdataBlock = NULL;
14404 options -= XML_PARSE_NOCDATA;
14405 ctxt->options |= XML_PARSE_NOCDATA;
14406 }
14407 if (options & XML_PARSE_NSCLEAN) {
14408 ctxt->options |= XML_PARSE_NSCLEAN;
14409 options -= XML_PARSE_NSCLEAN;
14410 }
14411 if (options & XML_PARSE_NONET) {
14412 ctxt->options |= XML_PARSE_NONET;
14413 options -= XML_PARSE_NONET;
14414 }
14415 if (options & XML_PARSE_COMPACT) {
14416 ctxt->options |= XML_PARSE_COMPACT;
14417 options -= XML_PARSE_COMPACT;
14418 }
14419 if (options & XML_PARSE_OLD10) {
14420 ctxt->options |= XML_PARSE_OLD10;
14421 options -= XML_PARSE_OLD10;
14422 }
14423 if (options & XML_PARSE_NOBASEFIX) {
14424 ctxt->options |= XML_PARSE_NOBASEFIX;
14425 options -= XML_PARSE_NOBASEFIX;
14426 }
14427 if (options & XML_PARSE_HUGE) {
14428 ctxt->options |= XML_PARSE_HUGE;
14429 options -= XML_PARSE_HUGE;
14430 }
14431 if (options & XML_PARSE_OLDSAX) {
14432 ctxt->options |= XML_PARSE_OLDSAX;
14433 options -= XML_PARSE_OLDSAX;
14434 }
14435 ctxt->linenumbers = 1;
14436 return (options);
14437 }
14438
14439 /**
14440 * xmlCtxtUseOptions:
14441 * @ctxt: an XML parser context
14442 * @options: a combination of xmlParserOption
14443 *
14444 * Applies the options to the parser context
14445 *
14446 * Returns 0 in case of success, the set of unknown or unimplemented options
14447 * in case of error.
14448 */
14449 int
14450 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14451 {
14452 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14453 }
14454
14455 /**
14456 * xmlDoRead:
14457 * @ctxt: an XML parser context
14458 * @URL: the base URL to use for the document
14459 * @encoding: the document encoding, or NULL
14460 * @options: a combination of xmlParserOption
14461 * @reuse: keep the context for reuse
14462 *
14463 * Common front-end for the xmlRead functions
14464 *
14465 * Returns the resulting document tree or NULL
14466 */
14467 static xmlDocPtr
14468 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14469 int options, int reuse)
14470 {
14471 xmlDocPtr ret;
14472
14473 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14474 if (encoding != NULL) {
14475 xmlCharEncodingHandlerPtr hdlr;
14476
14477 hdlr = xmlFindCharEncodingHandler(encoding);
14478 if (hdlr != NULL)
14479 xmlSwitchToEncoding(ctxt, hdlr);
14480 }
14481 if ((URL != NULL) && (ctxt->input != NULL) &&
14482 (ctxt->input->filename == NULL))
14483 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14484 xmlParseDocument(ctxt);
14485 if ((ctxt->wellFormed) || ctxt->recovery)
14486 ret = ctxt->myDoc;
14487 else {
14488 ret = NULL;
14489 if (ctxt->myDoc != NULL) {
14490 xmlFreeDoc(ctxt->myDoc);
14491 }
14492 }
14493 ctxt->myDoc = NULL;
14494 if (!reuse) {
14495 xmlFreeParserCtxt(ctxt);
14496 }
14497
14498 return (ret);
14499 }
14500
14501 /**
14502 * xmlReadDoc:
14503 * @cur: a pointer to a zero terminated string
14504 * @URL: the base URL to use for the document
14505 * @encoding: the document encoding, or NULL
14506 * @options: a combination of xmlParserOption
14507 *
14508 * parse an XML in-memory document and build a tree.
14509 *
14510 * Returns the resulting document tree
14511 */
14512 xmlDocPtr
14513 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14514 {
14515 xmlParserCtxtPtr ctxt;
14516
14517 if (cur == NULL)
14518 return (NULL);
14519
14520 ctxt = xmlCreateDocParserCtxt(cur);
14521 if (ctxt == NULL)
14522 return (NULL);
14523 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14524 }
14525
14526 /**
14527 * xmlReadFile:
14528 * @filename: a file or URL
14529 * @encoding: the document encoding, or NULL
14530 * @options: a combination of xmlParserOption
14531 *
14532 * parse an XML file from the filesystem or the network.
14533 *
14534 * Returns the resulting document tree
14535 */
14536 xmlDocPtr
14537 xmlReadFile(const char *filename, const char *encoding, int options)
14538 {
14539 xmlParserCtxtPtr ctxt;
14540
14541 ctxt = xmlCreateURLParserCtxt(filename, options);
14542 if (ctxt == NULL)
14543 return (NULL);
14544 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14545 }
14546
14547 /**
14548 * xmlReadMemory:
14549 * @buffer: a pointer to a char array
14550 * @size: the size of the array
14551 * @URL: the base URL to use for the document
14552 * @encoding: the document encoding, or NULL
14553 * @options: a combination of xmlParserOption
14554 *
14555 * parse an XML in-memory document and build a tree.
14556 *
14557 * Returns the resulting document tree
14558 */
14559 xmlDocPtr
14560 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14561 {
14562 xmlParserCtxtPtr ctxt;
14563
14564 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14565 if (ctxt == NULL)
14566 return (NULL);
14567 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14568 }
14569
14570 /**
14571 * xmlReadFd:
14572 * @fd: an open file descriptor
14573 * @URL: the base URL to use for the document
14574 * @encoding: the document encoding, or NULL
14575 * @options: a combination of xmlParserOption
14576 *
14577 * parse an XML from a file descriptor and build a tree.
14578 * NOTE that the file descriptor will not be closed when the
14579 * reader is closed or reset.
14580 *
14581 * Returns the resulting document tree
14582 */
14583 xmlDocPtr
14584 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14585 {
14586 xmlParserCtxtPtr ctxt;
14587 xmlParserInputBufferPtr input;
14588 xmlParserInputPtr stream;
14589
14590 if (fd < 0)
14591 return (NULL);
14592
14593 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14594 if (input == NULL)
14595 return (NULL);
14596 input->closecallback = NULL;
14597 ctxt = xmlNewParserCtxt();
14598 if (ctxt == NULL) {
14599 xmlFreeParserInputBuffer(input);
14600 return (NULL);
14601 }
14602 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14603 if (stream == NULL) {
14604 xmlFreeParserInputBuffer(input);
14605 xmlFreeParserCtxt(ctxt);
14606 return (NULL);
14607 }
14608 inputPush(ctxt, stream);
14609 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14610 }
14611
14612 /**
14613 * xmlReadIO:
14614 * @ioread: an I/O read function
14615 * @ioclose: an I/O close function
14616 * @ioctx: an I/O handler
14617 * @URL: the base URL to use for the document
14618 * @encoding: the document encoding, or NULL
14619 * @options: a combination of xmlParserOption
14620 *
14621 * parse an XML document from I/O functions and source and build a tree.
14622 *
14623 * Returns the resulting document tree
14624 */
14625 xmlDocPtr
14626 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14627 void *ioctx, const char *URL, const char *encoding, int options)
14628 {
14629 xmlParserCtxtPtr ctxt;
14630 xmlParserInputBufferPtr input;
14631 xmlParserInputPtr stream;
14632
14633 if (ioread == NULL)
14634 return (NULL);
14635
14636 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14637 XML_CHAR_ENCODING_NONE);
14638 if (input == NULL)
14639 return (NULL);
14640 ctxt = xmlNewParserCtxt();
14641 if (ctxt == NULL) {
14642 xmlFreeParserInputBuffer(input);
14643 return (NULL);
14644 }
14645 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14646 if (stream == NULL) {
14647 xmlFreeParserInputBuffer(input);
14648 xmlFreeParserCtxt(ctxt);
14649 return (NULL);
14650 }
14651 inputPush(ctxt, stream);
14652 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14653 }
14654
14655 /**
14656 * xmlCtxtReadDoc:
14657 * @ctxt: an XML parser context
14658 * @cur: a pointer to a zero terminated string
14659 * @URL: the base URL to use for the document
14660 * @encoding: the document encoding, or NULL
14661 * @options: a combination of xmlParserOption
14662 *
14663 * parse an XML in-memory document and build a tree.
14664 * This reuses the existing @ctxt parser context
14665 *
14666 * Returns the resulting document tree
14667 */
14668 xmlDocPtr
14669 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14670 const char *URL, const char *encoding, int options)
14671 {
14672 xmlParserInputPtr stream;
14673
14674 if (cur == NULL)
14675 return (NULL);
14676 if (ctxt == NULL)
14677 return (NULL);
14678
14679 xmlCtxtReset(ctxt);
14680
14681 stream = xmlNewStringInputStream(ctxt, cur);
14682 if (stream == NULL) {
14683 return (NULL);
14684 }
14685 inputPush(ctxt, stream);
14686 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14687 }
14688
14689 /**
14690 * xmlCtxtReadFile:
14691 * @ctxt: an XML parser context
14692 * @filename: a file or URL
14693 * @encoding: the document encoding, or NULL
14694 * @options: a combination of xmlParserOption
14695 *
14696 * parse an XML file from the filesystem or the network.
14697 * This reuses the existing @ctxt parser context
14698 *
14699 * Returns the resulting document tree
14700 */
14701 xmlDocPtr
14702 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14703 const char *encoding, int options)
14704 {
14705 xmlParserInputPtr stream;
14706
14707 if (filename == NULL)
14708 return (NULL);
14709 if (ctxt == NULL)
14710 return (NULL);
14711
14712 xmlCtxtReset(ctxt);
14713
14714 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14715 if (stream == NULL) {
14716 return (NULL);
14717 }
14718 inputPush(ctxt, stream);
14719 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14720 }
14721
14722 /**
14723 * xmlCtxtReadMemory:
14724 * @ctxt: an XML parser context
14725 * @buffer: a pointer to a char array
14726 * @size: the size of the array
14727 * @URL: the base URL to use for the document
14728 * @encoding: the document encoding, or NULL
14729 * @options: a combination of xmlParserOption
14730 *
14731 * parse an XML in-memory document and build a tree.
14732 * This reuses the existing @ctxt parser context
14733 *
14734 * Returns the resulting document tree
14735 */
14736 xmlDocPtr
14737 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14738 const char *URL, const char *encoding, int options)
14739 {
14740 xmlParserInputBufferPtr input;
14741 xmlParserInputPtr stream;
14742
14743 if (ctxt == NULL)
14744 return (NULL);
14745 if (buffer == NULL)
14746 return (NULL);
14747
14748 xmlCtxtReset(ctxt);
14749
14750 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14751 if (input == NULL) {
14752 return(NULL);
14753 }
14754
14755 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14756 if (stream == NULL) {
14757 xmlFreeParserInputBuffer(input);
14758 return(NULL);
14759 }
14760
14761 inputPush(ctxt, stream);
14762 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14763 }
14764
14765 /**
14766 * xmlCtxtReadFd:
14767 * @ctxt: an XML parser context
14768 * @fd: an open file descriptor
14769 * @URL: the base URL to use for the document
14770 * @encoding: the document encoding, or NULL
14771 * @options: a combination of xmlParserOption
14772 *
14773 * parse an XML from a file descriptor and build a tree.
14774 * This reuses the existing @ctxt parser context
14775 * NOTE that the file descriptor will not be closed when the
14776 * reader is closed or reset.
14777 *
14778 * Returns the resulting document tree
14779 */
14780 xmlDocPtr
14781 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14782 const char *URL, const char *encoding, int options)
14783 {
14784 xmlParserInputBufferPtr input;
14785 xmlParserInputPtr stream;
14786
14787 if (fd < 0)
14788 return (NULL);
14789 if (ctxt == NULL)
14790 return (NULL);
14791
14792 xmlCtxtReset(ctxt);
14793
14794
14795 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14796 if (input == NULL)
14797 return (NULL);
14798 input->closecallback = NULL;
14799 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14800 if (stream == NULL) {
14801 xmlFreeParserInputBuffer(input);
14802 return (NULL);
14803 }
14804 inputPush(ctxt, stream);
14805 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14806 }
14807
14808 /**
14809 * xmlCtxtReadIO:
14810 * @ctxt: an XML parser context
14811 * @ioread: an I/O read function
14812 * @ioclose: an I/O close function
14813 * @ioctx: an I/O handler
14814 * @URL: the base URL to use for the document
14815 * @encoding: the document encoding, or NULL
14816 * @options: a combination of xmlParserOption
14817 *
14818 * parse an XML document from I/O functions and source and build a tree.
14819 * This reuses the existing @ctxt parser context
14820 *
14821 * Returns the resulting document tree
14822 */
14823 xmlDocPtr
14824 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14825 xmlInputCloseCallback ioclose, void *ioctx,
14826 const char *URL,
14827 const char *encoding, int options)
14828 {
14829 xmlParserInputBufferPtr input;
14830 xmlParserInputPtr stream;
14831
14832 if (ioread == NULL)
14833 return (NULL);
14834 if (ctxt == NULL)
14835 return (NULL);
14836
14837 xmlCtxtReset(ctxt);
14838
14839 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14840 XML_CHAR_ENCODING_NONE);
14841 if (input == NULL)
14842 return (NULL);
14843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14844 if (stream == NULL) {
14845 xmlFreeParserInputBuffer(input);
14846 return (NULL);
14847 }
14848 inputPush(ctxt, stream);
14849 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14850 }
14851
14852 #define bottom_parser
14853 #include "elfgcchack.h"