2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
35 /************************************************************************
37 * Getting/Setting encoding meta tags *
39 ************************************************************************/
42 * htmlGetMetaEncoding:
45 * Encoding definition lookup in the Meta tags
47 * Returns the current encoding as flagged in the HTML source
50 htmlGetMetaEncoding(htmlDocPtr doc
) {
52 const xmlChar
*content
;
53 const xmlChar
*encoding
;
63 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
64 if (xmlStrEqual(cur
->name
, BAD_CAST
"html"))
66 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
68 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
81 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
82 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
84 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
95 * Search the meta elements
99 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
100 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta")) {
101 xmlAttrPtr attr
= cur
->properties
;
103 const xmlChar
*value
;
107 while (attr
!= NULL
) {
108 if ((attr
->children
!= NULL
) &&
109 (attr
->children
->type
== XML_TEXT_NODE
) &&
110 (attr
->children
->next
== NULL
)) {
111 value
= attr
->children
->content
;
112 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
113 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
115 else if ((value
!= NULL
)
116 && (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
118 if ((http
!= 0) && (content
!= NULL
))
130 encoding
= xmlStrstr(content
, BAD_CAST
"charset=");
131 if (encoding
== NULL
)
132 encoding
= xmlStrstr(content
, BAD_CAST
"Charset=");
133 if (encoding
== NULL
)
134 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET=");
135 if (encoding
!= NULL
) {
138 encoding
= xmlStrstr(content
, BAD_CAST
"charset =");
139 if (encoding
== NULL
)
140 encoding
= xmlStrstr(content
, BAD_CAST
"Charset =");
141 if (encoding
== NULL
)
142 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET =");
143 if (encoding
!= NULL
)
146 if (encoding
!= NULL
) {
147 while ((*encoding
== ' ') || (*encoding
== '\t')) encoding
++;
153 * htmlSetMetaEncoding:
155 * @encoding: the encoding string
157 * Sets the current encoding in the Meta tags
158 * NOTE: this will not change the document content encoding, just
159 * the META flag associated.
161 * Returns 0 in case of success and -1 in case of error
164 htmlSetMetaEncoding(htmlDocPtr doc
, const xmlChar
*encoding
) {
165 htmlNodePtr cur
, meta
= NULL
, head
= NULL
;
166 const xmlChar
*content
= NULL
;
167 char newcontent
[100];
174 /* html isn't a real encoding it's just libxml2 way to get entities */
175 if (!xmlStrcasecmp(encoding
, BAD_CAST
"html"))
178 if (encoding
!= NULL
) {
179 snprintf(newcontent
, sizeof(newcontent
), "text/html; charset=%s",
181 newcontent
[sizeof(newcontent
) - 1] = 0;
189 while (cur
!= NULL
) {
190 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
191 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"html") == 0)
193 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
195 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0)
207 while (cur
!= NULL
) {
208 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
209 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
211 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
222 if (cur
->children
== NULL
)
228 * Search and update all the remaining the meta elements carrying
229 * encoding informations
231 while (cur
!= NULL
) {
232 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
233 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
234 xmlAttrPtr attr
= cur
->properties
;
236 const xmlChar
*value
;
240 while (attr
!= NULL
) {
241 if ((attr
->children
!= NULL
) &&
242 (attr
->children
->type
== XML_TEXT_NODE
) &&
243 (attr
->children
->next
== NULL
)) {
244 value
= attr
->children
->content
;
245 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
246 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
250 if ((value
!= NULL
) &&
251 (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
254 if ((http
!= 0) && (content
!= NULL
))
259 if ((http
!= 0) && (content
!= NULL
)) {
270 if ((encoding
!= NULL
) && (head
!= NULL
)) {
272 * Create a new Meta element with the right attributes
275 meta
= xmlNewDocNode(doc
, NULL
, BAD_CAST
"meta", NULL
);
276 if (head
->children
== NULL
)
277 xmlAddChild(head
, meta
);
279 xmlAddPrevSibling(head
->children
, meta
);
280 xmlNewProp(meta
, BAD_CAST
"http-equiv", BAD_CAST
"Content-Type");
281 xmlNewProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
284 /* remove the meta tag if NULL is passed */
285 if (encoding
== NULL
) {
289 /* change the document only if there is a real encoding change */
290 else if (xmlStrcasestr(content
, encoding
) == NULL
) {
291 xmlSetProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
302 * These are the HTML attributes which will be output
303 * in minimized form, i.e. <option selected="selected"> will be
304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
307 static const char* htmlBooleanAttrs
[] = {
308 "checked", "compact", "declare", "defer", "disabled", "ismap",
309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
316 * @name: the name of the attribute to check
318 * Determine if a given attribute is a boolean attribute.
320 * returns: false if the attribute is not boolean, true otherwise.
323 htmlIsBooleanAttr(const xmlChar
*name
)
327 while (htmlBooleanAttrs
[i
] != NULL
) {
328 if (xmlStrcasecmp((const xmlChar
*)htmlBooleanAttrs
[i
], name
) == 0)
335 #ifdef LIBXML_OUTPUT_ENABLED
337 * private routine exported from xmlIO.c
340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder
);
341 /************************************************************************
343 * Output error handlers *
345 ************************************************************************/
348 * @extra: extra informations
350 * Handle an out of memory condition
353 htmlSaveErrMemory(const char *extra
)
355 __xmlSimpleError(XML_FROM_OUTPUT
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
360 * @code: the error number
361 * @node: the location of the error.
362 * @extra: extra informations
364 * Handle an out of memory condition
367 htmlSaveErr(int code
, xmlNodePtr node
, const char *extra
)
369 const char *msg
= NULL
;
372 case XML_SAVE_NOT_UTF8
:
373 msg
= "string is not in UTF-8\n";
375 case XML_SAVE_CHAR_INVALID
:
376 msg
= "invalid character value\n";
378 case XML_SAVE_UNKNOWN_ENCODING
:
379 msg
= "unknown encoding %s\n";
381 case XML_SAVE_NO_DOCTYPE
:
382 msg
= "HTML has no DOCTYPE\n";
385 msg
= "unexpected error number\n";
387 __xmlSimpleError(XML_FROM_OUTPUT
, code
, node
, msg
, extra
);
390 /************************************************************************
392 * Dumping HTML tree content to a simple buffer *
394 ************************************************************************/
397 * htmlBufNodeDumpFormat:
398 * @buf: the xmlBufPtr output
400 * @cur: the current node
401 * @format: should formatting spaces been added
403 * Dump an HTML node, recursive behaviour,children are printed too.
405 * Returns the number of byte written or -1 in case of error
408 htmlBufNodeDumpFormat(xmlBufPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
,
412 xmlOutputBufferPtr outbuf
;
420 outbuf
= (xmlOutputBufferPtr
) xmlMalloc(sizeof(xmlOutputBuffer
));
421 if (outbuf
== NULL
) {
422 htmlSaveErrMemory("allocating HTML output buffer");
425 memset(outbuf
, 0, (size_t) sizeof(xmlOutputBuffer
));
426 outbuf
->buffer
= buf
;
427 outbuf
->encoder
= NULL
;
428 outbuf
->writecallback
= NULL
;
429 outbuf
->closecallback
= NULL
;
430 outbuf
->context
= NULL
;
433 use
= xmlBufUse(buf
);
434 htmlNodeDumpFormatOutput(outbuf
, doc
, cur
, NULL
, format
);
436 ret
= xmlBufUse(buf
) - use
;
442 * @buf: the HTML buffer output
444 * @cur: the current node
446 * Dump an HTML node, recursive behaviour,children are printed too,
447 * and formatting returns are added.
449 * Returns the number of byte written or -1 in case of error
452 htmlNodeDump(xmlBufferPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
) {
456 if ((buf
== NULL
) || (cur
== NULL
))
460 buffer
= xmlBufFromBuffer(buf
);
464 ret
= htmlBufNodeDumpFormat(buffer
, doc
, cur
, 1);
466 xmlBufBackToBuffer(buffer
);
474 * htmlNodeDumpFileFormat:
475 * @out: the FILE pointer
477 * @cur: the current node
478 * @encoding: the document encoding
479 * @format: should formatting spaces been added
481 * Dump an HTML node, recursive behaviour,children are printed too.
483 * TODO: if encoding == NULL try to save in the doc encoding
485 * returns: the number of byte written or -1 in case of failure.
488 htmlNodeDumpFileFormat(FILE *out
, xmlDocPtr doc
,
489 xmlNodePtr cur
, const char *encoding
, int format
) {
490 xmlOutputBufferPtr buf
;
491 xmlCharEncodingHandlerPtr handler
= NULL
;
496 if (encoding
!= NULL
) {
499 enc
= xmlParseCharEncoding(encoding
);
500 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
501 handler
= xmlFindCharEncodingHandler(encoding
);
503 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
507 * Fallback to HTML or ASCII when the encoding is unspecified
510 handler
= xmlFindCharEncodingHandler("HTML");
512 handler
= xmlFindCharEncodingHandler("ascii");
516 * save the content to a temp buffer.
518 buf
= xmlOutputBufferCreateFile(out
, handler
);
519 if (buf
== NULL
) return(0);
521 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, format
);
523 ret
= xmlOutputBufferClose(buf
);
529 * @out: the FILE pointer
531 * @cur: the current node
533 * Dump an HTML node, recursive behaviour,children are printed too,
534 * and formatting returns are added.
537 htmlNodeDumpFile(FILE *out
, xmlDocPtr doc
, xmlNodePtr cur
) {
538 htmlNodeDumpFileFormat(out
, doc
, cur
, NULL
, 1);
542 * htmlDocDumpMemoryFormat:
544 * @mem: OUT: the memory pointer
545 * @size: OUT: the memory length
546 * @format: should formatting spaces been added
548 * Dump an HTML document in memory and return the xmlChar * and it's size.
549 * It's up to the caller to free the memory.
552 htmlDocDumpMemoryFormat(xmlDocPtr cur
, xmlChar
**mem
, int *size
, int format
) {
553 xmlOutputBufferPtr buf
;
554 xmlCharEncodingHandlerPtr handler
= NULL
;
555 const char *encoding
;
559 if ((mem
== NULL
) || (size
== NULL
))
567 encoding
= (const char *) htmlGetMetaEncoding(cur
);
569 if (encoding
!= NULL
) {
572 enc
= xmlParseCharEncoding(encoding
);
573 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
574 handler
= xmlFindCharEncodingHandler(encoding
);
576 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
581 * Fallback to HTML or ASCII when the encoding is unspecified
584 handler
= xmlFindCharEncodingHandler("HTML");
586 handler
= xmlFindCharEncodingHandler("ascii");
589 buf
= xmlAllocOutputBufferInternal(handler
);
596 htmlDocContentDumpFormatOutput(buf
, cur
, NULL
, format
);
598 xmlOutputBufferFlush(buf
);
599 if (buf
->conv
!= NULL
) {
600 *size
= xmlBufUse(buf
->conv
);
601 *mem
= xmlStrndup(xmlBufContent(buf
->conv
), *size
);
603 *size
= xmlBufUse(buf
->buffer
);
604 *mem
= xmlStrndup(xmlBufContent(buf
->buffer
), *size
);
606 (void)xmlOutputBufferClose(buf
);
612 * @mem: OUT: the memory pointer
613 * @size: OUT: the memory length
615 * Dump an HTML document in memory and return the xmlChar * and it's size.
616 * It's up to the caller to free the memory.
619 htmlDocDumpMemory(xmlDocPtr cur
, xmlChar
**mem
, int *size
) {
620 htmlDocDumpMemoryFormat(cur
, mem
, size
, 1);
624 /************************************************************************
626 * Dumping HTML tree content to an I/O output buffer *
628 ************************************************************************/
630 void xmlNsListDumpOutput(xmlOutputBufferPtr buf
, xmlNsPtr cur
);
634 * @buf: the HTML buffer output
636 * @encoding: the encoding string
638 * TODO: check whether encoding is needed
640 * Dump the HTML document DTD, if any.
643 htmlDtdDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
644 const char *encoding ATTRIBUTE_UNUSED
) {
645 xmlDtdPtr cur
= doc
->intSubset
;
648 htmlSaveErr(XML_SAVE_NO_DOCTYPE
, (xmlNodePtr
) doc
, NULL
);
651 xmlOutputBufferWriteString(buf
, "<!DOCTYPE ");
652 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
653 if (cur
->ExternalID
!= NULL
) {
654 xmlOutputBufferWriteString(buf
, " PUBLIC ");
655 xmlBufWriteQuotedString(buf
->buffer
, cur
->ExternalID
);
656 if (cur
->SystemID
!= NULL
) {
657 xmlOutputBufferWriteString(buf
, " ");
658 xmlBufWriteQuotedString(buf
->buffer
, cur
->SystemID
);
660 } else if (cur
->SystemID
!= NULL
&&
661 xmlStrcmp(cur
->SystemID
, BAD_CAST
"about:legacy-compat")) {
662 xmlOutputBufferWriteString(buf
, " SYSTEM ");
663 xmlBufWriteQuotedString(buf
->buffer
, cur
->SystemID
);
665 xmlOutputBufferWriteString(buf
, ">\n");
669 * htmlAttrDumpOutput:
670 * @buf: the HTML buffer output
672 * @cur: the attribute pointer
673 * @encoding: the encoding string
675 * Dump an HTML attribute
678 htmlAttrDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
, xmlAttrPtr cur
,
679 const char *encoding ATTRIBUTE_UNUSED
) {
683 * The html output method should not escape a & character
684 * occurring in an attribute value immediately followed by
685 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
686 * This is implemented in xmlEncodeEntitiesReentrant
692 xmlOutputBufferWriteString(buf
, " ");
693 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
694 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
695 xmlOutputBufferWriteString(buf
, ":");
697 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
698 if ((cur
->children
!= NULL
) && (!htmlIsBooleanAttr(cur
->name
))) {
699 value
= xmlNodeListGetString(doc
, cur
->children
, 0);
701 xmlOutputBufferWriteString(buf
, "=");
702 if ((cur
->ns
== NULL
) && (cur
->parent
!= NULL
) &&
703 (cur
->parent
->ns
== NULL
) &&
704 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"href")) ||
705 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"action")) ||
706 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"src")) ||
707 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"name")) &&
708 (!xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"a"))))) {
709 xmlChar
*tmp
= value
;
710 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
711 xmlBufCCat(buf
->buffer
, "\"");
713 while (IS_BLANK_CH(*tmp
)) tmp
++;
715 /* URI Escape everything, except server side includes. */
720 xmlChar
*start
= (xmlChar
*)xmlStrstr(tmp
, BAD_CAST
"<!--");
722 end
= (xmlChar
*)xmlStrstr(tmp
, BAD_CAST
"-->");
728 /* Escape the whole string, or until start (set to '\0'). */
729 escaped
= xmlURIEscapeStr(tmp
, BAD_CAST
"@/:=?;#%&,+");
730 if (escaped
!= NULL
) {
731 xmlBufCat(buf
->buffer
, escaped
);
734 xmlBufCat(buf
->buffer
, tmp
);
737 if (end
== NULL
) { /* Everything has been written. */
741 /* Do not escape anything within server side includes. */
742 *start
= '<'; /* Restore the first character of "<!--". */
743 end
+= 3; /* strlen("-->") */
746 xmlBufCat(buf
->buffer
, start
);
751 xmlBufCCat(buf
->buffer
, "\"");
753 xmlBufWriteQuotedString(buf
->buffer
, value
);
757 xmlOutputBufferWriteString(buf
, "=\"\"");
763 * htmlAttrListDumpOutput:
764 * @buf: the HTML buffer output
766 * @cur: the first attribute pointer
767 * @encoding: the encoding string
769 * Dump a list of HTML attributes
772 htmlAttrListDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
, xmlAttrPtr cur
, const char *encoding
) {
776 while (cur
!= NULL
) {
777 htmlAttrDumpOutput(buf
, doc
, cur
, encoding
);
785 * htmlNodeListDumpOutput:
786 * @buf: the HTML buffer output
788 * @cur: the first node
789 * @encoding: the encoding string
790 * @format: should formatting spaces been added
792 * Dump an HTML node list, recursive behaviour,children are printed too.
795 htmlNodeListDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
796 xmlNodePtr cur
, const char *encoding
, int format
) {
800 while (cur
!= NULL
) {
801 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, format
);
807 * htmlNodeDumpFormatOutput:
808 * @buf: the HTML buffer output
810 * @cur: the current node
811 * @encoding: the encoding string
812 * @format: should formatting spaces been added
814 * Dump an HTML node, recursive behaviour,children are printed too.
817 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
818 xmlNodePtr cur
, const char *encoding
, int format
) {
819 const htmlElemDesc
* info
;
823 if ((cur
== NULL
) || (buf
== NULL
)) {
829 if (cur
->type
== XML_DTD_NODE
)
831 if ((cur
->type
== XML_HTML_DOCUMENT_NODE
) ||
832 (cur
->type
== XML_DOCUMENT_NODE
)){
833 htmlDocContentDumpOutput(buf
, (xmlDocPtr
) cur
, encoding
);
836 if (cur
->type
== XML_ATTRIBUTE_NODE
) {
837 htmlAttrDumpOutput(buf
, doc
, (xmlAttrPtr
) cur
, encoding
);
840 if (cur
->type
== HTML_TEXT_NODE
) {
841 if (cur
->content
!= NULL
) {
842 if (((cur
->name
== (const xmlChar
*)xmlStringText
) ||
843 (cur
->name
!= (const xmlChar
*)xmlStringTextNoenc
)) &&
844 ((cur
->parent
== NULL
) ||
845 ((xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"script")) &&
846 (xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"style"))))) {
849 buffer
= xmlEncodeEntitiesReentrant(doc
, cur
->content
);
850 if (buffer
!= NULL
) {
851 xmlOutputBufferWriteString(buf
, (const char *)buffer
);
855 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
860 if (cur
->type
== HTML_COMMENT_NODE
) {
861 if (cur
->content
!= NULL
) {
862 xmlOutputBufferWriteString(buf
, "<!--");
863 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
864 xmlOutputBufferWriteString(buf
, "-->");
868 if (cur
->type
== HTML_PI_NODE
) {
869 if (cur
->name
== NULL
)
871 xmlOutputBufferWriteString(buf
, "<?");
872 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
873 if (cur
->content
!= NULL
) {
874 xmlOutputBufferWriteString(buf
, " ");
875 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
877 xmlOutputBufferWriteString(buf
, ">");
880 if (cur
->type
== HTML_ENTITY_REF_NODE
) {
881 xmlOutputBufferWriteString(buf
, "&");
882 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
883 xmlOutputBufferWriteString(buf
, ";");
886 if (cur
->type
== HTML_PRESERVE_NODE
) {
887 if (cur
->content
!= NULL
) {
888 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
894 * Get specific HTML info for that node.
897 info
= htmlTagLookup(cur
->name
);
901 xmlOutputBufferWriteString(buf
, "<");
902 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
903 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
904 xmlOutputBufferWriteString(buf
, ":");
906 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
908 xmlNsListDumpOutput(buf
, cur
->nsDef
);
909 if (cur
->properties
!= NULL
)
910 htmlAttrListDumpOutput(buf
, doc
, cur
->properties
, encoding
);
912 if ((info
!= NULL
) && (info
->empty
)) {
913 xmlOutputBufferWriteString(buf
, ">");
914 if ((format
) && (!info
->isinline
) && (cur
->next
!= NULL
)) {
915 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
916 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
917 (cur
->parent
!= NULL
) &&
918 (cur
->parent
->name
!= NULL
) &&
919 (cur
->parent
->name
[0] != 'p')) /* p, pre, param */
920 xmlOutputBufferWriteString(buf
, "\n");
924 if (((cur
->type
== XML_ELEMENT_NODE
) || (cur
->content
== NULL
)) &&
925 (cur
->children
== NULL
)) {
926 if ((info
!= NULL
) && (info
->saveEndTag
!= 0) &&
927 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"html")) &&
928 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"body"))) {
929 xmlOutputBufferWriteString(buf
, ">");
931 xmlOutputBufferWriteString(buf
, "></");
932 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
933 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
934 xmlOutputBufferWriteString(buf
, ":");
936 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
937 xmlOutputBufferWriteString(buf
, ">");
939 if ((format
) && (cur
->next
!= NULL
) &&
940 (info
!= NULL
) && (!info
->isinline
)) {
941 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
942 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
943 (cur
->parent
!= NULL
) &&
944 (cur
->parent
->name
!= NULL
) &&
945 (cur
->parent
->name
[0] != 'p')) /* p, pre, param */
946 xmlOutputBufferWriteString(buf
, "\n");
950 xmlOutputBufferWriteString(buf
, ">");
951 if ((cur
->type
!= XML_ELEMENT_NODE
) &&
952 (cur
->content
!= NULL
)) {
954 * Uses the OutputBuffer property to automatically convert
955 * invalids to charrefs
958 xmlOutputBufferWriteString(buf
, (const char *) cur
->content
);
960 if (cur
->children
!= NULL
) {
961 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
962 (cur
->children
->type
!= HTML_TEXT_NODE
) &&
963 (cur
->children
->type
!= HTML_ENTITY_REF_NODE
) &&
964 (cur
->children
!= cur
->last
) &&
965 (cur
->name
!= NULL
) &&
966 (cur
->name
[0] != 'p')) /* p, pre, param */
967 xmlOutputBufferWriteString(buf
, "\n");
968 htmlNodeListDumpOutput(buf
, doc
, cur
->children
, encoding
, format
);
969 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
970 (cur
->last
->type
!= HTML_TEXT_NODE
) &&
971 (cur
->last
->type
!= HTML_ENTITY_REF_NODE
) &&
972 (cur
->children
!= cur
->last
) &&
973 (cur
->name
!= NULL
) &&
974 (cur
->name
[0] != 'p')) /* p, pre, param */
975 xmlOutputBufferWriteString(buf
, "\n");
977 xmlOutputBufferWriteString(buf
, "</");
978 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
979 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
980 xmlOutputBufferWriteString(buf
, ":");
982 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
983 xmlOutputBufferWriteString(buf
, ">");
984 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
985 (cur
->next
!= NULL
)) {
986 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
987 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
988 (cur
->parent
!= NULL
) &&
989 (cur
->parent
->name
!= NULL
) &&
990 (cur
->parent
->name
[0] != 'p')) /* p, pre, param */
991 xmlOutputBufferWriteString(buf
, "\n");
996 * htmlNodeDumpOutput:
997 * @buf: the HTML buffer output
999 * @cur: the current node
1000 * @encoding: the encoding string
1002 * Dump an HTML node, recursive behaviour,children are printed too,
1003 * and formatting returns/spaces are added.
1006 htmlNodeDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
1007 xmlNodePtr cur
, const char *encoding
) {
1008 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, 1);
1012 * htmlDocContentDumpFormatOutput:
1013 * @buf: the HTML buffer output
1014 * @cur: the document
1015 * @encoding: the encoding string
1016 * @format: should formatting spaces been added
1018 * Dump an HTML document.
1021 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
1022 const char *encoding
, int format
) {
1027 if ((buf
== NULL
) || (cur
== NULL
))
1031 * force to output the stuff as HTML, especially for entities
1034 cur
->type
= XML_HTML_DOCUMENT_NODE
;
1035 if (cur
->intSubset
!= NULL
) {
1036 htmlDtdDumpOutput(buf
, cur
, NULL
);
1038 if (cur
->children
!= NULL
) {
1039 htmlNodeListDumpOutput(buf
, cur
, cur
->children
, encoding
, format
);
1041 xmlOutputBufferWriteString(buf
, "\n");
1042 cur
->type
= (xmlElementType
) type
;
1046 * htmlDocContentDumpOutput:
1047 * @buf: the HTML buffer output
1048 * @cur: the document
1049 * @encoding: the encoding string
1051 * Dump an HTML document. Formating return/spaces are added.
1054 htmlDocContentDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
1055 const char *encoding
) {
1056 htmlDocContentDumpFormatOutput(buf
, cur
, encoding
, 1);
1059 /************************************************************************
1061 * Saving functions front-ends *
1063 ************************************************************************/
1068 * @cur: the document
1070 * Dump an HTML document to an open FILE.
1072 * returns: the number of byte written or -1 in case of failure.
1075 htmlDocDump(FILE *f
, xmlDocPtr cur
) {
1076 xmlOutputBufferPtr buf
;
1077 xmlCharEncodingHandlerPtr handler
= NULL
;
1078 const char *encoding
;
1083 if ((cur
== NULL
) || (f
== NULL
)) {
1087 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1089 if (encoding
!= NULL
) {
1090 xmlCharEncoding enc
;
1092 enc
= xmlParseCharEncoding(encoding
);
1093 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1094 handler
= xmlFindCharEncodingHandler(encoding
);
1095 if (handler
== NULL
)
1096 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1100 * Fallback to HTML or ASCII when the encoding is unspecified
1102 if (handler
== NULL
)
1103 handler
= xmlFindCharEncodingHandler("HTML");
1104 if (handler
== NULL
)
1105 handler
= xmlFindCharEncodingHandler("ascii");
1108 buf
= xmlOutputBufferCreateFile(f
, handler
);
1109 if (buf
== NULL
) return(-1);
1110 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1112 ret
= xmlOutputBufferClose(buf
);
1118 * @filename: the filename (or URL)
1119 * @cur: the document
1121 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1123 * returns: the number of byte written or -1 in case of failure.
1126 htmlSaveFile(const char *filename
, xmlDocPtr cur
) {
1127 xmlOutputBufferPtr buf
;
1128 xmlCharEncodingHandlerPtr handler
= NULL
;
1129 const char *encoding
;
1132 if ((cur
== NULL
) || (filename
== NULL
))
1137 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1139 if (encoding
!= NULL
) {
1140 xmlCharEncoding enc
;
1142 enc
= xmlParseCharEncoding(encoding
);
1143 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1144 handler
= xmlFindCharEncodingHandler(encoding
);
1145 if (handler
== NULL
)
1146 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1150 * Fallback to HTML or ASCII when the encoding is unspecified
1152 if (handler
== NULL
)
1153 handler
= xmlFindCharEncodingHandler("HTML");
1154 if (handler
== NULL
)
1155 handler
= xmlFindCharEncodingHandler("ascii");
1159 * save the content to a temp buffer.
1161 buf
= xmlOutputBufferCreateFilename(filename
, handler
, cur
->compression
);
1162 if (buf
== NULL
) return(0);
1164 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1166 ret
= xmlOutputBufferClose(buf
);
1171 * htmlSaveFileFormat:
1172 * @filename: the filename
1173 * @cur: the document
1174 * @format: should formatting spaces been added
1175 * @encoding: the document encoding
1177 * Dump an HTML document to a file using a given encoding.
1179 * returns: the number of byte written or -1 in case of failure.
1182 htmlSaveFileFormat(const char *filename
, xmlDocPtr cur
,
1183 const char *encoding
, int format
) {
1184 xmlOutputBufferPtr buf
;
1185 xmlCharEncodingHandlerPtr handler
= NULL
;
1188 if ((cur
== NULL
) || (filename
== NULL
))
1193 if (encoding
!= NULL
) {
1194 xmlCharEncoding enc
;
1196 enc
= xmlParseCharEncoding(encoding
);
1197 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1198 handler
= xmlFindCharEncodingHandler(encoding
);
1199 if (handler
== NULL
)
1200 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1202 htmlSetMetaEncoding(cur
, (const xmlChar
*) encoding
);
1204 htmlSetMetaEncoding(cur
, (const xmlChar
*) "UTF-8");
1207 * Fallback to HTML or ASCII when the encoding is unspecified
1209 if (handler
== NULL
)
1210 handler
= xmlFindCharEncodingHandler("HTML");
1211 if (handler
== NULL
)
1212 handler
= xmlFindCharEncodingHandler("ascii");
1216 * save the content to a temp buffer.
1218 buf
= xmlOutputBufferCreateFilename(filename
, handler
, 0);
1219 if (buf
== NULL
) return(0);
1221 htmlDocContentDumpFormatOutput(buf
, cur
, encoding
, format
);
1223 ret
= xmlOutputBufferClose(buf
);
1229 * @filename: the filename
1230 * @cur: the document
1231 * @encoding: the document encoding
1233 * Dump an HTML document to a file using a given encoding
1234 * and formatting returns/spaces are added.
1236 * returns: the number of byte written or -1 in case of failure.
1239 htmlSaveFileEnc(const char *filename
, xmlDocPtr cur
, const char *encoding
) {
1240 return(htmlSaveFileFormat(filename
, cur
, encoding
, 1));
1243 #endif /* LIBXML_OUTPUT_ENABLED */
1245 #define bottom_HTMLtree
1246 #include "elfgcchack.h"
1247 #endif /* LIBXML_HTML_ENABLED */