reactos/lib/3rdparty/libxml2/HTMLtree.c

   1 /*
   2  * HTMLtree.c : implementation of access function for an HTML tree.
   3  *
   4  * See Copyright for the status of this software.
   5  *
   6  * daniel@veillard.com
   7  */
   8
   9
  10 #define IN_LIBXML
  11 #include "libxml.h"
  12 #ifdef LIBXML_HTML_ENABLED
  13
  14 #include <string.h> /* for memset() only ! */
  15
  16 #ifdef HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19 #ifdef HAVE_STDLIB_H
  20 #include <stdlib.h>
  21 #endif
  22
  23 #include <libxml/xmlmemory.h>
  24 #include <libxml/HTMLparser.h>
  25 #include <libxml/HTMLtree.h>
  26 #include <libxml/entities.h>
  27 #include <libxml/valid.h>
  28 #include <libxml/xmlerror.h>
  29 #include <libxml/parserInternals.h>
  30 #include <libxml/globals.h>
  31 #include <libxml/uri.h>
  32
  33 #include "buf.h"
  34
  35 /************************************************************************
  36  *                                                                      *
  37  *              Getting/Setting encoding meta tags                      *
  38  *                                                                      *
  39  ************************************************************************/
  40
  41 /**
  42  * htmlGetMetaEncoding:
  43  * @doc:  the document
  44  *
  45  * Encoding definition lookup in the Meta tags
  46  *
  47  * Returns the current encoding as flagged in the HTML source
  48  */
  49 const xmlChar *
  50 htmlGetMetaEncoding(htmlDocPtr doc) {
  51     htmlNodePtr cur;
  52     const xmlChar *content;
  53     const xmlChar *encoding;
  54
  55     if (doc == NULL)
  56         return(NULL);
  57     cur = doc->children;
  58
  59     /*
  60      * Search the html
  61      */
  62     while (cur != NULL) {
  63         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  64             if (xmlStrEqual(cur->name, BAD_CAST"html"))
  65                 break;
  66             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  67                 goto found_head;
  68             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  69                 goto found_meta;
  70         }
  71         cur = cur->next;
  72     }
  73     if (cur == NULL)
  74         return(NULL);
  75     cur = cur->children;
  76
  77     /*
  78      * Search the head
  79      */
  80     while (cur != NULL) {
  81         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  82             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  83                 break;
  84             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  85                 goto found_meta;
  86         }
  87         cur = cur->next;
  88     }
  89     if (cur == NULL)
  90         return(NULL);
  91 found_head:
  92     cur = cur->children;
  93
  94     /*
  95      * Search the meta elements
  96      */
  97 found_meta:
  98     while (cur != NULL) {
  99         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 100             if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
 101                 xmlAttrPtr attr = cur->properties;
 102                 int http;
 103                 const xmlChar *value;
 104
 105                 content = NULL;
 106                 http = 0;
 107                 while (attr != NULL) {
 108                     if ((attr->children != NULL) &&
 109                         (attr->children->type == XML_TEXT_NODE) &&
 110                         (attr->children->next == NULL)) {
 111                         value = attr->children->content;
 112                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 113                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 114                             http = 1;
 115                         else if ((value != NULL)
 116                          && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 117                             content = value;
 118                         if ((http != 0) && (content != NULL))
 119                             goto found_content;
 120                     }
 121                     attr = attr->next;
 122                 }
 123             }
 124         }
 125         cur = cur->next;
 126     }
 127     return(NULL);
 128
 129 found_content:
 130     encoding = xmlStrstr(content, BAD_CAST"charset=");
 131     if (encoding == NULL)
 132         encoding = xmlStrstr(content, BAD_CAST"Charset=");
 133     if (encoding == NULL)
 134         encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
 135     if (encoding != NULL) {
 136         encoding += 8;
 137     } else {
 138         encoding = xmlStrstr(content, BAD_CAST"charset =");
 139         if (encoding == NULL)
 140             encoding = xmlStrstr(content, BAD_CAST"Charset =");
 141         if (encoding == NULL)
 142             encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
 143         if (encoding != NULL)
 144             encoding += 9;
 145     }
 146     if (encoding != NULL) {
 147         while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
 148     }
 149     return(encoding);
 150 }
 151
 152 /**
 153  * htmlSetMetaEncoding:
 154  * @doc:  the document
 155  * @encoding:  the encoding string
 156  *
 157  * Sets the current encoding in the Meta tags
 158  * NOTE: this will not change the document content encoding, just
 159  * the META flag associated.
 160  *
 161  * Returns 0 in case of success and -1 in case of error
 162  */
 163 int
 164 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
 165     htmlNodePtr cur, meta = NULL, head = NULL;
 166     const xmlChar *content = NULL;
 167     char newcontent[100];
 168
 169     newcontent[0] = 0;
 170
 171     if (doc == NULL)
 172         return(-1);
 173
 174     /* html isn't a real encoding it's just libxml2 way to get entities */
 175     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
 176         return(-1);
 177
 178     if (encoding != NULL) {
 179         snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
 180                 (char *)encoding);
 181         newcontent[sizeof(newcontent) - 1] = 0;
 182     }
 183
 184     cur = doc->children;
 185
 186     /*
 187      * Search the html
 188      */
 189     while (cur != NULL) {
 190         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 191             if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
 192                 break;
 193             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 194                 goto found_head;
 195             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 196                 goto found_meta;
 197         }
 198         cur = cur->next;
 199     }
 200     if (cur == NULL)
 201         return(-1);
 202     cur = cur->children;
 203
 204     /*
 205      * Search the head
 206      */
 207     while (cur != NULL) {
 208         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 209             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 210                 break;
 211             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 212                 head = cur->parent;
 213                 goto found_meta;
 214             }
 215         }
 216         cur = cur->next;
 217     }
 218     if (cur == NULL)
 219         return(-1);
 220 found_head:
 221     head = cur;
 222     if (cur->children == NULL)
 223         goto create;
 224     cur = cur->children;
 225
 226 found_meta:
 227     /*
 228      * Search and update all the remaining the meta elements carrying
 229      * encoding informations
 230      */
 231     while (cur != NULL) {
 232         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 233             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 234                 xmlAttrPtr attr = cur->properties;
 235                 int http;
 236                 const xmlChar *value;
 237
 238                 content = NULL;
 239                 http = 0;
 240                 while (attr != NULL) {
 241                     if ((attr->children != NULL) &&
 242                         (attr->children->type == XML_TEXT_NODE) &&
 243                         (attr->children->next == NULL)) {
 244                         value = attr->children->content;
 245                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 246                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 247                             http = 1;
 248                         else
 249                         {
 250                            if ((value != NULL) &&
 251                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 252                                content = value;
 253                         }
 254                         if ((http != 0) && (content != NULL))
 255                             break;
 256                     }
 257                     attr = attr->next;
 258                 }
 259                 if ((http != 0) && (content != NULL)) {
 260                     meta = cur;
 261                     break;
 262                 }
 263
 264             }
 265         }
 266         cur = cur->next;
 267     }
 268 create:
 269     if (meta == NULL) {
 270         if ((encoding != NULL) && (head != NULL)) {
 271             /*
 272              * Create a new Meta element with the right attributes
 273              */
 274
 275             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 276             if (head->children == NULL)
 277                 xmlAddChild(head, meta);
 278             else
 279                 xmlAddPrevSibling(head->children, meta);
 280             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 281             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 282         }
 283     } else {
 284         /* remove the meta tag if NULL is passed */
 285         if (encoding == NULL) {
 286             xmlUnlinkNode(meta);
 287             xmlFreeNode(meta);
 288         }
 289         /* change the document only if there is a real encoding change */
 290         else if (xmlStrcasestr(content, encoding) == NULL) {
 291             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 292         }
 293     }
 294
 295
 296     return(0);
 297 }
 298
 299 /**
 300  * booleanHTMLAttrs:
 301  *
 302  * These are the HTML attributes which will be output
 303  * in minimized form, i.e. <option selected="selected"> will be
 304  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 305  *
 306  */
 307 static const char* htmlBooleanAttrs[] = {
 308   "checked", "compact", "declare", "defer", "disabled", "ismap",
 309   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
 310   "selected", NULL
 311 };
 312
 313
 314 /**
 315  * htmlIsBooleanAttr:
 316  * @name:  the name of the attribute to check
 317  *
 318  * Determine if a given attribute is a boolean attribute.
 319  *
 320  * returns: false if the attribute is not boolean, true otherwise.
 321  */
 322 int
 323 htmlIsBooleanAttr(const xmlChar *name)
 324 {
 325     int i = 0;
 326
 327     while (htmlBooleanAttrs[i] != NULL) {
 328         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
 329             return 1;
 330         i++;
 331     }
 332     return 0;
 333 }
 334
 335 #ifdef LIBXML_OUTPUT_ENABLED
 336 /*
 337  * private routine exported from xmlIO.c
 338  */
 339 xmlOutputBufferPtr
 340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
 341 /************************************************************************
 342  *                                                                      *
 343  *                      Output error handlers                           *
 344  *                                                                      *
 345  ************************************************************************/
 346 /**
 347  * htmlSaveErrMemory:
 348  * @extra:  extra informations
 349  *
 350  * Handle an out of memory condition
 351  */
 352 static void
 353 htmlSaveErrMemory(const char *extra)
 354 {
 355     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
 356 }
 357
 358 /**
 359  * htmlSaveErr:
 360  * @code:  the error number
 361  * @node:  the location of the error.
 362  * @extra:  extra informations
 363  *
 364  * Handle an out of memory condition
 365  */
 366 static void
 367 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
 368 {
 369     const char *msg = NULL;
 370
 371     switch(code) {
 372         case XML_SAVE_NOT_UTF8:
 373             msg = "string is not in UTF-8\n";
 374             break;
 375         case XML_SAVE_CHAR_INVALID:
 376             msg = "invalid character value\n";
 377             break;
 378         case XML_SAVE_UNKNOWN_ENCODING:
 379             msg = "unknown encoding %s\n";
 380             break;
 381         case XML_SAVE_NO_DOCTYPE:
 382             msg = "HTML has no DOCTYPE\n";
 383             break;
 384         default:
 385             msg = "unexpected error number\n";
 386     }
 387     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
 388 }
 389
 390 /************************************************************************
 391  *                                                                      *
 392  *              Dumping HTML tree content to a simple buffer            *
 393  *                                                                      *
 394  ************************************************************************/
 395
 396 /**
 397  * htmlBufNodeDumpFormat:
 398  * @buf:  the xmlBufPtr output
 399  * @doc:  the document
 400  * @cur:  the current node
 401  * @format:  should formatting spaces been added
 402  *
 403  * Dump an HTML node, recursive behaviour,children are printed too.
 404  *
 405  * Returns the number of byte written or -1 in case of error
 406  */
 407 static size_t
 408 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 409                    int format) {
 410     size_t use;
 411     int ret;
 412     xmlOutputBufferPtr outbuf;
 413
 414     if (cur == NULL) {
 415         return (-1);
 416     }
 417     if (buf == NULL) {
 418         return (-1);
 419     }
 420     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
 421     if (outbuf == NULL) {
 422         htmlSaveErrMemory("allocating HTML output buffer");
 423         return (-1);
 424     }
 425     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
 426     outbuf->buffer = buf;
 427     outbuf->encoder = NULL;
 428     outbuf->writecallback = NULL;
 429     outbuf->closecallback = NULL;
 430     outbuf->context = NULL;
 431     outbuf->written = 0;
 432
 433     use = xmlBufUse(buf);
 434     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
 435     xmlFree(outbuf);
 436     ret = xmlBufUse(buf) - use;
 437     return (ret);
 438 }
 439
 440 /**
 441  * htmlNodeDump:
 442  * @buf:  the HTML buffer output
 443  * @doc:  the document
 444  * @cur:  the current node
 445  *
 446  * Dump an HTML node, recursive behaviour,children are printed too,
 447  * and formatting returns are added.
 448  *
 449  * Returns the number of byte written or -1 in case of error
 450  */
 451 int
 452 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 453     xmlBufPtr buffer;
 454     size_t ret;
 455
 456     if ((buf == NULL) || (cur == NULL))
 457         return(-1);
 458
 459     xmlInitParser();
 460     buffer = xmlBufFromBuffer(buf);
 461     if (buffer == NULL)
 462         return(-1);
 463
 464     ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
 465
 466     xmlBufBackToBuffer(buffer);
 467
 468     if (ret > INT_MAX)
 469         return(-1);
 470     return((int) ret);
 471 }
 472
 473 /**
 474  * htmlNodeDumpFileFormat:
 475  * @out:  the FILE pointer
 476  * @doc:  the document
 477  * @cur:  the current node
 478  * @encoding: the document encoding
 479  * @format:  should formatting spaces been added
 480  *
 481  * Dump an HTML node, recursive behaviour,children are printed too.
 482  *
 483  * TODO: if encoding == NULL try to save in the doc encoding
 484  *
 485  * returns: the number of byte written or -1 in case of failure.
 486  */
 487 int
 488 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
 489                        xmlNodePtr cur, const char *encoding, int format) {
 490     xmlOutputBufferPtr buf;
 491     xmlCharEncodingHandlerPtr handler = NULL;
 492     int ret;
 493
 494     xmlInitParser();
 495
 496     if (encoding != NULL) {
 497         xmlCharEncoding enc;
 498
 499         enc = xmlParseCharEncoding(encoding);
 500         if (enc != XML_CHAR_ENCODING_UTF8) {
 501             handler = xmlFindCharEncodingHandler(encoding);
 502             if (handler == NULL)
 503                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 504         }
 505     }
 506
 507     /*
 508      * Fallback to HTML or ASCII when the encoding is unspecified
 509      */
 510     if (handler == NULL)
 511         handler = xmlFindCharEncodingHandler("HTML");
 512     if (handler == NULL)
 513         handler = xmlFindCharEncodingHandler("ascii");
 514
 515     /*
 516      * save the content to a temp buffer.
 517      */
 518     buf = xmlOutputBufferCreateFile(out, handler);
 519     if (buf == NULL) return(0);
 520
 521     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 522
 523     ret = xmlOutputBufferClose(buf);
 524     return(ret);
 525 }
 526
 527 /**
 528  * htmlNodeDumpFile:
 529  * @out:  the FILE pointer
 530  * @doc:  the document
 531  * @cur:  the current node
 532  *
 533  * Dump an HTML node, recursive behaviour,children are printed too,
 534  * and formatting returns are added.
 535  */
 536 void
 537 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
 538     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
 539 }
 540
 541 /**
 542  * htmlDocDumpMemoryFormat:
 543  * @cur:  the document
 544  * @mem:  OUT: the memory pointer
 545  * @size:  OUT: the memory length
 546  * @format:  should formatting spaces been added
 547  *
 548  * Dump an HTML document in memory and return the xmlChar * and it's size.
 549  * It's up to the caller to free the memory.
 550  */
 551 void
 552 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
 553     xmlOutputBufferPtr buf;
 554     xmlCharEncodingHandlerPtr handler = NULL;
 555     const char *encoding;
 556
 557     xmlInitParser();
 558
 559     if ((mem == NULL) || (size == NULL))
 560         return;
 561     if (cur == NULL) {
 562         *mem = NULL;
 563         *size = 0;
 564         return;
 565     }
 566
 567     encoding = (const char *) htmlGetMetaEncoding(cur);
 568
 569     if (encoding != NULL) {
 570         xmlCharEncoding enc;
 571
 572         enc = xmlParseCharEncoding(encoding);
 573         if (enc != cur->charset) {
 574             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 575                 /*
 576                  * Not supported yet
 577                  */
 578                 *mem = NULL;
 579                 *size = 0;
 580                 return;
 581             }
 582
 583             handler = xmlFindCharEncodingHandler(encoding);
 584             if (handler == NULL)
 585                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 586
 587         } else {
 588             handler = xmlFindCharEncodingHandler(encoding);
 589         }
 590     }
 591
 592     /*
 593      * Fallback to HTML or ASCII when the encoding is unspecified
 594      */
 595     if (handler == NULL)
 596         handler = xmlFindCharEncodingHandler("HTML");
 597     if (handler == NULL)
 598         handler = xmlFindCharEncodingHandler("ascii");
 599
 600     buf = xmlAllocOutputBufferInternal(handler);
 601     if (buf == NULL) {
 602         *mem = NULL;
 603         *size = 0;
 604         return;
 605     }
 606
 607     htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
 608
 609     xmlOutputBufferFlush(buf);
 610     if (buf->conv != NULL) {
 611         *size = xmlBufUse(buf->conv);
 612         *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
 613     } else {
 614         *size = xmlBufUse(buf->buffer);
 615         *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
 616     }
 617     (void)xmlOutputBufferClose(buf);
 618 }
 619
 620 /**
 621  * htmlDocDumpMemory:
 622  * @cur:  the document
 623  * @mem:  OUT: the memory pointer
 624  * @size:  OUT: the memory length
 625  *
 626  * Dump an HTML document in memory and return the xmlChar * and it's size.
 627  * It's up to the caller to free the memory.
 628  */
 629 void
 630 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
 631         htmlDocDumpMemoryFormat(cur, mem, size, 1);
 632 }
 633
 634
 635 /************************************************************************
 636  *                                                                      *
 637  *              Dumping HTML tree content to an I/O output buffer       *
 638  *                                                                      *
 639  ************************************************************************/
 640
 641 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
 642
 643 /**
 644  * htmlDtdDumpOutput:
 645  * @buf:  the HTML buffer output
 646  * @doc:  the document
 647  * @encoding:  the encoding string
 648  *
 649  * TODO: check whether encoding is needed
 650  *
 651  * Dump the HTML document DTD, if any.
 652  */
 653 static void
 654 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 655                   const char *encoding ATTRIBUTE_UNUSED) {
 656     xmlDtdPtr cur = doc->intSubset;
 657
 658     if (cur == NULL) {
 659         htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
 660         return;
 661     }
 662     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
 663     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 664     if (cur->ExternalID != NULL) {
 665         xmlOutputBufferWriteString(buf, " PUBLIC ");
 666         xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
 667         if (cur->SystemID != NULL) {
 668             xmlOutputBufferWriteString(buf, " ");
 669             xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
 670         }
 671     } else if (cur->SystemID != NULL &&
 672                xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
 673         xmlOutputBufferWriteString(buf, " SYSTEM ");
 674         xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
 675     }
 676     xmlOutputBufferWriteString(buf, ">\n");
 677 }
 678
 679 /**
 680  * htmlAttrDumpOutput:
 681  * @buf:  the HTML buffer output
 682  * @doc:  the document
 683  * @cur:  the attribute pointer
 684  * @encoding:  the encoding string
 685  *
 686  * Dump an HTML attribute
 687  */
 688 static void
 689 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
 690                    const char *encoding ATTRIBUTE_UNUSED) {
 691     xmlChar *value;
 692
 693     /*
 694      * The html output method should not escape a & character
 695      * occurring in an attribute value immediately followed by
 696      * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
 697      * This is implemented in xmlEncodeEntitiesReentrant
 698      */
 699
 700     if (cur == NULL) {
 701         return;
 702     }
 703     xmlOutputBufferWriteString(buf, " ");
 704     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 705         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 706         xmlOutputBufferWriteString(buf, ":");
 707     }
 708     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 709     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
 710         value = xmlNodeListGetString(doc, cur->children, 0);
 711         if (value) {
 712             xmlOutputBufferWriteString(buf, "=");
 713             if ((cur->ns == NULL) && (cur->parent != NULL) &&
 714                 (cur->parent->ns == NULL) &&
 715                 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
 716                  (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
 717                  (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
 718                  ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
 719                   (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
 720                 xmlChar *tmp = value;
 721                 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
 722                 xmlBufCCat(buf->buffer, "\"");
 723
 724                 while (IS_BLANK_CH(*tmp)) tmp++;
 725
 726                 /* URI Escape everything, except server side includes. */
 727                 for ( ; ; ) {
 728                     xmlChar *escaped;
 729                     xmlChar endChar;
 730                     xmlChar *end = NULL;
 731                     xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
 732                     if (start != NULL) {
 733                         end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
 734                         if (end != NULL) {
 735                             *start = '\0';
 736                         }
 737                     }
 738
 739                     /* Escape the whole string, or until start (set to '\0'). */
 740                     escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
 741                     if (escaped != NULL) {
 742                         xmlBufCat(buf->buffer, escaped);
 743                         xmlFree(escaped);
 744                     } else {
 745                         xmlBufCat(buf->buffer, tmp);
 746                     }
 747
 748                     if (end == NULL) { /* Everything has been written. */
 749                         break;
 750                     }
 751
 752                     /* Do not escape anything within server side includes. */
 753                     *start = '<'; /* Restore the first character of "<!--". */
 754                     end += 3; /* strlen("-->") */
 755                     endChar = *end;
 756                     *end = '\0';
 757                     xmlBufCat(buf->buffer, start);
 758                     *end = endChar;
 759                     tmp = end;
 760                 }
 761
 762                 xmlBufCCat(buf->buffer, "\"");
 763             } else {
 764                 xmlBufWriteQuotedString(buf->buffer, value);
 765             }
 766             xmlFree(value);
 767         } else  {
 768             xmlOutputBufferWriteString(buf, "=\"\"");
 769         }
 770     }
 771 }
 772
 773 /**
 774  * htmlAttrListDumpOutput:
 775  * @buf:  the HTML buffer output
 776  * @doc:  the document
 777  * @cur:  the first attribute pointer
 778  * @encoding:  the encoding string
 779  *
 780  * Dump a list of HTML attributes
 781  */
 782 static void
 783 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
 784     if (cur == NULL) {
 785         return;
 786     }
 787     while (cur != NULL) {
 788         htmlAttrDumpOutput(buf, doc, cur, encoding);
 789         cur = cur->next;
 790     }
 791 }
 792
 793
 794
 795 /**
 796  * htmlNodeListDumpOutput:
 797  * @buf:  the HTML buffer output
 798  * @doc:  the document
 799  * @cur:  the first node
 800  * @encoding:  the encoding string
 801  * @format:  should formatting spaces been added
 802  *
 803  * Dump an HTML node list, recursive behaviour,children are printed too.
 804  */
 805 static void
 806 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 807                        xmlNodePtr cur, const char *encoding, int format) {
 808     if (cur == NULL) {
 809         return;
 810     }
 811     while (cur != NULL) {
 812         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 813         cur = cur->next;
 814     }
 815 }
 816
 817 /**
 818  * htmlNodeDumpFormatOutput:
 819  * @buf:  the HTML buffer output
 820  * @doc:  the document
 821  * @cur:  the current node
 822  * @encoding:  the encoding string
 823  * @format:  should formatting spaces been added
 824  *
 825  * Dump an HTML node, recursive behaviour,children are printed too.
 826  */
 827 void
 828 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 829                          xmlNodePtr cur, const char *encoding, int format) {
 830     const htmlElemDesc * info;
 831
 832     xmlInitParser();
 833
 834     if ((cur == NULL) || (buf == NULL)) {
 835         return;
 836     }
 837     /*
 838      * Special cases.
 839      */
 840     if (cur->type == XML_DTD_NODE)
 841         return;
 842     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 843         (cur->type == XML_DOCUMENT_NODE)){
 844         htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
 845         return;
 846     }
 847     if (cur->type == XML_ATTRIBUTE_NODE) {
 848         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
 849         return;
 850     }
 851     if (cur->type == HTML_TEXT_NODE) {
 852         if (cur->content != NULL) {
 853             if (((cur->name == (const xmlChar *)xmlStringText) ||
 854                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 855                 ((cur->parent == NULL) ||
 856                  ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
 857                   (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
 858                 xmlChar *buffer;
 859
 860                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 861                 if (buffer != NULL) {
 862                     xmlOutputBufferWriteString(buf, (const char *)buffer);
 863                     xmlFree(buffer);
 864                 }
 865             } else {
 866                 xmlOutputBufferWriteString(buf, (const char *)cur->content);
 867             }
 868         }
 869         return;
 870     }
 871     if (cur->type == HTML_COMMENT_NODE) {
 872         if (cur->content != NULL) {
 873             xmlOutputBufferWriteString(buf, "<!--");
 874             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 875             xmlOutputBufferWriteString(buf, "-->");
 876         }
 877         return;
 878     }
 879     if (cur->type == HTML_PI_NODE) {
 880         if (cur->name == NULL)
 881             return;
 882         xmlOutputBufferWriteString(buf, "<?");
 883         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 884         if (cur->content != NULL) {
 885             xmlOutputBufferWriteString(buf, " ");
 886             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 887         }
 888         xmlOutputBufferWriteString(buf, ">");
 889         return;
 890     }
 891     if (cur->type == HTML_ENTITY_REF_NODE) {
 892         xmlOutputBufferWriteString(buf, "&");
 893         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 894         xmlOutputBufferWriteString(buf, ";");
 895         return;
 896     }
 897     if (cur->type == HTML_PRESERVE_NODE) {
 898         if (cur->content != NULL) {
 899             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 900         }
 901         return;
 902     }
 903
 904     /*
 905      * Get specific HTML info for that node.
 906      */
 907     if (cur->ns == NULL)
 908         info = htmlTagLookup(cur->name);
 909     else
 910         info = NULL;
 911
 912     xmlOutputBufferWriteString(buf, "<");
 913     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 914         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 915         xmlOutputBufferWriteString(buf, ":");
 916     }
 917     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 918     if (cur->nsDef)
 919         xmlNsListDumpOutput(buf, cur->nsDef);
 920     if (cur->properties != NULL)
 921         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
 922
 923     if ((info != NULL) && (info->empty)) {
 924         xmlOutputBufferWriteString(buf, ">");
 925         if ((format) && (!info->isinline) && (cur->next != NULL)) {
 926             if ((cur->next->type != HTML_TEXT_NODE) &&
 927                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 928                 (cur->parent != NULL) &&
 929                 (cur->parent->name != NULL) &&
 930                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 931                 xmlOutputBufferWriteString(buf, "\n");
 932         }
 933         return;
 934     }
 935     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
 936         (cur->children == NULL)) {
 937         if ((info != NULL) && (info->saveEndTag != 0) &&
 938             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
 939             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
 940             xmlOutputBufferWriteString(buf, ">");
 941         } else {
 942             xmlOutputBufferWriteString(buf, "></");
 943             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 944                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 945                 xmlOutputBufferWriteString(buf, ":");
 946             }
 947             xmlOutputBufferWriteString(buf, (const char *)cur->name);
 948             xmlOutputBufferWriteString(buf, ">");
 949         }
 950         if ((format) && (cur->next != NULL) &&
 951             (info != NULL) && (!info->isinline)) {
 952             if ((cur->next->type != HTML_TEXT_NODE) &&
 953                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 954                 (cur->parent != NULL) &&
 955                 (cur->parent->name != NULL) &&
 956                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 957                 xmlOutputBufferWriteString(buf, "\n");
 958         }
 959         return;
 960     }
 961     xmlOutputBufferWriteString(buf, ">");
 962     if ((cur->type != XML_ELEMENT_NODE) &&
 963         (cur->content != NULL)) {
 964             /*
 965              * Uses the OutputBuffer property to automatically convert
 966              * invalids to charrefs
 967              */
 968
 969             xmlOutputBufferWriteString(buf, (const char *) cur->content);
 970     }
 971     if (cur->children != NULL) {
 972         if ((format) && (info != NULL) && (!info->isinline) &&
 973             (cur->children->type != HTML_TEXT_NODE) &&
 974             (cur->children->type != HTML_ENTITY_REF_NODE) &&
 975             (cur->children != cur->last) &&
 976             (cur->name != NULL) &&
 977             (cur->name[0] != 'p')) /* p, pre, param */
 978             xmlOutputBufferWriteString(buf, "\n");
 979         htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
 980         if ((format) && (info != NULL) && (!info->isinline) &&
 981             (cur->last->type != HTML_TEXT_NODE) &&
 982             (cur->last->type != HTML_ENTITY_REF_NODE) &&
 983             (cur->children != cur->last) &&
 984             (cur->name != NULL) &&
 985             (cur->name[0] != 'p')) /* p, pre, param */
 986             xmlOutputBufferWriteString(buf, "\n");
 987     }
 988     xmlOutputBufferWriteString(buf, "</");
 989     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 990         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 991         xmlOutputBufferWriteString(buf, ":");
 992     }
 993     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 994     xmlOutputBufferWriteString(buf, ">");
 995     if ((format) && (info != NULL) && (!info->isinline) &&
 996         (cur->next != NULL)) {
 997         if ((cur->next->type != HTML_TEXT_NODE) &&
 998             (cur->next->type != HTML_ENTITY_REF_NODE) &&
 999             (cur->parent != NULL) &&
1000             (cur->parent->name != NULL) &&
1001             (cur->parent->name[0] != 'p')) /* p, pre, param */
1002             xmlOutputBufferWriteString(buf, "\n");
1003     }
1004 }
1005
1006 /**
1007  * htmlNodeDumpOutput:
1008  * @buf:  the HTML buffer output
1009  * @doc:  the document
1010  * @cur:  the current node
1011  * @encoding:  the encoding string
1012  *
1013  * Dump an HTML node, recursive behaviour,children are printed too,
1014  * and formatting returns/spaces are added.
1015  */
1016 void
1017 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1018                    xmlNodePtr cur, const char *encoding) {
1019     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1020 }
1021
1022 /**
1023  * htmlDocContentDumpFormatOutput:
1024  * @buf:  the HTML buffer output
1025  * @cur:  the document
1026  * @encoding:  the encoding string
1027  * @format:  should formatting spaces been added
1028  *
1029  * Dump an HTML document.
1030  */
1031 void
1032 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1033                                const char *encoding, int format) {
1034     int type;
1035
1036     xmlInitParser();
1037
1038     if ((buf == NULL) || (cur == NULL))
1039         return;
1040
1041     /*
1042      * force to output the stuff as HTML, especially for entities
1043      */
1044     type = cur->type;
1045     cur->type = XML_HTML_DOCUMENT_NODE;
1046     if (cur->intSubset != NULL) {
1047         htmlDtdDumpOutput(buf, cur, NULL);
1048     }
1049     if (cur->children != NULL) {
1050         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1051     }
1052     xmlOutputBufferWriteString(buf, "\n");
1053     cur->type = (xmlElementType) type;
1054 }
1055
1056 /**
1057  * htmlDocContentDumpOutput:
1058  * @buf:  the HTML buffer output
1059  * @cur:  the document
1060  * @encoding:  the encoding string
1061  *
1062  * Dump an HTML document. Formating return/spaces are added.
1063  */
1064 void
1065 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1066                          const char *encoding) {
1067     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1068 }
1069
1070 /************************************************************************
1071  *                                                                      *
1072  *              Saving functions front-ends                             *
1073  *                                                                      *
1074  ************************************************************************/
1075
1076 /**
1077  * htmlDocDump:
1078  * @f:  the FILE*
1079  * @cur:  the document
1080  *
1081  * Dump an HTML document to an open FILE.
1082  *
1083  * returns: the number of byte written or -1 in case of failure.
1084  */
1085 int
1086 htmlDocDump(FILE *f, xmlDocPtr cur) {
1087     xmlOutputBufferPtr buf;
1088     xmlCharEncodingHandlerPtr handler = NULL;
1089     const char *encoding;
1090     int ret;
1091
1092     xmlInitParser();
1093
1094     if ((cur == NULL) || (f == NULL)) {
1095         return(-1);
1096     }
1097
1098     encoding = (const char *) htmlGetMetaEncoding(cur);
1099
1100     if (encoding != NULL) {
1101         xmlCharEncoding enc;
1102
1103         enc = xmlParseCharEncoding(encoding);
1104         if (enc != cur->charset) {
1105             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1106                 /*
1107                  * Not supported yet
1108                  */
1109                 return(-1);
1110             }
1111
1112             handler = xmlFindCharEncodingHandler(encoding);
1113             if (handler == NULL)
1114                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1115         } else {
1116             handler = xmlFindCharEncodingHandler(encoding);
1117         }
1118     }
1119
1120     /*
1121      * Fallback to HTML or ASCII when the encoding is unspecified
1122      */
1123     if (handler == NULL)
1124         handler = xmlFindCharEncodingHandler("HTML");
1125     if (handler == NULL)
1126         handler = xmlFindCharEncodingHandler("ascii");
1127
1128     buf = xmlOutputBufferCreateFile(f, handler);
1129     if (buf == NULL) return(-1);
1130     htmlDocContentDumpOutput(buf, cur, NULL);
1131
1132     ret = xmlOutputBufferClose(buf);
1133     return(ret);
1134 }
1135
1136 /**
1137  * htmlSaveFile:
1138  * @filename:  the filename (or URL)
1139  * @cur:  the document
1140  *
1141  * Dump an HTML document to a file. If @filename is "-" the stdout file is
1142  * used.
1143  * returns: the number of byte written or -1 in case of failure.
1144  */
1145 int
1146 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1147     xmlOutputBufferPtr buf;
1148     xmlCharEncodingHandlerPtr handler = NULL;
1149     const char *encoding;
1150     int ret;
1151
1152     if ((cur == NULL) || (filename == NULL))
1153         return(-1);
1154
1155     xmlInitParser();
1156
1157     encoding = (const char *) htmlGetMetaEncoding(cur);
1158
1159     if (encoding != NULL) {
1160         xmlCharEncoding enc;
1161
1162         enc = xmlParseCharEncoding(encoding);
1163         if (enc != cur->charset) {
1164             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1165                 /*
1166                  * Not supported yet
1167                  */
1168                 return(-1);
1169             }
1170
1171             handler = xmlFindCharEncodingHandler(encoding);
1172             if (handler == NULL)
1173                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1174         }
1175     }
1176
1177     /*
1178      * Fallback to HTML or ASCII when the encoding is unspecified
1179      */
1180     if (handler == NULL)
1181         handler = xmlFindCharEncodingHandler("HTML");
1182     if (handler == NULL)
1183         handler = xmlFindCharEncodingHandler("ascii");
1184
1185     /*
1186      * save the content to a temp buffer.
1187      */
1188     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1189     if (buf == NULL) return(0);
1190
1191     htmlDocContentDumpOutput(buf, cur, NULL);
1192
1193     ret = xmlOutputBufferClose(buf);
1194     return(ret);
1195 }
1196
1197 /**
1198  * htmlSaveFileFormat:
1199  * @filename:  the filename
1200  * @cur:  the document
1201  * @format:  should formatting spaces been added
1202  * @encoding: the document encoding
1203  *
1204  * Dump an HTML document to a file using a given encoding.
1205  *
1206  * returns: the number of byte written or -1 in case of failure.
1207  */
1208 int
1209 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1210                    const char *encoding, int format) {
1211     xmlOutputBufferPtr buf;
1212     xmlCharEncodingHandlerPtr handler = NULL;
1213     int ret;
1214
1215     if ((cur == NULL) || (filename == NULL))
1216         return(-1);
1217
1218     xmlInitParser();
1219
1220     if (encoding != NULL) {
1221         xmlCharEncoding enc;
1222
1223         enc = xmlParseCharEncoding(encoding);
1224         if (enc != cur->charset) {
1225             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1226                 /*
1227                  * Not supported yet
1228                  */
1229                 return(-1);
1230             }
1231
1232             handler = xmlFindCharEncodingHandler(encoding);
1233             if (handler == NULL)
1234                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1235         }
1236         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1237     } else {
1238         htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1239     }
1240
1241     /*
1242      * Fallback to HTML or ASCII when the encoding is unspecified
1243      */
1244     if (handler == NULL)
1245         handler = xmlFindCharEncodingHandler("HTML");
1246     if (handler == NULL)
1247         handler = xmlFindCharEncodingHandler("ascii");
1248
1249     /*
1250      * save the content to a temp buffer.
1251      */
1252     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1253     if (buf == NULL) return(0);
1254
1255     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1256
1257     ret = xmlOutputBufferClose(buf);
1258     return(ret);
1259 }
1260
1261 /**
1262  * htmlSaveFileEnc:
1263  * @filename:  the filename
1264  * @cur:  the document
1265  * @encoding: the document encoding
1266  *
1267  * Dump an HTML document to a file using a given encoding
1268  * and formatting returns/spaces are added.
1269  *
1270  * returns: the number of byte written or -1 in case of failure.
1271  */
1272 int
1273 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1274     return(htmlSaveFileFormat(filename, cur, encoding, 1));
1275 }
1276
1277 #endif /* LIBXML_OUTPUT_ENABLED */
1278
1279 #define bottom_HTMLtree
1280 #include "elfgcchack.h"
1281 #endif /* LIBXML_HTML_ENABLED */