sdk/lib/3rdparty/libxml2/uri.c

   1 /**
   2  * uri.c: set of generic URI related routines
   3  *
   4  * Reference: RFCs 3986, 2732 and 2373
   5  *
   6  * See Copyright for the status of this software.
   7  *
   8  * daniel@veillard.com
   9  */
  10
  11 #define IN_LIBXML
  12 #include "libxml.h"
  13
  14 #include <string.h>
  15
  16 #include <libxml/xmlmemory.h>
  17 #include <libxml/uri.h>
  18 #include <libxml/globals.h>
  19 #include <libxml/xmlerror.h>
  20
  21 /**
  22  * MAX_URI_LENGTH:
  23  *
  24  * The definition of the URI regexp in the above RFC has no size limit
  25  * In practice they are usually relativey short except for the
  26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
  27  * maximum size before hitting random practical limits is around 64 KB
  28  * and 4KB is usually a maximum admitted limit for proper operations.
  29  * The value below is more a security limit than anything else and
  30  * really should never be hit by 'normal' operations
  31  * Set to 1 MByte in 2012, this is only enforced on output
  32  */
  33 #define MAX_URI_LENGTH 1024 * 1024
  34
  35 static void
  36 xmlURIErrMemory(const char *extra)
  37 {
  38     if (extra)
  39         __xmlRaiseError(NULL, NULL, NULL,
  40                         NULL, NULL, XML_FROM_URI,
  41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  42                         extra, NULL, NULL, 0, 0,
  43                         "Memory allocation failed : %s\n", extra);
  44     else
  45         __xmlRaiseError(NULL, NULL, NULL,
  46                         NULL, NULL, XML_FROM_URI,
  47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
  48                         NULL, NULL, NULL, 0, 0,
  49                         "Memory allocation failed\n");
  50 }
  51
  52 static void xmlCleanURI(xmlURIPtr uri);
  53
  54 /*
  55  * Old rule from 2396 used in legacy handling code
  56  * alpha    = lowalpha | upalpha
  57  */
  58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  59
  60
  61 /*
  62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  64  *            "u" | "v" | "w" | "x" | "y" | "z"
  65  */
  66
  67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  68
  69 /*
  70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
  73  */
  74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  75
  76 #ifdef IS_DIGIT
  77 #undef IS_DIGIT
  78 #endif
  79 /*
  80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  81  */
  82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  83
  84 /*
  85  * alphanum = alpha | digit
  86  */
  87
  88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  89
  90 /*
  91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  92  */
  93
  94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
  95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
  96     ((x) == '(') || ((x) == ')'))
  97
  98 /*
  99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
 100  */
 101
 102 #define IS_UNWISE(p)                                                    \
 103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
 104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
 105        ((*(p) == ']')) || ((*(p) == '`')))
 106 /*
 107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
 108  *            "[" | "]"
 109  */
 110
 111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
 112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
 113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
 114         ((x) == ']'))
 115
 116 /*
 117  * unreserved = alphanum | mark
 118  */
 119
 120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
 121
 122 /*
 123  * Skip to next pointer char, handle escaped sequences
 124  */
 125
 126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
 127
 128 /*
 129  * Productions from the spec.
 130  *
 131  *    authority     = server | reg_name
 132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
 133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
 134  *
 135  * path          = [ abs_path | opaque_part ]
 136  */
 137
 138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
 139
 140 /************************************************************************
 141  *                                                                      *
 142  *                         RFC 3986 parser                              *
 143  *                                                                      *
 144  ************************************************************************/
 145
 146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
 147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||               \
 148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
 149 #define ISA_HEXDIG(p)                                                   \
 150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||             \
 151         ((*(p) >= 'A') && (*(p) <= 'F')))
 152
 153 /*
 154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 155  *                     / "*" / "+" / "," / ";" / "="
 156  */
 157 #define ISA_SUB_DELIM(p)                                                \
 158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||         \
 159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||         \
 160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||         \
 161        ((*(p) == '=')) || ((*(p) == '\'')))
 162
 163 /*
 164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 165  */
 166 #define ISA_GEN_DELIM(p)                                                \
 167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
 168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
 169        ((*(p) == '@')))
 170
 171 /*
 172  *    reserved      = gen-delims / sub-delims
 173  */
 174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
 175
 176 /*
 177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
 178  */
 179 #define ISA_UNRESERVED(p)                                               \
 180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||           \
 181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
 182
 183 /*
 184  *    pct-encoded   = "%" HEXDIG HEXDIG
 185  */
 186 #define ISA_PCT_ENCODED(p)                                              \
 187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
 188
 189 /*
 190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 191  */
 192 #define ISA_PCHAR(p)                                                    \
 193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||    \
 194       ((*(p) == ':')) || ((*(p) == '@')))
 195
 196 /**
 197  * xmlParse3986Scheme:
 198  * @uri:  pointer to an URI structure
 199  * @str:  pointer to the string to analyze
 200  *
 201  * Parse an URI scheme
 202  *
 203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 204  *
 205  * Returns 0 or the error code
 206  */
 207 static int
 208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
 209     const char *cur;
 210
 211     if (str == NULL)
 212         return(-1);
 213
 214     cur = *str;
 215     if (!ISA_ALPHA(cur))
 216         return(2);
 217     cur++;
 218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
 219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
 220     if (uri != NULL) {
 221         if (uri->scheme != NULL) xmlFree(uri->scheme);
 222         uri->scheme = STRNDUP(*str, cur - *str);
 223     }
 224     *str = cur;
 225     return(0);
 226 }
 227
 228 /**
 229  * xmlParse3986Fragment:
 230  * @uri:  pointer to an URI structure
 231  * @str:  pointer to the string to analyze
 232  *
 233  * Parse the query part of an URI
 234  *
 235  * fragment      = *( pchar / "/" / "?" )
 236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
 237  *       in the fragment identifier but this is used very broadly for
 238  *       xpointer scheme selection, so we are allowing it here to not break
 239  *       for example all the DocBook processing chains.
 240  *
 241  * Returns 0 or the error code
 242  */
 243 static int
 244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
 245 {
 246     const char *cur;
 247
 248     if (str == NULL)
 249         return (-1);
 250
 251     cur = *str;
 252
 253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 254            (*cur == '[') || (*cur == ']') ||
 255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 256         NEXT(cur);
 257     if (uri != NULL) {
 258         if (uri->fragment != NULL)
 259             xmlFree(uri->fragment);
 260         if (uri->cleanup & 2)
 261             uri->fragment = STRNDUP(*str, cur - *str);
 262         else
 263             uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
 264     }
 265     *str = cur;
 266     return (0);
 267 }
 268
 269 /**
 270  * xmlParse3986Query:
 271  * @uri:  pointer to an URI structure
 272  * @str:  pointer to the string to analyze
 273  *
 274  * Parse the query part of an URI
 275  *
 276  * query = *uric
 277  *
 278  * Returns 0 or the error code
 279  */
 280 static int
 281 xmlParse3986Query(xmlURIPtr uri, const char **str)
 282 {
 283     const char *cur;
 284
 285     if (str == NULL)
 286         return (-1);
 287
 288     cur = *str;
 289
 290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
 291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
 292         NEXT(cur);
 293     if (uri != NULL) {
 294         if (uri->query != NULL)
 295             xmlFree(uri->query);
 296         if (uri->cleanup & 2)
 297             uri->query = STRNDUP(*str, cur - *str);
 298         else
 299             uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
 300
 301         /* Save the raw bytes of the query as well.
 302          * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
 303          */
 304         if (uri->query_raw != NULL)
 305             xmlFree (uri->query_raw);
 306         uri->query_raw = STRNDUP (*str, cur - *str);
 307     }
 308     *str = cur;
 309     return (0);
 310 }
 311
 312 /**
 313  * xmlParse3986Port:
 314  * @uri:  pointer to an URI structure
 315  * @str:  the string to analyze
 316  *
 317  * Parse a port part and fills in the appropriate fields
 318  * of the @uri structure
 319  *
 320  * port          = *DIGIT
 321  *
 322  * Returns 0 or the error code
 323  */
 324 static int
 325 xmlParse3986Port(xmlURIPtr uri, const char **str)
 326 {
 327     const char *cur = *str;
 328     unsigned port = 0; /* unsigned for defined overflow behavior */
 329
 330     if (ISA_DIGIT(cur)) {
 331         while (ISA_DIGIT(cur)) {
 332             port = port * 10 + (*cur - '0');
 333
 334             cur++;
 335         }
 336         if (uri != NULL)
 337             uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
 338         *str = cur;
 339         return(0);
 340     }
 341     return(1);
 342 }
 343
 344 /**
 345  * xmlParse3986Userinfo:
 346  * @uri:  pointer to an URI structure
 347  * @str:  the string to analyze
 348  *
 349  * Parse an user informations part and fills in the appropriate fields
 350  * of the @uri structure
 351  *
 352  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 353  *
 354  * Returns 0 or the error code
 355  */
 356 static int
 357 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
 358 {
 359     const char *cur;
 360
 361     cur = *str;
 362     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
 363            ISA_SUB_DELIM(cur) || (*cur == ':'))
 364         NEXT(cur);
 365     if (*cur == '@') {
 366         if (uri != NULL) {
 367             if (uri->user != NULL) xmlFree(uri->user);
 368             if (uri->cleanup & 2)
 369                 uri->user = STRNDUP(*str, cur - *str);
 370             else
 371                 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
 372         }
 373         *str = cur;
 374         return(0);
 375     }
 376     return(1);
 377 }
 378
 379 /**
 380  * xmlParse3986DecOctet:
 381  * @str:  the string to analyze
 382  *
 383  *    dec-octet     = DIGIT                 ; 0-9
 384  *                  / %x31-39 DIGIT         ; 10-99
 385  *                  / "1" 2DIGIT            ; 100-199
 386  *                  / "2" %x30-34 DIGIT     ; 200-249
 387  *                  / "25" %x30-35          ; 250-255
 388  *
 389  * Skip a dec-octet.
 390  *
 391  * Returns 0 if found and skipped, 1 otherwise
 392  */
 393 static int
 394 xmlParse3986DecOctet(const char **str) {
 395     const char *cur = *str;
 396
 397     if (!(ISA_DIGIT(cur)))
 398         return(1);
 399     if (!ISA_DIGIT(cur+1))
 400         cur++;
 401     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
 402         cur += 2;
 403     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
 404         cur += 3;
 405     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
 406              (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
 407         cur += 3;
 408     else if ((*cur == '2') && (*(cur + 1) == '5') &&
 409              (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
 410         cur += 3;
 411     else
 412         return(1);
 413     *str = cur;
 414     return(0);
 415 }
 416 /**
 417  * xmlParse3986Host:
 418  * @uri:  pointer to an URI structure
 419  * @str:  the string to analyze
 420  *
 421  * Parse an host part and fills in the appropriate fields
 422  * of the @uri structure
 423  *
 424  * host          = IP-literal / IPv4address / reg-name
 425  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 426  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 427  * reg-name      = *( unreserved / pct-encoded / sub-delims )
 428  *
 429  * Returns 0 or the error code
 430  */
 431 static int
 432 xmlParse3986Host(xmlURIPtr uri, const char **str)
 433 {
 434     const char *cur = *str;
 435     const char *host;
 436
 437     host = cur;
 438     /*
 439      * IPv6 and future adressing scheme are enclosed between brackets
 440      */
 441     if (*cur == '[') {
 442         cur++;
 443         while ((*cur != ']') && (*cur != 0))
 444             cur++;
 445         if (*cur != ']')
 446             return(1);
 447         cur++;
 448         goto found;
 449     }
 450     /*
 451      * try to parse an IPv4
 452      */
 453     if (ISA_DIGIT(cur)) {
 454         if (xmlParse3986DecOctet(&cur) != 0)
 455             goto not_ipv4;
 456         if (*cur != '.')
 457             goto not_ipv4;
 458         cur++;
 459         if (xmlParse3986DecOctet(&cur) != 0)
 460             goto not_ipv4;
 461         if (*cur != '.')
 462             goto not_ipv4;
 463         if (xmlParse3986DecOctet(&cur) != 0)
 464             goto not_ipv4;
 465         if (*cur != '.')
 466             goto not_ipv4;
 467         if (xmlParse3986DecOctet(&cur) != 0)
 468             goto not_ipv4;
 469         goto found;
 470 not_ipv4:
 471         cur = *str;
 472     }
 473     /*
 474      * then this should be a hostname which can be empty
 475      */
 476     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
 477         NEXT(cur);
 478 found:
 479     if (uri != NULL) {
 480         if (uri->authority != NULL) xmlFree(uri->authority);
 481         uri->authority = NULL;
 482         if (uri->server != NULL) xmlFree(uri->server);
 483         if (cur != host) {
 484             if (uri->cleanup & 2)
 485                 uri->server = STRNDUP(host, cur - host);
 486             else
 487                 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
 488         } else
 489             uri->server = NULL;
 490     }
 491     *str = cur;
 492     return(0);
 493 }
 494
 495 /**
 496  * xmlParse3986Authority:
 497  * @uri:  pointer to an URI structure
 498  * @str:  the string to analyze
 499  *
 500  * Parse an authority part and fills in the appropriate fields
 501  * of the @uri structure
 502  *
 503  * authority     = [ userinfo "@" ] host [ ":" port ]
 504  *
 505  * Returns 0 or the error code
 506  */
 507 static int
 508 xmlParse3986Authority(xmlURIPtr uri, const char **str)
 509 {
 510     const char *cur;
 511     int ret;
 512
 513     cur = *str;
 514     /*
 515      * try to parse an userinfo and check for the trailing @
 516      */
 517     ret = xmlParse3986Userinfo(uri, &cur);
 518     if ((ret != 0) || (*cur != '@'))
 519         cur = *str;
 520     else
 521         cur++;
 522     ret = xmlParse3986Host(uri, &cur);
 523     if (ret != 0) return(ret);
 524     if (*cur == ':') {
 525         cur++;
 526         ret = xmlParse3986Port(uri, &cur);
 527         if (ret != 0) return(ret);
 528     }
 529     *str = cur;
 530     return(0);
 531 }
 532
 533 /**
 534  * xmlParse3986Segment:
 535  * @str:  the string to analyze
 536  * @forbid: an optional forbidden character
 537  * @empty: allow an empty segment
 538  *
 539  * Parse a segment and fills in the appropriate fields
 540  * of the @uri structure
 541  *
 542  * segment       = *pchar
 543  * segment-nz    = 1*pchar
 544  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 545  *               ; non-zero-length segment without any colon ":"
 546  *
 547  * Returns 0 or the error code
 548  */
 549 static int
 550 xmlParse3986Segment(const char **str, char forbid, int empty)
 551 {
 552     const char *cur;
 553
 554     cur = *str;
 555     if (!ISA_PCHAR(cur)) {
 556         if (empty)
 557             return(0);
 558         return(1);
 559     }
 560     while (ISA_PCHAR(cur) && (*cur != forbid))
 561         NEXT(cur);
 562     *str = cur;
 563     return (0);
 564 }
 565
 566 /**
 567  * xmlParse3986PathAbEmpty:
 568  * @uri:  pointer to an URI structure
 569  * @str:  the string to analyze
 570  *
 571  * Parse an path absolute or empty and fills in the appropriate fields
 572  * of the @uri structure
 573  *
 574  * path-abempty  = *( "/" segment )
 575  *
 576  * Returns 0 or the error code
 577  */
 578 static int
 579 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
 580 {
 581     const char *cur;
 582     int ret;
 583
 584     cur = *str;
 585
 586     while (*cur == '/') {
 587         cur++;
 588         ret = xmlParse3986Segment(&cur, 0, 1);
 589         if (ret != 0) return(ret);
 590     }
 591     if (uri != NULL) {
 592         if (uri->path != NULL) xmlFree(uri->path);
 593         if (*str != cur) {
 594             if (uri->cleanup & 2)
 595                 uri->path = STRNDUP(*str, cur - *str);
 596             else
 597                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 598         } else {
 599             uri->path = NULL;
 600         }
 601     }
 602     *str = cur;
 603     return (0);
 604 }
 605
 606 /**
 607  * xmlParse3986PathAbsolute:
 608  * @uri:  pointer to an URI structure
 609  * @str:  the string to analyze
 610  *
 611  * Parse an path absolute and fills in the appropriate fields
 612  * of the @uri structure
 613  *
 614  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
 615  *
 616  * Returns 0 or the error code
 617  */
 618 static int
 619 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
 620 {
 621     const char *cur;
 622     int ret;
 623
 624     cur = *str;
 625
 626     if (*cur != '/')
 627         return(1);
 628     cur++;
 629     ret = xmlParse3986Segment(&cur, 0, 0);
 630     if (ret == 0) {
 631         while (*cur == '/') {
 632             cur++;
 633             ret = xmlParse3986Segment(&cur, 0, 1);
 634             if (ret != 0) return(ret);
 635         }
 636     }
 637     if (uri != NULL) {
 638         if (uri->path != NULL) xmlFree(uri->path);
 639         if (cur != *str) {
 640             if (uri->cleanup & 2)
 641                 uri->path = STRNDUP(*str, cur - *str);
 642             else
 643                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 644         } else {
 645             uri->path = NULL;
 646         }
 647     }
 648     *str = cur;
 649     return (0);
 650 }
 651
 652 /**
 653  * xmlParse3986PathRootless:
 654  * @uri:  pointer to an URI structure
 655  * @str:  the string to analyze
 656  *
 657  * Parse an path without root and fills in the appropriate fields
 658  * of the @uri structure
 659  *
 660  * path-rootless = segment-nz *( "/" segment )
 661  *
 662  * Returns 0 or the error code
 663  */
 664 static int
 665 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
 666 {
 667     const char *cur;
 668     int ret;
 669
 670     cur = *str;
 671
 672     ret = xmlParse3986Segment(&cur, 0, 0);
 673     if (ret != 0) return(ret);
 674     while (*cur == '/') {
 675         cur++;
 676         ret = xmlParse3986Segment(&cur, 0, 1);
 677         if (ret != 0) return(ret);
 678     }
 679     if (uri != NULL) {
 680         if (uri->path != NULL) xmlFree(uri->path);
 681         if (cur != *str) {
 682             if (uri->cleanup & 2)
 683                 uri->path = STRNDUP(*str, cur - *str);
 684             else
 685                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 686         } else {
 687             uri->path = NULL;
 688         }
 689     }
 690     *str = cur;
 691     return (0);
 692 }
 693
 694 /**
 695  * xmlParse3986PathNoScheme:
 696  * @uri:  pointer to an URI structure
 697  * @str:  the string to analyze
 698  *
 699  * Parse an path which is not a scheme and fills in the appropriate fields
 700  * of the @uri structure
 701  *
 702  * path-noscheme = segment-nz-nc *( "/" segment )
 703  *
 704  * Returns 0 or the error code
 705  */
 706 static int
 707 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
 708 {
 709     const char *cur;
 710     int ret;
 711
 712     cur = *str;
 713
 714     ret = xmlParse3986Segment(&cur, ':', 0);
 715     if (ret != 0) return(ret);
 716     while (*cur == '/') {
 717         cur++;
 718         ret = xmlParse3986Segment(&cur, 0, 1);
 719         if (ret != 0) return(ret);
 720     }
 721     if (uri != NULL) {
 722         if (uri->path != NULL) xmlFree(uri->path);
 723         if (cur != *str) {
 724             if (uri->cleanup & 2)
 725                 uri->path = STRNDUP(*str, cur - *str);
 726             else
 727                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
 728         } else {
 729             uri->path = NULL;
 730         }
 731     }
 732     *str = cur;
 733     return (0);
 734 }
 735
 736 /**
 737  * xmlParse3986HierPart:
 738  * @uri:  pointer to an URI structure
 739  * @str:  the string to analyze
 740  *
 741  * Parse an hierarchical part and fills in the appropriate fields
 742  * of the @uri structure
 743  *
 744  * hier-part     = "//" authority path-abempty
 745  *                / path-absolute
 746  *                / path-rootless
 747  *                / path-empty
 748  *
 749  * Returns 0 or the error code
 750  */
 751 static int
 752 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
 753 {
 754     const char *cur;
 755     int ret;
 756
 757     cur = *str;
 758
 759     if ((*cur == '/') && (*(cur + 1) == '/')) {
 760         cur += 2;
 761         ret = xmlParse3986Authority(uri, &cur);
 762         if (ret != 0) return(ret);
 763         if (uri->server == NULL)
 764             uri->port = -1;
 765         ret = xmlParse3986PathAbEmpty(uri, &cur);
 766         if (ret != 0) return(ret);
 767         *str = cur;
 768         return(0);
 769     } else if (*cur == '/') {
 770         ret = xmlParse3986PathAbsolute(uri, &cur);
 771         if (ret != 0) return(ret);
 772     } else if (ISA_PCHAR(cur)) {
 773         ret = xmlParse3986PathRootless(uri, &cur);
 774         if (ret != 0) return(ret);
 775     } else {
 776         /* path-empty is effectively empty */
 777         if (uri != NULL) {
 778             if (uri->path != NULL) xmlFree(uri->path);
 779             uri->path = NULL;
 780         }
 781     }
 782     *str = cur;
 783     return (0);
 784 }
 785
 786 /**
 787  * xmlParse3986RelativeRef:
 788  * @uri:  pointer to an URI structure
 789  * @str:  the string to analyze
 790  *
 791  * Parse an URI string and fills in the appropriate fields
 792  * of the @uri structure
 793  *
 794  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
 795  * relative-part = "//" authority path-abempty
 796  *               / path-absolute
 797  *               / path-noscheme
 798  *               / path-empty
 799  *
 800  * Returns 0 or the error code
 801  */
 802 static int
 803 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
 804     int ret;
 805
 806     if ((*str == '/') && (*(str + 1) == '/')) {
 807         str += 2;
 808         ret = xmlParse3986Authority(uri, &str);
 809         if (ret != 0) return(ret);
 810         ret = xmlParse3986PathAbEmpty(uri, &str);
 811         if (ret != 0) return(ret);
 812     } else if (*str == '/') {
 813         ret = xmlParse3986PathAbsolute(uri, &str);
 814         if (ret != 0) return(ret);
 815     } else if (ISA_PCHAR(str)) {
 816         ret = xmlParse3986PathNoScheme(uri, &str);
 817         if (ret != 0) return(ret);
 818     } else {
 819         /* path-empty is effectively empty */
 820         if (uri != NULL) {
 821             if (uri->path != NULL) xmlFree(uri->path);
 822             uri->path = NULL;
 823         }
 824     }
 825
 826     if (*str == '?') {
 827         str++;
 828         ret = xmlParse3986Query(uri, &str);
 829         if (ret != 0) return(ret);
 830     }
 831     if (*str == '#') {
 832         str++;
 833         ret = xmlParse3986Fragment(uri, &str);
 834         if (ret != 0) return(ret);
 835     }
 836     if (*str != 0) {
 837         xmlCleanURI(uri);
 838         return(1);
 839     }
 840     return(0);
 841 }
 842
 843
 844 /**
 845  * xmlParse3986URI:
 846  * @uri:  pointer to an URI structure
 847  * @str:  the string to analyze
 848  *
 849  * Parse an URI string and fills in the appropriate fields
 850  * of the @uri structure
 851  *
 852  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 853  *
 854  * Returns 0 or the error code
 855  */
 856 static int
 857 xmlParse3986URI(xmlURIPtr uri, const char *str) {
 858     int ret;
 859
 860     ret = xmlParse3986Scheme(uri, &str);
 861     if (ret != 0) return(ret);
 862     if (*str != ':') {
 863         return(1);
 864     }
 865     str++;
 866     ret = xmlParse3986HierPart(uri, &str);
 867     if (ret != 0) return(ret);
 868     if (*str == '?') {
 869         str++;
 870         ret = xmlParse3986Query(uri, &str);
 871         if (ret != 0) return(ret);
 872     }
 873     if (*str == '#') {
 874         str++;
 875         ret = xmlParse3986Fragment(uri, &str);
 876         if (ret != 0) return(ret);
 877     }
 878     if (*str != 0) {
 879         xmlCleanURI(uri);
 880         return(1);
 881     }
 882     return(0);
 883 }
 884
 885 /**
 886  * xmlParse3986URIReference:
 887  * @uri:  pointer to an URI structure
 888  * @str:  the string to analyze
 889  *
 890  * Parse an URI reference string and fills in the appropriate fields
 891  * of the @uri structure
 892  *
 893  * URI-reference = URI / relative-ref
 894  *
 895  * Returns 0 or the error code
 896  */
 897 static int
 898 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
 899     int ret;
 900
 901     if (str == NULL)
 902         return(-1);
 903     xmlCleanURI(uri);
 904
 905     /*
 906      * Try first to parse absolute refs, then fallback to relative if
 907      * it fails.
 908      */
 909     ret = xmlParse3986URI(uri, str);
 910     if (ret != 0) {
 911         xmlCleanURI(uri);
 912         ret = xmlParse3986RelativeRef(uri, str);
 913         if (ret != 0) {
 914             xmlCleanURI(uri);
 915             return(ret);
 916         }
 917     }
 918     return(0);
 919 }
 920
 921 /**
 922  * xmlParseURI:
 923  * @str:  the URI string to analyze
 924  *
 925  * Parse an URI based on RFC 3986
 926  *
 927  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 928  *
 929  * Returns a newly built xmlURIPtr or NULL in case of error
 930  */
 931 xmlURIPtr
 932 xmlParseURI(const char *str) {
 933     xmlURIPtr uri;
 934     int ret;
 935
 936     if (str == NULL)
 937         return(NULL);
 938     uri = xmlCreateURI();
 939     if (uri != NULL) {
 940         ret = xmlParse3986URIReference(uri, str);
 941         if (ret) {
 942             xmlFreeURI(uri);
 943             return(NULL);
 944         }
 945     }
 946     return(uri);
 947 }
 948
 949 /**
 950  * xmlParseURIReference:
 951  * @uri:  pointer to an URI structure
 952  * @str:  the string to analyze
 953  *
 954  * Parse an URI reference string based on RFC 3986 and fills in the
 955  * appropriate fields of the @uri structure
 956  *
 957  * URI-reference = URI / relative-ref
 958  *
 959  * Returns 0 or the error code
 960  */
 961 int
 962 xmlParseURIReference(xmlURIPtr uri, const char *str) {
 963     return(xmlParse3986URIReference(uri, str));
 964 }
 965
 966 /**
 967  * xmlParseURIRaw:
 968  * @str:  the URI string to analyze
 969  * @raw:  if 1 unescaping of URI pieces are disabled
 970  *
 971  * Parse an URI but allows to keep intact the original fragments.
 972  *
 973  * URI-reference = URI / relative-ref
 974  *
 975  * Returns a newly built xmlURIPtr or NULL in case of error
 976  */
 977 xmlURIPtr
 978 xmlParseURIRaw(const char *str, int raw) {
 979     xmlURIPtr uri;
 980     int ret;
 981
 982     if (str == NULL)
 983         return(NULL);
 984     uri = xmlCreateURI();
 985     if (uri != NULL) {
 986         if (raw) {
 987             uri->cleanup |= 2;
 988         }
 989         ret = xmlParseURIReference(uri, str);
 990         if (ret) {
 991             xmlFreeURI(uri);
 992             return(NULL);
 993         }
 994     }
 995     return(uri);
 996 }
 997
 998 /************************************************************************
 999  *                                                                      *
1000  *                      Generic URI structure functions                 *
1001  *                                                                      *
1002  ************************************************************************/
1003
1004 /**
1005  * xmlCreateURI:
1006  *
1007  * Simply creates an empty xmlURI
1008  *
1009  * Returns the new structure or NULL in case of error
1010  */
1011 xmlURIPtr
1012 xmlCreateURI(void) {
1013     xmlURIPtr ret;
1014
1015     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016     if (ret == NULL) {
1017         xmlURIErrMemory("creating URI structure\n");
1018         return(NULL);
1019     }
1020     memset(ret, 0, sizeof(xmlURI));
1021     return(ret);
1022 }
1023
1024 /**
1025  * xmlSaveUriRealloc:
1026  *
1027  * Function to handle properly a reallocation when saving an URI
1028  * Also imposes some limit on the length of an URI string output
1029  */
1030 static xmlChar *
1031 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1032     xmlChar *temp;
1033     int tmp;
1034
1035     if (*max > MAX_URI_LENGTH) {
1036         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037         return(NULL);
1038     }
1039     tmp = *max * 2;
1040     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041     if (temp == NULL) {
1042         xmlURIErrMemory("saving URI\n");
1043         return(NULL);
1044     }
1045     *max = tmp;
1046     return(temp);
1047 }
1048
1049 /**
1050  * xmlSaveUri:
1051  * @uri:  pointer to an xmlURI
1052  *
1053  * Save the URI as an escaped string
1054  *
1055  * Returns a new string (to be deallocated by caller)
1056  */
1057 xmlChar *
1058 xmlSaveUri(xmlURIPtr uri) {
1059     xmlChar *ret = NULL;
1060     xmlChar *temp;
1061     const char *p;
1062     int len;
1063     int max;
1064
1065     if (uri == NULL) return(NULL);
1066
1067
1068     max = 80;
1069     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1070     if (ret == NULL) {
1071         xmlURIErrMemory("saving URI\n");
1072         return(NULL);
1073     }
1074     len = 0;
1075
1076     if (uri->scheme != NULL) {
1077         p = uri->scheme;
1078         while (*p != 0) {
1079             if (len >= max) {
1080                 temp = xmlSaveUriRealloc(ret, &max);
1081                 if (temp == NULL) goto mem_error;
1082                 ret = temp;
1083             }
1084             ret[len++] = *p++;
1085         }
1086         if (len >= max) {
1087             temp = xmlSaveUriRealloc(ret, &max);
1088             if (temp == NULL) goto mem_error;
1089             ret = temp;
1090         }
1091         ret[len++] = ':';
1092     }
1093     if (uri->opaque != NULL) {
1094         p = uri->opaque;
1095         while (*p != 0) {
1096             if (len + 3 >= max) {
1097                 temp = xmlSaveUriRealloc(ret, &max);
1098                 if (temp == NULL) goto mem_error;
1099                 ret = temp;
1100             }
1101             if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1102                 ret[len++] = *p++;
1103             else {
1104                 int val = *(unsigned char *)p++;
1105                 int hi = val / 0x10, lo = val % 0x10;
1106                 ret[len++] = '%';
1107                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109             }
1110         }
1111     } else {
1112         if ((uri->server != NULL) || (uri->port == -1)) {
1113             if (len + 3 >= max) {
1114                 temp = xmlSaveUriRealloc(ret, &max);
1115                 if (temp == NULL) goto mem_error;
1116                 ret = temp;
1117             }
1118             ret[len++] = '/';
1119             ret[len++] = '/';
1120             if (uri->user != NULL) {
1121                 p = uri->user;
1122                 while (*p != 0) {
1123                     if (len + 3 >= max) {
1124                         temp = xmlSaveUriRealloc(ret, &max);
1125                         if (temp == NULL) goto mem_error;
1126                         ret = temp;
1127                     }
1128                     if ((IS_UNRESERVED(*(p))) ||
1129                         ((*(p) == ';')) || ((*(p) == ':')) ||
1130                         ((*(p) == '&')) || ((*(p) == '=')) ||
1131                         ((*(p) == '+')) || ((*(p) == '$')) ||
1132                         ((*(p) == ',')))
1133                         ret[len++] = *p++;
1134                     else {
1135                         int val = *(unsigned char *)p++;
1136                         int hi = val / 0x10, lo = val % 0x10;
1137                         ret[len++] = '%';
1138                         ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139                         ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140                     }
1141                 }
1142                 if (len + 3 >= max) {
1143                     temp = xmlSaveUriRealloc(ret, &max);
1144                     if (temp == NULL) goto mem_error;
1145                     ret = temp;
1146                 }
1147                 ret[len++] = '@';
1148             }
1149             if (uri->server != NULL) {
1150                 p = uri->server;
1151                 while (*p != 0) {
1152                     if (len >= max) {
1153                         temp = xmlSaveUriRealloc(ret, &max);
1154                         if (temp == NULL) goto mem_error;
1155                         ret = temp;
1156                     }
1157                     ret[len++] = *p++;
1158                 }
1159                 if (uri->port > 0) {
1160                     if (len + 10 >= max) {
1161                         temp = xmlSaveUriRealloc(ret, &max);
1162                         if (temp == NULL) goto mem_error;
1163                         ret = temp;
1164                     }
1165                     len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166                 }
1167             }
1168         } else if (uri->authority != NULL) {
1169             if (len + 3 >= max) {
1170                 temp = xmlSaveUriRealloc(ret, &max);
1171                 if (temp == NULL) goto mem_error;
1172                 ret = temp;
1173             }
1174             ret[len++] = '/';
1175             ret[len++] = '/';
1176             p = uri->authority;
1177             while (*p != 0) {
1178                 if (len + 3 >= max) {
1179                     temp = xmlSaveUriRealloc(ret, &max);
1180                     if (temp == NULL) goto mem_error;
1181                     ret = temp;
1182                 }
1183                 if ((IS_UNRESERVED(*(p))) ||
1184                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1185                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1186                     ((*(p) == '=')) || ((*(p) == '+')))
1187                     ret[len++] = *p++;
1188                 else {
1189                     int val = *(unsigned char *)p++;
1190                     int hi = val / 0x10, lo = val % 0x10;
1191                     ret[len++] = '%';
1192                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194                 }
1195             }
1196         } else if (uri->scheme != NULL) {
1197             if (len + 3 >= max) {
1198                 temp = xmlSaveUriRealloc(ret, &max);
1199                 if (temp == NULL) goto mem_error;
1200                 ret = temp;
1201             }
1202         }
1203         if (uri->path != NULL) {
1204             p = uri->path;
1205             /*
1206              * the colon in file:///d: should not be escaped or
1207              * Windows accesses fail later.
1208              */
1209             if ((uri->scheme != NULL) &&
1210                 (p[0] == '/') &&
1211                 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212                  ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213                 (p[2] == ':') &&
1214                 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215                 if (len + 3 >= max) {
1216                     temp = xmlSaveUriRealloc(ret, &max);
1217                     if (temp == NULL) goto mem_error;
1218                     ret = temp;
1219                 }
1220                 ret[len++] = *p++;
1221                 ret[len++] = *p++;
1222                 ret[len++] = *p++;
1223             }
1224             while (*p != 0) {
1225                 if (len + 3 >= max) {
1226                     temp = xmlSaveUriRealloc(ret, &max);
1227                     if (temp == NULL) goto mem_error;
1228                     ret = temp;
1229                 }
1230                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1231                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1232                     ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1233                     ((*(p) == ',')))
1234                     ret[len++] = *p++;
1235                 else {
1236                     int val = *(unsigned char *)p++;
1237                     int hi = val / 0x10, lo = val % 0x10;
1238                     ret[len++] = '%';
1239                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241                 }
1242             }
1243         }
1244         if (uri->query_raw != NULL) {
1245             if (len + 1 >= max) {
1246                 temp = xmlSaveUriRealloc(ret, &max);
1247                 if (temp == NULL) goto mem_error;
1248                 ret = temp;
1249             }
1250             ret[len++] = '?';
1251             p = uri->query_raw;
1252             while (*p != 0) {
1253                 if (len + 1 >= max) {
1254                     temp = xmlSaveUriRealloc(ret, &max);
1255                     if (temp == NULL) goto mem_error;
1256                     ret = temp;
1257                 }
1258                 ret[len++] = *p++;
1259             }
1260         } else if (uri->query != NULL) {
1261             if (len + 3 >= max) {
1262                 temp = xmlSaveUriRealloc(ret, &max);
1263                 if (temp == NULL) goto mem_error;
1264                 ret = temp;
1265             }
1266             ret[len++] = '?';
1267             p = uri->query;
1268             while (*p != 0) {
1269                 if (len + 3 >= max) {
1270                     temp = xmlSaveUriRealloc(ret, &max);
1271                     if (temp == NULL) goto mem_error;
1272                     ret = temp;
1273                 }
1274                 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1275                     ret[len++] = *p++;
1276                 else {
1277                     int val = *(unsigned char *)p++;
1278                     int hi = val / 0x10, lo = val % 0x10;
1279                     ret[len++] = '%';
1280                     ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281                     ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282                 }
1283             }
1284         }
1285     }
1286     if (uri->fragment != NULL) {
1287         if (len + 3 >= max) {
1288             temp = xmlSaveUriRealloc(ret, &max);
1289             if (temp == NULL) goto mem_error;
1290             ret = temp;
1291         }
1292         ret[len++] = '#';
1293         p = uri->fragment;
1294         while (*p != 0) {
1295             if (len + 3 >= max) {
1296                 temp = xmlSaveUriRealloc(ret, &max);
1297                 if (temp == NULL) goto mem_error;
1298                 ret = temp;
1299             }
1300             if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1301                 ret[len++] = *p++;
1302             else {
1303                 int val = *(unsigned char *)p++;
1304                 int hi = val / 0x10, lo = val % 0x10;
1305                 ret[len++] = '%';
1306                 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307                 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308             }
1309         }
1310     }
1311     if (len >= max) {
1312         temp = xmlSaveUriRealloc(ret, &max);
1313         if (temp == NULL) goto mem_error;
1314         ret = temp;
1315     }
1316     ret[len] = 0;
1317     return(ret);
1318
1319 mem_error:
1320     xmlFree(ret);
1321     return(NULL);
1322 }
1323
1324 /**
1325  * xmlPrintURI:
1326  * @stream:  a FILE* for the output
1327  * @uri:  pointer to an xmlURI
1328  *
1329  * Prints the URI in the stream @stream.
1330  */
1331 void
1332 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333     xmlChar *out;
1334
1335     out = xmlSaveUri(uri);
1336     if (out != NULL) {
1337         fprintf(stream, "%s", (char *) out);
1338         xmlFree(out);
1339     }
1340 }
1341
1342 /**
1343  * xmlCleanURI:
1344  * @uri:  pointer to an xmlURI
1345  *
1346  * Make sure the xmlURI struct is free of content
1347  */
1348 static void
1349 xmlCleanURI(xmlURIPtr uri) {
1350     if (uri == NULL) return;
1351
1352     if (uri->scheme != NULL) xmlFree(uri->scheme);
1353     uri->scheme = NULL;
1354     if (uri->server != NULL) xmlFree(uri->server);
1355     uri->server = NULL;
1356     if (uri->user != NULL) xmlFree(uri->user);
1357     uri->user = NULL;
1358     if (uri->path != NULL) xmlFree(uri->path);
1359     uri->path = NULL;
1360     if (uri->fragment != NULL) xmlFree(uri->fragment);
1361     uri->fragment = NULL;
1362     if (uri->opaque != NULL) xmlFree(uri->opaque);
1363     uri->opaque = NULL;
1364     if (uri->authority != NULL) xmlFree(uri->authority);
1365     uri->authority = NULL;
1366     if (uri->query != NULL) xmlFree(uri->query);
1367     uri->query = NULL;
1368     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369     uri->query_raw = NULL;
1370 }
1371
1372 /**
1373  * xmlFreeURI:
1374  * @uri:  pointer to an xmlURI
1375  *
1376  * Free up the xmlURI struct
1377  */
1378 void
1379 xmlFreeURI(xmlURIPtr uri) {
1380     if (uri == NULL) return;
1381
1382     if (uri->scheme != NULL) xmlFree(uri->scheme);
1383     if (uri->server != NULL) xmlFree(uri->server);
1384     if (uri->user != NULL) xmlFree(uri->user);
1385     if (uri->path != NULL) xmlFree(uri->path);
1386     if (uri->fragment != NULL) xmlFree(uri->fragment);
1387     if (uri->opaque != NULL) xmlFree(uri->opaque);
1388     if (uri->authority != NULL) xmlFree(uri->authority);
1389     if (uri->query != NULL) xmlFree(uri->query);
1390     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391     xmlFree(uri);
1392 }
1393
1394 /************************************************************************
1395  *                                                                      *
1396  *                      Helper functions                                *
1397  *                                                                      *
1398  ************************************************************************/
1399
1400 /**
1401  * xmlNormalizeURIPath:
1402  * @path:  pointer to the path string
1403  *
1404  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1405  * Section 5.2, steps 6.c through 6.g.
1406  *
1407  * Normalization occurs directly on the string, no new allocation is done
1408  *
1409  * Returns 0 or an error code
1410  */
1411 int
1412 xmlNormalizeURIPath(char *path) {
1413     char *cur, *out;
1414
1415     if (path == NULL)
1416         return(-1);
1417
1418     /* Skip all initial "/" chars.  We want to get to the beginning of the
1419      * first non-empty segment.
1420      */
1421     cur = path;
1422     while (cur[0] == '/')
1423       ++cur;
1424     if (cur[0] == '\0')
1425       return(0);
1426
1427     /* Keep everything we've seen so far.  */
1428     out = cur;
1429
1430     /*
1431      * Analyze each segment in sequence for cases (c) and (d).
1432      */
1433     while (cur[0] != '\0') {
1434         /*
1435          * c) All occurrences of "./", where "." is a complete path segment,
1436          *    are removed from the buffer string.
1437          */
1438         if ((cur[0] == '.') && (cur[1] == '/')) {
1439             cur += 2;
1440             /* '//' normalization should be done at this point too */
1441             while (cur[0] == '/')
1442                 cur++;
1443             continue;
1444         }
1445
1446         /*
1447          * d) If the buffer string ends with "." as a complete path segment,
1448          *    that "." is removed.
1449          */
1450         if ((cur[0] == '.') && (cur[1] == '\0'))
1451             break;
1452
1453         /* Otherwise keep the segment.  */
1454         while (cur[0] != '/') {
1455             if (cur[0] == '\0')
1456               goto done_cd;
1457             (out++)[0] = (cur++)[0];
1458         }
1459         /* nomalize // */
1460         while ((cur[0] == '/') && (cur[1] == '/'))
1461             cur++;
1462
1463         (out++)[0] = (cur++)[0];
1464     }
1465  done_cd:
1466     out[0] = '\0';
1467
1468     /* Reset to the beginning of the first segment for the next sequence.  */
1469     cur = path;
1470     while (cur[0] == '/')
1471       ++cur;
1472     if (cur[0] == '\0')
1473         return(0);
1474
1475     /*
1476      * Analyze each segment in sequence for cases (e) and (f).
1477      *
1478      * e) All occurrences of "<segment>/../", where <segment> is a
1479      *    complete path segment not equal to "..", are removed from the
1480      *    buffer string.  Removal of these path segments is performed
1481      *    iteratively, removing the leftmost matching pattern on each
1482      *    iteration, until no matching pattern remains.
1483      *
1484      * f) If the buffer string ends with "<segment>/..", where <segment>
1485      *    is a complete path segment not equal to "..", that
1486      *    "<segment>/.." is removed.
1487      *
1488      * To satisfy the "iterative" clause in (e), we need to collapse the
1489      * string every time we find something that needs to be removed.  Thus,
1490      * we don't need to keep two pointers into the string: we only need a
1491      * "current position" pointer.
1492      */
1493     while (1) {
1494         char *segp, *tmp;
1495
1496         /* At the beginning of each iteration of this loop, "cur" points to
1497          * the first character of the segment we want to examine.
1498          */
1499
1500         /* Find the end of the current segment.  */
1501         segp = cur;
1502         while ((segp[0] != '/') && (segp[0] != '\0'))
1503           ++segp;
1504
1505         /* If this is the last segment, we're done (we need at least two
1506          * segments to meet the criteria for the (e) and (f) cases).
1507          */
1508         if (segp[0] == '\0')
1509           break;
1510
1511         /* If the first segment is "..", or if the next segment _isn't_ "..",
1512          * keep this segment and try the next one.
1513          */
1514         ++segp;
1515         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516             || ((segp[0] != '.') || (segp[1] != '.')
1517                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1518           cur = segp;
1519           continue;
1520         }
1521
1522         /* If we get here, remove this segment and the next one and back up
1523          * to the previous segment (if there is one), to implement the
1524          * "iteratively" clause.  It's pretty much impossible to back up
1525          * while maintaining two pointers into the buffer, so just compact
1526          * the whole buffer now.
1527          */
1528
1529         /* If this is the end of the buffer, we're done.  */
1530         if (segp[2] == '\0') {
1531           cur[0] = '\0';
1532           break;
1533         }
1534         /* Valgrind complained, strcpy(cur, segp + 3); */
1535         /* string will overlap, do not use strcpy */
1536         tmp = cur;
1537         segp += 3;
1538         while ((*tmp++ = *segp++) != 0)
1539           ;
1540
1541         /* If there are no previous segments, then keep going from here.  */
1542         segp = cur;
1543         while ((segp > path) && ((--segp)[0] == '/'))
1544           ;
1545         if (segp == path)
1546           continue;
1547
1548         /* "segp" is pointing to the end of a previous segment; find it's
1549          * start.  We need to back up to the previous segment and start
1550          * over with that to handle things like "foo/bar/../..".  If we
1551          * don't do this, then on the first pass we'll remove the "bar/..",
1552          * but be pointing at the second ".." so we won't realize we can also
1553          * remove the "foo/..".
1554          */
1555         cur = segp;
1556         while ((cur > path) && (cur[-1] != '/'))
1557           --cur;
1558     }
1559     out[0] = '\0';
1560
1561     /*
1562      * g) If the resulting buffer string still begins with one or more
1563      *    complete path segments of "..", then the reference is
1564      *    considered to be in error. Implementations may handle this
1565      *    error by retaining these components in the resolved path (i.e.,
1566      *    treating them as part of the final URI), by removing them from
1567      *    the resolved path (i.e., discarding relative levels above the
1568      *    root), or by avoiding traversal of the reference.
1569      *
1570      * We discard them from the final path.
1571      */
1572     if (path[0] == '/') {
1573       cur = path;
1574       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575              && ((cur[3] == '/') || (cur[3] == '\0')))
1576         cur += 3;
1577
1578       if (cur != path) {
1579         out = path;
1580         while (cur[0] != '\0')
1581           (out++)[0] = (cur++)[0];
1582         out[0] = 0;
1583       }
1584     }
1585
1586     return(0);
1587 }
1588
1589 static int is_hex(char c) {
1590     if (((c >= '0') && (c <= '9')) ||
1591         ((c >= 'a') && (c <= 'f')) ||
1592         ((c >= 'A') && (c <= 'F')))
1593         return(1);
1594     return(0);
1595 }
1596
1597 /**
1598  * xmlURIUnescapeString:
1599  * @str:  the string to unescape
1600  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1601  * @target:  optional destination buffer
1602  *
1603  * Unescaping routine, but does not check that the string is an URI. The
1604  * output is a direct unsigned char translation of %XX values (no encoding)
1605  * Note that the length of the result can only be smaller or same size as
1606  * the input string.
1607  *
1608  * Returns a copy of the string, but unescaped, will return NULL only in case
1609  * of error
1610  */
1611 char *
1612 xmlURIUnescapeString(const char *str, int len, char *target) {
1613     char *ret, *out;
1614     const char *in;
1615
1616     if (str == NULL)
1617         return(NULL);
1618     if (len <= 0) len = strlen(str);
1619     if (len < 0) return(NULL);
1620
1621     if (target == NULL) {
1622         ret = (char *) xmlMallocAtomic(len + 1);
1623         if (ret == NULL) {
1624             xmlURIErrMemory("unescaping URI value\n");
1625             return(NULL);
1626         }
1627     } else
1628         ret = target;
1629     in = str;
1630     out = ret;
1631     while(len > 0) {
1632         if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633             in++;
1634             if ((*in >= '0') && (*in <= '9'))
1635                 *out = (*in - '0');
1636             else if ((*in >= 'a') && (*in <= 'f'))
1637                 *out = (*in - 'a') + 10;
1638             else if ((*in >= 'A') && (*in <= 'F'))
1639                 *out = (*in - 'A') + 10;
1640             in++;
1641             if ((*in >= '0') && (*in <= '9'))
1642                 *out = *out * 16 + (*in - '0');
1643             else if ((*in >= 'a') && (*in <= 'f'))
1644                 *out = *out * 16 + (*in - 'a') + 10;
1645             else if ((*in >= 'A') && (*in <= 'F'))
1646                 *out = *out * 16 + (*in - 'A') + 10;
1647             in++;
1648             len -= 3;
1649             out++;
1650         } else {
1651             *out++ = *in++;
1652             len--;
1653         }
1654     }
1655     *out = 0;
1656     return(ret);
1657 }
1658
1659 /**
1660  * xmlURIEscapeStr:
1661  * @str:  string to escape
1662  * @list: exception list string of chars not to escape
1663  *
1664  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1665  * and the characters in the exception list.
1666  *
1667  * Returns a new escaped string or NULL in case of error.
1668  */
1669 xmlChar *
1670 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1671     xmlChar *ret, ch;
1672     xmlChar *temp;
1673     const xmlChar *in;
1674     int len, out;
1675
1676     if (str == NULL)
1677         return(NULL);
1678     if (str[0] == 0)
1679         return(xmlStrdup(str));
1680     len = xmlStrlen(str);
1681     if (!(len > 0)) return(NULL);
1682
1683     len += 20;
1684     ret = (xmlChar *) xmlMallocAtomic(len);
1685     if (ret == NULL) {
1686         xmlURIErrMemory("escaping URI value\n");
1687         return(NULL);
1688     }
1689     in = (const xmlChar *) str;
1690     out = 0;
1691     while(*in != 0) {
1692         if (len - out <= 3) {
1693             temp = xmlSaveUriRealloc(ret, &len);
1694             if (temp == NULL) {
1695                 xmlURIErrMemory("escaping URI value\n");
1696                 xmlFree(ret);
1697                 return(NULL);
1698             }
1699             ret = temp;
1700         }
1701
1702         ch = *in;
1703
1704         if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705             unsigned char val;
1706             ret[out++] = '%';
1707             val = ch >> 4;
1708             if (val <= 9)
1709                 ret[out++] = '0' + val;
1710             else
1711                 ret[out++] = 'A' + val - 0xA;
1712             val = ch & 0xF;
1713             if (val <= 9)
1714                 ret[out++] = '0' + val;
1715             else
1716                 ret[out++] = 'A' + val - 0xA;
1717             in++;
1718         } else {
1719             ret[out++] = *in++;
1720         }
1721
1722     }
1723     ret[out] = 0;
1724     return(ret);
1725 }
1726
1727 /**
1728  * xmlURIEscape:
1729  * @str:  the string of the URI to escape
1730  *
1731  * Escaping routine, does not do validity checks !
1732  * It will try to escape the chars needing this, but this is heuristic
1733  * based it's impossible to be sure.
1734  *
1735  * Returns an copy of the string, but escaped
1736  *
1737  * 25 May 2001
1738  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739  * according to RFC2396.
1740  *   - Carl Douglas
1741  */
1742 xmlChar *
1743 xmlURIEscape(const xmlChar * str)
1744 {
1745     xmlChar *ret, *segment = NULL;
1746     xmlURIPtr uri;
1747     int ret2;
1748
1749 #define NULLCHK(p) if(!p) { \
1750          xmlURIErrMemory("escaping URI value\n"); \
1751          xmlFreeURI(uri); \
1752          return NULL; } \
1753
1754     if (str == NULL)
1755         return (NULL);
1756
1757     uri = xmlCreateURI();
1758     if (uri != NULL) {
1759         /*
1760          * Allow escaping errors in the unescaped form
1761          */
1762         uri->cleanup = 1;
1763         ret2 = xmlParseURIReference(uri, (const char *)str);
1764         if (ret2) {
1765             xmlFreeURI(uri);
1766             return (NULL);
1767         }
1768     }
1769
1770     if (!uri)
1771         return NULL;
1772
1773     ret = NULL;
1774
1775     if (uri->scheme) {
1776         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777         NULLCHK(segment)
1778         ret = xmlStrcat(ret, segment);
1779         ret = xmlStrcat(ret, BAD_CAST ":");
1780         xmlFree(segment);
1781     }
1782
1783     if (uri->authority) {
1784         segment =
1785             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786         NULLCHK(segment)
1787         ret = xmlStrcat(ret, BAD_CAST "//");
1788         ret = xmlStrcat(ret, segment);
1789         xmlFree(segment);
1790     }
1791
1792     if (uri->user) {
1793         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794         NULLCHK(segment)
1795                 ret = xmlStrcat(ret,BAD_CAST "//");
1796         ret = xmlStrcat(ret, segment);
1797         ret = xmlStrcat(ret, BAD_CAST "@");
1798         xmlFree(segment);
1799     }
1800
1801     if (uri->server) {
1802         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803         NULLCHK(segment)
1804                 if (uri->user == NULL)
1805                 ret = xmlStrcat(ret, BAD_CAST "//");
1806         ret = xmlStrcat(ret, segment);
1807         xmlFree(segment);
1808     }
1809
1810     if (uri->port) {
1811         xmlChar port[10];
1812
1813         snprintf((char *) port, 10, "%d", uri->port);
1814         ret = xmlStrcat(ret, BAD_CAST ":");
1815         ret = xmlStrcat(ret, port);
1816     }
1817
1818     if (uri->path) {
1819         segment =
1820             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821         NULLCHK(segment)
1822         ret = xmlStrcat(ret, segment);
1823         xmlFree(segment);
1824     }
1825
1826     if (uri->query_raw) {
1827         ret = xmlStrcat(ret, BAD_CAST "?");
1828         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829     }
1830     else if (uri->query) {
1831         segment =
1832             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833         NULLCHK(segment)
1834         ret = xmlStrcat(ret, BAD_CAST "?");
1835         ret = xmlStrcat(ret, segment);
1836         xmlFree(segment);
1837     }
1838
1839     if (uri->opaque) {
1840         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841         NULLCHK(segment)
1842         ret = xmlStrcat(ret, segment);
1843         xmlFree(segment);
1844     }
1845
1846     if (uri->fragment) {
1847         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848         NULLCHK(segment)
1849         ret = xmlStrcat(ret, BAD_CAST "#");
1850         ret = xmlStrcat(ret, segment);
1851         xmlFree(segment);
1852     }
1853
1854     xmlFreeURI(uri);
1855 #undef NULLCHK
1856
1857     return (ret);
1858 }
1859
1860 /************************************************************************
1861  *                                                                      *
1862  *                      Public functions                                *
1863  *                                                                      *
1864  ************************************************************************/
1865
1866 /**
1867  * xmlBuildURI:
1868  * @URI:  the URI instance found in the document
1869  * @base:  the base value
1870  *
1871  * Computes he final URI of the reference done by checking that
1872  * the given URI is valid, and building the final URI using the
1873  * base URI. This is processed according to section 5.2 of the
1874  * RFC 2396
1875  *
1876  * 5.2. Resolving Relative References to Absolute Form
1877  *
1878  * Returns a new URI string (to be freed by the caller) or NULL in case
1879  *         of error.
1880  */
1881 xmlChar *
1882 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1883     xmlChar *val = NULL;
1884     int ret, len, indx, cur, out;
1885     xmlURIPtr ref = NULL;
1886     xmlURIPtr bas = NULL;
1887     xmlURIPtr res = NULL;
1888
1889     /*
1890      * 1) The URI reference is parsed into the potential four components and
1891      *    fragment identifier, as described in Section 4.3.
1892      *
1893      *    NOTE that a completely empty URI is treated by modern browsers
1894      *    as a reference to "." rather than as a synonym for the current
1895      *    URI.  Should we do that here?
1896      */
1897     if (URI == NULL)
1898         ret = -1;
1899     else {
1900         if (*URI) {
1901             ref = xmlCreateURI();
1902             if (ref == NULL)
1903                 goto done;
1904             ret = xmlParseURIReference(ref, (const char *) URI);
1905         }
1906         else
1907             ret = 0;
1908     }
1909     if (ret != 0)
1910         goto done;
1911     if ((ref != NULL) && (ref->scheme != NULL)) {
1912         /*
1913          * The URI is absolute don't modify.
1914          */
1915         val = xmlStrdup(URI);
1916         goto done;
1917     }
1918     if (base == NULL)
1919         ret = -1;
1920     else {
1921         bas = xmlCreateURI();
1922         if (bas == NULL)
1923             goto done;
1924         ret = xmlParseURIReference(bas, (const char *) base);
1925     }
1926     if (ret != 0) {
1927         if (ref)
1928             val = xmlSaveUri(ref);
1929         goto done;
1930     }
1931     if (ref == NULL) {
1932         /*
1933          * the base fragment must be ignored
1934          */
1935         if (bas->fragment != NULL) {
1936             xmlFree(bas->fragment);
1937             bas->fragment = NULL;
1938         }
1939         val = xmlSaveUri(bas);
1940         goto done;
1941     }
1942
1943     /*
1944      * 2) If the path component is empty and the scheme, authority, and
1945      *    query components are undefined, then it is a reference to the
1946      *    current document and we are done.  Otherwise, the reference URI's
1947      *    query and fragment components are defined as found (or not found)
1948      *    within the URI reference and not inherited from the base URI.
1949      *
1950      *    NOTE that in modern browsers, the parsing differs from the above
1951      *    in the following aspect:  the query component is allowed to be
1952      *    defined while still treating this as a reference to the current
1953      *    document.
1954      */
1955     res = xmlCreateURI();
1956     if (res == NULL)
1957         goto done;
1958     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959         ((ref->authority == NULL) && (ref->server == NULL))) {
1960         if (bas->scheme != NULL)
1961             res->scheme = xmlMemStrdup(bas->scheme);
1962         if (bas->authority != NULL)
1963             res->authority = xmlMemStrdup(bas->authority);
1964         else if ((bas->server != NULL) || (bas->port == -1)) {
1965             if (bas->server != NULL)
1966                 res->server = xmlMemStrdup(bas->server);
1967             if (bas->user != NULL)
1968                 res->user = xmlMemStrdup(bas->user);
1969             res->port = bas->port;
1970         }
1971         if (bas->path != NULL)
1972             res->path = xmlMemStrdup(bas->path);
1973         if (ref->query_raw != NULL)
1974             res->query_raw = xmlMemStrdup (ref->query_raw);
1975         else if (ref->query != NULL)
1976             res->query = xmlMemStrdup(ref->query);
1977         else if (bas->query_raw != NULL)
1978             res->query_raw = xmlMemStrdup(bas->query_raw);
1979         else if (bas->query != NULL)
1980             res->query = xmlMemStrdup(bas->query);
1981         if (ref->fragment != NULL)
1982             res->fragment = xmlMemStrdup(ref->fragment);
1983         goto step_7;
1984     }
1985
1986     /*
1987      * 3) If the scheme component is defined, indicating that the reference
1988      *    starts with a scheme name, then the reference is interpreted as an
1989      *    absolute URI and we are done.  Otherwise, the reference URI's
1990      *    scheme is inherited from the base URI's scheme component.
1991      */
1992     if (ref->scheme != NULL) {
1993         val = xmlSaveUri(ref);
1994         goto done;
1995     }
1996     if (bas->scheme != NULL)
1997         res->scheme = xmlMemStrdup(bas->scheme);
1998
1999     if (ref->query_raw != NULL)
2000         res->query_raw = xmlMemStrdup(ref->query_raw);
2001     else if (ref->query != NULL)
2002         res->query = xmlMemStrdup(ref->query);
2003     if (ref->fragment != NULL)
2004         res->fragment = xmlMemStrdup(ref->fragment);
2005
2006     /*
2007      * 4) If the authority component is defined, then the reference is a
2008      *    network-path and we skip to step 7.  Otherwise, the reference
2009      *    URI's authority is inherited from the base URI's authority
2010      *    component, which will also be undefined if the URI scheme does not
2011      *    use an authority component.
2012      */
2013     if ((ref->authority != NULL) || (ref->server != NULL)) {
2014         if (ref->authority != NULL)
2015             res->authority = xmlMemStrdup(ref->authority);
2016         else {
2017             res->server = xmlMemStrdup(ref->server);
2018             if (ref->user != NULL)
2019                 res->user = xmlMemStrdup(ref->user);
2020             res->port = ref->port;
2021         }
2022         if (ref->path != NULL)
2023             res->path = xmlMemStrdup(ref->path);
2024         goto step_7;
2025     }
2026     if (bas->authority != NULL)
2027         res->authority = xmlMemStrdup(bas->authority);
2028     else if ((bas->server != NULL) || (bas->port == -1)) {
2029         if (bas->server != NULL)
2030             res->server = xmlMemStrdup(bas->server);
2031         if (bas->user != NULL)
2032             res->user = xmlMemStrdup(bas->user);
2033         res->port = bas->port;
2034     }
2035
2036     /*
2037      * 5) If the path component begins with a slash character ("/"), then
2038      *    the reference is an absolute-path and we skip to step 7.
2039      */
2040     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2041         res->path = xmlMemStrdup(ref->path);
2042         goto step_7;
2043     }
2044
2045
2046     /*
2047      * 6) If this step is reached, then we are resolving a relative-path
2048      *    reference.  The relative path needs to be merged with the base
2049      *    URI's path.  Although there are many ways to do this, we will
2050      *    describe a simple method using a separate string buffer.
2051      *
2052      * Allocate a buffer large enough for the result string.
2053      */
2054     len = 2; /* extra / and 0 */
2055     if (ref->path != NULL)
2056         len += strlen(ref->path);
2057     if (bas->path != NULL)
2058         len += strlen(bas->path);
2059     res->path = (char *) xmlMallocAtomic(len);
2060     if (res->path == NULL) {
2061         xmlURIErrMemory("resolving URI against base\n");
2062         goto done;
2063     }
2064     res->path[0] = 0;
2065
2066     /*
2067      * a) All but the last segment of the base URI's path component is
2068      *    copied to the buffer.  In other words, any characters after the
2069      *    last (right-most) slash character, if any, are excluded.
2070      */
2071     cur = 0;
2072     out = 0;
2073     if (bas->path != NULL) {
2074         while (bas->path[cur] != 0) {
2075             while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2076                 cur++;
2077             if (bas->path[cur] == 0)
2078                 break;
2079
2080             cur++;
2081             while (out < cur) {
2082                 res->path[out] = bas->path[out];
2083                 out++;
2084             }
2085         }
2086     }
2087     res->path[out] = 0;
2088
2089     /*
2090      * b) The reference's path component is appended to the buffer
2091      *    string.
2092      */
2093     if (ref->path != NULL && ref->path[0] != 0) {
2094         indx = 0;
2095         /*
2096          * Ensure the path includes a '/'
2097          */
2098         if ((out == 0) && (bas->server != NULL))
2099             res->path[out++] = '/';
2100         while (ref->path[indx] != 0) {
2101             res->path[out++] = ref->path[indx++];
2102         }
2103     }
2104     res->path[out] = 0;
2105
2106     /*
2107      * Steps c) to h) are really path normalization steps
2108      */
2109     xmlNormalizeURIPath(res->path);
2110
2111 step_7:
2112
2113     /*
2114      * 7) The resulting URI components, including any inherited from the
2115      *    base URI, are recombined to give the absolute form of the URI
2116      *    reference.
2117      */
2118     val = xmlSaveUri(res);
2119
2120 done:
2121     if (ref != NULL)
2122         xmlFreeURI(ref);
2123     if (bas != NULL)
2124         xmlFreeURI(bas);
2125     if (res != NULL)
2126         xmlFreeURI(res);
2127     return(val);
2128 }
2129
2130 /**
2131  * xmlBuildRelativeURI:
2132  * @URI:  the URI reference under consideration
2133  * @base:  the base value
2134  *
2135  * Expresses the URI of the reference in terms relative to the
2136  * base.  Some examples of this operation include:
2137  *     base = "http://site1.com/docs/book1.html"
2138  *        URI input                        URI returned
2139  *     docs/pic1.gif                    pic1.gif
2140  *     docs/img/pic1.gif                img/pic1.gif
2141  *     img/pic1.gif                     ../img/pic1.gif
2142  *     http://site1.com/docs/pic1.gif   pic1.gif
2143  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2144  *
2145  *     base = "docs/book1.html"
2146  *        URI input                        URI returned
2147  *     docs/pic1.gif                    pic1.gif
2148  *     docs/img/pic1.gif                img/pic1.gif
2149  *     img/pic1.gif                     ../img/pic1.gif
2150  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2151  *
2152  *
2153  * Note: if the URI reference is really wierd or complicated, it may be
2154  *       worthwhile to first convert it into a "nice" one by calling
2155  *       xmlBuildURI (using 'base') before calling this routine,
2156  *       since this routine (for reasonable efficiency) assumes URI has
2157  *       already been through some validation.
2158  *
2159  * Returns a new URI string (to be freed by the caller) or NULL in case
2160  * error.
2161  */
2162 xmlChar *
2163 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2164 {
2165     xmlChar *val = NULL;
2166     int ret;
2167     int ix;
2168     int nbslash = 0;
2169     int len;
2170     xmlURIPtr ref = NULL;
2171     xmlURIPtr bas = NULL;
2172     xmlChar *bptr, *uptr, *vptr;
2173     int remove_path = 0;
2174
2175     if ((URI == NULL) || (*URI == 0))
2176         return NULL;
2177
2178     /*
2179      * First parse URI into a standard form
2180      */
2181     ref = xmlCreateURI ();
2182     if (ref == NULL)
2183         return NULL;
2184     /* If URI not already in "relative" form */
2185     if (URI[0] != '.') {
2186         ret = xmlParseURIReference (ref, (const char *) URI);
2187         if (ret != 0)
2188             goto done;          /* Error in URI, return NULL */
2189     } else
2190         ref->path = (char *)xmlStrdup(URI);
2191
2192     /*
2193      * Next parse base into the same standard form
2194      */
2195     if ((base == NULL) || (*base == 0)) {
2196         val = xmlStrdup (URI);
2197         goto done;
2198     }
2199     bas = xmlCreateURI ();
2200     if (bas == NULL)
2201         goto done;
2202     if (base[0] != '.') {
2203         ret = xmlParseURIReference (bas, (const char *) base);
2204         if (ret != 0)
2205             goto done;          /* Error in base, return NULL */
2206     } else
2207         bas->path = (char *)xmlStrdup(base);
2208
2209     /*
2210      * If the scheme / server on the URI differs from the base,
2211      * just return the URI
2212      */
2213     if ((ref->scheme != NULL) &&
2214         ((bas->scheme == NULL) ||
2215          (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2216          (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2217         val = xmlStrdup (URI);
2218         goto done;
2219     }
2220     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2221         val = xmlStrdup(BAD_CAST "");
2222         goto done;
2223     }
2224     if (bas->path == NULL) {
2225         val = xmlStrdup((xmlChar *)ref->path);
2226         goto done;
2227     }
2228     if (ref->path == NULL) {
2229         ref->path = (char *) "/";
2230         remove_path = 1;
2231     }
2232
2233     /*
2234      * At this point (at last!) we can compare the two paths
2235      *
2236      * First we take care of the special case where either of the
2237      * two path components may be missing (bug 316224)
2238      */
2239     if (bas->path == NULL) {
2240         if (ref->path != NULL) {
2241             uptr = (xmlChar *) ref->path;
2242             if (*uptr == '/')
2243                 uptr++;
2244             /* exception characters from xmlSaveUri */
2245             val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246         }
2247         goto done;
2248     }
2249     bptr = (xmlChar *)bas->path;
2250     if (ref->path == NULL) {
2251         for (ix = 0; bptr[ix] != 0; ix++) {
2252             if (bptr[ix] == '/')
2253                 nbslash++;
2254         }
2255         uptr = NULL;
2256         len = 1;        /* this is for a string terminator only */
2257     } else {
2258         xmlChar *rptr = (xmlChar *) ref->path;
2259         int pos = 0;
2260
2261         /*
2262          * Next we compare the two strings and find where they first differ
2263          */
2264         if ((*rptr == '.') && (rptr[1] == '/'))
2265             rptr += 2;
2266         if ((*bptr == '.') && (bptr[1] == '/'))
2267             bptr += 2;
2268         else if ((*bptr == '/') && (*rptr != '/'))
2269             bptr++;
2270         while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2271             pos++;
2272
2273         if (bptr[pos] == rptr[pos]) {
2274             val = xmlStrdup(BAD_CAST "");
2275             goto done;          /* (I can't imagine why anyone would do this) */
2276         }
2277
2278         /*
2279          * In URI, "back up" to the last '/' encountered.  This will be the
2280          * beginning of the "unique" suffix of URI
2281          */
2282         ix = pos;
2283         if ((rptr[ix] == '/') && (ix > 0))
2284             ix--;
2285         else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/'))
2286             ix -= 2;
2287         for (; ix > 0; ix--) {
2288             if (rptr[ix] == '/')
2289                 break;
2290         }
2291         if (ix == 0) {
2292             uptr = (xmlChar *)rptr;
2293         } else {
2294             ix++;
2295             uptr = (xmlChar *)&rptr[ix];
2296         }
2297
2298         /*
2299          * In base, count the number of '/' from the differing point
2300          */
2301         if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */
2302             for (; bptr[ix] != 0; ix++) {
2303                 if (bptr[ix] == '/')
2304                     nbslash++;
2305             }
2306         }
2307         len = xmlStrlen (uptr) + 1;
2308     }
2309
2310     if (nbslash == 0) {
2311         if (uptr != NULL)
2312             /* exception characters from xmlSaveUri */
2313             val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2314         goto done;
2315     }
2316
2317     /*
2318      * Allocate just enough space for the returned string -
2319      * length of the remainder of the URI, plus enough space
2320      * for the "../" groups, plus one for the terminator
2321      */
2322     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2323     if (val == NULL) {
2324         xmlURIErrMemory("building relative URI\n");
2325         goto done;
2326     }
2327     vptr = val;
2328     /*
2329      * Put in as many "../" as needed
2330      */
2331     for (; nbslash>0; nbslash--) {
2332         *vptr++ = '.';
2333         *vptr++ = '.';
2334         *vptr++ = '/';
2335     }
2336     /*
2337      * Finish up with the end of the URI
2338      */
2339     if (uptr != NULL) {
2340         if ((vptr > val) && (len > 0) &&
2341             (uptr[0] == '/') && (vptr[-1] == '/')) {
2342             memcpy (vptr, uptr + 1, len - 1);
2343             vptr[len - 2] = 0;
2344         } else {
2345             memcpy (vptr, uptr, len);
2346             vptr[len - 1] = 0;
2347         }
2348     } else {
2349         vptr[len - 1] = 0;
2350     }
2351
2352     /* escape the freshly-built path */
2353     vptr = val;
2354         /* exception characters from xmlSaveUri */
2355     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2356     xmlFree(vptr);
2357
2358 done:
2359     /*
2360      * Free the working variables
2361      */
2362     if (remove_path != 0)
2363         ref->path = NULL;
2364     if (ref != NULL)
2365         xmlFreeURI (ref);
2366     if (bas != NULL)
2367         xmlFreeURI (bas);
2368
2369     return val;
2370 }
2371
2372 /**
2373  * xmlCanonicPath:
2374  * @path:  the resource locator in a filesystem notation
2375  *
2376  * Constructs a canonic path from the specified path.
2377  *
2378  * Returns a new canonic path, or a duplicate of the path parameter if the
2379  * construction fails. The caller is responsible for freeing the memory occupied
2380  * by the returned string. If there is insufficient memory available, or the
2381  * argument is NULL, the function returns NULL.
2382  */
2383 #define IS_WINDOWS_PATH(p)                                      \
2384         ((p != NULL) &&                                         \
2385          (((p[0] >= 'a') && (p[0] <= 'z')) ||                   \
2386           ((p[0] >= 'A') && (p[0] <= 'Z'))) &&                  \
2387          (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2388 xmlChar *
2389 xmlCanonicPath(const xmlChar *path)
2390 {
2391 /*
2392  * For Windows implementations, additional work needs to be done to
2393  * replace backslashes in pathnames with "forward slashes"
2394  */
2395 #if defined(_WIN32) && !defined(__CYGWIN__)
2396     int len = 0;
2397     char *p = NULL;
2398 #endif
2399     xmlURIPtr uri;
2400     xmlChar *ret;
2401     const xmlChar *absuri;
2402
2403     if (path == NULL)
2404         return(NULL);
2405
2406 #if defined(_WIN32)
2407     /*
2408      * We must not change the backslashes to slashes if the the path
2409      * starts with \\?\
2410      * Those paths can be up to 32k characters long.
2411      * Was added specifically for OpenOffice, those paths can't be converted
2412      * to URIs anyway.
2413      */
2414     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2415         (path[3] == '\\') )
2416         return xmlStrdup((const xmlChar *) path);
2417 #endif
2418
2419         /* sanitize filename starting with // so it can be used as URI */
2420     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2421         path++;
2422
2423     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2424         xmlFreeURI(uri);
2425         return xmlStrdup(path);
2426     }
2427
2428     /* Check if this is an "absolute uri" */
2429     absuri = xmlStrstr(path, BAD_CAST "://");
2430     if (absuri != NULL) {
2431         int l, j;
2432         unsigned char c;
2433         xmlChar *escURI;
2434
2435         /*
2436          * this looks like an URI where some parts have not been
2437          * escaped leading to a parsing problem.  Check that the first
2438          * part matches a protocol.
2439          */
2440         l = absuri - path;
2441         /* Bypass if first part (part before the '://') is > 20 chars */
2442         if ((l <= 0) || (l > 20))
2443             goto path_processing;
2444         /* Bypass if any non-alpha characters are present in first part */
2445         for (j = 0;j < l;j++) {
2446             c = path[j];
2447             if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2448                 goto path_processing;
2449         }
2450
2451         /* Escape all except the characters specified in the supplied path */
2452         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2453         if (escURI != NULL) {
2454             /* Try parsing the escaped path */
2455             uri = xmlParseURI((const char *) escURI);
2456             /* If successful, return the escaped string */
2457             if (uri != NULL) {
2458                 xmlFreeURI(uri);
2459                 return escURI;
2460             }
2461             xmlFree(escURI);
2462         }
2463     }
2464
2465 path_processing:
2466 /* For Windows implementations, replace backslashes with 'forward slashes' */
2467 #if defined(_WIN32) && !defined(__CYGWIN__)
2468     /*
2469      * Create a URI structure
2470      */
2471     uri = xmlCreateURI();
2472     if (uri == NULL) {          /* Guard against 'out of memory' */
2473         return(NULL);
2474     }
2475
2476     len = xmlStrlen(path);
2477     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2478         /* make the scheme 'file' */
2479         uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2480         /* allocate space for leading '/' + path + string terminator */
2481         uri->path = xmlMallocAtomic(len + 2);
2482         if (uri->path == NULL) {
2483             xmlFreeURI(uri);    /* Guard agains 'out of memory' */
2484             return(NULL);
2485         }
2486         /* Put in leading '/' plus path */
2487         uri->path[0] = '/';
2488         p = uri->path + 1;
2489         strncpy(p, (char *) path, len + 1);
2490     } else {
2491         uri->path = (char *) xmlStrdup(path);
2492         if (uri->path == NULL) {
2493             xmlFreeURI(uri);
2494             return(NULL);
2495         }
2496         p = uri->path;
2497     }
2498     /* Now change all occurences of '\' to '/' */
2499     while (*p != '\0') {
2500         if (*p == '\\')
2501             *p = '/';
2502         p++;
2503     }
2504
2505     if (uri->scheme == NULL) {
2506         ret = xmlStrdup((const xmlChar *) uri->path);
2507     } else {
2508         ret = xmlSaveUri(uri);
2509     }
2510
2511     xmlFreeURI(uri);
2512 #else
2513     ret = xmlStrdup((const xmlChar *) path);
2514 #endif
2515     return(ret);
2516 }
2517
2518 /**
2519  * xmlPathToURI:
2520  * @path:  the resource locator in a filesystem notation
2521  *
2522  * Constructs an URI expressing the existing path
2523  *
2524  * Returns a new URI, or a duplicate of the path parameter if the
2525  * construction fails. The caller is responsible for freeing the memory
2526  * occupied by the returned string. If there is insufficient memory available,
2527  * or the argument is NULL, the function returns NULL.
2528  */
2529 xmlChar *
2530 xmlPathToURI(const xmlChar *path)
2531 {
2532     xmlURIPtr uri;
2533     xmlURI temp;
2534     xmlChar *ret, *cal;
2535
2536     if (path == NULL)
2537         return(NULL);
2538
2539     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2540         xmlFreeURI(uri);
2541         return xmlStrdup(path);
2542     }
2543     cal = xmlCanonicPath(path);
2544     if (cal == NULL)
2545         return(NULL);
2546 #if defined(_WIN32) && !defined(__CYGWIN__)
2547     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2548        If 'cal' is a valid URI allready then we are done here, as continuing would make
2549        it invalid. */
2550     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2551         xmlFreeURI(uri);
2552         return cal;
2553     }
2554     /* 'cal' can contain a relative path with backslashes. If that is processed
2555        by xmlSaveURI, they will be escaped and the external entity loader machinery
2556        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2557     ret = cal;
2558     while (*ret != '\0') {
2559         if (*ret == '\\')
2560             *ret = '/';
2561         ret++;
2562     }
2563 #endif
2564     memset(&temp, 0, sizeof(temp));
2565     temp.path = (char *) cal;
2566     ret = xmlSaveUri(&temp);
2567     xmlFree(cal);
2568     return(ret);
2569 }
2570 #define bottom_uri
2571 #include "elfgcchack.h"