4ab0ce2e253900e30aae95cb7d89d0ad644bf9c9
2 * uri.c: set of generic URI related routines
4 * Reference: RFCs 3986, 2732 and 2373
6 * See Copyright for the status of this software.
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
24 * The definition of the URI regexp in the above RFC has no size limit
25 * In practice they are usually relativey short except for the
26 * data URI scheme as defined in RFC 2397. Even for data URI the usual
27 * maximum size before hitting random practical limits is around 64 KB
28 * and 4KB is usually a maximum admitted limit for proper operations.
29 * The value below is more a security limit than anything else and
30 * really should never be hit by 'normal' operations
31 * Set to 1 MByte in 2012, this is only enforced on output
33 #define MAX_URI_LENGTH 1024 * 1024
36 xmlURIErrMemory(const char *extra
)
39 __xmlRaiseError(NULL
, NULL
, NULL
,
40 NULL
, NULL
, XML_FROM_URI
,
41 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0,
42 extra
, NULL
, NULL
, 0, 0,
43 "Memory allocation failed : %s\n", extra
);
45 __xmlRaiseError(NULL
, NULL
, NULL
,
46 NULL
, NULL
, XML_FROM_URI
,
47 XML_ERR_NO_MEMORY
, XML_ERR_FATAL
, NULL
, 0,
48 NULL
, NULL
, NULL
, 0, 0,
49 "Memory allocation failed\n");
52 static void xmlCleanURI(xmlURIPtr uri
);
55 * Old rule from 2396 used in legacy handling code
56 * alpha = lowalpha | upalpha
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64 * "u" | "v" | "w" | "x" | "y" | "z"
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72 * "U" | "V" | "W" | "X" | "Y" | "Z"
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
85 * alphanum = alpha | digit
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
96 ((x) == '(') || ((x) == ')'))
99 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
102 #define IS_UNWISE(p) \
103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105 ((*(p) == ']')) || ((*(p) == '`')))
107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
117 * unreserved = alphanum | mark
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
123 * Skip to next pointer char, handle escaped sequences
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
129 * Productions from the spec.
131 * authority = server | reg_name
132 * reg_name = 1*( unreserved | escaped | "$" | "," |
133 * ";" | ":" | "@" | "&" | "=" | "+" )
135 * path = [ abs_path | opaque_part ]
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140 /************************************************************************
144 ************************************************************************/
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148 ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p) \
150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151 ((*(p) >= 'A') && (*(p) <= 'F')))
154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155 * / "*" / "+" / "," / ";" / "="
157 #define ISA_SUB_DELIM(p) \
158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
161 ((*(p) == '=')) || ((*(p) == '\'')))
164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 #define ISA_GEN_DELIM(p) \
167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
172 * reserved = gen-delims / sub-delims
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 #define ISA_UNRESERVED(p) \
180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
184 * pct-encoded = "%" HEXDIG HEXDIG
186 #define ISA_PCT_ENCODED(p) \
187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192 #define ISA_PCHAR(p) \
193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194 ((*(p) == ':')) || ((*(p) == '@')))
197 * xmlParse3986Scheme:
198 * @uri: pointer to an URI structure
199 * @str: pointer to the string to analyze
201 * Parse an URI scheme
203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 * Returns 0 or the error code
208 xmlParse3986Scheme(xmlURIPtr uri
, const char **str
) {
218 while (ISA_ALPHA(cur
) || ISA_DIGIT(cur
) ||
219 (*cur
== '+') || (*cur
== '-') || (*cur
== '.')) cur
++;
221 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
222 uri
->scheme
= STRNDUP(*str
, cur
- *str
);
229 * xmlParse3986Fragment:
230 * @uri: pointer to an URI structure
231 * @str: pointer to the string to analyze
233 * Parse the query part of an URI
235 * fragment = *( pchar / "/" / "?" )
236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237 * in the fragment identifier but this is used very broadly for
238 * xpointer scheme selection, so we are allowing it here to not break
239 * for example all the DocBook processing chains.
241 * Returns 0 or the error code
244 xmlParse3986Fragment(xmlURIPtr uri
, const char **str
)
253 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
254 (*cur
== '[') || (*cur
== ']') ||
255 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
258 if (uri
->fragment
!= NULL
)
259 xmlFree(uri
->fragment
);
260 if (uri
->cleanup
& 2)
261 uri
->fragment
= STRNDUP(*str
, cur
- *str
);
263 uri
->fragment
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
271 * @uri: pointer to an URI structure
272 * @str: pointer to the string to analyze
274 * Parse the query part of an URI
278 * Returns 0 or the error code
281 xmlParse3986Query(xmlURIPtr uri
, const char **str
)
290 while ((ISA_PCHAR(cur
)) || (*cur
== '/') || (*cur
== '?') ||
291 ((uri
!= NULL
) && (uri
->cleanup
& 1) && (IS_UNWISE(cur
))))
294 if (uri
->query
!= NULL
)
296 if (uri
->cleanup
& 2)
297 uri
->query
= STRNDUP(*str
, cur
- *str
);
299 uri
->query
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
301 /* Save the raw bytes of the query as well.
302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 if (uri
->query_raw
!= NULL
)
305 xmlFree (uri
->query_raw
);
306 uri
->query_raw
= STRNDUP (*str
, cur
- *str
);
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
317 * Parse a port part and fills in the appropriate fields
318 * of the @uri structure
322 * Returns 0 or the error code
325 xmlParse3986Port(xmlURIPtr uri
, const char **str
)
327 const char *cur
= *str
;
329 if (ISA_DIGIT(cur
)) {
332 while (ISA_DIGIT(cur
)) {
334 uri
->port
= uri
->port
* 10 + (*cur
- '0');
344 * xmlParse3986Userinfo:
345 * @uri: pointer to an URI structure
346 * @str: the string to analyze
348 * Parse an user informations part and fills in the appropriate fields
349 * of the @uri structure
351 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
353 * Returns 0 or the error code
356 xmlParse3986Userinfo(xmlURIPtr uri
, const char **str
)
361 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) ||
362 ISA_SUB_DELIM(cur
) || (*cur
== ':'))
366 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
367 if (uri
->cleanup
& 2)
368 uri
->user
= STRNDUP(*str
, cur
- *str
);
370 uri
->user
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
379 * xmlParse3986DecOctet:
380 * @str: the string to analyze
382 * dec-octet = DIGIT ; 0-9
383 * / %x31-39 DIGIT ; 10-99
384 * / "1" 2DIGIT ; 100-199
385 * / "2" %x30-34 DIGIT ; 200-249
386 * / "25" %x30-35 ; 250-255
390 * Returns 0 if found and skipped, 1 otherwise
393 xmlParse3986DecOctet(const char **str
) {
394 const char *cur
= *str
;
396 if (!(ISA_DIGIT(cur
)))
398 if (!ISA_DIGIT(cur
+1))
400 else if ((*cur
!= '0') && (ISA_DIGIT(cur
+ 1)) && (!ISA_DIGIT(cur
+2)))
402 else if ((*cur
== '1') && (ISA_DIGIT(cur
+ 1)) && (ISA_DIGIT(cur
+ 2)))
404 else if ((*cur
== '2') && (*(cur
+ 1) >= '0') &&
405 (*(cur
+ 1) <= '4') && (ISA_DIGIT(cur
+ 2)))
407 else if ((*cur
== '2') && (*(cur
+ 1) == '5') &&
408 (*(cur
+ 2) >= '0') && (*(cur
+ 1) <= '5'))
417 * @uri: pointer to an URI structure
418 * @str: the string to analyze
420 * Parse an host part and fills in the appropriate fields
421 * of the @uri structure
423 * host = IP-literal / IPv4address / reg-name
424 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
425 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
426 * reg-name = *( unreserved / pct-encoded / sub-delims )
428 * Returns 0 or the error code
431 xmlParse3986Host(xmlURIPtr uri
, const char **str
)
433 const char *cur
= *str
;
438 * IPv6 and future adressing scheme are enclosed between brackets
442 while ((*cur
!= ']') && (*cur
!= 0))
450 * try to parse an IPv4
452 if (ISA_DIGIT(cur
)) {
453 if (xmlParse3986DecOctet(&cur
) != 0)
458 if (xmlParse3986DecOctet(&cur
) != 0)
462 if (xmlParse3986DecOctet(&cur
) != 0)
466 if (xmlParse3986DecOctet(&cur
) != 0)
473 * then this should be a hostname which can be empty
475 while (ISA_UNRESERVED(cur
) || ISA_PCT_ENCODED(cur
) || ISA_SUB_DELIM(cur
))
479 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
480 uri
->authority
= NULL
;
481 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
483 if (uri
->cleanup
& 2)
484 uri
->server
= STRNDUP(host
, cur
- host
);
486 uri
->server
= xmlURIUnescapeString(host
, cur
- host
, NULL
);
495 * xmlParse3986Authority:
496 * @uri: pointer to an URI structure
497 * @str: the string to analyze
499 * Parse an authority part and fills in the appropriate fields
500 * of the @uri structure
502 * authority = [ userinfo "@" ] host [ ":" port ]
504 * Returns 0 or the error code
507 xmlParse3986Authority(xmlURIPtr uri
, const char **str
)
514 * try to parse an userinfo and check for the trailing @
516 ret
= xmlParse3986Userinfo(uri
, &cur
);
517 if ((ret
!= 0) || (*cur
!= '@'))
521 ret
= xmlParse3986Host(uri
, &cur
);
522 if (ret
!= 0) return(ret
);
525 ret
= xmlParse3986Port(uri
, &cur
);
526 if (ret
!= 0) return(ret
);
533 * xmlParse3986Segment:
534 * @str: the string to analyze
535 * @forbid: an optional forbidden character
536 * @empty: allow an empty segment
538 * Parse a segment and fills in the appropriate fields
539 * of the @uri structure
542 * segment-nz = 1*pchar
543 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544 * ; non-zero-length segment without any colon ":"
546 * Returns 0 or the error code
549 xmlParse3986Segment(const char **str
, char forbid
, int empty
)
554 if (!ISA_PCHAR(cur
)) {
559 while (ISA_PCHAR(cur
) && (*cur
!= forbid
))
566 * xmlParse3986PathAbEmpty:
567 * @uri: pointer to an URI structure
568 * @str: the string to analyze
570 * Parse an path absolute or empty and fills in the appropriate fields
571 * of the @uri structure
573 * path-abempty = *( "/" segment )
575 * Returns 0 or the error code
578 xmlParse3986PathAbEmpty(xmlURIPtr uri
, const char **str
)
585 while (*cur
== '/') {
587 ret
= xmlParse3986Segment(&cur
, 0, 1);
588 if (ret
!= 0) return(ret
);
591 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
593 if (uri
->cleanup
& 2)
594 uri
->path
= STRNDUP(*str
, cur
- *str
);
596 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
606 * xmlParse3986PathAbsolute:
607 * @uri: pointer to an URI structure
608 * @str: the string to analyze
610 * Parse an path absolute and fills in the appropriate fields
611 * of the @uri structure
613 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
615 * Returns 0 or the error code
618 xmlParse3986PathAbsolute(xmlURIPtr uri
, const char **str
)
628 ret
= xmlParse3986Segment(&cur
, 0, 0);
630 while (*cur
== '/') {
632 ret
= xmlParse3986Segment(&cur
, 0, 1);
633 if (ret
!= 0) return(ret
);
637 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
639 if (uri
->cleanup
& 2)
640 uri
->path
= STRNDUP(*str
, cur
- *str
);
642 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
652 * xmlParse3986PathRootless:
653 * @uri: pointer to an URI structure
654 * @str: the string to analyze
656 * Parse an path without root and fills in the appropriate fields
657 * of the @uri structure
659 * path-rootless = segment-nz *( "/" segment )
661 * Returns 0 or the error code
664 xmlParse3986PathRootless(xmlURIPtr uri
, const char **str
)
671 ret
= xmlParse3986Segment(&cur
, 0, 0);
672 if (ret
!= 0) return(ret
);
673 while (*cur
== '/') {
675 ret
= xmlParse3986Segment(&cur
, 0, 1);
676 if (ret
!= 0) return(ret
);
679 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
681 if (uri
->cleanup
& 2)
682 uri
->path
= STRNDUP(*str
, cur
- *str
);
684 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
694 * xmlParse3986PathNoScheme:
695 * @uri: pointer to an URI structure
696 * @str: the string to analyze
698 * Parse an path which is not a scheme and fills in the appropriate fields
699 * of the @uri structure
701 * path-noscheme = segment-nz-nc *( "/" segment )
703 * Returns 0 or the error code
706 xmlParse3986PathNoScheme(xmlURIPtr uri
, const char **str
)
713 ret
= xmlParse3986Segment(&cur
, ':', 0);
714 if (ret
!= 0) return(ret
);
715 while (*cur
== '/') {
717 ret
= xmlParse3986Segment(&cur
, 0, 1);
718 if (ret
!= 0) return(ret
);
721 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
723 if (uri
->cleanup
& 2)
724 uri
->path
= STRNDUP(*str
, cur
- *str
);
726 uri
->path
= xmlURIUnescapeString(*str
, cur
- *str
, NULL
);
736 * xmlParse3986HierPart:
737 * @uri: pointer to an URI structure
738 * @str: the string to analyze
740 * Parse an hierarchical part and fills in the appropriate fields
741 * of the @uri structure
743 * hier-part = "//" authority path-abempty
748 * Returns 0 or the error code
751 xmlParse3986HierPart(xmlURIPtr uri
, const char **str
)
758 if ((*cur
== '/') && (*(cur
+ 1) == '/')) {
760 ret
= xmlParse3986Authority(uri
, &cur
);
761 if (ret
!= 0) return(ret
);
762 ret
= xmlParse3986PathAbEmpty(uri
, &cur
);
763 if (ret
!= 0) return(ret
);
766 } else if (*cur
== '/') {
767 ret
= xmlParse3986PathAbsolute(uri
, &cur
);
768 if (ret
!= 0) return(ret
);
769 } else if (ISA_PCHAR(cur
)) {
770 ret
= xmlParse3986PathRootless(uri
, &cur
);
771 if (ret
!= 0) return(ret
);
773 /* path-empty is effectively empty */
775 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
784 * xmlParse3986RelativeRef:
785 * @uri: pointer to an URI structure
786 * @str: the string to analyze
788 * Parse an URI string and fills in the appropriate fields
789 * of the @uri structure
791 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
792 * relative-part = "//" authority path-abempty
797 * Returns 0 or the error code
800 xmlParse3986RelativeRef(xmlURIPtr uri
, const char *str
) {
803 if ((*str
== '/') && (*(str
+ 1) == '/')) {
805 ret
= xmlParse3986Authority(uri
, &str
);
806 if (ret
!= 0) return(ret
);
807 ret
= xmlParse3986PathAbEmpty(uri
, &str
);
808 if (ret
!= 0) return(ret
);
809 } else if (*str
== '/') {
810 ret
= xmlParse3986PathAbsolute(uri
, &str
);
811 if (ret
!= 0) return(ret
);
812 } else if (ISA_PCHAR(str
)) {
813 ret
= xmlParse3986PathNoScheme(uri
, &str
);
814 if (ret
!= 0) return(ret
);
816 /* path-empty is effectively empty */
818 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
825 ret
= xmlParse3986Query(uri
, &str
);
826 if (ret
!= 0) return(ret
);
830 ret
= xmlParse3986Fragment(uri
, &str
);
831 if (ret
!= 0) return(ret
);
843 * @uri: pointer to an URI structure
844 * @str: the string to analyze
846 * Parse an URI string and fills in the appropriate fields
847 * of the @uri structure
849 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
851 * Returns 0 or the error code
854 xmlParse3986URI(xmlURIPtr uri
, const char *str
) {
857 ret
= xmlParse3986Scheme(uri
, &str
);
858 if (ret
!= 0) return(ret
);
863 ret
= xmlParse3986HierPart(uri
, &str
);
864 if (ret
!= 0) return(ret
);
867 ret
= xmlParse3986Query(uri
, &str
);
868 if (ret
!= 0) return(ret
);
872 ret
= xmlParse3986Fragment(uri
, &str
);
873 if (ret
!= 0) return(ret
);
883 * xmlParse3986URIReference:
884 * @uri: pointer to an URI structure
885 * @str: the string to analyze
887 * Parse an URI reference string and fills in the appropriate fields
888 * of the @uri structure
890 * URI-reference = URI / relative-ref
892 * Returns 0 or the error code
895 xmlParse3986URIReference(xmlURIPtr uri
, const char *str
) {
903 * Try first to parse absolute refs, then fallback to relative if
906 ret
= xmlParse3986URI(uri
, str
);
909 ret
= xmlParse3986RelativeRef(uri
, str
);
920 * @str: the URI string to analyze
922 * Parse an URI based on RFC 3986
924 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
926 * Returns a newly built xmlURIPtr or NULL in case of error
929 xmlParseURI(const char *str
) {
935 uri
= xmlCreateURI();
937 ret
= xmlParse3986URIReference(uri
, str
);
947 * xmlParseURIReference:
948 * @uri: pointer to an URI structure
949 * @str: the string to analyze
951 * Parse an URI reference string based on RFC 3986 and fills in the
952 * appropriate fields of the @uri structure
954 * URI-reference = URI / relative-ref
956 * Returns 0 or the error code
959 xmlParseURIReference(xmlURIPtr uri
, const char *str
) {
960 return(xmlParse3986URIReference(uri
, str
));
965 * @str: the URI string to analyze
966 * @raw: if 1 unescaping of URI pieces are disabled
968 * Parse an URI but allows to keep intact the original fragments.
970 * URI-reference = URI / relative-ref
972 * Returns a newly built xmlURIPtr or NULL in case of error
975 xmlParseURIRaw(const char *str
, int raw
) {
981 uri
= xmlCreateURI();
986 ret
= xmlParseURIReference(uri
, str
);
995 /************************************************************************
997 * Generic URI structure functions *
999 ************************************************************************/
1004 * Simply creates an empty xmlURI
1006 * Returns the new structure or NULL in case of error
1009 xmlCreateURI(void) {
1012 ret
= (xmlURIPtr
) xmlMalloc(sizeof(xmlURI
));
1014 xmlURIErrMemory("creating URI structure\n");
1017 memset(ret
, 0, sizeof(xmlURI
));
1022 * xmlSaveUriRealloc:
1024 * Function to handle properly a reallocation when saving an URI
1025 * Also imposes some limit on the length of an URI string output
1028 xmlSaveUriRealloc(xmlChar
*ret
, int *max
) {
1032 if (*max
> MAX_URI_LENGTH
) {
1033 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037 temp
= (xmlChar
*) xmlRealloc(ret
, (tmp
+ 1));
1039 xmlURIErrMemory("saving URI\n");
1048 * @uri: pointer to an xmlURI
1050 * Save the URI as an escaped string
1052 * Returns a new string (to be deallocated by caller)
1055 xmlSaveUri(xmlURIPtr uri
) {
1056 xmlChar
*ret
= NULL
;
1062 if (uri
== NULL
) return(NULL
);
1066 ret
= (xmlChar
*) xmlMallocAtomic((max
+ 1) * sizeof(xmlChar
));
1068 xmlURIErrMemory("saving URI\n");
1073 if (uri
->scheme
!= NULL
) {
1077 temp
= xmlSaveUriRealloc(ret
, &max
);
1078 if (temp
== NULL
) goto mem_error
;
1084 temp
= xmlSaveUriRealloc(ret
, &max
);
1085 if (temp
== NULL
) goto mem_error
;
1090 if (uri
->opaque
!= NULL
) {
1093 if (len
+ 3 >= max
) {
1094 temp
= xmlSaveUriRealloc(ret
, &max
);
1095 if (temp
== NULL
) goto mem_error
;
1098 if (IS_RESERVED(*(p
)) || IS_UNRESERVED(*(p
)))
1101 int val
= *(unsigned char *)p
++;
1102 int hi
= val
/ 0x10, lo
= val
% 0x10;
1104 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1105 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1109 if (uri
->server
!= NULL
) {
1110 if (len
+ 3 >= max
) {
1111 temp
= xmlSaveUriRealloc(ret
, &max
);
1112 if (temp
== NULL
) goto mem_error
;
1117 if (uri
->user
!= NULL
) {
1120 if (len
+ 3 >= max
) {
1121 temp
= xmlSaveUriRealloc(ret
, &max
);
1122 if (temp
== NULL
) goto mem_error
;
1125 if ((IS_UNRESERVED(*(p
))) ||
1126 ((*(p
) == ';')) || ((*(p
) == ':')) ||
1127 ((*(p
) == '&')) || ((*(p
) == '=')) ||
1128 ((*(p
) == '+')) || ((*(p
) == '$')) ||
1132 int val
= *(unsigned char *)p
++;
1133 int hi
= val
/ 0x10, lo
= val
% 0x10;
1135 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1136 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1139 if (len
+ 3 >= max
) {
1140 temp
= xmlSaveUriRealloc(ret
, &max
);
1141 if (temp
== NULL
) goto mem_error
;
1149 temp
= xmlSaveUriRealloc(ret
, &max
);
1150 if (temp
== NULL
) goto mem_error
;
1155 if (uri
->port
> 0) {
1156 if (len
+ 10 >= max
) {
1157 temp
= xmlSaveUriRealloc(ret
, &max
);
1158 if (temp
== NULL
) goto mem_error
;
1161 len
+= snprintf((char *) &ret
[len
], max
- len
, ":%d", uri
->port
);
1163 } else if (uri
->authority
!= NULL
) {
1164 if (len
+ 3 >= max
) {
1165 temp
= xmlSaveUriRealloc(ret
, &max
);
1166 if (temp
== NULL
) goto mem_error
;
1173 if (len
+ 3 >= max
) {
1174 temp
= xmlSaveUriRealloc(ret
, &max
);
1175 if (temp
== NULL
) goto mem_error
;
1178 if ((IS_UNRESERVED(*(p
))) ||
1179 ((*(p
) == '$')) || ((*(p
) == ',')) || ((*(p
) == ';')) ||
1180 ((*(p
) == ':')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1181 ((*(p
) == '=')) || ((*(p
) == '+')))
1184 int val
= *(unsigned char *)p
++;
1185 int hi
= val
/ 0x10, lo
= val
% 0x10;
1187 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1188 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1191 } else if (uri
->scheme
!= NULL
) {
1192 if (len
+ 3 >= max
) {
1193 temp
= xmlSaveUriRealloc(ret
, &max
);
1194 if (temp
== NULL
) goto mem_error
;
1200 if (uri
->path
!= NULL
) {
1203 * the colon in file:///d: should not be escaped or
1204 * Windows accesses fail later.
1206 if ((uri
->scheme
!= NULL
) &&
1208 (((p
[1] >= 'a') && (p
[1] <= 'z')) ||
1209 ((p
[1] >= 'A') && (p
[1] <= 'Z'))) &&
1211 (xmlStrEqual(BAD_CAST uri
->scheme
, BAD_CAST
"file"))) {
1212 if (len
+ 3 >= max
) {
1213 temp
= xmlSaveUriRealloc(ret
, &max
);
1214 if (temp
== NULL
) goto mem_error
;
1222 if (len
+ 3 >= max
) {
1223 temp
= xmlSaveUriRealloc(ret
, &max
);
1224 if (temp
== NULL
) goto mem_error
;
1227 if ((IS_UNRESERVED(*(p
))) || ((*(p
) == '/')) ||
1228 ((*(p
) == ';')) || ((*(p
) == '@')) || ((*(p
) == '&')) ||
1229 ((*(p
) == '=')) || ((*(p
) == '+')) || ((*(p
) == '$')) ||
1233 int val
= *(unsigned char *)p
++;
1234 int hi
= val
/ 0x10, lo
= val
% 0x10;
1236 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1237 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1241 if (uri
->query_raw
!= NULL
) {
1242 if (len
+ 1 >= max
) {
1243 temp
= xmlSaveUriRealloc(ret
, &max
);
1244 if (temp
== NULL
) goto mem_error
;
1250 if (len
+ 1 >= max
) {
1251 temp
= xmlSaveUriRealloc(ret
, &max
);
1252 if (temp
== NULL
) goto mem_error
;
1257 } else if (uri
->query
!= NULL
) {
1258 if (len
+ 3 >= max
) {
1259 temp
= xmlSaveUriRealloc(ret
, &max
);
1260 if (temp
== NULL
) goto mem_error
;
1266 if (len
+ 3 >= max
) {
1267 temp
= xmlSaveUriRealloc(ret
, &max
);
1268 if (temp
== NULL
) goto mem_error
;
1271 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1274 int val
= *(unsigned char *)p
++;
1275 int hi
= val
/ 0x10, lo
= val
% 0x10;
1277 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1278 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1283 if (uri
->fragment
!= NULL
) {
1284 if (len
+ 3 >= max
) {
1285 temp
= xmlSaveUriRealloc(ret
, &max
);
1286 if (temp
== NULL
) goto mem_error
;
1292 if (len
+ 3 >= max
) {
1293 temp
= xmlSaveUriRealloc(ret
, &max
);
1294 if (temp
== NULL
) goto mem_error
;
1297 if ((IS_UNRESERVED(*(p
))) || (IS_RESERVED(*(p
))))
1300 int val
= *(unsigned char *)p
++;
1301 int hi
= val
/ 0x10, lo
= val
% 0x10;
1303 ret
[len
++] = hi
+ (hi
> 9? 'A'-10 : '0');
1304 ret
[len
++] = lo
+ (lo
> 9? 'A'-10 : '0');
1309 temp
= xmlSaveUriRealloc(ret
, &max
);
1310 if (temp
== NULL
) goto mem_error
;
1323 * @stream: a FILE* for the output
1324 * @uri: pointer to an xmlURI
1326 * Prints the URI in the stream @stream.
1329 xmlPrintURI(FILE *stream
, xmlURIPtr uri
) {
1332 out
= xmlSaveUri(uri
);
1334 fprintf(stream
, "%s", (char *) out
);
1341 * @uri: pointer to an xmlURI
1343 * Make sure the xmlURI struct is free of content
1346 xmlCleanURI(xmlURIPtr uri
) {
1347 if (uri
== NULL
) return;
1349 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1351 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1353 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1355 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1357 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1358 uri
->fragment
= NULL
;
1359 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1361 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1362 uri
->authority
= NULL
;
1363 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1365 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1366 uri
->query_raw
= NULL
;
1371 * @uri: pointer to an xmlURI
1373 * Free up the xmlURI struct
1376 xmlFreeURI(xmlURIPtr uri
) {
1377 if (uri
== NULL
) return;
1379 if (uri
->scheme
!= NULL
) xmlFree(uri
->scheme
);
1380 if (uri
->server
!= NULL
) xmlFree(uri
->server
);
1381 if (uri
->user
!= NULL
) xmlFree(uri
->user
);
1382 if (uri
->path
!= NULL
) xmlFree(uri
->path
);
1383 if (uri
->fragment
!= NULL
) xmlFree(uri
->fragment
);
1384 if (uri
->opaque
!= NULL
) xmlFree(uri
->opaque
);
1385 if (uri
->authority
!= NULL
) xmlFree(uri
->authority
);
1386 if (uri
->query
!= NULL
) xmlFree(uri
->query
);
1387 if (uri
->query_raw
!= NULL
) xmlFree(uri
->query_raw
);
1391 /************************************************************************
1393 * Helper functions *
1395 ************************************************************************/
1398 * xmlNormalizeURIPath:
1399 * @path: pointer to the path string
1401 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1402 * Section 5.2, steps 6.c through 6.g.
1404 * Normalization occurs directly on the string, no new allocation is done
1406 * Returns 0 or an error code
1409 xmlNormalizeURIPath(char *path
) {
1415 /* Skip all initial "/" chars. We want to get to the beginning of the
1416 * first non-empty segment.
1419 while (cur
[0] == '/')
1424 /* Keep everything we've seen so far. */
1428 * Analyze each segment in sequence for cases (c) and (d).
1430 while (cur
[0] != '\0') {
1432 * c) All occurrences of "./", where "." is a complete path segment,
1433 * are removed from the buffer string.
1435 if ((cur
[0] == '.') && (cur
[1] == '/')) {
1437 /* '//' normalization should be done at this point too */
1438 while (cur
[0] == '/')
1444 * d) If the buffer string ends with "." as a complete path segment,
1445 * that "." is removed.
1447 if ((cur
[0] == '.') && (cur
[1] == '\0'))
1450 /* Otherwise keep the segment. */
1451 while (cur
[0] != '/') {
1454 (out
++)[0] = (cur
++)[0];
1457 while ((cur
[0] == '/') && (cur
[1] == '/'))
1460 (out
++)[0] = (cur
++)[0];
1465 /* Reset to the beginning of the first segment for the next sequence. */
1467 while (cur
[0] == '/')
1473 * Analyze each segment in sequence for cases (e) and (f).
1475 * e) All occurrences of "<segment>/../", where <segment> is a
1476 * complete path segment not equal to "..", are removed from the
1477 * buffer string. Removal of these path segments is performed
1478 * iteratively, removing the leftmost matching pattern on each
1479 * iteration, until no matching pattern remains.
1481 * f) If the buffer string ends with "<segment>/..", where <segment>
1482 * is a complete path segment not equal to "..", that
1483 * "<segment>/.." is removed.
1485 * To satisfy the "iterative" clause in (e), we need to collapse the
1486 * string every time we find something that needs to be removed. Thus,
1487 * we don't need to keep two pointers into the string: we only need a
1488 * "current position" pointer.
1493 /* At the beginning of each iteration of this loop, "cur" points to
1494 * the first character of the segment we want to examine.
1497 /* Find the end of the current segment. */
1499 while ((segp
[0] != '/') && (segp
[0] != '\0'))
1502 /* If this is the last segment, we're done (we need at least two
1503 * segments to meet the criteria for the (e) and (f) cases).
1505 if (segp
[0] == '\0')
1508 /* If the first segment is "..", or if the next segment _isn't_ "..",
1509 * keep this segment and try the next one.
1512 if (((cur
[0] == '.') && (cur
[1] == '.') && (segp
== cur
+3))
1513 || ((segp
[0] != '.') || (segp
[1] != '.')
1514 || ((segp
[2] != '/') && (segp
[2] != '\0')))) {
1519 /* If we get here, remove this segment and the next one and back up
1520 * to the previous segment (if there is one), to implement the
1521 * "iteratively" clause. It's pretty much impossible to back up
1522 * while maintaining two pointers into the buffer, so just compact
1523 * the whole buffer now.
1526 /* If this is the end of the buffer, we're done. */
1527 if (segp
[2] == '\0') {
1531 /* Valgrind complained, strcpy(cur, segp + 3); */
1532 /* string will overlap, do not use strcpy */
1535 while ((*tmp
++ = *segp
++) != 0)
1538 /* If there are no previous segments, then keep going from here. */
1540 while ((segp
> path
) && ((--segp
)[0] == '/'))
1545 /* "segp" is pointing to the end of a previous segment; find it's
1546 * start. We need to back up to the previous segment and start
1547 * over with that to handle things like "foo/bar/../..". If we
1548 * don't do this, then on the first pass we'll remove the "bar/..",
1549 * but be pointing at the second ".." so we won't realize we can also
1550 * remove the "foo/..".
1553 while ((cur
> path
) && (cur
[-1] != '/'))
1559 * g) If the resulting buffer string still begins with one or more
1560 * complete path segments of "..", then the reference is
1561 * considered to be in error. Implementations may handle this
1562 * error by retaining these components in the resolved path (i.e.,
1563 * treating them as part of the final URI), by removing them from
1564 * the resolved path (i.e., discarding relative levels above the
1565 * root), or by avoiding traversal of the reference.
1567 * We discard them from the final path.
1569 if (path
[0] == '/') {
1571 while ((cur
[0] == '/') && (cur
[1] == '.') && (cur
[2] == '.')
1572 && ((cur
[3] == '/') || (cur
[3] == '\0')))
1577 while (cur
[0] != '\0')
1578 (out
++)[0] = (cur
++)[0];
1586 static int is_hex(char c
) {
1587 if (((c
>= '0') && (c
<= '9')) ||
1588 ((c
>= 'a') && (c
<= 'f')) ||
1589 ((c
>= 'A') && (c
<= 'F')))
1595 * xmlURIUnescapeString:
1596 * @str: the string to unescape
1597 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1598 * @target: optional destination buffer
1600 * Unescaping routine, but does not check that the string is an URI. The
1601 * output is a direct unsigned char translation of %XX values (no encoding)
1602 * Note that the length of the result can only be smaller or same size as
1605 * Returns a copy of the string, but unescaped, will return NULL only in case
1609 xmlURIUnescapeString(const char *str
, int len
, char *target
) {
1615 if (len
<= 0) len
= strlen(str
);
1616 if (len
< 0) return(NULL
);
1618 if (target
== NULL
) {
1619 ret
= (char *) xmlMallocAtomic(len
+ 1);
1621 xmlURIErrMemory("unescaping URI value\n");
1629 if ((len
> 2) && (*in
== '%') && (is_hex(in
[1])) && (is_hex(in
[2]))) {
1631 if ((*in
>= '0') && (*in
<= '9'))
1633 else if ((*in
>= 'a') && (*in
<= 'f'))
1634 *out
= (*in
- 'a') + 10;
1635 else if ((*in
>= 'A') && (*in
<= 'F'))
1636 *out
= (*in
- 'A') + 10;
1638 if ((*in
>= '0') && (*in
<= '9'))
1639 *out
= *out
* 16 + (*in
- '0');
1640 else if ((*in
>= 'a') && (*in
<= 'f'))
1641 *out
= *out
* 16 + (*in
- 'a') + 10;
1642 else if ((*in
>= 'A') && (*in
<= 'F'))
1643 *out
= *out
* 16 + (*in
- 'A') + 10;
1658 * @str: string to escape
1659 * @list: exception list string of chars not to escape
1661 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1662 * and the characters in the exception list.
1664 * Returns a new escaped string or NULL in case of error.
1667 xmlURIEscapeStr(const xmlChar
*str
, const xmlChar
*list
) {
1676 return(xmlStrdup(str
));
1677 len
= xmlStrlen(str
);
1678 if (!(len
> 0)) return(NULL
);
1681 ret
= (xmlChar
*) xmlMallocAtomic(len
);
1683 xmlURIErrMemory("escaping URI value\n");
1686 in
= (const xmlChar
*) str
;
1689 if (len
- out
<= 3) {
1690 temp
= xmlSaveUriRealloc(ret
, &len
);
1692 xmlURIErrMemory("escaping URI value\n");
1701 if ((ch
!= '@') && (!IS_UNRESERVED(ch
)) && (!xmlStrchr(list
, ch
))) {
1706 ret
[out
++] = '0' + val
;
1708 ret
[out
++] = 'A' + val
- 0xA;
1711 ret
[out
++] = '0' + val
;
1713 ret
[out
++] = 'A' + val
- 0xA;
1726 * @str: the string of the URI to escape
1728 * Escaping routine, does not do validity checks !
1729 * It will try to escape the chars needing this, but this is heuristic
1730 * based it's impossible to be sure.
1732 * Returns an copy of the string, but escaped
1735 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1736 * according to RFC2396.
1740 xmlURIEscape(const xmlChar
* str
)
1742 xmlChar
*ret
, *segment
= NULL
;
1746 #define NULLCHK(p) if(!p) { \
1747 xmlURIErrMemory("escaping URI value\n"); \
1754 uri
= xmlCreateURI();
1757 * Allow escaping errors in the unescaped form
1760 ret2
= xmlParseURIReference(uri
, (const char *)str
);
1773 segment
= xmlURIEscapeStr(BAD_CAST uri
->scheme
, BAD_CAST
"+-.");
1775 ret
= xmlStrcat(ret
, segment
);
1776 ret
= xmlStrcat(ret
, BAD_CAST
":");
1780 if (uri
->authority
) {
1782 xmlURIEscapeStr(BAD_CAST uri
->authority
, BAD_CAST
"/?;:@");
1784 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1785 ret
= xmlStrcat(ret
, segment
);
1790 segment
= xmlURIEscapeStr(BAD_CAST uri
->user
, BAD_CAST
";:&=+$,");
1792 ret
= xmlStrcat(ret
,BAD_CAST
"//");
1793 ret
= xmlStrcat(ret
, segment
);
1794 ret
= xmlStrcat(ret
, BAD_CAST
"@");
1799 segment
= xmlURIEscapeStr(BAD_CAST uri
->server
, BAD_CAST
"/?;:@");
1801 if (uri
->user
== NULL
)
1802 ret
= xmlStrcat(ret
, BAD_CAST
"//");
1803 ret
= xmlStrcat(ret
, segment
);
1810 snprintf((char *) port
, 10, "%d", uri
->port
);
1811 ret
= xmlStrcat(ret
, BAD_CAST
":");
1812 ret
= xmlStrcat(ret
, port
);
1817 xmlURIEscapeStr(BAD_CAST uri
->path
, BAD_CAST
":@&=+$,/?;");
1819 ret
= xmlStrcat(ret
, segment
);
1823 if (uri
->query_raw
) {
1824 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1825 ret
= xmlStrcat(ret
, BAD_CAST uri
->query_raw
);
1827 else if (uri
->query
) {
1829 xmlURIEscapeStr(BAD_CAST uri
->query
, BAD_CAST
";/?:@&=+,$");
1831 ret
= xmlStrcat(ret
, BAD_CAST
"?");
1832 ret
= xmlStrcat(ret
, segment
);
1837 segment
= xmlURIEscapeStr(BAD_CAST uri
->opaque
, BAD_CAST
"");
1839 ret
= xmlStrcat(ret
, segment
);
1843 if (uri
->fragment
) {
1844 segment
= xmlURIEscapeStr(BAD_CAST uri
->fragment
, BAD_CAST
"#");
1846 ret
= xmlStrcat(ret
, BAD_CAST
"#");
1847 ret
= xmlStrcat(ret
, segment
);
1857 /************************************************************************
1859 * Public functions *
1861 ************************************************************************/
1865 * @URI: the URI instance found in the document
1866 * @base: the base value
1868 * Computes he final URI of the reference done by checking that
1869 * the given URI is valid, and building the final URI using the
1870 * base URI. This is processed according to section 5.2 of the
1873 * 5.2. Resolving Relative References to Absolute Form
1875 * Returns a new URI string (to be freed by the caller) or NULL in case
1879 xmlBuildURI(const xmlChar
*URI
, const xmlChar
*base
) {
1880 xmlChar
*val
= NULL
;
1881 int ret
, len
, indx
, cur
, out
;
1882 xmlURIPtr ref
= NULL
;
1883 xmlURIPtr bas
= NULL
;
1884 xmlURIPtr res
= NULL
;
1887 * 1) The URI reference is parsed into the potential four components and
1888 * fragment identifier, as described in Section 4.3.
1890 * NOTE that a completely empty URI is treated by modern browsers
1891 * as a reference to "." rather than as a synonym for the current
1892 * URI. Should we do that here?
1898 ref
= xmlCreateURI();
1901 ret
= xmlParseURIReference(ref
, (const char *) URI
);
1908 if ((ref
!= NULL
) && (ref
->scheme
!= NULL
)) {
1910 * The URI is absolute don't modify.
1912 val
= xmlStrdup(URI
);
1918 bas
= xmlCreateURI();
1921 ret
= xmlParseURIReference(bas
, (const char *) base
);
1925 val
= xmlSaveUri(ref
);
1930 * the base fragment must be ignored
1932 if (bas
->fragment
!= NULL
) {
1933 xmlFree(bas
->fragment
);
1934 bas
->fragment
= NULL
;
1936 val
= xmlSaveUri(bas
);
1941 * 2) If the path component is empty and the scheme, authority, and
1942 * query components are undefined, then it is a reference to the
1943 * current document and we are done. Otherwise, the reference URI's
1944 * query and fragment components are defined as found (or not found)
1945 * within the URI reference and not inherited from the base URI.
1947 * NOTE that in modern browsers, the parsing differs from the above
1948 * in the following aspect: the query component is allowed to be
1949 * defined while still treating this as a reference to the current
1952 res
= xmlCreateURI();
1955 if ((ref
->scheme
== NULL
) && (ref
->path
== NULL
) &&
1956 ((ref
->authority
== NULL
) && (ref
->server
== NULL
))) {
1957 if (bas
->scheme
!= NULL
)
1958 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1959 if (bas
->authority
!= NULL
)
1960 res
->authority
= xmlMemStrdup(bas
->authority
);
1961 else if (bas
->server
!= NULL
) {
1962 res
->server
= xmlMemStrdup(bas
->server
);
1963 if (bas
->user
!= NULL
)
1964 res
->user
= xmlMemStrdup(bas
->user
);
1965 res
->port
= bas
->port
;
1967 if (bas
->path
!= NULL
)
1968 res
->path
= xmlMemStrdup(bas
->path
);
1969 if (ref
->query_raw
!= NULL
)
1970 res
->query_raw
= xmlMemStrdup (ref
->query_raw
);
1971 else if (ref
->query
!= NULL
)
1972 res
->query
= xmlMemStrdup(ref
->query
);
1973 else if (bas
->query_raw
!= NULL
)
1974 res
->query_raw
= xmlMemStrdup(bas
->query_raw
);
1975 else if (bas
->query
!= NULL
)
1976 res
->query
= xmlMemStrdup(bas
->query
);
1977 if (ref
->fragment
!= NULL
)
1978 res
->fragment
= xmlMemStrdup(ref
->fragment
);
1983 * 3) If the scheme component is defined, indicating that the reference
1984 * starts with a scheme name, then the reference is interpreted as an
1985 * absolute URI and we are done. Otherwise, the reference URI's
1986 * scheme is inherited from the base URI's scheme component.
1988 if (ref
->scheme
!= NULL
) {
1989 val
= xmlSaveUri(ref
);
1992 if (bas
->scheme
!= NULL
)
1993 res
->scheme
= xmlMemStrdup(bas
->scheme
);
1995 if (ref
->query_raw
!= NULL
)
1996 res
->query_raw
= xmlMemStrdup(ref
->query_raw
);
1997 else if (ref
->query
!= NULL
)
1998 res
->query
= xmlMemStrdup(ref
->query
);
1999 if (ref
->fragment
!= NULL
)
2000 res
->fragment
= xmlMemStrdup(ref
->fragment
);
2003 * 4) If the authority component is defined, then the reference is a
2004 * network-path and we skip to step 7. Otherwise, the reference
2005 * URI's authority is inherited from the base URI's authority
2006 * component, which will also be undefined if the URI scheme does not
2007 * use an authority component.
2009 if ((ref
->authority
!= NULL
) || (ref
->server
!= NULL
)) {
2010 if (ref
->authority
!= NULL
)
2011 res
->authority
= xmlMemStrdup(ref
->authority
);
2013 res
->server
= xmlMemStrdup(ref
->server
);
2014 if (ref
->user
!= NULL
)
2015 res
->user
= xmlMemStrdup(ref
->user
);
2016 res
->port
= ref
->port
;
2018 if (ref
->path
!= NULL
)
2019 res
->path
= xmlMemStrdup(ref
->path
);
2022 if (bas
->authority
!= NULL
)
2023 res
->authority
= xmlMemStrdup(bas
->authority
);
2024 else if (bas
->server
!= NULL
) {
2025 res
->server
= xmlMemStrdup(bas
->server
);
2026 if (bas
->user
!= NULL
)
2027 res
->user
= xmlMemStrdup(bas
->user
);
2028 res
->port
= bas
->port
;
2032 * 5) If the path component begins with a slash character ("/"), then
2033 * the reference is an absolute-path and we skip to step 7.
2035 if ((ref
->path
!= NULL
) && (ref
->path
[0] == '/')) {
2036 res
->path
= xmlMemStrdup(ref
->path
);
2042 * 6) If this step is reached, then we are resolving a relative-path
2043 * reference. The relative path needs to be merged with the base
2044 * URI's path. Although there are many ways to do this, we will
2045 * describe a simple method using a separate string buffer.
2047 * Allocate a buffer large enough for the result string.
2049 len
= 2; /* extra / and 0 */
2050 if (ref
->path
!= NULL
)
2051 len
+= strlen(ref
->path
);
2052 if (bas
->path
!= NULL
)
2053 len
+= strlen(bas
->path
);
2054 res
->path
= (char *) xmlMallocAtomic(len
);
2055 if (res
->path
== NULL
) {
2056 xmlURIErrMemory("resolving URI against base\n");
2062 * a) All but the last segment of the base URI's path component is
2063 * copied to the buffer. In other words, any characters after the
2064 * last (right-most) slash character, if any, are excluded.
2068 if (bas
->path
!= NULL
) {
2069 while (bas
->path
[cur
] != 0) {
2070 while ((bas
->path
[cur
] != 0) && (bas
->path
[cur
] != '/'))
2072 if (bas
->path
[cur
] == 0)
2077 res
->path
[out
] = bas
->path
[out
];
2085 * b) The reference's path component is appended to the buffer
2088 if (ref
->path
!= NULL
&& ref
->path
[0] != 0) {
2091 * Ensure the path includes a '/'
2093 if ((out
== 0) && (bas
->server
!= NULL
))
2094 res
->path
[out
++] = '/';
2095 while (ref
->path
[indx
] != 0) {
2096 res
->path
[out
++] = ref
->path
[indx
++];
2102 * Steps c) to h) are really path normalization steps
2104 xmlNormalizeURIPath(res
->path
);
2109 * 7) The resulting URI components, including any inherited from the
2110 * base URI, are recombined to give the absolute form of the URI
2113 val
= xmlSaveUri(res
);
2126 * xmlBuildRelativeURI:
2127 * @URI: the URI reference under consideration
2128 * @base: the base value
2130 * Expresses the URI of the reference in terms relative to the
2131 * base. Some examples of this operation include:
2132 * base = "http://site1.com/docs/book1.html"
2133 * URI input URI returned
2134 * docs/pic1.gif pic1.gif
2135 * docs/img/pic1.gif img/pic1.gif
2136 * img/pic1.gif ../img/pic1.gif
2137 * http://site1.com/docs/pic1.gif pic1.gif
2138 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2140 * base = "docs/book1.html"
2141 * URI input URI returned
2142 * docs/pic1.gif pic1.gif
2143 * docs/img/pic1.gif img/pic1.gif
2144 * img/pic1.gif ../img/pic1.gif
2145 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2148 * Note: if the URI reference is really wierd or complicated, it may be
2149 * worthwhile to first convert it into a "nice" one by calling
2150 * xmlBuildURI (using 'base') before calling this routine,
2151 * since this routine (for reasonable efficiency) assumes URI has
2152 * already been through some validation.
2154 * Returns a new URI string (to be freed by the caller) or NULL in case
2158 xmlBuildRelativeURI (const xmlChar
* URI
, const xmlChar
* base
)
2160 xmlChar
*val
= NULL
;
2166 xmlURIPtr ref
= NULL
;
2167 xmlURIPtr bas
= NULL
;
2168 xmlChar
*bptr
, *uptr
, *vptr
;
2169 int remove_path
= 0;
2171 if ((URI
== NULL
) || (*URI
== 0))
2175 * First parse URI into a standard form
2177 ref
= xmlCreateURI ();
2180 /* If URI not already in "relative" form */
2181 if (URI
[0] != '.') {
2182 ret
= xmlParseURIReference (ref
, (const char *) URI
);
2184 goto done
; /* Error in URI, return NULL */
2186 ref
->path
= (char *)xmlStrdup(URI
);
2189 * Next parse base into the same standard form
2191 if ((base
== NULL
) || (*base
== 0)) {
2192 val
= xmlStrdup (URI
);
2195 bas
= xmlCreateURI ();
2198 if (base
[0] != '.') {
2199 ret
= xmlParseURIReference (bas
, (const char *) base
);
2201 goto done
; /* Error in base, return NULL */
2203 bas
->path
= (char *)xmlStrdup(base
);
2206 * If the scheme / server on the URI differs from the base,
2207 * just return the URI
2209 if ((ref
->scheme
!= NULL
) &&
2210 ((bas
->scheme
== NULL
) ||
2211 (xmlStrcmp ((xmlChar
*)bas
->scheme
, (xmlChar
*)ref
->scheme
)) ||
2212 (xmlStrcmp ((xmlChar
*)bas
->server
, (xmlChar
*)ref
->server
)))) {
2213 val
= xmlStrdup (URI
);
2216 if (xmlStrEqual((xmlChar
*)bas
->path
, (xmlChar
*)ref
->path
)) {
2217 val
= xmlStrdup(BAD_CAST
"");
2220 if (bas
->path
== NULL
) {
2221 val
= xmlStrdup((xmlChar
*)ref
->path
);
2224 if (ref
->path
== NULL
) {
2225 ref
->path
= (char *) "/";
2230 * At this point (at last!) we can compare the two paths
2232 * First we take care of the special case where either of the
2233 * two path components may be missing (bug 316224)
2235 if (bas
->path
== NULL
) {
2236 if (ref
->path
!= NULL
) {
2237 uptr
= (xmlChar
*) ref
->path
;
2240 /* exception characters from xmlSaveUri */
2241 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2245 bptr
= (xmlChar
*)bas
->path
;
2246 if (ref
->path
== NULL
) {
2247 for (ix
= 0; bptr
[ix
] != 0; ix
++) {
2248 if (bptr
[ix
] == '/')
2252 len
= 1; /* this is for a string terminator only */
2255 * Next we compare the two strings and find where they first differ
2257 if ((ref
->path
[pos
] == '.') && (ref
->path
[pos
+1] == '/'))
2259 if ((*bptr
== '.') && (bptr
[1] == '/'))
2261 else if ((*bptr
== '/') && (ref
->path
[pos
] != '/'))
2263 while ((bptr
[pos
] == ref
->path
[pos
]) && (bptr
[pos
] != 0))
2266 if (bptr
[pos
] == ref
->path
[pos
]) {
2267 val
= xmlStrdup(BAD_CAST
"");
2268 goto done
; /* (I can't imagine why anyone would do this) */
2272 * In URI, "back up" to the last '/' encountered. This will be the
2273 * beginning of the "unique" suffix of URI
2276 if ((ref
->path
[ix
] == '/') && (ix
> 0))
2278 else if ((ref
->path
[ix
] == 0) && (ix
> 1) && (ref
->path
[ix
- 1] == '/'))
2280 for (; ix
> 0; ix
--) {
2281 if (ref
->path
[ix
] == '/')
2285 uptr
= (xmlChar
*)ref
->path
;
2288 uptr
= (xmlChar
*)&ref
->path
[ix
];
2292 * In base, count the number of '/' from the differing point
2294 if (bptr
[pos
] != ref
->path
[pos
]) {/* check for trivial URI == base */
2295 for (; bptr
[ix
] != 0; ix
++) {
2296 if (bptr
[ix
] == '/')
2300 len
= xmlStrlen (uptr
) + 1;
2305 /* exception characters from xmlSaveUri */
2306 val
= xmlURIEscapeStr(uptr
, BAD_CAST
"/;&=+$,");
2311 * Allocate just enough space for the returned string -
2312 * length of the remainder of the URI, plus enough space
2313 * for the "../" groups, plus one for the terminator
2315 val
= (xmlChar
*) xmlMalloc (len
+ 3 * nbslash
);
2317 xmlURIErrMemory("building relative URI\n");
2322 * Put in as many "../" as needed
2324 for (; nbslash
>0; nbslash
--) {
2330 * Finish up with the end of the URI
2333 if ((vptr
> val
) && (len
> 0) &&
2334 (uptr
[0] == '/') && (vptr
[-1] == '/')) {
2335 memcpy (vptr
, uptr
+ 1, len
- 1);
2338 memcpy (vptr
, uptr
, len
);
2345 /* escape the freshly-built path */
2347 /* exception characters from xmlSaveUri */
2348 val
= xmlURIEscapeStr(vptr
, BAD_CAST
"/;&=+$,");
2353 * Free the working variables
2355 if (remove_path
!= 0)
2367 * @path: the resource locator in a filesystem notation
2369 * Constructs a canonic path from the specified path.
2371 * Returns a new canonic path, or a duplicate of the path parameter if the
2372 * construction fails. The caller is responsible for freeing the memory occupied
2373 * by the returned string. If there is insufficient memory available, or the
2374 * argument is NULL, the function returns NULL.
2376 #define IS_WINDOWS_PATH(p) \
2378 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2379 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2380 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2382 xmlCanonicPath(const xmlChar
*path
)
2385 * For Windows implementations, additional work needs to be done to
2386 * replace backslashes in pathnames with "forward slashes"
2388 #if defined(_WIN32) && !defined(__CYGWIN__)
2395 const xmlChar
*absuri
;
2402 * We must not change the backslashes to slashes if the the path
2404 * Those paths can be up to 32k characters long.
2405 * Was added specifically for OpenOffice, those paths can't be converted
2408 if ((path
[0] == '\\') && (path
[1] == '\\') && (path
[2] == '?') &&
2410 return xmlStrdup((const xmlChar
*) path
);
2413 /* sanitize filename starting with // so it can be used as URI */
2414 if ((path
[0] == '/') && (path
[1] == '/') && (path
[2] != '/'))
2417 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2419 return xmlStrdup(path
);
2422 /* Check if this is an "absolute uri" */
2423 absuri
= xmlStrstr(path
, BAD_CAST
"://");
2424 if (absuri
!= NULL
) {
2430 * this looks like an URI where some parts have not been
2431 * escaped leading to a parsing problem. Check that the first
2432 * part matches a protocol.
2435 /* Bypass if first part (part before the '://') is > 20 chars */
2436 if ((l
<= 0) || (l
> 20))
2437 goto path_processing
;
2438 /* Bypass if any non-alpha characters are present in first part */
2439 for (j
= 0;j
< l
;j
++) {
2441 if (!(((c
>= 'a') && (c
<= 'z')) || ((c
>= 'A') && (c
<= 'Z'))))
2442 goto path_processing
;
2445 /* Escape all except the characters specified in the supplied path */
2446 escURI
= xmlURIEscapeStr(path
, BAD_CAST
":/?_.#&;=");
2447 if (escURI
!= NULL
) {
2448 /* Try parsing the escaped path */
2449 uri
= xmlParseURI((const char *) escURI
);
2450 /* If successful, return the escaped string */
2459 /* For Windows implementations, replace backslashes with 'forward slashes' */
2460 #if defined(_WIN32) && !defined(__CYGWIN__)
2462 * Create a URI structure
2464 uri
= xmlCreateURI();
2465 if (uri
== NULL
) { /* Guard against 'out of memory' */
2469 len
= xmlStrlen(path
);
2470 if ((len
> 2) && IS_WINDOWS_PATH(path
)) {
2471 /* make the scheme 'file' */
2472 uri
->scheme
= xmlStrdup(BAD_CAST
"file");
2473 /* allocate space for leading '/' + path + string terminator */
2474 uri
->path
= xmlMallocAtomic(len
+ 2);
2475 if (uri
->path
== NULL
) {
2476 xmlFreeURI(uri
); /* Guard agains 'out of memory' */
2479 /* Put in leading '/' plus path */
2482 strncpy(p
, path
, len
+ 1);
2484 uri
->path
= xmlStrdup(path
);
2485 if (uri
->path
== NULL
) {
2491 /* Now change all occurences of '\' to '/' */
2492 while (*p
!= '\0') {
2498 if (uri
->scheme
== NULL
) {
2499 ret
= xmlStrdup((const xmlChar
*) uri
->path
);
2501 ret
= xmlSaveUri(uri
);
2506 ret
= xmlStrdup((const xmlChar
*) path
);
2513 * @path: the resource locator in a filesystem notation
2515 * Constructs an URI expressing the existing path
2517 * Returns a new URI, or a duplicate of the path parameter if the
2518 * construction fails. The caller is responsible for freeing the memory
2519 * occupied by the returned string. If there is insufficient memory available,
2520 * or the argument is NULL, the function returns NULL.
2523 xmlPathToURI(const xmlChar
*path
)
2532 if ((uri
= xmlParseURI((const char *) path
)) != NULL
) {
2534 return xmlStrdup(path
);
2536 cal
= xmlCanonicPath(path
);
2539 #if defined(_WIN32) && !defined(__CYGWIN__)
2540 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2541 If 'cal' is a valid URI allready then we are done here, as continuing would make
2543 if ((uri
= xmlParseURI((const char *) cal
)) != NULL
) {
2547 /* 'cal' can contain a relative path with backslashes. If that is processed
2548 by xmlSaveURI, they will be escaped and the external entity loader machinery
2549 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2551 while (*ret
!= '\0') {
2557 memset(&temp
, 0, sizeof(temp
));
2558 temp
.path
= (char *) cal
;
2559 ret
= xmlSaveUri(&temp
);
2564 #include "elfgcchack.h"