dll/win32/urlmon/uri.c

   1 /*
   2  * Copyright 2010 Jacek Caban for CodeWeavers
   3  * Copyright 2010 Thomas Mullaly
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  18  */
  19
  20 #include "urlmon_main.h"
  21 #include "wine/debug.h"
  22
  23 #define NO_SHLWAPI_REG
  24 #include "shlwapi.h"
  25
  26 #define UINT_MAX 0xffffffff
  27 #define USHORT_MAX 0xffff
  28
  29 #define ALLOW_NULL_TERM_SCHEME          0x01
  30 #define ALLOW_NULL_TERM_USER_NAME       0x02
  31 #define ALLOW_NULL_TERM_PASSWORD        0x04
  32 #define ALLOW_BRACKETLESS_IP_LITERAL    0x08
  33 #define SKIP_IP_FUTURE_CHECK            0x10
  34 #define IGNORE_PORT_DELIMITER           0x20
  35
  36 WINE_DEFAULT_DEBUG_CHANNEL(urlmon);
  37
  38 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}};
  39
  40 typedef struct {
  41     const IUriVtbl  *lpIUriVtbl;
  42     LONG ref;
  43
  44     BSTR            raw_uri;
  45
  46     /* Information about the canonicalized URI's buffer. */
  47     WCHAR           *canon_uri;
  48     DWORD           canon_size;
  49     DWORD           canon_len;
  50     BOOL            display_absolute;
  51     DWORD           create_flags;
  52
  53     INT             scheme_start;
  54     DWORD           scheme_len;
  55     URL_SCHEME      scheme_type;
  56
  57     INT             userinfo_start;
  58     DWORD           userinfo_len;
  59     INT             userinfo_split;
  60
  61     INT             host_start;
  62     DWORD           host_len;
  63     Uri_HOST_TYPE   host_type;
  64
  65     USHORT          port;
  66     BOOL            has_port;
  67
  68     INT             authority_start;
  69     DWORD           authority_len;
  70
  71     INT             domain_offset;
  72
  73     INT             path_start;
  74     DWORD           path_len;
  75     INT             extension_offset;
  76
  77     INT             query_start;
  78     DWORD           query_len;
  79
  80     INT             fragment_start;
  81     DWORD           fragment_len;
  82 } Uri;
  83
  84 typedef struct {
  85     const IUriBuilderVtbl  *lpIUriBuilderVtbl;
  86     LONG ref;
  87
  88     Uri *uri;
  89     DWORD modified_props;
  90
  91     WCHAR   *fragment;
  92     DWORD   fragment_len;
  93
  94     WCHAR   *host;
  95     DWORD   host_len;
  96
  97     WCHAR   *password;
  98     DWORD   password_len;
  99
 100     WCHAR   *path;
 101     DWORD   path_len;
 102
 103     BOOL    has_port;
 104     DWORD   port;
 105
 106     WCHAR   *query;
 107     DWORD   query_len;
 108
 109     WCHAR   *scheme;
 110     DWORD   scheme_len;
 111
 112     WCHAR   *username;
 113     DWORD   username_len;
 114 } UriBuilder;
 115
 116 typedef struct {
 117     const WCHAR *str;
 118     DWORD       len;
 119 } h16;
 120
 121 typedef struct {
 122     /* IPv6 addresses can hold up to 8 h16 components. */
 123     h16         components[8];
 124     DWORD       h16_count;
 125
 126     /* An IPv6 can have 1 elision ("::"). */
 127     const WCHAR *elision;
 128
 129     /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
 130     const WCHAR *ipv4;
 131     DWORD       ipv4_len;
 132
 133     INT         components_size;
 134     INT         elision_size;
 135 } ipv6_address;
 136
 137 typedef struct {
 138     BSTR            uri;
 139
 140     BOOL            is_relative;
 141     BOOL            is_opaque;
 142     BOOL            has_implicit_scheme;
 143     BOOL            has_implicit_ip;
 144     UINT            implicit_ipv4;
 145
 146     const WCHAR     *scheme;
 147     DWORD           scheme_len;
 148     URL_SCHEME      scheme_type;
 149
 150     const WCHAR     *username;
 151     DWORD           username_len;
 152
 153     const WCHAR     *password;
 154     DWORD           password_len;
 155
 156     const WCHAR     *host;
 157     DWORD           host_len;
 158     Uri_HOST_TYPE   host_type;
 159
 160     BOOL            has_ipv6;
 161     ipv6_address    ipv6_address;
 162
 163     BOOL            has_port;
 164     const WCHAR     *port;
 165     DWORD           port_len;
 166     DWORD           port_value;
 167
 168     const WCHAR     *path;
 169     DWORD           path_len;
 170
 171     const WCHAR     *query;
 172     DWORD           query_len;
 173
 174     const WCHAR     *fragment;
 175     DWORD           fragment_len;
 176 } parse_data;
 177
 178 static const CHAR hexDigits[] = "0123456789ABCDEF";
 179
 180 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
 181 static const struct {
 182     URL_SCHEME  scheme;
 183     WCHAR       scheme_name[16];
 184 } recognized_schemes[] = {
 185     {URL_SCHEME_FTP,            {'f','t','p',0}},
 186     {URL_SCHEME_HTTP,           {'h','t','t','p',0}},
 187     {URL_SCHEME_GOPHER,         {'g','o','p','h','e','r',0}},
 188     {URL_SCHEME_MAILTO,         {'m','a','i','l','t','o',0}},
 189     {URL_SCHEME_NEWS,           {'n','e','w','s',0}},
 190     {URL_SCHEME_NNTP,           {'n','n','t','p',0}},
 191     {URL_SCHEME_TELNET,         {'t','e','l','n','e','t',0}},
 192     {URL_SCHEME_WAIS,           {'w','a','i','s',0}},
 193     {URL_SCHEME_FILE,           {'f','i','l','e',0}},
 194     {URL_SCHEME_MK,             {'m','k',0}},
 195     {URL_SCHEME_HTTPS,          {'h','t','t','p','s',0}},
 196     {URL_SCHEME_SHELL,          {'s','h','e','l','l',0}},
 197     {URL_SCHEME_SNEWS,          {'s','n','e','w','s',0}},
 198     {URL_SCHEME_LOCAL,          {'l','o','c','a','l',0}},
 199     {URL_SCHEME_JAVASCRIPT,     {'j','a','v','a','s','c','r','i','p','t',0}},
 200     {URL_SCHEME_VBSCRIPT,       {'v','b','s','c','r','i','p','t',0}},
 201     {URL_SCHEME_ABOUT,          {'a','b','o','u','t',0}},
 202     {URL_SCHEME_RES,            {'r','e','s',0}},
 203     {URL_SCHEME_MSSHELLROOTED,  {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}},
 204     {URL_SCHEME_MSSHELLIDLIST,  {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}},
 205     {URL_SCHEME_MSHELP,         {'h','c','p',0}},
 206     {URL_SCHEME_WILDCARD,       {'*',0}}
 207 };
 208
 209 /* List of default ports Windows recognizes. */
 210 static const struct {
 211     URL_SCHEME  scheme;
 212     USHORT      port;
 213 } default_ports[] = {
 214     {URL_SCHEME_FTP,    21},
 215     {URL_SCHEME_HTTP,   80},
 216     {URL_SCHEME_GOPHER, 70},
 217     {URL_SCHEME_NNTP,   119},
 218     {URL_SCHEME_TELNET, 23},
 219     {URL_SCHEME_WAIS,   210},
 220     {URL_SCHEME_HTTPS,  443},
 221 };
 222
 223 /* List of 3 character top level domain names Windows seems to recognize.
 224  * There might be more, but, these are the only ones I've found so far.
 225  */
 226 static const struct {
 227     WCHAR tld_name[4];
 228 } recognized_tlds[] = {
 229     {{'c','o','m',0}},
 230     {{'e','d','u',0}},
 231     {{'g','o','v',0}},
 232     {{'i','n','t',0}},
 233     {{'m','i','l',0}},
 234     {{'n','e','t',0}},
 235     {{'o','r','g',0}}
 236 };
 237
 238 static Uri *get_uri_obj(IUri *uri)
 239 {
 240     Uri *ret;
 241     HRESULT hres;
 242
 243     hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret);
 244     return SUCCEEDED(hres) ? ret : NULL;
 245 }
 246
 247 static inline BOOL is_alpha(WCHAR val) {
 248         return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z'));
 249 }
 250
 251 static inline BOOL is_num(WCHAR val) {
 252         return (val >= '0' && val <= '9');
 253 }
 254
 255 static inline BOOL is_drive_path(const WCHAR *str) {
 256     return (is_alpha(str[0]) && (str[1] == ':' || str[1] == '|'));
 257 }
 258
 259 static inline BOOL is_unc_path(const WCHAR *str) {
 260     return (str[0] == '\\' && str[0] == '\\');
 261 }
 262
 263 static inline BOOL is_forbidden_dos_path_char(WCHAR val) {
 264     return (val == '>' || val == '<' || val == '\"');
 265 }
 266
 267 /* A URI is implicitly a file path if it begins with
 268  * a drive letter (eg X:) or starts with "\\" (UNC path).
 269  */
 270 static inline BOOL is_implicit_file_path(const WCHAR *str) {
 271     return (is_unc_path(str) || (is_alpha(str[0]) && str[1] == ':'));
 272 }
 273
 274 /* Checks if the URI is a hierarchical URI. A hierarchical
 275  * URI is one that has "//" after the scheme.
 276  */
 277 static BOOL check_hierarchical(const WCHAR **ptr) {
 278     const WCHAR *start = *ptr;
 279
 280     if(**ptr != '/')
 281         return FALSE;
 282
 283     ++(*ptr);
 284     if(**ptr != '/') {
 285         *ptr = start;
 286         return FALSE;
 287     }
 288
 289     ++(*ptr);
 290     return TRUE;
 291 }
 292
 293 /* unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" */
 294 static inline BOOL is_unreserved(WCHAR val) {
 295     return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
 296             val == '_' || val == '~');
 297 }
 298
 299 /* sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
 300  *               / "*" / "+" / "," / ";" / "="
 301  */
 302 static inline BOOL is_subdelim(WCHAR val) {
 303     return (val == '!' || val == '$' || val == '&' ||
 304             val == '\'' || val == '(' || val == ')' ||
 305             val == '*' || val == '+' || val == ',' ||
 306             val == ';' || val == '=');
 307 }
 308
 309 /* gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
 310 static inline BOOL is_gendelim(WCHAR val) {
 311     return (val == ':' || val == '/' || val == '?' ||
 312             val == '#' || val == '[' || val == ']' ||
 313             val == '@');
 314 }
 315
 316 /* Characters that delimit the end of the authority
 317  * section of a URI. Sometimes a '\\' is considered
 318  * an authority delimeter.
 319  */
 320 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
 321     return (val == '#' || val == '/' || val == '?' ||
 322             val == '\0' || (acceptSlash && val == '\\'));
 323 }
 324
 325 /* reserved = gen-delims / sub-delims */
 326 static inline BOOL is_reserved(WCHAR val) {
 327     return (is_subdelim(val) || is_gendelim(val));
 328 }
 329
 330 static inline BOOL is_hexdigit(WCHAR val) {
 331     return ((val >= 'a' && val <= 'f') ||
 332             (val >= 'A' && val <= 'F') ||
 333             (val >= '0' && val <= '9'));
 334 }
 335
 336 static inline BOOL is_path_delim(WCHAR val) {
 337     return (!val || val == '#' || val == '?');
 338 }
 339
 340 /* List of schemes types Windows seems to expect to be hierarchical. */
 341 static inline BOOL is_hierarchical_scheme(URL_SCHEME type) {
 342     return(type == URL_SCHEME_HTTP || type == URL_SCHEME_FTP ||
 343            type == URL_SCHEME_GOPHER || type == URL_SCHEME_NNTP ||
 344            type == URL_SCHEME_TELNET || type == URL_SCHEME_WAIS ||
 345            type == URL_SCHEME_FILE || type == URL_SCHEME_HTTPS ||
 346            type == URL_SCHEME_RES);
 347 }
 348
 349 /* Checks if 'flags' contains an invalid combination of Uri_CREATE flags. */
 350 static inline BOOL has_invalid_flag_combination(DWORD flags) {
 351     return((flags & Uri_CREATE_DECODE_EXTRA_INFO && flags & Uri_CREATE_NO_DECODE_EXTRA_INFO) ||
 352            (flags & Uri_CREATE_CANONICALIZE && flags & Uri_CREATE_NO_CANONICALIZE) ||
 353            (flags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) ||
 354            (flags & Uri_CREATE_PRE_PROCESS_HTML_URI && flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) ||
 355            (flags & Uri_CREATE_IE_SETTINGS && flags & Uri_CREATE_NO_IE_SETTINGS));
 356 }
 357
 358 /* Applies each default Uri_CREATE flags to 'flags' if it
 359  * doesn't cause a flag conflict.
 360  */
 361 static void apply_default_flags(DWORD *flags) {
 362     if(!(*flags & Uri_CREATE_NO_CANONICALIZE))
 363         *flags |= Uri_CREATE_CANONICALIZE;
 364     if(!(*flags & Uri_CREATE_NO_DECODE_EXTRA_INFO))
 365         *flags |= Uri_CREATE_DECODE_EXTRA_INFO;
 366     if(!(*flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES))
 367         *flags |= Uri_CREATE_CRACK_UNKNOWN_SCHEMES;
 368     if(!(*flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI))
 369         *flags |= Uri_CREATE_PRE_PROCESS_HTML_URI;
 370     if(!(*flags & Uri_CREATE_IE_SETTINGS))
 371         *flags |= Uri_CREATE_NO_IE_SETTINGS;
 372 }
 373
 374 /* Determines if the URI is hierarchical using the information already parsed into
 375  * data and using the current location of parsing in the URI string.
 376  *
 377  * Windows considers a URI hierarchical if on of the following is true:
 378  *  A.) It's a wildcard scheme.
 379  *  B.) It's an implicit file scheme.
 380  *  C.) It's a known hierarchical scheme and it has two '\\' after the scheme name.
 381  *      (the '\\' will be converted into "//" during canonicalization).
 382  *  D.) It's not a relative URI and "//" appears after the scheme name.
 383  */
 384 static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data) {
 385     const WCHAR *start = *ptr;
 386
 387     if(data->scheme_type == URL_SCHEME_WILDCARD)
 388         return TRUE;
 389     else if(data->scheme_type == URL_SCHEME_FILE && data->has_implicit_scheme)
 390         return TRUE;
 391     else if(is_hierarchical_scheme(data->scheme_type) && (*ptr)[0] == '\\' && (*ptr)[1] == '\\') {
 392         *ptr += 2;
 393         return TRUE;
 394     } else if(!data->is_relative && check_hierarchical(ptr))
 395         return TRUE;
 396
 397     *ptr = start;
 398     return FALSE;
 399 }
 400
 401 /* Checks if the two Uri's are logically equivalent. It's a simple
 402  * comparison, since they are both of type Uri, and it can access
 403  * the properties of each Uri directly without the need to go
 404  * through the "IUri_Get*" interface calls.
 405  */
 406 static BOOL are_equal_simple(const Uri *a, const Uri *b) {
 407     if(a->scheme_type == b->scheme_type) {
 408         const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN;
 409         const BOOL are_hierarchical =
 410                 (a->authority_start > -1 && b->authority_start > -1);
 411
 412         if(a->scheme_type == URL_SCHEME_FILE) {
 413             if(a->canon_len == b->canon_len)
 414                 return !StrCmpIW(a->canon_uri, b->canon_uri);
 415         }
 416
 417         /* Only compare the scheme names (if any) if their unknown scheme types. */
 418         if(!known_scheme) {
 419             if((a->scheme_start > -1 && b->scheme_start > -1) &&
 420                (a->scheme_len == b->scheme_len)) {
 421                 /* Make sure the schemes are the same. */
 422                 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len))
 423                     return FALSE;
 424             } else if(a->scheme_len != b->scheme_len)
 425                 /* One of the Uri's has a scheme name, while the other doesn't. */
 426                 return FALSE;
 427         }
 428
 429         /* If they have a userinfo component, perform case sensitive compare. */
 430         if((a->userinfo_start > -1 && b->userinfo_start > -1) &&
 431            (a->userinfo_len == b->userinfo_len)) {
 432             if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len))
 433                 return FALSE;
 434         } else if(a->userinfo_len != b->userinfo_len)
 435             /* One of the Uri's had a userinfo, while the other one doesn't. */
 436             return FALSE;
 437
 438         /* Check if they have a host name. */
 439         if((a->host_start > -1 && b->host_start > -1) &&
 440            (a->host_len == b->host_len)) {
 441             /* Perform a case insensitive compare if they are a known scheme type. */
 442             if(known_scheme) {
 443                 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len))
 444                     return FALSE;
 445             } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len))
 446                 return FALSE;
 447         } else if(a->host_len != b->host_len)
 448             /* One of the Uri's had a host, while the other one didn't. */
 449             return FALSE;
 450
 451         if(a->has_port && b->has_port) {
 452             if(a->port != b->port)
 453                 return FALSE;
 454         } else if(a->has_port || b->has_port)
 455             /* One had a port, while the other one didn't. */
 456             return FALSE;
 457
 458         /* Windows is weird with how it handles paths. For example
 459          * One URI could be "http://google.com" (after canonicalization)
 460          * and one could be "http://google.com/" and the IsEqual function
 461          * would still evaluate to TRUE, but, only if they are both hierarchical
 462          * URIs.
 463          */
 464         if((a->path_start > -1 && b->path_start > -1) &&
 465            (a->path_len == b->path_len)) {
 466             if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len))
 467                 return FALSE;
 468         } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) {
 469             if(*(a->canon_uri+a->path_start) != '/')
 470                 return FALSE;
 471         } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) {
 472             if(*(b->canon_uri+b->path_start) != '/')
 473                 return FALSE;
 474         } else if(a->path_len != b->path_len)
 475             return FALSE;
 476
 477         /* Compare the query strings of the two URIs. */
 478         if((a->query_start > -1 && b->query_start > -1) &&
 479            (a->query_len == b->query_len)) {
 480             if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len))
 481                 return FALSE;
 482         } else if(a->query_len != b->query_len)
 483             return FALSE;
 484
 485         if((a->fragment_start > -1 && b->fragment_start > -1) &&
 486            (a->fragment_len == b->fragment_len)) {
 487             if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len))
 488                 return FALSE;
 489         } else if(a->fragment_len != b->fragment_len)
 490             return FALSE;
 491
 492         /* If we get here, the two URIs are equivalent. */
 493         return TRUE;
 494     }
 495
 496     return FALSE;
 497 }
 498
 499 /* Computes the size of the given IPv6 address.
 500  * Each h16 component is 16bits, if there is an IPv4 address, it's
 501  * 32bits. If there's an elision it can be 16bits to 128bits, depending
 502  * on the number of other components.
 503  *
 504  * Modeled after google-url's CheckIPv6ComponentsSize function
 505  */
 506 static void compute_ipv6_comps_size(ipv6_address *address) {
 507     address->components_size = address->h16_count * 2;
 508
 509     if(address->ipv4)
 510         /* IPv4 address is 4 bytes. */
 511         address->components_size += 4;
 512
 513     if(address->elision) {
 514         /* An elision can be anywhere from 2 bytes up to 16 bytes.
 515          * It size depends on the size of the h16 and IPv4 components.
 516          */
 517         address->elision_size = 16 - address->components_size;
 518         if(address->elision_size < 2)
 519             address->elision_size = 2;
 520     } else
 521         address->elision_size = 0;
 522 }
 523
 524 /* Taken from dlls/jscript/lex.c */
 525 static int hex_to_int(WCHAR val) {
 526     if(val >= '0' && val <= '9')
 527         return val - '0';
 528     else if(val >= 'a' && val <= 'f')
 529         return val - 'a' + 10;
 530     else if(val >= 'A' && val <= 'F')
 531         return val - 'A' + 10;
 532
 533     return -1;
 534 }
 535
 536 /* Helper function for converting a percent encoded string
 537  * representation of a WCHAR value into its actual WCHAR value. If
 538  * the two characters following the '%' aren't valid hex values then
 539  * this function returns the NULL character.
 540  *
 541  * Eg.
 542  *  "%2E" will result in '.' being returned by this function.
 543  */
 544 static WCHAR decode_pct_val(const WCHAR *ptr) {
 545     WCHAR ret = '\0';
 546
 547     if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) {
 548         INT a = hex_to_int(*(ptr + 1));
 549         INT b = hex_to_int(*(ptr + 2));
 550
 551         ret = a << 4;
 552         ret += b;
 553     }
 554
 555     return ret;
 556 }
 557
 558 /* Helper function for percent encoding a given character
 559  * and storing the encoded value into a given buffer (dest).
 560  *
 561  * It's up to the calling function to ensure that there is
 562  * at least enough space in 'dest' for the percent encoded
 563  * value to be stored (so dest + 3 spaces available).
 564  */
 565 static inline void pct_encode_val(WCHAR val, WCHAR *dest) {
 566     dest[0] = '%';
 567     dest[1] = hexDigits[(val >> 4) & 0xf];
 568     dest[2] = hexDigits[val & 0xf];
 569 }
 570
 571 /* Scans the range of characters [str, end] and returns the last occurrence
 572  * of 'ch' or returns NULL.
 573  */
 574 static const WCHAR *str_last_of(const WCHAR *str, const WCHAR *end, WCHAR ch) {
 575     const WCHAR *ptr = end;
 576
 577     while(ptr >= str) {
 578         if(*ptr == ch)
 579             return ptr;
 580         --ptr;
 581     }
 582
 583     return NULL;
 584 }
 585
 586 /* Attempts to parse the domain name from the host.
 587  *
 588  * This function also includes the Top-level Domain (TLD) name
 589  * of the host when it tries to find the domain name. If it finds
 590  * a valid domain name it will assign 'domain_start' the offset
 591  * into 'host' where the domain name starts.
 592  *
 593  * It's implied that if a domain name its range is implied to be
 594  * [host+domain_start, host+host_len).
 595  */
 596 static void find_domain_name(const WCHAR *host, DWORD host_len,
 597                              INT *domain_start) {
 598     const WCHAR *last_tld, *sec_last_tld, *end;
 599
 600     end = host+host_len-1;
 601
 602     *domain_start = -1;
 603
 604     /* There has to be at least enough room for a '.' followed by a
 605      * 3 character TLD for a domain to even exist in the host name.
 606      */
 607     if(host_len < 4)
 608         return;
 609
 610     last_tld = str_last_of(host, end, '.');
 611     if(!last_tld)
 612         /* http://hostname -> has no domain name. */
 613         return;
 614
 615     sec_last_tld = str_last_of(host, last_tld-1, '.');
 616     if(!sec_last_tld) {
 617         /* If the '.' is at the beginning of the host there
 618          * has to be at least 3 characters in the TLD for it
 619          * to be valid.
 620          *  Ex: .com -> .com as the domain name.
 621          *      .co  -> has no domain name.
 622          */
 623         if(last_tld-host == 0) {
 624             if(end-(last_tld-1) < 3)
 625                 return;
 626         } else if(last_tld-host == 3) {
 627             DWORD i;
 628
 629             /* If there's three characters in front of last_tld and
 630              * they are on the list of recognized TLDs, then this
 631              * host doesn't have a domain (since the host only contains
 632              * a TLD name.
 633              *  Ex: edu.uk -> has no domain name.
 634              *      foo.uk -> foo.uk as the domain name.
 635              */
 636             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 637                 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3))
 638                     return;
 639             }
 640         } else if(last_tld-host < 3)
 641             /* Anything less than 3 characters is considered part
 642              * of the TLD name.
 643              *  Ex: ak.uk -> Has no domain name.
 644              */
 645             return;
 646
 647         /* Otherwise the domain name is the whole host name. */
 648         *domain_start = 0;
 649     } else if(end+1-last_tld > 3) {
 650         /* If the last_tld has more than 3 characters, then it's automatically
 651          * considered the TLD of the domain name.
 652          *  Ex: www.winehq.org.uk.test -> uk.test as the domain name.
 653          */
 654         *domain_start = (sec_last_tld+1)-host;
 655     } else if(last_tld - (sec_last_tld+1) < 4) {
 656         DWORD i;
 657         /* If the sec_last_tld is 3 characters long it HAS to be on the list of
 658          * recognized to still be considered part of the TLD name, otherwise
 659          * its considered the domain name.
 660          *  Ex: www.google.com.uk -> google.com.uk as the domain name.
 661          *      www.google.foo.uk -> foo.uk as the domain name.
 662          */
 663         if(last_tld - (sec_last_tld+1) == 3) {
 664             for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) {
 665                 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) {
 666                     const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 667
 668                     if(!domain)
 669                         *domain_start = 0;
 670                     else
 671                         *domain_start = (domain+1) - host;
 672                     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 673                                                         (host+host_len)-(host+*domain_start)));
 674                     return;
 675                 }
 676             }
 677
 678             *domain_start = (sec_last_tld+1)-host;
 679         } else {
 680             /* Since the sec_last_tld is less than 3 characters it's considered
 681              * part of the TLD.
 682              *  Ex: www.google.fo.uk -> google.fo.uk as the domain name.
 683              */
 684             const WCHAR *domain = str_last_of(host, sec_last_tld-1, '.');
 685
 686             if(!domain)
 687                 *domain_start = 0;
 688             else
 689                 *domain_start = (domain+1) - host;
 690         }
 691     } else {
 692         /* The second to last TLD has more than 3 characters making it
 693          * the domain name.
 694          *  Ex: www.google.test.us -> test.us as the domain name.
 695          */
 696         *domain_start = (sec_last_tld+1)-host;
 697     }
 698
 699     TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start,
 700                                         (host+host_len)-(host+*domain_start)));
 701 }
 702
 703 /* Removes the dot segments from a hierarchical URIs path component. This
 704  * function performs the removal in place.
 705  *
 706  * This is a modified version of Qt's QUrl function "removeDotsFromPath".
 707  *
 708  * This function returns the new length of the path string.
 709  */
 710 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) {
 711     WCHAR *out = path;
 712     const WCHAR *in = out;
 713     const WCHAR *end = out + path_len;
 714     DWORD len;
 715
 716     while(in < end) {
 717         /* A.  if the input buffer begins with a prefix of "/./" or "/.",
 718          *     where "." is a complete path segment, then replace that
 719          *     prefix with "/" in the input buffer; otherwise,
 720          */
 721         if(in <= end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '/') {
 722             in += 2;
 723             continue;
 724         } else if(in == end - 2 && in[0] == '/' && in[1] == '.') {
 725             *out++ = '/';
 726             in += 2;
 727             break;
 728         }
 729
 730         /* B.  if the input buffer begins with a prefix of "/../" or "/..",
 731          *     where ".." is a complete path segment, then replace that
 732          *     prefix with "/" in the input buffer and remove the last
 733          *     segment and its preceding "/" (if any) from the output
 734          *     buffer; otherwise,
 735          */
 736         if(in <= end - 4 && in[0] == '/' && in[1] == '.' && in[2] == '.' && in[3] == '/') {
 737             while(out > path && *(--out) != '/');
 738
 739             in += 3;
 740             continue;
 741         } else if(in == end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '.') {
 742             while(out > path && *(--out) != '/');
 743
 744             if(*out == '/')
 745                 ++out;
 746
 747             in += 3;
 748             break;
 749         }
 750
 751         /* C.  move the first path segment in the input buffer to the end of
 752          *     the output buffer, including the initial "/" character (if
 753          *     any) and any subsequent characters up to, but not including,
 754          *     the next "/" character or the end of the input buffer.
 755          */
 756         *out++ = *in++;
 757         while(in < end && *in != '/')
 758             *out++ = *in++;
 759     }
 760
 761     len = out - path;
 762     TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len,
 763         debugstr_wn(path, len), len);
 764     return len;
 765 }
 766
 767 /* Attempts to find the file extension in a given path. */
 768 static INT find_file_extension(const WCHAR *path, DWORD path_len) {
 769     const WCHAR *end;
 770
 771     for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) {
 772         if(*end == '.')
 773             return end-path;
 774     }
 775
 776     return -1;
 777 }
 778
 779 /* Computes the location where the elision should occur in the IPv6
 780  * address using the numerical values of each component stored in
 781  * 'values'. If the address shouldn't contain an elision then 'index'
 782  * is assigned -1 as it's value. Otherwise 'index' will contain the
 783  * starting index (into values) where the elision should be, and 'count'
 784  * will contain the number of cells the elision covers.
 785  *
 786  * NOTES:
 787  *  Windows will expand an elision if the elision only represents 1 h16
 788  *  component of the URI.
 789  *
 790  *  Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
 791  *
 792  *  If the IPv6 address contains an IPv4 address, the IPv4 address is also
 793  *  considered for being included as part of an elision if all it's components
 794  *  are zeros.
 795  *
 796  *  Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
 797  */
 798 static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
 799                                      INT *index, DWORD *count) {
 800     DWORD i, max_len, cur_len;
 801     INT max_index, cur_index;
 802
 803     max_len = cur_len = 0;
 804     max_index = cur_index = -1;
 805     for(i = 0; i < 8; ++i) {
 806         BOOL check_ipv4 = (address->ipv4 && i == 6);
 807         BOOL is_end = (check_ipv4 || i == 7);
 808
 809         if(check_ipv4) {
 810             /* Check if the IPv4 address contains only zeros. */
 811             if(values[i] == 0 && values[i+1] == 0) {
 812                 if(cur_index == -1)
 813                     cur_index = i;
 814
 815                 cur_len += 2;
 816                 ++i;
 817             }
 818         } else if(values[i] == 0) {
 819             if(cur_index == -1)
 820                 cur_index = i;
 821
 822             ++cur_len;
 823         }
 824
 825         if(is_end || values[i] != 0) {
 826             /* We only consider it for an elision if it's
 827              * more than 1 component long.
 828              */
 829             if(cur_len > 1 && cur_len > max_len) {
 830                 /* Found the new elision location. */
 831                 max_len = cur_len;
 832                 max_index = cur_index;
 833             }
 834
 835             /* Reset the current range for the next range of zeros. */
 836             cur_index = -1;
 837             cur_len = 0;
 838         }
 839     }
 840
 841     *index = max_index;
 842     *count = max_len;
 843 }
 844
 845 /* Removes all the leading and trailing white spaces or
 846  * control characters from the URI and removes all control
 847  * characters inside of the URI string.
 848  */
 849 static BSTR pre_process_uri(LPCWSTR uri) {
 850     BSTR ret;
 851     DWORD len;
 852     const WCHAR *start, *end;
 853     WCHAR *buf, *ptr;
 854
 855     len = lstrlenW(uri);
 856
 857     start = uri;
 858     /* Skip leading controls and whitespace. */
 859     while(iscntrlW(*start) || isspaceW(*start)) ++start;
 860
 861     end = uri+len-1;
 862     if(start == end)
 863         /* URI consisted only of control/whitespace. */
 864         ret = SysAllocStringLen(NULL, 0);
 865     else {
 866         while(iscntrlW(*end) || isspaceW(*end)) --end;
 867
 868         buf = heap_alloc(((end+1)-start)*sizeof(WCHAR));
 869         if(!buf)
 870             return NULL;
 871
 872         for(ptr = buf; start < end+1; ++start) {
 873             if(!iscntrlW(*start))
 874                 *ptr++ = *start;
 875         }
 876
 877         ret = SysAllocStringLen(buf, ptr-buf);
 878         heap_free(buf);
 879     }
 880
 881     return ret;
 882 }
 883
 884 /* Converts the specified IPv4 address into an uint value.
 885  *
 886  * This function assumes that the IPv4 address has already been validated.
 887  */
 888 static UINT ipv4toui(const WCHAR *ip, DWORD len) {
 889     UINT ret = 0;
 890     DWORD comp_value = 0;
 891     const WCHAR *ptr;
 892
 893     for(ptr = ip; ptr < ip+len; ++ptr) {
 894         if(*ptr == '.') {
 895             ret <<= 8;
 896             ret += comp_value;
 897             comp_value = 0;
 898         } else
 899             comp_value = comp_value*10 + (*ptr-'0');
 900     }
 901
 902     ret <<= 8;
 903     ret += comp_value;
 904
 905     return ret;
 906 }
 907
 908 /* Converts an IPv4 address in numerical form into it's fully qualified
 909  * string form. This function returns the number of characters written
 910  * to 'dest'. If 'dest' is NULL this function will return the number of
 911  * characters that would have been written.
 912  *
 913  * It's up to the caller to ensure there's enough space in 'dest' for the
 914  * address.
 915  */
 916 static DWORD ui2ipv4(WCHAR *dest, UINT address) {
 917     static const WCHAR formatW[] =
 918         {'%','u','.','%','u','.','%','u','.','%','u',0};
 919     DWORD ret = 0;
 920     UCHAR digits[4];
 921
 922     digits[0] = (address >> 24) & 0xff;
 923     digits[1] = (address >> 16) & 0xff;
 924     digits[2] = (address >> 8) & 0xff;
 925     digits[3] = address & 0xff;
 926
 927     if(!dest) {
 928         WCHAR tmp[16];
 929         ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]);
 930     } else
 931         ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]);
 932
 933     return ret;
 934 }
 935
 936 /* Converts an h16 component (from an IPv6 address) into it's
 937  * numerical value.
 938  *
 939  * This function assumes that the h16 component has already been validated.
 940  */
 941 static USHORT h16tous(h16 component) {
 942     DWORD i;
 943     USHORT ret = 0;
 944
 945     for(i = 0; i < component.len; ++i) {
 946         ret <<= 4;
 947         ret += hex_to_int(component.str[i]);
 948     }
 949
 950     return ret;
 951 }
 952
 953 /* Converts an IPv6 address into it's 128 bits (16 bytes) numerical value.
 954  *
 955  * This function assumes that the ipv6_address has already been validated.
 956  */
 957 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
 958     DWORD i, cur_component = 0;
 959     BOOL already_passed_elision = FALSE;
 960
 961     for(i = 0; i < address->h16_count; ++i) {
 962         if(address->elision) {
 963             if(address->components[i].str > address->elision && !already_passed_elision) {
 964                 /* Means we just passed the elision and need to add it's values to
 965                  * 'number' before we do anything else.
 966                  */
 967                 DWORD j = 0;
 968                 for(j = 0; j < address->elision_size; j+=2)
 969                     number[cur_component++] = 0;
 970
 971                 already_passed_elision = TRUE;
 972             }
 973         }
 974
 975         number[cur_component++] = h16tous(address->components[i]);
 976     }
 977
 978     /* Case when the elision appears after the h16 components. */
 979     if(!already_passed_elision && address->elision) {
 980         for(i = 0; i < address->elision_size; i+=2)
 981             number[cur_component++] = 0;
 982         already_passed_elision = TRUE;
 983     }
 984
 985     if(address->ipv4) {
 986         UINT value = ipv4toui(address->ipv4, address->ipv4_len);
 987
 988         if(cur_component != 6) {
 989             ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
 990             return FALSE;
 991         }
 992
 993         number[cur_component++] = (value >> 16) & 0xffff;
 994         number[cur_component] = value & 0xffff;
 995     }
 996
 997     return TRUE;
 998 }
 999
1000 /* Checks if the characters pointed to by 'ptr' are
1001  * a percent encoded data octet.
1002  *
1003  * pct-encoded = "%" HEXDIG HEXDIG
1004  */
1005 static BOOL check_pct_encoded(const WCHAR **ptr) {
1006     const WCHAR *start = *ptr;
1007
1008     if(**ptr != '%')
1009         return FALSE;
1010
1011     ++(*ptr);
1012     if(!is_hexdigit(**ptr)) {
1013         *ptr = start;
1014         return FALSE;
1015     }
1016
1017     ++(*ptr);
1018     if(!is_hexdigit(**ptr)) {
1019         *ptr = start;
1020         return FALSE;
1021     }
1022
1023     ++(*ptr);
1024     return TRUE;
1025 }
1026
1027 /* dec-octet   = DIGIT                 ; 0-9
1028  *             / %x31-39 DIGIT         ; 10-99
1029  *             / "1" 2DIGIT            ; 100-199
1030  *             / "2" %x30-34 DIGIT     ; 200-249
1031  *             / "25" %x30-35          ; 250-255
1032  */
1033 static BOOL check_dec_octet(const WCHAR **ptr) {
1034     const WCHAR *c1, *c2, *c3;
1035
1036     c1 = *ptr;
1037     /* A dec-octet must be at least 1 digit long. */
1038     if(*c1 < '0' || *c1 > '9')
1039         return FALSE;
1040
1041     ++(*ptr);
1042
1043     c2 = *ptr;
1044     /* Since the 1 digit requirment was meet, it doesn't
1045      * matter if this is a DIGIT value, it's considered a
1046      * dec-octet.
1047      */
1048     if(*c2 < '0' || *c2 > '9')
1049         return TRUE;
1050
1051     ++(*ptr);
1052
1053     c3 = *ptr;
1054     /* Same explanation as above. */
1055     if(*c3 < '0' || *c3 > '9')
1056         return TRUE;
1057
1058     /* Anything > 255 isn't a valid IP dec-octet. */
1059     if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') {
1060         *ptr = c1;
1061         return FALSE;
1062     }
1063
1064     ++(*ptr);
1065     return TRUE;
1066 }
1067
1068 /* Checks if there is an implicit IPv4 address in the host component of the URI.
1069  * The max value of an implicit IPv4 address is UINT_MAX.
1070  *
1071  *  Ex:
1072  *      "234567" would be considered an implicit IPv4 address.
1073  */
1074 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) {
1075     const WCHAR *start = *ptr;
1076     ULONGLONG ret = 0;
1077     *val = 0;
1078
1079     while(is_num(**ptr)) {
1080         ret = ret*10 + (**ptr - '0');
1081
1082         if(ret > UINT_MAX) {
1083             *ptr = start;
1084             return FALSE;
1085         }
1086         ++(*ptr);
1087     }
1088
1089     if(*ptr == start)
1090         return FALSE;
1091
1092     *val = ret;
1093     return TRUE;
1094 }
1095
1096 /* Checks if the string contains an IPv4 address.
1097  *
1098  * This function has a strict mode or a non-strict mode of operation
1099  * When 'strict' is set to FALSE this function will return TRUE if
1100  * the string contains at least 'dec-octet "." dec-octet' since partial
1101  * IPv4 addresses will be normalized out into full IPv4 addresses. When
1102  * 'strict' is set this function expects there to be a full IPv4 address.
1103  *
1104  * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1105  */
1106 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) {
1107     const WCHAR *start = *ptr;
1108
1109     if(!check_dec_octet(ptr)) {
1110         *ptr = start;
1111         return FALSE;
1112     }
1113
1114     if(**ptr != '.') {
1115         *ptr = start;
1116         return FALSE;
1117     }
1118
1119     ++(*ptr);
1120     if(!check_dec_octet(ptr)) {
1121         *ptr = start;
1122         return FALSE;
1123     }
1124
1125     if(**ptr != '.') {
1126         if(strict) {
1127             *ptr = start;
1128             return FALSE;
1129         } else
1130             return TRUE;
1131     }
1132
1133     ++(*ptr);
1134     if(!check_dec_octet(ptr)) {
1135         *ptr = start;
1136         return FALSE;
1137     }
1138
1139     if(**ptr != '.') {
1140         if(strict) {
1141             *ptr = start;
1142             return FALSE;
1143         } else
1144             return TRUE;
1145     }
1146
1147     ++(*ptr);
1148     if(!check_dec_octet(ptr)) {
1149         *ptr = start;
1150         return FALSE;
1151     }
1152
1153     /* Found a four digit ip address. */
1154     return TRUE;
1155 }
1156 /* Tries to parse the scheme name of the URI.
1157  *
1158  * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
1159  * NOTE: Windows accepts a number as the first character of a scheme.
1160  */
1161 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data, DWORD extras) {
1162     const WCHAR *start = *ptr;
1163
1164     data->scheme = NULL;
1165     data->scheme_len = 0;
1166
1167     while(**ptr) {
1168         if(**ptr == '*' && *ptr == start) {
1169             /* Might have found a wildcard scheme. If it is the next
1170              * char has to be a ':' for it to be a valid URI
1171              */
1172             ++(*ptr);
1173             break;
1174         } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' &&
1175            **ptr != '-' && **ptr != '.')
1176             break;
1177
1178         (*ptr)++;
1179     }
1180
1181     if(*ptr == start)
1182         return FALSE;
1183
1184     /* Schemes must end with a ':' */
1185     if(**ptr != ':' && !((extras & ALLOW_NULL_TERM_SCHEME) && !**ptr)) {
1186         *ptr = start;
1187         return FALSE;
1188     }
1189
1190     data->scheme = start;
1191     data->scheme_len = *ptr - start;
1192
1193     ++(*ptr);
1194     return TRUE;
1195 }
1196
1197 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores
1198  * the deduced URL_SCHEME in data->scheme_type.
1199  */
1200 static BOOL parse_scheme_type(parse_data *data) {
1201     /* If there's scheme data then see if it's a recognized scheme. */
1202     if(data->scheme && data->scheme_len) {
1203         DWORD i;
1204
1205         for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) {
1206             if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) {
1207                 /* Has to be a case insensitive compare. */
1208                 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) {
1209                     data->scheme_type = recognized_schemes[i].scheme;
1210                     return TRUE;
1211                 }
1212             }
1213         }
1214
1215         /* If we get here it means it's not a recognized scheme. */
1216         data->scheme_type = URL_SCHEME_UNKNOWN;
1217         return TRUE;
1218     } else if(data->is_relative) {
1219         /* Relative URI's have no scheme. */
1220         data->scheme_type = URL_SCHEME_UNKNOWN;
1221         return TRUE;
1222     } else {
1223         /* Should never reach here! what happened... */
1224         FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri));
1225         return FALSE;
1226     }
1227 }
1228
1229 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't
1230  * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type
1231  * using the flags specified in 'flags' (if any). Flags that affect how this function
1232  * operates are the Uri_CREATE_ALLOW_* flags.
1233  *
1234  * All parsed/deduced information will be stored in 'data' when the function returns.
1235  *
1236  * Returns TRUE if it was able to successfully parse the information.
1237  */
1238 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1239     static const WCHAR fileW[] = {'f','i','l','e',0};
1240     static const WCHAR wildcardW[] = {'*',0};
1241
1242     /* First check to see if the uri could implicitly be a file path. */
1243     if(is_implicit_file_path(*ptr)) {
1244         if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) {
1245             data->scheme = fileW;
1246             data->scheme_len = lstrlenW(fileW);
1247             data->has_implicit_scheme = TRUE;
1248
1249             TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags);
1250         } else {
1251             /* Window's does not consider anything that can implicitly be a file
1252              * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set...
1253              */
1254             TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n",
1255                     ptr, data, flags);
1256             return FALSE;
1257         }
1258     } else if(!parse_scheme_name(ptr, data, extras)) {
1259         /* No Scheme was found, this means it could be:
1260          *      a) an implicit Wildcard scheme
1261          *      b) a relative URI
1262          *      c) a invalid URI.
1263          */
1264         if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) {
1265             data->scheme = wildcardW;
1266             data->scheme_len = lstrlenW(wildcardW);
1267             data->has_implicit_scheme = TRUE;
1268
1269             TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags);
1270         } else if (flags & Uri_CREATE_ALLOW_RELATIVE) {
1271             data->is_relative = TRUE;
1272             TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags);
1273         } else {
1274             TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags);
1275             return FALSE;
1276         }
1277     }
1278
1279     if(!data->is_relative)
1280         TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags,
1281                 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
1282
1283     if(!parse_scheme_type(data))
1284         return FALSE;
1285
1286     TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type);
1287     return TRUE;
1288 }
1289
1290 static BOOL parse_username(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1291     data->username = *ptr;
1292
1293     while(**ptr != ':' && **ptr != '@') {
1294         if(**ptr == '%') {
1295             if(!check_pct_encoded(ptr)) {
1296                 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1297                     *ptr = data->username;
1298                     data->username = NULL;
1299                     return FALSE;
1300                 }
1301             } else
1302                 continue;
1303         } else if(extras & ALLOW_NULL_TERM_USER_NAME && !**ptr)
1304             break;
1305         else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1306             *ptr = data->username;
1307             data->username = NULL;
1308             return FALSE;
1309         }
1310
1311         ++(*ptr);
1312     }
1313
1314     data->username_len = *ptr - data->username;
1315     return TRUE;
1316 }
1317
1318 static BOOL parse_password(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1319     const WCHAR *start = *ptr;
1320
1321     if(**ptr != ':')
1322         return TRUE;
1323
1324     ++(*ptr);
1325     data->password = *ptr;
1326
1327     while(**ptr != '@') {
1328         if(**ptr == '%') {
1329             if(!check_pct_encoded(ptr)) {
1330                 if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1331                     *ptr = start;
1332                     data->password = NULL;
1333                     return FALSE;
1334                 }
1335             } else
1336                 continue;
1337         } else if(extras & ALLOW_NULL_TERM_PASSWORD && !**ptr)
1338             break;
1339         else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1340             *ptr = start;
1341             data->password = NULL;
1342             return FALSE;
1343         }
1344
1345         ++(*ptr);
1346     }
1347
1348     data->password_len = *ptr - data->password;
1349     return TRUE;
1350 }
1351
1352 /* Parses the userinfo part of the URI (if it exists). The userinfo field of
1353  * a URI can consist of "username:password@", or just "username@".
1354  *
1355  * RFC def:
1356  * userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
1357  *
1358  * NOTES:
1359  *  1)  If there is more than one ':' in the userinfo part of the URI Windows
1360  *      uses the first occurrence of ':' to delimit the username and password
1361  *      components.
1362  *
1363  *      ex:
1364  *          ftp://user:pass:word@winehq.org
1365  *
1366  *      Would yield, "user" as the username and "pass:word" as the password.
1367  *
1368  *  2)  Windows allows any character to appear in the "userinfo" part of
1369  *      a URI, as long as it's not an authority delimeter character set.
1370  */
1371 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
1372     const WCHAR *start = *ptr;
1373
1374     if(!parse_username(ptr, data, flags, 0)) {
1375         TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1376         return;
1377     }
1378
1379     if(!parse_password(ptr, data, flags, 0)) {
1380         *ptr = start;
1381         data->username = NULL;
1382         data->username_len = 0;
1383         TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1384         return;
1385     }
1386
1387     if(**ptr != '@') {
1388         *ptr = start;
1389         data->username = NULL;
1390         data->username_len = 0;
1391         data->password = NULL;
1392         data->password_len = 0;
1393
1394         TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
1395         return;
1396     }
1397
1398     if(data->username)
1399         TRACE("(%p %p %x): Found username %s len=%d.\n", ptr, data, flags,
1400             debugstr_wn(data->username, data->username_len), data->username_len);
1401
1402     if(data->password)
1403         TRACE("(%p %p %x): Found password %s len=%d.\n", ptr, data, flags,
1404             debugstr_wn(data->password, data->password_len), data->password_len);
1405
1406     ++(*ptr);
1407 }
1408
1409 /* Attempts to parse a port from the URI.
1410  *
1411  * NOTES:
1412  *  Windows seems to have a cap on what the maximum value
1413  *  for a port can be. The max value is USHORT_MAX.
1414  *
1415  * port = *DIGIT
1416  */
1417 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) {
1418     UINT port = 0;
1419     data->port = *ptr;
1420
1421     while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) {
1422         if(!is_num(**ptr)) {
1423             *ptr = data->port;
1424             data->port = NULL;
1425             return FALSE;
1426         }
1427
1428         port = port*10 + (**ptr-'0');
1429
1430         if(port > USHORT_MAX) {
1431             *ptr = data->port;
1432             data->port = NULL;
1433             return FALSE;
1434         }
1435
1436         ++(*ptr);
1437     }
1438
1439     data->has_port = TRUE;
1440     data->port_value = port;
1441     data->port_len = *ptr - data->port;
1442
1443     TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags,
1444         debugstr_wn(data->port, data->port_len), data->port_len, data->port_value);
1445     return TRUE;
1446 }
1447
1448 /* Attempts to parse a IPv4 address from the URI.
1449  *
1450  * NOTES:
1451  *  Window's normalizes IPv4 addresses, This means there's three
1452  *  possibilities for the URI to contain an IPv4 address.
1453  *      1)  A well formed address (ex. 192.2.2.2).
1454  *      2)  A partially formed address. For example "192.0" would
1455  *          normalize to "192.0.0.0" during canonicalization.
1456  *      3)  An implicit IPv4 address. For example "256" would
1457  *          normalize to "0.0.1.0" during canonicalization. Also
1458  *          note that the maximum value for an implicit IP address
1459  *          is UINT_MAX, if the value in the URI exceeds this then
1460  *          it is not considered an IPv4 address.
1461  */
1462 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1463     const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN;
1464     data->host = *ptr;
1465
1466     if(!check_ipv4address(ptr, FALSE)) {
1467         if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) {
1468             TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n",
1469                 ptr, data, flags);
1470             *ptr = data->host;
1471             data->host = NULL;
1472             return FALSE;
1473         } else
1474             data->has_implicit_ip = TRUE;
1475     }
1476
1477     /* Check if what we found is the only part of the host name (if it isn't
1478      * we don't have an IPv4 address).
1479      */
1480     if(**ptr == ':') {
1481         ++(*ptr);
1482         if(!parse_port(ptr, data, flags)) {
1483             *ptr = data->host;
1484             data->host = NULL;
1485             return FALSE;
1486         }
1487     } else if(!is_auth_delim(**ptr, !is_unknown)) {
1488         /* Found more data which belongs the host, so this isn't an IPv4. */
1489         *ptr = data->host;
1490         data->host = NULL;
1491         data->has_implicit_ip = FALSE;
1492         return FALSE;
1493     }
1494
1495     data->host_len = *ptr - data->host;
1496     data->host_type = Uri_HOST_IPV4;
1497
1498     TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n",
1499         ptr, data, flags, debugstr_wn(data->host, data->host_len),
1500         data->host_len, data->host_type);
1501     return TRUE;
1502 }
1503
1504 /* Attempts to parse the reg-name from the URI.
1505  *
1506  * Because of the way Windows handles ':' this function also
1507  * handles parsing the port.
1508  *
1509  * reg-name = *( unreserved / pct-encoded / sub-delims )
1510  *
1511  * NOTE:
1512  *  Windows allows everything, but, the characters in "auth_delims" and ':'
1513  *  to appear in a reg-name, unless it's an unknown scheme type then ':' is
1514  *  allowed to appear (even if a valid port isn't after it).
1515  *
1516  *  Windows doesn't like host names which start with '[' and end with ']'
1517  *  and don't contain a valid IP literal address in between them.
1518  *
1519  *  On Windows if an '[' is encountered in the host name the ':' no longer
1520  *  counts as a delimiter until you reach the next ']' or an "authority delimeter".
1521  *
1522  *  A reg-name CAN be empty.
1523  */
1524 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1525     const BOOL has_start_bracket = **ptr == '[';
1526     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1527     BOOL inside_brackets = has_start_bracket;
1528     BOOL ignore_col = extras & IGNORE_PORT_DELIMITER;
1529
1530     /* We have to be careful with file schemes. */
1531     if(data->scheme_type == URL_SCHEME_FILE) {
1532         /* This is because an implicit file scheme could be "C:\\test" and it
1533          * would trick this function into thinking the host is "C", when after
1534          * canonicalization the host would end up being an empty string. A drive
1535          * path can also have a '|' instead of a ':' after the drive letter.
1536          */
1537         if(is_drive_path(*ptr)) {
1538             /* Regular old drive paths don't have a host type (or host name). */
1539             data->host_type = Uri_HOST_UNKNOWN;
1540             data->host = *ptr;
1541             data->host_len = 0;
1542             return TRUE;
1543         } else if(is_unc_path(*ptr))
1544             /* Skip past the "\\" of a UNC path. */
1545             *ptr += 2;
1546     }
1547
1548     data->host = *ptr;
1549
1550     while(!is_auth_delim(**ptr, known_scheme)) {
1551         if(**ptr == ':' && !ignore_col) {
1552             /* We can ignore ':' if were inside brackets.*/
1553             if(!inside_brackets) {
1554                 const WCHAR *tmp = (*ptr)++;
1555
1556                 /* Attempt to parse the port. */
1557                 if(!parse_port(ptr, data, flags)) {
1558                     /* Windows expects there to be a valid port for known scheme types. */
1559                     if(data->scheme_type != URL_SCHEME_UNKNOWN) {
1560                         *ptr = data->host;
1561                         data->host = NULL;
1562                         TRACE("(%p %p %x %x): Expected valid port\n", ptr, data, flags, extras);
1563                         return FALSE;
1564                     } else
1565                         /* Windows gives up on trying to parse a port when it
1566                          * encounters 1 invalid port.
1567                          */
1568                         ignore_col = TRUE;
1569                 } else {
1570                     data->host_len = tmp - data->host;
1571                     break;
1572                 }
1573             }
1574         } else if(**ptr == '%' && known_scheme) {
1575             /* Has to be a legit % encoded value. */
1576             if(!check_pct_encoded(ptr)) {
1577                 *ptr = data->host;
1578                 data->host = NULL;
1579                 return FALSE;
1580             } else
1581                 continue;
1582         } else if(**ptr == ']')
1583             inside_brackets = FALSE;
1584         else if(**ptr == '[')
1585             inside_brackets = TRUE;
1586
1587         ++(*ptr);
1588     }
1589
1590     if(has_start_bracket) {
1591         /* Make sure the last character of the host wasn't a ']'. */
1592         if(*(*ptr-1) == ']') {
1593             TRACE("(%p %p %x %x): Expected an IP literal inside of the host\n",
1594                 ptr, data, flags, extras);
1595             *ptr = data->host;
1596             data->host = NULL;
1597             return FALSE;
1598         }
1599     }
1600
1601     /* Don't overwrite our length if we found a port earlier. */
1602     if(!data->port)
1603         data->host_len = *ptr - data->host;
1604
1605     /* If the host is empty, then it's an unknown host type. */
1606     if(data->host_len == 0)
1607         data->host_type = Uri_HOST_UNKNOWN;
1608     else
1609         data->host_type = Uri_HOST_DNS;
1610
1611     TRACE("(%p %p %x %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags, extras,
1612         debugstr_wn(data->host, data->host_len), data->host_len);
1613     return TRUE;
1614 }
1615
1616 /* Attempts to parse an IPv6 address out of the URI.
1617  *
1618  * IPv6address =                               6( h16 ":" ) ls32
1619  *                /                       "::" 5( h16 ":" ) ls32
1620  *                / [               h16 ] "::" 4( h16 ":" ) ls32
1621  *                / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1622  *                / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1623  *                / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1624  *                / [ *4( h16 ":" ) h16 ] "::"              ls32
1625  *                / [ *5( h16 ":" ) h16 ] "::"              h16
1626  *                / [ *6( h16 ":" ) h16 ] "::"
1627  *
1628  * ls32        = ( h16 ":" h16 ) / IPv4address
1629  *             ; least-significant 32 bits of address.
1630  *
1631  * h16         = 1*4HEXDIG
1632  *             ; 16 bits of address represented in hexadecimal.
1633  *
1634  * Modeled after google-url's 'DoParseIPv6' function.
1635  */
1636 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) {
1637     const WCHAR *start, *cur_start;
1638     ipv6_address ip;
1639
1640     start = cur_start = *ptr;
1641     memset(&ip, 0, sizeof(ipv6_address));
1642
1643     for(;; ++(*ptr)) {
1644         /* Check if we're on the last character of the host. */
1645         BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
1646                         || **ptr == ']');
1647
1648         BOOL is_split = (**ptr == ':');
1649         BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
1650
1651         /* Check if we're at the end of a component, or
1652          * if we're at the end of the IPv6 address.
1653          */
1654         if(is_split || is_end) {
1655             DWORD cur_len = 0;
1656
1657             cur_len = *ptr - cur_start;
1658
1659             /* h16 can't have a length > 4. */
1660             if(cur_len > 4) {
1661                 *ptr = start;
1662
1663                 TRACE("(%p %p %x): h16 component to long.\n",
1664                     ptr, data, flags);
1665                 return FALSE;
1666             }
1667
1668             if(cur_len == 0) {
1669                 /* An h16 component can't have the length of 0 unless
1670                  * the elision is at the beginning of the address, or
1671                  * at the end of the address.
1672                  */
1673                 if(!((*ptr == start && is_elision) ||
1674                     (is_end && (*ptr-2) == ip.elision))) {
1675                     *ptr = start;
1676                     TRACE("(%p %p %x): IPv6 component cannot have a length of 0.\n",
1677                         ptr, data, flags);
1678                     return FALSE;
1679                 }
1680             }
1681
1682             if(cur_len > 0) {
1683                 /* An IPv6 address can have no more than 8 h16 components. */
1684                 if(ip.h16_count >= 8) {
1685                     *ptr = start;
1686                     TRACE("(%p %p %x): Not a IPv6 address, to many h16 components.\n",
1687                         ptr, data, flags);
1688                     return FALSE;
1689                 }
1690
1691                 ip.components[ip.h16_count].str = cur_start;
1692                 ip.components[ip.h16_count].len = cur_len;
1693
1694                 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n",
1695                     ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len,
1696                     ip.h16_count);
1697                 ++ip.h16_count;
1698             }
1699         }
1700
1701         if(is_end)
1702             break;
1703
1704         if(is_elision) {
1705             /* A IPv6 address can only have 1 elision ('::'). */
1706             if(ip.elision) {
1707                 *ptr = start;
1708
1709                 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n",
1710                     ptr, data, flags);
1711                 return FALSE;
1712             }
1713
1714             ip.elision = *ptr;
1715             ++(*ptr);
1716         }
1717
1718         if(is_split)
1719             cur_start = *ptr+1;
1720         else {
1721             if(!check_ipv4address(ptr, TRUE)) {
1722                 if(!is_hexdigit(**ptr)) {
1723                     /* Not a valid character for an IPv6 address. */
1724                     *ptr = start;
1725                     return FALSE;
1726                 }
1727             } else {
1728                 /* Found an IPv4 address. */
1729                 ip.ipv4 = cur_start;
1730                 ip.ipv4_len = *ptr - cur_start;
1731
1732                 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n",
1733                     ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len),
1734                     ip.ipv4_len);
1735
1736                 /* IPv4 addresses can only appear at the end of a IPv6. */
1737                 break;
1738             }
1739         }
1740     }
1741
1742     compute_ipv6_comps_size(&ip);
1743
1744     /* Make sure the IPv6 address adds up to 16 bytes. */
1745     if(ip.components_size + ip.elision_size != 16) {
1746         *ptr = start;
1747         TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n",
1748             ptr, data, flags);
1749         return FALSE;
1750     }
1751
1752     if(ip.elision_size == 2) {
1753         /* For some reason on Windows if an elision that represents
1754          * only 1 h16 component is encountered at the very begin or
1755          * end of an IPv6 address, Windows does not consider it a
1756          * valid IPv6 address.
1757          *
1758          *  Ex: [::2:3:4:5:6:7] is not valid, even though the sum
1759          *      of all the components == 128bits.
1760          */
1761          if(ip.elision < ip.components[0].str ||
1762             ip.elision > ip.components[ip.h16_count-1].str) {
1763             *ptr = start;
1764             TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
1765                 ptr, data, flags);
1766             return FALSE;
1767         }
1768     }
1769
1770     data->host_type = Uri_HOST_IPV6;
1771     data->has_ipv6 = TRUE;
1772     data->ipv6_address = ip;
1773
1774     TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n",
1775         ptr, data, flags, debugstr_wn(start, *ptr-start),
1776         *ptr-start);
1777     return TRUE;
1778 }
1779
1780 /*  IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */
1781 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) {
1782     const WCHAR *start = *ptr;
1783
1784     /* IPvFuture has to start with a 'v' or 'V'. */
1785     if(**ptr != 'v' && **ptr != 'V')
1786         return FALSE;
1787
1788     /* Following the v there must be at least 1 hex digit. */
1789     ++(*ptr);
1790     if(!is_hexdigit(**ptr)) {
1791         *ptr = start;
1792         return FALSE;
1793     }
1794
1795     ++(*ptr);
1796     while(is_hexdigit(**ptr))
1797         ++(*ptr);
1798
1799     /* End of the hexdigit sequence must be a '.' */
1800     if(**ptr != '.') {
1801         *ptr = start;
1802         return FALSE;
1803     }
1804
1805     ++(*ptr);
1806     if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') {
1807         *ptr = start;
1808         return FALSE;
1809     }
1810
1811     ++(*ptr);
1812     while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':')
1813         ++(*ptr);
1814
1815     data->host_type = Uri_HOST_UNKNOWN;
1816
1817     TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags,
1818         debugstr_wn(start, *ptr-start), *ptr-start);
1819
1820     return TRUE;
1821 }
1822
1823 /* IP-literal = "[" ( IPv6address / IPvFuture  ) "]" */
1824 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1825     data->host = *ptr;
1826
1827     if(**ptr != '[' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) {
1828         data->host = NULL;
1829         return FALSE;
1830     } else if(**ptr == '[')
1831         ++(*ptr);
1832
1833     if(!parse_ipv6address(ptr, data, flags)) {
1834         if(extras & SKIP_IP_FUTURE_CHECK || !parse_ipvfuture(ptr, data, flags)) {
1835             *ptr = data->host;
1836             data->host = NULL;
1837             return FALSE;
1838         }
1839     }
1840
1841     if(**ptr != ']' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) {
1842         *ptr = data->host;
1843         data->host = NULL;
1844         return FALSE;
1845     } else if(!**ptr && extras & ALLOW_BRACKETLESS_IP_LITERAL) {
1846         /* The IP literal didn't contain brackets and was followed by
1847          * a NULL terminator, so no reason to even check the port.
1848          */
1849         data->host_len = *ptr - data->host;
1850         return TRUE;
1851     }
1852
1853     ++(*ptr);
1854     if(**ptr == ':') {
1855         ++(*ptr);
1856         /* If a valid port is not found, then let it trickle down to
1857          * parse_reg_name.
1858          */
1859         if(!parse_port(ptr, data, flags)) {
1860             *ptr = data->host;
1861             data->host = NULL;
1862             return FALSE;
1863         }
1864     } else
1865         data->host_len = *ptr - data->host;
1866
1867     return TRUE;
1868 }
1869
1870 /* Parses the host information from the URI.
1871  *
1872  * host = IP-literal / IPv4address / reg-name
1873  */
1874 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) {
1875     if(!parse_ip_literal(ptr, data, flags, extras)) {
1876         if(!parse_ipv4address(ptr, data, flags)) {
1877             if(!parse_reg_name(ptr, data, flags, extras)) {
1878                 TRACE("(%p %p %x %x): Malformed URI, Unknown host type.\n",
1879                     ptr, data, flags, extras);
1880                 return FALSE;
1881             }
1882         }
1883     }
1884
1885     return TRUE;
1886 }
1887
1888 /* Parses the authority information from the URI.
1889  *
1890  * authority   = [ userinfo "@" ] host [ ":" port ]
1891  */
1892 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
1893     parse_userinfo(ptr, data, flags);
1894
1895     /* Parsing the port will happen during one of the host parsing
1896      * routines (if the URI has a port).
1897      */
1898     if(!parse_host(ptr, data, flags, 0))
1899         return FALSE;
1900
1901     return TRUE;
1902 }
1903
1904 /* Attempts to parse the path information of a hierarchical URI. */
1905 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
1906     const WCHAR *start = *ptr;
1907     static const WCHAR slash[] = {'/',0};
1908     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
1909
1910     if(is_path_delim(**ptr)) {
1911         if(data->scheme_type == URL_SCHEME_WILDCARD) {
1912             /* Wildcard schemes don't get a '/' attached if their path is
1913              * empty.
1914              */
1915             data->path = NULL;
1916             data->path_len = 0;
1917         } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
1918             /* If the path component is empty, then a '/' is added. */
1919             data->path = slash;
1920             data->path_len = 1;
1921         }
1922     } else {
1923         while(!is_path_delim(**ptr)) {
1924             if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && !is_file) {
1925                 if(!check_pct_encoded(ptr)) {
1926                     *ptr = start;
1927                     return FALSE;
1928                 } else
1929                     continue;
1930             } else if(is_forbidden_dos_path_char(**ptr) && is_file &&
1931                       (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
1932                 /* File schemes with USE_DOS_PATH set aren't allowed to have
1933                  * a '<' or '>' or '\"' appear in them.
1934                  */
1935                 *ptr = start;
1936                 return FALSE;
1937             } else if(**ptr == '\\') {
1938                 /* Not allowed to have a backslash if NO_CANONICALIZE is set
1939                  * and the scheme is known type (but not a file scheme).
1940                  */
1941                 if(flags & Uri_CREATE_NO_CANONICALIZE) {
1942                     if(data->scheme_type != URL_SCHEME_FILE &&
1943                        data->scheme_type != URL_SCHEME_UNKNOWN) {
1944                         *ptr = start;
1945                         return FALSE;
1946                     }
1947                 }
1948             }
1949
1950             ++(*ptr);
1951         }
1952
1953         /* The only time a URI doesn't have a path is when
1954          * the NO_CANONICALIZE flag is set and the raw URI
1955          * didn't contain one.
1956          */
1957         if(*ptr == start) {
1958             data->path = NULL;
1959             data->path_len = 0;
1960         } else {
1961             data->path = start;
1962             data->path_len = *ptr - start;
1963         }
1964     }
1965
1966     if(data->path)
1967         TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
1968             debugstr_wn(data->path, data->path_len), data->path_len);
1969     else
1970         TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
1971
1972     return TRUE;
1973 }
1974
1975 /* Parses the path of a opaque URI (much less strict then the parser
1976  * for a hierarchical URI).
1977  *
1978  * NOTE:
1979  *  Windows allows invalid % encoded data to appear in opaque URI paths
1980  *  for unknown scheme types.
1981  *
1982  *  File schemes with USE_DOS_PATH set aren't allowed to have '<', '>', or '\"'
1983  *  appear in them.
1984  */
1985 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) {
1986     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
1987     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
1988
1989     data->path = *ptr;
1990
1991     while(!is_path_delim(**ptr)) {
1992         if(**ptr == '%' && known_scheme) {
1993             if(!check_pct_encoded(ptr)) {
1994                 *ptr = data->path;
1995                 data->path = NULL;
1996                 return FALSE;
1997             } else
1998                 continue;
1999         } else if(is_forbidden_dos_path_char(**ptr) && is_file &&
2000                   (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2001             *ptr = data->path;
2002             data->path = NULL;
2003             return FALSE;
2004         }
2005
2006         ++(*ptr);
2007     }
2008
2009     data->path_len = *ptr - data->path;
2010     TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags,
2011         debugstr_wn(data->path, data->path_len), data->path_len);
2012     return TRUE;
2013 }
2014
2015 /* Determines how the URI should be parsed after the scheme information.
2016  *
2017  * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
2018  * which then the authority and path information will be parsed out. Otherwise, the
2019  * URI will be treated as an opaque URI which the authority information is not parsed
2020  * out.
2021  *
2022  * RFC 3896 definition of hier-part:
2023  *
2024  * hier-part   = "//" authority path-abempty
2025  *                 / path-absolute
2026  *                 / path-rootless
2027  *                 / path-empty
2028  *
2029  * MSDN opaque URI definition:
2030  *  scheme ":" path [ "#" fragment ]
2031  *
2032  * NOTES:
2033  *  If the URI is of an unknown scheme type and has a "//" following the scheme then it
2034  *  is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is
2035  *  set then it is considered an opaque URI reguardless of what follows the scheme information
2036  *  (per MSDN documentation).
2037  */
2038 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
2039     const WCHAR *start = *ptr;
2040
2041     /* Checks if the authority information needs to be parsed. */
2042     if(is_hierarchical_uri(ptr, data)) {
2043         /* Only treat it as a hierarchical URI if the scheme_type is known or
2044          * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set.
2045          */
2046         if(data->scheme_type != URL_SCHEME_UNKNOWN ||
2047            !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) {
2048             TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
2049             data->is_opaque = FALSE;
2050
2051             /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
2052             if(!parse_authority(ptr, data, flags))
2053                 return FALSE;
2054
2055             return parse_path_hierarchical(ptr, data, flags);
2056         } else
2057             /* Reset ptr to it's starting position so opaque path parsing
2058              * begins at the correct location.
2059              */
2060             *ptr = start;
2061     }
2062
2063     /* If it reaches here, then the URI will be treated as an opaque
2064      * URI.
2065      */
2066
2067     TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags);
2068
2069     data->is_opaque = TRUE;
2070     if(!parse_path_opaque(ptr, data, flags))
2071         return FALSE;
2072
2073     return TRUE;
2074 }
2075
2076 /* Attempts to parse the query string from the URI.
2077  *
2078  * NOTES:
2079  *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
2080  *  data is allowed appear in the query string. For unknown scheme types
2081  *  invalid percent encoded data is allowed to appear reguardless.
2082  */
2083 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) {
2084     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2085
2086     if(**ptr != '?') {
2087         TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags);
2088         return TRUE;
2089     }
2090
2091     data->query = *ptr;
2092
2093     ++(*ptr);
2094     while(**ptr && **ptr != '#') {
2095         if(**ptr == '%' && known_scheme &&
2096            !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2097             if(!check_pct_encoded(ptr)) {
2098                 *ptr = data->query;
2099                 data->query = NULL;
2100                 return FALSE;
2101             } else
2102                 continue;
2103         }
2104
2105         ++(*ptr);
2106     }
2107
2108     data->query_len = *ptr - data->query;
2109
2110     TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags,
2111         debugstr_wn(data->query, data->query_len), data->query_len);
2112     return TRUE;
2113 }
2114
2115 /* Attempts to parse the fragment from the URI.
2116  *
2117  * NOTES:
2118  *  If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded
2119  *  data is allowed appear in the query string. For unknown scheme types
2120  *  invalid percent encoded data is allowed to appear reguardless.
2121  */
2122 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) {
2123     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2124
2125     if(**ptr != '#') {
2126         TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags);
2127         return TRUE;
2128     }
2129
2130     data->fragment = *ptr;
2131
2132     ++(*ptr);
2133     while(**ptr) {
2134         if(**ptr == '%' && known_scheme &&
2135            !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
2136             if(!check_pct_encoded(ptr)) {
2137                 *ptr = data->fragment;
2138                 data->fragment = NULL;
2139                 return FALSE;
2140             } else
2141                 continue;
2142         }
2143
2144         ++(*ptr);
2145     }
2146
2147     data->fragment_len = *ptr - data->fragment;
2148
2149     TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags,
2150         debugstr_wn(data->fragment, data->fragment_len), data->fragment_len);
2151     return TRUE;
2152 }
2153
2154 /* Parses and validates the components of the specified by data->uri
2155  * and stores the information it parses into 'data'.
2156  *
2157  * Returns TRUE if it successfully parsed the URI. False otherwise.
2158  */
2159 static BOOL parse_uri(parse_data *data, DWORD flags) {
2160     const WCHAR *ptr;
2161     const WCHAR **pptr;
2162
2163     ptr = data->uri;
2164     pptr = &ptr;
2165
2166     TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri));
2167
2168     if(!parse_scheme(pptr, data, flags, 0))
2169         return FALSE;
2170
2171     if(!parse_hierpart(pptr, data, flags))
2172         return FALSE;
2173
2174     if(!parse_query(pptr, data, flags))
2175         return FALSE;
2176
2177     if(!parse_fragment(pptr, data, flags))
2178         return FALSE;
2179
2180     TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
2181     return TRUE;
2182 }
2183
2184 static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2185     const WCHAR *ptr;
2186
2187     if(!data->username) {
2188         uri->userinfo_start = -1;
2189         return TRUE;
2190     }
2191
2192     uri->userinfo_start = uri->canon_len;
2193     for(ptr = data->username; ptr < data->username+data->username_len; ++ptr) {
2194         if(*ptr == '%') {
2195             /* Only decode % encoded values for known scheme types. */
2196             if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2197                 /* See if the value really needs decoded. */
2198                 WCHAR val = decode_pct_val(ptr);
2199                 if(is_unreserved(val)) {
2200                     if(!computeOnly)
2201                         uri->canon_uri[uri->canon_len] = val;
2202
2203                     ++uri->canon_len;
2204
2205                     /* Move pass the hex characters. */
2206                     ptr += 2;
2207                     continue;
2208                 }
2209             }
2210         } else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
2211             /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
2212              * is NOT set.
2213              */
2214             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2215                 if(!computeOnly)
2216                     pct_encode_val(*ptr, uri->canon_uri + uri->canon_len);
2217
2218                 uri->canon_len += 3;
2219                 continue;
2220             }
2221         }
2222
2223         if(!computeOnly)
2224             /* Nothing special, so just copy the character over. */
2225             uri->canon_uri[uri->canon_len] = *ptr;
2226         ++uri->canon_len;
2227     }
2228
2229     return TRUE;
2230 }
2231
2232 static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2233     const WCHAR *ptr;
2234
2235     if(!data->password) {
2236         uri->userinfo_split = -1;
2237         return TRUE;
2238     }
2239
2240     if(uri->userinfo_start == -1)
2241         /* Has a password, but, doesn't have a username. */
2242         uri->userinfo_start = uri->canon_len;
2243
2244     uri->userinfo_split = uri->canon_len - uri->userinfo_start;
2245
2246     /* Add the ':' to the userinfo component. */
2247     if(!computeOnly)
2248         uri->canon_uri[uri->canon_len] = ':';
2249     ++uri->canon_len;
2250
2251     for(ptr = data->password; ptr < data->password+data->password_len; ++ptr) {
2252         if(*ptr == '%') {
2253             /* Only decode % encoded values for known scheme types. */
2254             if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2255                 /* See if the value really needs decoded. */
2256                 WCHAR val = decode_pct_val(ptr);
2257                 if(is_unreserved(val)) {
2258                     if(!computeOnly)
2259                         uri->canon_uri[uri->canon_len] = val;
2260
2261                     ++uri->canon_len;
2262
2263                     /* Move pass the hex characters. */
2264                     ptr += 2;
2265                     continue;
2266                 }
2267             }
2268         } else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
2269             /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
2270              * is NOT set.
2271              */
2272             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
2273                 if(!computeOnly)
2274                     pct_encode_val(*ptr, uri->canon_uri + uri->canon_len);
2275
2276                 uri->canon_len += 3;
2277                 continue;
2278             }
2279         }
2280
2281         if(!computeOnly)
2282             /* Nothing special, so just copy the character over. */
2283             uri->canon_uri[uri->canon_len] = *ptr;
2284         ++uri->canon_len;
2285     }
2286
2287     return TRUE;
2288 }
2289
2290 /* Canonicalizes the userinfo of the URI represented by the parse_data.
2291  *
2292  * Canonicalization of the userinfo is a simple process. If there are any percent
2293  * encoded characters that fall in the "unreserved" character set, they are decoded
2294  * to their actual value. If a character is not in the "unreserved" or "reserved" sets
2295  * then it is percent encoded. Other than that the characters are copied over without
2296  * change.
2297  */
2298 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2299     uri->userinfo_start = uri->userinfo_split = -1;
2300     uri->userinfo_len = 0;
2301
2302     if(!data->username && !data->password)
2303         /* URI doesn't have userinfo, so nothing to do here. */
2304         return TRUE;
2305
2306     if(!canonicalize_username(data, uri, flags, computeOnly))
2307         return FALSE;
2308
2309     if(!canonicalize_password(data, uri, flags, computeOnly))
2310         return FALSE;
2311
2312     uri->userinfo_len = uri->canon_len - uri->userinfo_start;
2313     if(!computeOnly)
2314         TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n",
2315                 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len),
2316                 uri->userinfo_split, uri->userinfo_len);
2317
2318     /* Now insert the '@' after the userinfo. */
2319     if(!computeOnly)
2320         uri->canon_uri[uri->canon_len] = '@';
2321     ++uri->canon_len;
2322
2323     return TRUE;
2324 }
2325
2326 /* Attempts to canonicalize a reg_name.
2327  *
2328  * Things that happen:
2329  *  1)  If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is
2330  *      lower cased. Unless it's an unknown scheme type, which case it's
2331  *      no lower cased reguardless.
2332  *
2333  *  2)  Unreserved % encoded characters are decoded for known
2334  *      scheme types.
2335  *
2336  *  3)  Forbidden characters are % encoded as long as
2337  *      Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and
2338  *      it isn't an unknown scheme type.
2339  *
2340  *  4)  If it's a file scheme and the host is "localhost" it's removed.
2341  */
2342 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
2343                                   DWORD flags, BOOL computeOnly) {
2344     static const WCHAR localhostW[] =
2345             {'l','o','c','a','l','h','o','s','t',0};
2346     const WCHAR *ptr;
2347     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2348
2349     uri->host_start = uri->canon_len;
2350
2351     if(data->scheme_type == URL_SCHEME_FILE &&
2352        data->host_len == lstrlenW(localhostW)) {
2353         if(!StrCmpNIW(data->host, localhostW, data->host_len)) {
2354             uri->host_start = -1;
2355             uri->host_len = 0;
2356             uri->host_type = Uri_HOST_UNKNOWN;
2357             return TRUE;
2358         }
2359     }
2360
2361     for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) {
2362         if(*ptr == '%' && known_scheme) {
2363             WCHAR val = decode_pct_val(ptr);
2364             if(is_unreserved(val)) {
2365                 /* If NO_CANONICALZE is not set, then windows lower cases the
2366                  * decoded value.
2367                  */
2368                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) {
2369                     if(!computeOnly)
2370                         uri->canon_uri[uri->canon_len] = tolowerW(val);
2371                 } else {
2372                     if(!computeOnly)
2373                         uri->canon_uri[uri->canon_len] = val;
2374                 }
2375                 ++uri->canon_len;
2376
2377                 /* Skip past the % encoded character. */
2378                 ptr += 2;
2379                 continue;
2380             } else {
2381                 /* Just copy the % over. */
2382                 if(!computeOnly)
2383                     uri->canon_uri[uri->canon_len] = *ptr;
2384                 ++uri->canon_len;
2385             }
2386         } else if(*ptr == '\\') {
2387             /* Only unknown scheme types could have made it here with a '\\' in the host name. */
2388             if(!computeOnly)
2389                 uri->canon_uri[uri->canon_len] = *ptr;
2390             ++uri->canon_len;
2391         } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
2392                   !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
2393             if(!computeOnly) {
2394                 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2395
2396                 /* The percent encoded value gets lower cased also. */
2397                 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
2398                     uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]);
2399                     uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]);
2400                 }
2401             }
2402
2403             uri->canon_len += 3;
2404         } else {
2405             if(!computeOnly) {
2406                 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme)
2407                     uri->canon_uri[uri->canon_len] = tolowerW(*ptr);
2408                 else
2409                     uri->canon_uri[uri->canon_len] = *ptr;
2410             }
2411
2412             ++uri->canon_len;
2413         }
2414     }
2415
2416     uri->host_len = uri->canon_len - uri->host_start;
2417
2418     if(!computeOnly)
2419         TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags,
2420             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2421             uri->host_len);
2422
2423     if(!computeOnly)
2424         find_domain_name(uri->canon_uri+uri->host_start, uri->host_len,
2425             &(uri->domain_offset));
2426
2427     return TRUE;
2428 }
2429
2430 /* Attempts to canonicalize an implicit IPv4 address. */
2431 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2432     uri->host_start = uri->canon_len;
2433
2434     TRACE("%u\n", data->implicit_ipv4);
2435     /* For unknown scheme types Window's doesn't convert
2436      * the value into an IP address, but, it still considers
2437      * it an IPv4 address.
2438      */
2439     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2440         if(!computeOnly)
2441             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2442         uri->canon_len += data->host_len;
2443     } else {
2444         if(!computeOnly)
2445             uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4);
2446         else
2447             uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4);
2448     }
2449
2450     uri->host_len = uri->canon_len - uri->host_start;
2451     uri->host_type = Uri_HOST_IPV4;
2452
2453     if(!computeOnly)
2454         TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n",
2455             data, uri, flags, computeOnly,
2456             debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2457             uri->host_len);
2458
2459     return TRUE;
2460 }
2461
2462 /* Attempts to canonicalize an IPv4 address.
2463  *
2464  * If the parse_data represents a URI that has an implicit IPv4 address
2465  * (ex. http://256/, this function will convert 256 into 0.0.1.0). If
2466  * the implicit IP address exceeds the value of UINT_MAX (maximum value
2467  * for an IPv4 address) it's canonicalized as if were a reg-name.
2468  *
2469  * If the parse_data contains a partial or full IPv4 address it normalizes it.
2470  * A partial IPv4 address is something like "192.0" and would be normalized to
2471  * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would
2472  * be normalized to "192.2.1.3".
2473  *
2474  * NOTES:
2475  *  Window's ONLY normalizes IPv4 address for known scheme types (one that isn't
2476  *  URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from
2477  *  the original URI into the canonicalized URI, but, it still recognizes URI's
2478  *  host type as HOST_IPV4.
2479  */
2480 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2481     if(data->has_implicit_ip)
2482         return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly);
2483     else {
2484         uri->host_start = uri->canon_len;
2485
2486         /* Windows only normalizes for known scheme types. */
2487         if(data->scheme_type != URL_SCHEME_UNKNOWN) {
2488             /* parse_data contains a partial or full IPv4 address, so normalize it. */
2489             DWORD i, octetDigitCount = 0, octetCount = 0;
2490             BOOL octetHasDigit = FALSE;
2491
2492             for(i = 0; i < data->host_len; ++i) {
2493                 if(data->host[i] == '0' && !octetHasDigit) {
2494                     /* Can ignore leading zeros if:
2495                      *  1) It isn't the last digit of the octet.
2496                      *  2) i+1 != data->host_len
2497                      *  3) i+1 != '.'
2498                      */
2499                     if(octetDigitCount == 2 ||
2500                        i+1 == data->host_len ||
2501                        data->host[i+1] == '.') {
2502                         if(!computeOnly)
2503                             uri->canon_uri[uri->canon_len] = data->host[i];
2504                         ++uri->canon_len;
2505                         TRACE("Adding zero\n");
2506                     }
2507                 } else if(data->host[i] == '.') {
2508                     if(!computeOnly)
2509                         uri->canon_uri[uri->canon_len] = data->host[i];
2510                     ++uri->canon_len;
2511
2512                     octetDigitCount = 0;
2513                     octetHasDigit = FALSE;
2514                     ++octetCount;
2515                 } else {
2516                     if(!computeOnly)
2517                         uri->canon_uri[uri->canon_len] = data->host[i];
2518                     ++uri->canon_len;
2519
2520                     ++octetDigitCount;
2521                     octetHasDigit = TRUE;
2522                 }
2523             }
2524
2525             /* Make sure the canonicalized IP address has 4 dec-octets.
2526              * If doesn't add "0" ones until there is 4;
2527              */
2528             for( ; octetCount < 3; ++octetCount) {
2529                 if(!computeOnly) {
2530                     uri->canon_uri[uri->canon_len] = '.';
2531                     uri->canon_uri[uri->canon_len+1] = '0';
2532                 }
2533
2534                 uri->canon_len += 2;
2535             }
2536         } else {
2537             /* Windows doesn't normalize addresses in unknown schemes. */
2538             if(!computeOnly)
2539                 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2540             uri->canon_len += data->host_len;
2541         }
2542
2543         uri->host_len = uri->canon_len - uri->host_start;
2544         if(!computeOnly)
2545             TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n",
2546                 data, uri, flags, computeOnly,
2547                 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2548                 uri->host_len);
2549     }
2550
2551     return TRUE;
2552 }
2553
2554 /* Attempts to canonicalize the IPv6 address of the URI.
2555  *
2556  * Multiple things happen during the canonicalization of an IPv6 address:
2557  *  1)  Any leading zero's in an h16 component are removed.
2558  *      Ex: [0001:0022::] -> [1:22::]
2559  *
2560  *  2)  The longest sequence of zero h16 components are compressed
2561  *      into a "::" (elision). If there's a tie, the first is choosen.
2562  *
2563  *      Ex: [0:0:0:0:1:6:7:8]   -> [::1:6:7:8]
2564  *          [0:0:0:0:1:2::]     -> [::1:2:0:0]
2565  *          [0:0:1:2:0:0:7:8]   -> [::1:2:0:0:7:8]
2566  *
2567  *  3)  If an IPv4 address is attached to the IPv6 address, it's
2568  *      also normalized.
2569  *      Ex: [::001.002.022.000] -> [::1.2.22.0]
2570  *
2571  *  4)  If an elision is present, but, only represents 1 h16 component
2572  *      it's expanded.
2573  *
2574  *      Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
2575  *
2576  *  5)  If the IPv6 address contains an IPv4 address and there exists
2577  *      at least 1 non-zero h16 component the IPv4 address is converted
2578  *      into two h16 components, otherwise it's normalized and kept as is.
2579  *
2580  *      Ex: [::192.200.003.4]       -> [::192.200.3.4]
2581  *          [ffff::192.200.003.4]   -> [ffff::c0c8:3041]
2582  *
2583  * NOTE:
2584  *  For unknown scheme types Windows simply copies the address over without any
2585  *  changes.
2586  *
2587  *  IPv4 address can be included in an elision if all its components are 0's.
2588  */
2589 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
2590                                      DWORD flags, BOOL computeOnly) {
2591     uri->host_start = uri->canon_len;
2592
2593     if(data->scheme_type == URL_SCHEME_UNKNOWN) {
2594         if(!computeOnly)
2595             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2596         uri->canon_len += data->host_len;
2597     } else {
2598         USHORT values[8];
2599         INT elision_start;
2600         DWORD i, elision_len;
2601
2602         if(!ipv6_to_number(&(data->ipv6_address), values)) {
2603             TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
2604                 data, uri, flags, computeOnly);
2605             return FALSE;
2606         }
2607
2608         if(!computeOnly)
2609             uri->canon_uri[uri->canon_len] = '[';
2610         ++uri->canon_len;
2611
2612         /* Find where the elision should occur (if any). */
2613         compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
2614
2615         TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
2616             computeOnly, elision_start, elision_len);
2617
2618         for(i = 0; i < 8; ++i) {
2619             BOOL in_elision = (elision_start > -1 && i >= elision_start &&
2620                                i < elision_start+elision_len);
2621             BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
2622                             data->ipv6_address.h16_count == 0);
2623
2624             if(i == elision_start) {
2625                 if(!computeOnly) {
2626                     uri->canon_uri[uri->canon_len] = ':';
2627                     uri->canon_uri[uri->canon_len+1] = ':';
2628                 }
2629                 uri->canon_len += 2;
2630             }
2631
2632             /* We can ignore the current component if we're in the elision. */
2633             if(in_elision)
2634                 continue;
2635
2636             /* We only add a ':' if we're not at i == 0, or when we're at
2637              * the very end of elision range since the ':' colon was handled
2638              * earlier. Otherwise we would end up with ":::" after elision.
2639              */
2640             if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
2641                 if(!computeOnly)
2642                     uri->canon_uri[uri->canon_len] = ':';
2643                 ++uri->canon_len;
2644             }
2645
2646             if(do_ipv4) {
2647                 UINT val;
2648                 DWORD len;
2649
2650                 /* Combine the two parts of the IPv4 address values. */
2651                 val = values[i];
2652                 val <<= 16;
2653                 val += values[i+1];
2654
2655                 if(!computeOnly)
2656                     len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
2657                 else
2658                     len = ui2ipv4(NULL, val);
2659
2660                 uri->canon_len += len;
2661                 ++i;
2662             } else {
2663                 /* Write a regular h16 component to the URI. */
2664
2665                 /* Short circuit for the trivial case. */
2666                 if(values[i] == 0) {
2667                     if(!computeOnly)
2668                         uri->canon_uri[uri->canon_len] = '0';
2669                     ++uri->canon_len;
2670                 } else {
2671                     static const WCHAR formatW[] = {'%','x',0};
2672
2673                     if(!computeOnly)
2674                         uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len,
2675                                             formatW, values[i]);
2676                     else {
2677                         WCHAR tmp[5];
2678                         uri->canon_len += sprintfW(tmp, formatW, values[i]);
2679                     }
2680                 }
2681             }
2682         }
2683
2684         /* Add the closing ']'. */
2685         if(!computeOnly)
2686             uri->canon_uri[uri->canon_len] = ']';
2687         ++uri->canon_len;
2688     }
2689
2690     uri->host_len = uri->canon_len - uri->host_start;
2691
2692     if(!computeOnly)
2693         TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags,
2694             computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len),
2695             uri->host_len);
2696
2697     return TRUE;
2698 }
2699
2700 /* Attempts to canonicalize the host of the URI (if any). */
2701 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2702     uri->host_start = -1;
2703     uri->host_len = 0;
2704     uri->domain_offset = -1;
2705
2706     if(data->host) {
2707         switch(data->host_type) {
2708         case Uri_HOST_DNS:
2709             uri->host_type = Uri_HOST_DNS;
2710             if(!canonicalize_reg_name(data, uri, flags, computeOnly))
2711                 return FALSE;
2712
2713             break;
2714         case Uri_HOST_IPV4:
2715             uri->host_type = Uri_HOST_IPV4;
2716             if(!canonicalize_ipv4address(data, uri, flags, computeOnly))
2717                 return FALSE;
2718
2719             break;
2720         case Uri_HOST_IPV6:
2721             if(!canonicalize_ipv6address(data, uri, flags, computeOnly))
2722                 return FALSE;
2723
2724             uri->host_type = Uri_HOST_IPV6;
2725             break;
2726         case Uri_HOST_UNKNOWN:
2727             if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) {
2728                 uri->host_start = uri->canon_len;
2729
2730                 /* Nothing happens to unknown host types. */
2731                 if(!computeOnly)
2732                     memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
2733                 uri->canon_len += data->host_len;
2734                 uri->host_len = data->host_len;
2735             }
2736
2737             uri->host_type = Uri_HOST_UNKNOWN;
2738             break;
2739         default:
2740             FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data,
2741                     uri, flags, computeOnly, data->host_type);
2742             return FALSE;
2743        }
2744    }
2745
2746    return TRUE;
2747 }
2748
2749 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2750     BOOL has_default_port = FALSE;
2751     USHORT default_port = 0;
2752     DWORD i;
2753
2754     uri->has_port = FALSE;
2755
2756     /* Check if the scheme has a default port. */
2757     for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
2758         if(default_ports[i].scheme == data->scheme_type) {
2759             has_default_port = TRUE;
2760             default_port = default_ports[i].port;
2761             break;
2762         }
2763     }
2764
2765     if(data->port || has_default_port)
2766         uri->has_port = TRUE;
2767
2768     /* Possible cases:
2769      *  1)  Has a port which is the default port.
2770      *  2)  Has a port (not the default).
2771      *  3)  Doesn't have a port, but, scheme has a default port.
2772      *  4)  No port.
2773      */
2774     if(has_default_port && data->port && data->port_value == default_port) {
2775         /* If it's the default port and this flag isn't set, don't do anything. */
2776         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2777             /* Copy the original port over. */
2778             if(!computeOnly) {
2779                 uri->canon_uri[uri->canon_len] = ':';
2780                 memcpy(uri->canon_uri+uri->canon_len+1, data->port, data->port_len*sizeof(WCHAR));
2781             }
2782             uri->canon_len += data->port_len+1;
2783         }
2784
2785         uri->port = default_port;
2786     } else if(data->port) {
2787         if(!computeOnly)
2788             uri->canon_uri[uri->canon_len] = ':';
2789         ++uri->canon_len;
2790
2791         if(flags & Uri_CREATE_NO_CANONICALIZE) {
2792             /* Copy the original over without changes. */
2793             if(!computeOnly)
2794                 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR));
2795             uri->canon_len += data->port_len;
2796         } else {
2797             const WCHAR formatW[] = {'%','u',0};
2798             INT len = 0;
2799             if(!computeOnly)
2800                 len = sprintfW(uri->canon_uri+uri->canon_len, formatW, data->port_value);
2801             else {
2802                 WCHAR tmp[6];
2803                 len = sprintfW(tmp, formatW, data->port_value);
2804             }
2805             uri->canon_len += len;
2806         }
2807
2808         uri->port = data->port_value;
2809     } else if(has_default_port)
2810         uri->port = default_port;
2811
2812     return TRUE;
2813 }
2814
2815 /* Canonicalizes the authority of the URI represented by the parse_data. */
2816 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
2817     uri->authority_start = uri->canon_len;
2818     uri->authority_len = 0;
2819
2820     if(!canonicalize_userinfo(data, uri, flags, computeOnly))
2821         return FALSE;
2822
2823     if(!canonicalize_host(data, uri, flags, computeOnly))
2824         return FALSE;
2825
2826     if(!canonicalize_port(data, uri, flags, computeOnly))
2827         return FALSE;
2828
2829     if(uri->host_start != -1)
2830         uri->authority_len = uri->canon_len - uri->authority_start;
2831     else
2832         uri->authority_start = -1;
2833
2834     return TRUE;
2835 }
2836
2837 /* Attempts to canonicalize the path of a hierarchical URI.
2838  *
2839  * Things that happen:
2840  *  1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
2841  *      flag is set or it's a file URI. Forbidden characters are always encoded
2842  *      for file schemes reguardless and forbidden characters are never encoded
2843  *      for unknown scheme types.
2844  *
2845  *  2). For known scheme types '\\' are changed to '/'.
2846  *
2847  *  3). Percent encoded, unreserved characters are decoded to their actual values.
2848  *      Unless the scheme type is unknown. For file schemes any percent encoded
2849  *      character in the unreserved or reserved set is decoded.
2850  *
2851  *  4). For File schemes if the path is starts with a drive letter and doesn't
2852  *      start with a '/' then one is appended.
2853  *      Ex: file://c:/test.mp3 -> file:///c:/test.mp3
2854  *
2855  *  5). Dot segments are removed from the path for all scheme types
2856  *      unless NO_CANONICALIZE flag is set. Dot segments aren't removed
2857  *      for wildcard scheme types.
2858  *
2859  * NOTES:
2860  *      file://c:/test%20test   -> file:///c:/test%2520test
2861  *      file://c:/test%3Etest   -> file:///c:/test%253Etest
2862  *      file:///c:/test%20test  -> file:///c:/test%20test
2863  *      file:///c:/test%test    -> file:///c:/test%25test
2864  */
2865 static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
2866                                            DWORD flags, BOOL computeOnly) {
2867     const WCHAR *ptr;
2868     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
2869     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
2870
2871     BOOL escape_pct = FALSE;
2872
2873     if(!data->path) {
2874         uri->path_start = -1;
2875         uri->path_len = 0;
2876         return TRUE;
2877     }
2878
2879     uri->path_start = uri->canon_len;
2880     ptr = data->path;
2881
2882     if(is_file && uri->host_start == -1) {
2883         /* Check if a '/' needs to be appended for the file scheme. */
2884         if(data->path_len > 1 && is_drive_path(ptr) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2885             if(!computeOnly)
2886                 uri->canon_uri[uri->canon_len] = '/';
2887             uri->canon_len++;
2888             escape_pct = TRUE;
2889         } else if(*ptr == '/') {
2890             if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2891                 /* Copy the extra '/' over. */
2892                 if(!computeOnly)
2893                     uri->canon_uri[uri->canon_len] = '/';
2894                 ++uri->canon_len;
2895             }
2896             ++ptr;
2897         }
2898
2899         if(is_drive_path(ptr)) {
2900             if(!computeOnly) {
2901                 uri->canon_uri[uri->canon_len] = *ptr;
2902                 /* If theres a '|' after the drive letter, convert it to a ':'. */
2903                 uri->canon_uri[uri->canon_len+1] = ':';
2904             }
2905             ptr += 2;
2906             uri->canon_len += 2;
2907         }
2908     }
2909
2910     for(; ptr < data->path+data->path_len; ++ptr) {
2911         if(*ptr == '%') {
2912             const WCHAR *tmp = ptr;
2913             WCHAR val;
2914
2915             /* Check if the % represents a valid encoded char, or if it needs encoded. */
2916             BOOL force_encode = !check_pct_encoded(&tmp) && is_file;
2917             val = decode_pct_val(ptr);
2918
2919             if(force_encode || escape_pct) {
2920                 /* Escape the percent sign in the file URI. */
2921                 if(!computeOnly)
2922                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2923                 uri->canon_len += 3;
2924             } else if((is_unreserved(val) && known_scheme) ||
2925                       (is_file && (is_unreserved(val) || is_reserved(val)))) {
2926                 if(!computeOnly)
2927                     uri->canon_uri[uri->canon_len] = val;
2928                 ++uri->canon_len;
2929
2930                 ptr += 2;
2931                 continue;
2932             } else {
2933                 if(!computeOnly)
2934                     uri->canon_uri[uri->canon_len] = *ptr;
2935                 ++uri->canon_len;
2936             }
2937         } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2938             /* Convert the '/' back to a '\\'. */
2939             if(!computeOnly)
2940                 uri->canon_uri[uri->canon_len] = '\\';
2941             ++uri->canon_len;
2942         } else if(*ptr == '\\' && known_scheme) {
2943             if(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2944                 /* Don't convert the '\\' to a '/'. */
2945                 if(!computeOnly)
2946                     uri->canon_uri[uri->canon_len] = *ptr;
2947                 ++uri->canon_len;
2948             } else {
2949                 if(!computeOnly)
2950                     uri->canon_uri[uri->canon_len] = '/';
2951                 ++uri->canon_len;
2952             }
2953         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
2954                   (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
2955             if(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
2956                 /* Don't escape the character. */
2957                 if(!computeOnly)
2958                     uri->canon_uri[uri->canon_len] = *ptr;
2959                 ++uri->canon_len;
2960             } else {
2961                 /* Escape the forbidden character. */
2962                 if(!computeOnly)
2963                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
2964                 uri->canon_len += 3;
2965             }
2966         } else {
2967             if(!computeOnly)
2968                 uri->canon_uri[uri->canon_len] = *ptr;
2969             ++uri->canon_len;
2970         }
2971     }
2972
2973     uri->path_len = uri->canon_len - uri->path_start;
2974
2975     /* Removing the dot segments only happens when it's not in
2976      * computeOnly mode and it's not a wildcard scheme. File schemes
2977      * with USE_DOS_PATH set don't get dot segments removed.
2978      */
2979     if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) &&
2980        data->scheme_type != URL_SCHEME_WILDCARD) {
2981         if(!(flags & Uri_CREATE_NO_CANONICALIZE) && !computeOnly) {
2982             /* Remove the dot segments (if any) and reset everything to the new
2983              * correct length.
2984              */
2985             DWORD new_len = remove_dot_segments(uri->canon_uri+uri->path_start, uri->path_len);
2986             uri->canon_len -= uri->path_len-new_len;
2987             uri->path_len = new_len;
2988         }
2989     }
2990
2991     if(!computeOnly)
2992         TRACE("Canonicalized path %s len=%d\n",
2993             debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
2994             uri->path_len);
2995
2996     return TRUE;
2997 }
2998
2999 /* Attempts to canonicalize the path for an opaque URI.
3000  *
3001  * For known scheme types:
3002  *  1)  forbidden characters are percent encoded if
3003  *      NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
3004  *
3005  *  2)  Percent encoded, unreserved characters are decoded
3006  *      to their actual values, for known scheme types.
3007  *
3008  *  3)  '\\' are changed to '/' for known scheme types
3009  *      except for mailto schemes.
3010  *
3011  *  4)  For file schemes, if USE_DOS_PATH is set all '/'
3012  *      are converted to backslashes.
3013  *
3014  *  5)  For file schemes, if USE_DOS_PATH isn't set all '\'
3015  *      are converted to forward slashes.
3016  */
3017 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3018     const WCHAR *ptr;
3019     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
3020     const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
3021
3022     if(!data->path) {
3023         uri->path_start = -1;
3024         uri->path_len = 0;
3025         return TRUE;
3026     }
3027
3028     uri->path_start = uri->canon_len;
3029
3030     /* Windows doesn't allow a "//" to appear after the scheme
3031      * of a URI, if it's an opaque URI.
3032      */
3033     if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
3034         /* So it inserts a "/." before the "//" if it exists. */
3035         if(!computeOnly) {
3036             uri->canon_uri[uri->canon_len] = '/';
3037             uri->canon_uri[uri->canon_len+1] = '.';
3038         }
3039
3040         uri->canon_len += 2;
3041     }
3042
3043     for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
3044         if(*ptr == '%' && known_scheme) {
3045             WCHAR val = decode_pct_val(ptr);
3046
3047             if(is_unreserved(val)) {
3048                 if(!computeOnly)
3049                     uri->canon_uri[uri->canon_len] = val;
3050                 ++uri->canon_len;
3051
3052                 ptr += 2;
3053                 continue;
3054             } else {
3055                 if(!computeOnly)
3056                     uri->canon_uri[uri->canon_len] = *ptr;
3057                 ++uri->canon_len;
3058             }
3059         } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
3060             if(!computeOnly)
3061                 uri->canon_uri[uri->canon_len] = '\\';
3062             ++uri->canon_len;
3063         } else if(*ptr == '\\' && is_file) {
3064             if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
3065                 /* Convert to a '/'. */
3066                 if(!computeOnly)
3067                     uri->canon_uri[uri->canon_len] = '/';
3068                 ++uri->canon_len;
3069             } else {
3070                 /* Just copy it over. */
3071                 if(!computeOnly)
3072                     uri->canon_uri[uri->canon_len] = *ptr;
3073                 ++uri->canon_len;
3074             }
3075         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
3076                   !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
3077             if(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
3078                 /* Forbidden characters aren't percent encoded for file schemes
3079                  * with USE_DOS_PATH set.
3080                  */
3081                 if(!computeOnly)
3082                     uri->canon_uri[uri->canon_len] = *ptr;
3083                 ++uri->canon_len;
3084             } else if(data->scheme_type == URL_SCHEME_MK && *ptr == '\\') {
3085                 /* MK URIs don't get '\\' percent encoded. */
3086                 if(!computeOnly)
3087                     uri->canon_uri[uri->canon_len] = *ptr;
3088                 ++uri->canon_len;
3089             } else {
3090                 if(!computeOnly)
3091                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
3092                 uri->canon_len += 3;
3093             }
3094         } else {
3095             if(!computeOnly)
3096                 uri->canon_uri[uri->canon_len] = *ptr;
3097             ++uri->canon_len;
3098         }
3099     }
3100
3101     uri->path_len = uri->canon_len - uri->path_start;
3102
3103     TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
3104         debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
3105     return TRUE;
3106 }
3107
3108 /* Determines how the URI represented by the parse_data should be canonicalized.
3109  *
3110  * Essentially, if the parse_data represents an hierarchical URI then it calls
3111  * canonicalize_authority and the canonicalization functions for the path. If the
3112  * URI is opaque it canonicalizes the path of the URI.
3113  */
3114 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3115     uri->display_absolute = TRUE;
3116
3117     if(!data->is_opaque) {
3118         /* "//" is only added for non-wildcard scheme types. */
3119         if(data->scheme_type != URL_SCHEME_WILDCARD) {
3120             if(!computeOnly) {
3121                 INT pos = uri->canon_len;
3122
3123                 uri->canon_uri[pos] = '/';
3124                 uri->canon_uri[pos+1] = '/';
3125            }
3126            uri->canon_len += 2;
3127         }
3128
3129         if(!canonicalize_authority(data, uri, flags, computeOnly))
3130             return FALSE;
3131
3132         /* TODO: Canonicalize the path of the URI. */
3133         if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly))
3134             return FALSE;
3135
3136     } else {
3137         /* Opaque URI's don't have an authority. */
3138         uri->userinfo_start = uri->userinfo_split = -1;
3139         uri->userinfo_len = 0;
3140         uri->host_start = -1;
3141         uri->host_len = 0;
3142         uri->host_type = Uri_HOST_UNKNOWN;
3143         uri->has_port = FALSE;
3144         uri->authority_start = -1;
3145         uri->authority_len = 0;
3146         uri->domain_offset = -1;
3147
3148         if(is_hierarchical_scheme(data->scheme_type)) {
3149             DWORD i;
3150
3151             /* Absolute URIs aren't displayed for known scheme types
3152              * which should be hierarchical URIs.
3153              */
3154             uri->display_absolute = FALSE;
3155
3156             /* Windows also sets the port for these (if they have one). */
3157             for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) {
3158                 if(data->scheme_type == default_ports[i].scheme) {
3159                     uri->has_port = TRUE;
3160                     uri->port = default_ports[i].port;
3161                     break;
3162                 }
3163             }
3164         }
3165
3166         if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
3167             return FALSE;
3168     }
3169
3170     if(uri->path_start > -1 && !computeOnly)
3171         /* Finding file extensions happens for both types of URIs. */
3172         uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len);
3173     else
3174         uri->extension_offset = -1;
3175
3176     return TRUE;
3177 }
3178
3179 /* Attempts to canonicalize the query string of the URI.
3180  *
3181  * Things that happen:
3182  *  1)  For known scheme types forbidden characters
3183  *      are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set
3184  *      or NO_ENCODE_FORBIDDEN_CHARACTERS is set.
3185  *
3186  *  2)  For known scheme types, percent encoded, unreserved characters
3187  *      are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set.
3188  */
3189 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3190     const WCHAR *ptr, *end;
3191     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
3192
3193     if(!data->query) {
3194         uri->query_start = -1;
3195         uri->query_len = 0;
3196         return TRUE;
3197     }
3198
3199     uri->query_start = uri->canon_len;
3200
3201     end = data->query+data->query_len;
3202     for(ptr = data->query; ptr < end; ++ptr) {
3203         if(*ptr == '%') {
3204             if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3205                 WCHAR val = decode_pct_val(ptr);
3206                 if(is_unreserved(val)) {
3207                     if(!computeOnly)
3208                         uri->canon_uri[uri->canon_len] = val;
3209                     ++uri->canon_len;
3210
3211                     ptr += 2;
3212                     continue;
3213                 }
3214             }
3215         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
3216             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
3217                !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3218                 if(!computeOnly)
3219                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
3220                 uri->canon_len += 3;
3221                 continue;
3222             }
3223         }
3224
3225         if(!computeOnly)
3226             uri->canon_uri[uri->canon_len] = *ptr;
3227         ++uri->canon_len;
3228     }
3229
3230     uri->query_len = uri->canon_len - uri->query_start;
3231
3232     if(!computeOnly)
3233         TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags,
3234             computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len),
3235             uri->query_len);
3236     return TRUE;
3237 }
3238
3239 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3240     const WCHAR *ptr, *end;
3241     const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
3242
3243     if(!data->fragment) {
3244         uri->fragment_start = -1;
3245         uri->fragment_len = 0;
3246         return TRUE;
3247     }
3248
3249     uri->fragment_start = uri->canon_len;
3250
3251     end = data->fragment + data->fragment_len;
3252     for(ptr = data->fragment; ptr < end; ++ptr) {
3253         if(*ptr == '%') {
3254             if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3255                 WCHAR val = decode_pct_val(ptr);
3256                 if(is_unreserved(val)) {
3257                     if(!computeOnly)
3258                         uri->canon_uri[uri->canon_len] = val;
3259                     ++uri->canon_len;
3260
3261                     ptr += 2;
3262                     continue;
3263                 }
3264             }
3265         } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
3266             if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
3267                !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
3268                 if(!computeOnly)
3269                     pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
3270                 uri->canon_len += 3;
3271                 continue;
3272             }
3273         }
3274
3275         if(!computeOnly)
3276             uri->canon_uri[uri->canon_len] = *ptr;
3277         ++uri->canon_len;
3278     }
3279
3280     uri->fragment_len = uri->canon_len - uri->fragment_start;
3281
3282     if(!computeOnly)
3283         TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags,
3284             computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len),
3285             uri->fragment_len);
3286     return TRUE;
3287 }
3288
3289 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */
3290 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
3291     uri->scheme_start = -1;
3292     uri->scheme_len = 0;
3293
3294     if(!data->scheme) {
3295         /* The only type of URI that doesn't have to have a scheme is a relative
3296          * URI.
3297          */
3298         if(!data->is_relative) {
3299             FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data,
3300                     uri, flags, debugstr_w(data->uri));
3301             return FALSE;
3302         }
3303     } else {
3304         if(!computeOnly) {
3305             DWORD i;
3306             INT pos = uri->canon_len;
3307
3308             for(i = 0; i < data->scheme_len; ++i) {
3309                 /* Scheme name must be lower case after canonicalization. */
3310                 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]);
3311             }
3312
3313             uri->canon_uri[i + pos] = ':';
3314             uri->scheme_start = pos;
3315
3316             TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags,
3317                     debugstr_wn(uri->canon_uri,  uri->scheme_len), data->scheme_len);
3318         }
3319
3320         /* This happens in both computation modes. */
3321         uri->canon_len += data->scheme_len + 1;
3322         uri->scheme_len = data->scheme_len;
3323     }
3324     return TRUE;
3325 }
3326
3327 /* Compute's what the length of the URI specified by the parse_data will be
3328  * after canonicalization occurs using the specified flags.
3329  *
3330  * This function will return a non-zero value indicating the length of the canonicalized
3331  * URI, or -1 on error.
3332  */
3333 static int compute_canonicalized_length(const parse_data *data, DWORD flags) {
3334     Uri uri;
3335
3336     memset(&uri, 0, sizeof(Uri));
3337
3338     TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags,
3339             debugstr_w(data->uri));
3340
3341     if(!canonicalize_scheme(data, &uri, flags, TRUE)) {
3342         ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags);
3343         return -1;
3344     }
3345
3346     if(!canonicalize_hierpart(data, &uri, flags, TRUE)) {
3347         ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags);
3348         return -1;
3349     }
3350
3351     if(!canonicalize_query(data, &uri, flags, TRUE)) {
3352         ERR("(%p %x): Failed to compute query string length.\n", data, flags);
3353         return -1;
3354     }
3355
3356     if(!canonicalize_fragment(data, &uri, flags, TRUE)) {
3357         ERR("(%p %x): Failed to compute fragment length.\n", data, flags);
3358         return -1;
3359     }
3360
3361     TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len);
3362
3363     return uri.canon_len;
3364 }
3365
3366 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the
3367  * canonicalization succeededs it will store all the canonicalization information
3368  * in the pointer to the Uri.
3369  *
3370  * To canonicalize a URI this function first computes what the length of the URI
3371  * specified by the parse_data will be. Once this is done it will then perfom the actual
3372  * canonicalization of the URI.
3373  */
3374 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
3375     INT len;
3376
3377     uri->canon_uri = NULL;
3378     len = uri->canon_size = uri->canon_len = 0;
3379
3380     TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri));
3381
3382     /* First try to compute the length of the URI. */
3383     len = compute_canonicalized_length(data, flags);
3384     if(len == -1) {
3385         ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags,
3386                 debugstr_w(data->uri));
3387         return E_INVALIDARG;
3388     }
3389
3390     uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR));
3391     if(!uri->canon_uri)
3392         return E_OUTOFMEMORY;
3393
3394     uri->canon_size = len;
3395     if(!canonicalize_scheme(data, uri, flags, FALSE)) {
3396         ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
3397         heap_free(uri->canon_uri);
3398         return E_INVALIDARG;
3399     }
3400     uri->scheme_type = data->scheme_type;
3401
3402     if(!canonicalize_hierpart(data, uri, flags, FALSE)) {
3403         ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags);
3404         heap_free(uri->canon_uri);
3405         return E_INVALIDARG;
3406     }
3407
3408     if(!canonicalize_query(data, uri, flags, FALSE)) {
3409         ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n",
3410             data, uri, flags);
3411         return E_INVALIDARG;
3412     }
3413
3414     if(!canonicalize_fragment(data, uri, flags, FALSE)) {
3415         ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n",
3416             data, uri, flags);
3417         return E_INVALIDARG;
3418     }
3419
3420     /* There's a possibility we didn't use all the space we allocated
3421      * earlier.
3422      */
3423     if(uri->canon_len < uri->canon_size) {
3424         /* This happens if the URI is hierarchical and dot
3425          * segments were removed from it's path.
3426          */
3427         WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR));
3428         if(!tmp)
3429             return E_OUTOFMEMORY;
3430
3431         uri->canon_uri = tmp;
3432         uri->canon_size = uri->canon_len;
3433     }
3434
3435     uri->canon_uri[uri->canon_len] = '\0';
3436     TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
3437
3438     return S_OK;
3439 }
3440
3441 static HRESULT get_builder_component(LPWSTR *component, DWORD *component_len,
3442                                      LPCWSTR source, DWORD source_len,
3443                                      LPCWSTR *output, DWORD *output_len)
3444 {
3445     if(!output_len) {
3446         if(output)
3447             *output = NULL;
3448         return E_POINTER;
3449     }
3450
3451     if(!output) {
3452         *output_len = 0;
3453         return E_POINTER;
3454     }
3455
3456     if(!(*component) && source) {
3457         /* Allocate 'component', and copy the contents from 'source'
3458          * into the new allocation.
3459          */
3460         *component = heap_alloc((source_len+1)*sizeof(WCHAR));
3461         if(!(*component))
3462             return E_OUTOFMEMORY;
3463
3464         memcpy(*component, source, source_len*sizeof(WCHAR));
3465         (*component)[source_len] = '\0';
3466         *component_len = source_len;
3467     }
3468
3469     *output = *component;
3470     *output_len = *component_len;
3471     return *output ? S_OK : S_FALSE;
3472 }
3473
3474 /* Allocates 'component' and copies the string from 'new_value' into 'component'.
3475  * If 'prefix' is set and 'new_value' isn't NULL, then it checks if 'new_value'
3476  * starts with 'prefix'. If it doesn't then 'prefix' is prepended to 'component'.
3477  *
3478  * If everything is successful, then will set 'success_flag' in 'flags'.
3479  */
3480 static HRESULT set_builder_component(LPWSTR *component, DWORD *component_len, LPCWSTR new_value,
3481                                      WCHAR prefix, DWORD *flags, DWORD success_flag)
3482 {
3483     heap_free(*component);
3484
3485     if(!new_value) {
3486         *component = NULL;
3487         *component_len = 0;
3488     } else {
3489         BOOL add_prefix = FALSE;
3490         DWORD len = lstrlenW(new_value);
3491         DWORD pos = 0;
3492
3493         if(prefix && *new_value != prefix) {
3494             add_prefix = TRUE;
3495             *component = heap_alloc((len+2)*sizeof(WCHAR));
3496         } else
3497             *component = heap_alloc((len+1)*sizeof(WCHAR));
3498
3499         if(!(*component))
3500             return E_OUTOFMEMORY;
3501
3502         if(add_prefix)
3503             (*component)[pos++] = prefix;
3504
3505         memcpy(*component+pos, new_value, (len+1)*sizeof(WCHAR));
3506         *component_len = len+pos;
3507     }
3508
3509     *flags |= success_flag;
3510     return S_OK;
3511 }
3512
3513 #define URI(x)         ((IUri*)  &(x)->lpIUriVtbl)
3514 #define URIBUILDER(x)  ((IUriBuilder*)  &(x)->lpIUriBuilderVtbl)
3515
3516 static void reset_builder(UriBuilder *builder) {
3517     if(builder->uri)
3518         IUri_Release(URI(builder->uri));
3519     builder->uri = NULL;
3520
3521     heap_free(builder->fragment);
3522     builder->fragment = NULL;
3523     builder->fragment_len = 0;
3524
3525     heap_free(builder->host);
3526     builder->host = NULL;
3527     builder->host_len = 0;
3528
3529     heap_free(builder->password);
3530     builder->password = NULL;
3531     builder->password_len = 0;
3532
3533     heap_free(builder->path);
3534     builder->path = NULL;
3535     builder->path_len = 0;
3536
3537     heap_free(builder->query);
3538     builder->query = NULL;
3539     builder->query_len = 0;
3540
3541     heap_free(builder->scheme);
3542     builder->scheme = NULL;
3543     builder->scheme_len = 0;
3544
3545     heap_free(builder->username);
3546     builder->username = NULL;
3547     builder->username_len = 0;
3548
3549     builder->has_port = FALSE;
3550     builder->port = 0;
3551     builder->modified_props = 0;
3552 }
3553
3554 static HRESULT validate_scheme_name(const UriBuilder *builder, parse_data *data, DWORD flags) {
3555     const WCHAR *component;
3556     const WCHAR *ptr;
3557     const WCHAR **pptr;
3558     DWORD expected_len;
3559
3560     if(builder->scheme) {
3561         ptr = builder->scheme;
3562         expected_len = builder->scheme_len;
3563     } else if(builder->uri && builder->uri->scheme_start > -1) {
3564         ptr = builder->uri->canon_uri+builder->uri->scheme_start;
3565         expected_len = builder->uri->scheme_len;
3566     } else {
3567         static const WCHAR nullW[] = {0};
3568         ptr = nullW;
3569         expected_len = 0;
3570     }
3571
3572     component = ptr;
3573     pptr = &ptr;
3574     if(parse_scheme(pptr, data, flags, ALLOW_NULL_TERM_SCHEME) &&
3575        data->scheme_len == expected_len) {
3576         if(data->scheme)
3577             TRACE("(%p %p %x): Found valid scheme component %s len=%d.\n", builder, data, flags,
3578                debugstr_wn(data->scheme, data->scheme_len), data->scheme_len);
3579     } else {
3580         TRACE("(%p %p %x): Invalid scheme component found %s.\n", builder, data, flags,
3581             debugstr_wn(component, expected_len));
3582         return INET_E_INVALID_URL;
3583    }
3584
3585     return S_OK;
3586 }
3587
3588 static HRESULT validate_username(const UriBuilder *builder, parse_data *data, DWORD flags) {
3589     const WCHAR *ptr;
3590     const WCHAR **pptr;
3591     DWORD expected_len;
3592
3593     if(builder->username) {
3594         ptr = builder->username;
3595         expected_len = builder->username_len;
3596     } else if(!(builder->modified_props & Uri_HAS_USER_NAME) && builder->uri &&
3597               builder->uri->userinfo_start > -1 && builder->uri->userinfo_split != 0) {
3598         /* Just use the username from the base Uri. */
3599         data->username = builder->uri->canon_uri+builder->uri->userinfo_start;
3600         data->username_len = (builder->uri->userinfo_split > -1) ?
3601                                         builder->uri->userinfo_split : builder->uri->userinfo_len;
3602         ptr = NULL;
3603     } else {
3604         ptr = NULL;
3605         expected_len = 0;
3606     }
3607
3608     if(ptr) {
3609         const WCHAR *component = ptr;
3610         pptr = &ptr;
3611         if(parse_username(pptr, data, flags, ALLOW_NULL_TERM_USER_NAME) &&
3612            data->username_len == expected_len)
3613             TRACE("(%p %p %x): Found valid username component %s len=%d.\n", builder, data, flags,
3614                 debugstr_wn(data->username, data->username_len), data->username_len);
3615         else {
3616             TRACE("(%p %p %x): Invalid username component found %s.\n", builder, data, flags,
3617                 debugstr_wn(component, expected_len));
3618             return INET_E_INVALID_URL;
3619         }
3620     }
3621
3622     return S_OK;
3623 }
3624
3625 static HRESULT validate_password(const UriBuilder *builder, parse_data *data, DWORD flags) {
3626     const WCHAR *ptr;
3627     const WCHAR **pptr;
3628     DWORD expected_len;
3629
3630     if(builder->password) {
3631         ptr = builder->password;
3632         expected_len = builder->password_len;
3633     } else if(!(builder->modified_props & Uri_HAS_PASSWORD) && builder->uri &&
3634               builder->uri->userinfo_split > -1) {
3635         data->password = builder->uri->canon_uri+builder->uri->userinfo_start+builder->uri->userinfo_split+1;
3636         data->password_len = builder->uri->userinfo_len-builder->uri->userinfo_split-1;
3637         ptr = NULL;
3638     } else {
3639         ptr = NULL;
3640         expected_len = 0;
3641     }
3642
3643     if(ptr) {
3644         const WCHAR *component = ptr;
3645         pptr = &ptr;
3646         if(parse_password(pptr, data, flags, ALLOW_NULL_TERM_PASSWORD) &&
3647            data->password_len == expected_len)
3648             TRACE("(%p %p %x): Found valid password component %s len=%d.\n", builder, data, flags,
3649                 debugstr_wn(data->password, data->password_len), data->password_len);
3650         else {
3651             TRACE("(%p %p %x): Invalid password component found %s.\n", builder, data, flags,
3652                 debugstr_wn(component, expected_len));
3653             return INET_E_INVALID_URL;
3654         }
3655     }
3656
3657     return S_OK;
3658 }
3659
3660 static HRESULT validate_userinfo(const UriBuilder *builder, parse_data *data, DWORD flags) {
3661     HRESULT hr;
3662
3663     hr = validate_username(builder, data, flags);
3664     if(FAILED(hr))
3665         return hr;
3666
3667     hr = validate_password(builder, data, flags);
3668     if(FAILED(hr))
3669         return hr;
3670
3671     return S_OK;
3672 }
3673
3674 static HRESULT validate_host(const UriBuilder *builder, parse_data *data, DWORD flags) {
3675     const WCHAR *ptr;
3676     const WCHAR **pptr;
3677     DWORD expected_len;
3678
3679     if(builder->host) {
3680         ptr = builder->host;
3681         expected_len = builder->host_len;
3682     } else if(!(builder->modified_props & Uri_HAS_HOST) && builder->uri && builder->uri->host_start > -1) {
3683         ptr = builder->uri->canon_uri + builder->uri->host_start;
3684         expected_len = builder->uri->host_len;
3685     } else
3686         ptr = NULL;
3687
3688     if(ptr) {
3689         const WCHAR *component = ptr;
3690         DWORD extras = ALLOW_BRACKETLESS_IP_LITERAL|IGNORE_PORT_DELIMITER|SKIP_IP_FUTURE_CHECK;
3691         pptr = &ptr;
3692
3693         if(parse_host(pptr, data, flags, extras) && data->host_len == expected_len)
3694             TRACE("(%p %p %x): Found valid host name %s len=%d type=%d.\n", builder, data, flags,
3695                 debugstr_wn(data->host, data->host_len), data->host_len, data->host_type);
3696         else {
3697             TRACE("(%p %p %x): Invalid host name found %s.\n", builder, data, flags,
3698                 debugstr_wn(component, expected_len));
3699             return INET_E_INVALID_URL;
3700         }
3701     }
3702
3703     return S_OK;
3704 }
3705
3706 static void setup_port(const UriBuilder *builder, parse_data *data, DWORD flags) {
3707     if(builder->modified_props & Uri_HAS_PORT) {
3708         if(builder->has_port) {
3709             data->has_port = TRUE;
3710             data->port_value = builder->port;
3711         }
3712     } else if(builder->uri && builder->uri->has_port) {
3713         data->has_port = TRUE;
3714         data->port_value = builder->uri->port;
3715     }
3716
3717     if(data->has_port)
3718         TRACE("(%p %p %x): Using %u as port for IUri.\n", builder, data, flags, data->port_value);
3719 }
3720
3721 static HRESULT validate_path(const UriBuilder *builder, parse_data *data, DWORD flags) {
3722     const WCHAR *ptr = NULL;
3723     const WCHAR **pptr;
3724     DWORD expected_len;
3725
3726     if(builder->path) {
3727         ptr = builder->path;
3728         expected_len = builder->path_len;
3729     } else if(!(builder->modified_props & Uri_HAS_PATH) &&
3730               builder->uri && builder->uri->path_start > -1) {
3731         ptr = builder->uri->canon_uri+builder->uri->path_start;
3732         expected_len = builder->uri->path_len;
3733     }
3734
3735     if(ptr) {
3736         BOOL valid = FALSE;
3737         const WCHAR *component = ptr;
3738         pptr = &ptr;
3739
3740         /* How the path is validated depends on what type of
3741          * URI it is.
3742          */
3743         valid = data->is_opaque ?
3744             parse_path_opaque(pptr, data, flags) : parse_path_hierarchical(pptr, data, flags);
3745
3746         if(!valid || expected_len != data->path_len) {
3747             TRACE("(%p %p %x): Invalid path componet %s.\n", builder, data, flags,
3748                 debugstr_wn(component, expected_len));
3749             return INET_E_INVALID_URL;
3750         }
3751
3752         TRACE("(%p %p %x): Valid path component %s len=%d.\n", builder, data, flags,
3753             debugstr_wn(data->path, data->path_len), data->path_len);
3754     }
3755
3756     return S_OK;
3757 }
3758
3759 static HRESULT validate_query(const UriBuilder *builder, parse_data *data, DWORD flags) {
3760     const WCHAR *ptr = NULL;
3761     const WCHAR **pptr;
3762     DWORD expected_len;
3763
3764     if(builder->query) {
3765         ptr = builder->query;
3766         expected_len = builder->query_len;
3767     } else if(!(builder->modified_props & Uri_HAS_QUERY) && builder->uri &&
3768               builder->uri->query_start > -1) {
3769         ptr = builder->uri->canon_uri+builder->uri->query_start;
3770         expected_len = builder->uri->query_len;
3771     }
3772
3773     if(ptr) {
3774         const WCHAR *component = ptr;
3775         pptr = &ptr;
3776
3777         if(parse_query(pptr, data, flags) && expected_len == data->query_len)
3778             TRACE("(%p %p %x): Valid query component %s len=%d.\n", builder, data, flags,
3779                 debugstr_wn(data->query, data->query_len), data->query_len);
3780         else {
3781             TRACE("(%p %p %x): Invalid query component %s.\n", builder, data, flags,
3782                 debugstr_wn(component, expected_len));
3783             return INET_E_INVALID_URL;
3784         }
3785     }
3786
3787     return S_OK;
3788 }
3789
3790 static HRESULT validate_fragment(const UriBuilder *builder, parse_data *data, DWORD flags) {
3791     const WCHAR *ptr = NULL;
3792     const WCHAR **pptr;
3793     DWORD expected_len;
3794
3795     if(builder->fragment) {
3796         ptr = builder->fragment;
3797         expected_len = builder->fragment_len;
3798     } else if(!(builder->modified_props & Uri_HAS_FRAGMENT) && builder->uri &&
3799               builder->uri->fragment_start > -1) {
3800         ptr = builder->uri->canon_uri+builder->uri->fragment_start;
3801         expected_len = builder->uri->fragment_len;
3802     }
3803
3804     if(ptr) {
3805         const WCHAR *component = ptr;
3806         pptr = &ptr;
3807
3808         if(parse_query(pptr, data, flags) && expected_len == data->fragment_len)
3809             TRACE("(%p %p %x): Valid fragment component %s len=%d.\n", builder, data, flags,
3810                 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len);
3811         else {
3812             TRACE("(%p %p %x): Invalid fragment component %s.\n", builder, data, flags,
3813                 debugstr_wn(component, expected_len));
3814             return INET_E_INVALID_URL;
3815         }
3816     }
3817
3818     return S_OK;
3819 }
3820
3821 static HRESULT validate_components(const UriBuilder *builder, parse_data *data, DWORD flags) {
3822     HRESULT hr;
3823
3824     memset(data, 0, sizeof(parse_data));
3825
3826     TRACE("(%p %p %x): Beginning to validate builder components.\n", builder, data, flags);
3827
3828     hr = validate_scheme_name(builder, data, flags);
3829     if(FAILED(hr))
3830         return hr;
3831
3832     /* Extra validation for file schemes. */
3833     if(data->scheme_type == URL_SCHEME_FILE) {
3834         if((builder->password || (builder->uri && builder->uri->userinfo_split > -1)) ||
3835            (builder->username || (builder->uri && builder->uri->userinfo_start > -1))) {
3836             TRACE("(%p %p %x): File schemes can't contain a username or password.\n",
3837                 builder, data, flags);
3838             return INET_E_INVALID_URL;
3839         }
3840     }
3841
3842     hr = validate_userinfo(builder, data, flags);
3843     if(FAILED(hr))
3844         return hr;
3845
3846     hr = validate_host(builder, data, flags);
3847     if(FAILED(hr))
3848         return hr;
3849
3850     /* The URI is opaque if it doesn't have an authority component. */
3851     data->is_opaque = !data->username && !data->password && !data->host;
3852
3853     setup_port(builder, data, flags);
3854
3855     hr = validate_path(builder, data, flags);
3856     if(FAILED(hr))
3857         return hr;
3858
3859     hr = validate_query(builder, data, flags);
3860     if(FAILED(hr))
3861         return hr;
3862
3863     hr = validate_fragment(builder, data, flags);
3864     if(FAILED(hr))
3865         return hr;
3866
3867     TRACE("(%p %p %x): Finished validating builder components.\n", builder, data, flags);
3868
3869     return S_OK;
3870 }
3871
3872 static HRESULT build_uri(const UriBuilder *builder, IUri **uri, DWORD create_flags,
3873                          DWORD use_orig_flags, DWORD encoding_mask)
3874 {
3875     HRESULT hr;
3876     parse_data data;
3877
3878     if(!uri)
3879         return E_POINTER;
3880
3881     if(encoding_mask && (!builder->uri || builder->modified_props)) {
3882         *uri = NULL;
3883         return E_NOTIMPL;
3884     }
3885
3886     /* Decide what flags should be used when creating the Uri. */
3887     if((use_orig_flags & UriBuilder_USE_ORIGINAL_FLAGS) && builder->uri)
3888         create_flags = builder->uri->create_flags;
3889     else {
3890         if(has_invalid_flag_combination(create_flags)) {
3891             *uri = NULL;
3892             return E_INVALIDARG;
3893         }
3894
3895         /* Set the default flags if they don't cause a conflict. */
3896         apply_default_flags(&create_flags);
3897     }
3898
3899     /* Return the base IUri if no changes have been made and the create_flags match. */
3900     if(builder->uri && !builder->modified_props && builder->uri->create_flags == create_flags) {
3901         *uri = URI(builder->uri);
3902         IUri_AddRef(*uri);
3903         return S_OK;
3904     }
3905
3906     hr = validate_components(builder, &data, create_flags);
3907     if(FAILED(hr)) {
3908         *uri = NULL;
3909         return hr;
3910     }
3911
3912     return E_NOTIMPL;
3913 }
3914
3915 #define URI_THIS(iface) DEFINE_THIS(Uri, IUri, iface)
3916
3917 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv)
3918 {
3919     Uri *This = URI_THIS(iface);
3920
3921     if(IsEqualGUID(&IID_IUnknown, riid)) {
3922         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
3923         *ppv = URI(This);
3924     }else if(IsEqualGUID(&IID_IUri, riid)) {
3925         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
3926         *ppv = URI(This);
3927     }else if(IsEqualGUID(&IID_IUriObj, riid)) {
3928         TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv);
3929         *ppv = This;
3930         return S_OK;
3931     }else {
3932         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
3933         *ppv = NULL;
3934         return E_NOINTERFACE;
3935     }
3936
3937     IUnknown_AddRef((IUnknown*)*ppv);
3938     return S_OK;
3939 }
3940
3941 static ULONG WINAPI Uri_AddRef(IUri *iface)
3942 {
3943     Uri *This = URI_THIS(iface);
3944     LONG ref = InterlockedIncrement(&This->ref);
3945
3946     TRACE("(%p) ref=%d\n", This, ref);
3947
3948     return ref;
3949 }
3950
3951 static ULONG WINAPI Uri_Release(IUri *iface)
3952 {
3953     Uri *This = URI_THIS(iface);
3954     LONG ref = InterlockedDecrement(&This->ref);
3955
3956     TRACE("(%p) ref=%d\n", This, ref);
3957
3958     if(!ref) {
3959         SysFreeString(This->raw_uri);
3960         heap_free(This->canon_uri);
3961         heap_free(This);
3962     }
3963
3964     return ref;
3965 }
3966
3967 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags)
3968 {
3969     Uri *This = URI_THIS(iface);
3970     HRESULT hres;
3971     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3972
3973     if(!pbstrProperty)
3974         return E_POINTER;
3975
3976     if(uriProp > Uri_PROPERTY_STRING_LAST) {
3977         /* Windows allocates an empty BSTR for invalid Uri_PROPERTY's. */
3978         *pbstrProperty = SysAllocStringLen(NULL, 0);
3979         if(!(*pbstrProperty))
3980             return E_OUTOFMEMORY;
3981
3982         /* It only returns S_FALSE for the ZONE property... */
3983         if(uriProp == Uri_PROPERTY_ZONE)
3984             return S_FALSE;
3985         else
3986             return S_OK;
3987     }
3988
3989     /* Don't have support for flags yet. */
3990     if(dwFlags) {
3991         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
3992         return E_NOTIMPL;
3993     }
3994
3995     switch(uriProp) {
3996     case Uri_PROPERTY_ABSOLUTE_URI:
3997         if(!This->display_absolute) {
3998             *pbstrProperty = SysAllocStringLen(NULL, 0);
3999             hres = S_FALSE;
4000         } else {
4001             if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) {
4002                 if(This->userinfo_len == 0) {
4003                     /* Don't include the '@' after the userinfo component. */
4004                     *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-1);
4005                     hres = S_OK;
4006                     if(*pbstrProperty) {
4007                         /* Copy everything before it. */
4008                         memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
4009
4010                         /* And everything after it. */
4011                         memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+1,
4012                                (This->canon_len-This->userinfo_start-1)*sizeof(WCHAR));
4013                     }
4014                 } else if(This->userinfo_split == 0 && This->userinfo_len == 1) {
4015                     /* Don't include the ":@" */
4016                     *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-2);
4017                     hres = S_OK;
4018                     if(*pbstrProperty) {
4019                         memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
4020                         memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+2,
4021                                (This->canon_len-This->userinfo_start-2)*sizeof(WCHAR));
4022                     }
4023                 } else {
4024                     *pbstrProperty = SysAllocString(This->canon_uri);
4025                     hres = S_OK;
4026                 }
4027             } else {
4028                 *pbstrProperty = SysAllocString(This->canon_uri);
4029                 hres = S_OK;
4030             }
4031         }
4032
4033         if(!(*pbstrProperty))
4034             hres = E_OUTOFMEMORY;
4035
4036         break;
4037     case Uri_PROPERTY_AUTHORITY:
4038         if(This->authority_start > -1) {
4039             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len);
4040             hres = S_OK;
4041         } else {
4042             *pbstrProperty = SysAllocStringLen(NULL, 0);
4043             hres = S_FALSE;
4044         }
4045
4046         if(!(*pbstrProperty))
4047             hres = E_OUTOFMEMORY;
4048
4049         break;
4050     case Uri_PROPERTY_DISPLAY_URI:
4051         /* The Display URI contains everything except for the userinfo for known
4052          * scheme types.
4053          */
4054         if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) {
4055             *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len);
4056
4057             if(*pbstrProperty) {
4058                 /* Copy everything before the userinfo over. */
4059                 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR));
4060                 /* Copy everything after the userinfo over. */
4061                 memcpy(*pbstrProperty+This->userinfo_start,
4062                    This->canon_uri+This->userinfo_start+This->userinfo_len+1,
4063                    (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR));
4064             }
4065         } else
4066             *pbstrProperty = SysAllocString(This->canon_uri);
4067
4068         if(!(*pbstrProperty))
4069             hres = E_OUTOFMEMORY;
4070         else
4071             hres = S_OK;
4072
4073         break;
4074     case Uri_PROPERTY_DOMAIN:
4075         if(This->domain_offset > -1) {
4076             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset,
4077                                                This->host_len-This->domain_offset);
4078             hres = S_OK;
4079         } else {
4080             *pbstrProperty = SysAllocStringLen(NULL, 0);
4081             hres = S_FALSE;
4082         }
4083
4084         if(!(*pbstrProperty))
4085             hres = E_OUTOFMEMORY;
4086
4087         break;
4088     case Uri_PROPERTY_EXTENSION:
4089         if(This->extension_offset > -1) {
4090             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset,
4091                                                This->path_len-This->extension_offset);
4092             hres = S_OK;
4093         } else {
4094             *pbstrProperty = SysAllocStringLen(NULL, 0);
4095             hres = S_FALSE;
4096         }
4097
4098         if(!(*pbstrProperty))
4099             hres = E_OUTOFMEMORY;
4100
4101         break;
4102     case Uri_PROPERTY_FRAGMENT:
4103         if(This->fragment_start > -1) {
4104             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len);
4105             hres = S_OK;
4106         } else {
4107             *pbstrProperty = SysAllocStringLen(NULL, 0);
4108             hres = S_FALSE;
4109         }
4110
4111         if(!(*pbstrProperty))
4112             hres = E_OUTOFMEMORY;
4113
4114         break;
4115     case Uri_PROPERTY_HOST:
4116         if(This->host_start > -1) {
4117             /* The '[' and ']' aren't included for IPv6 addresses. */
4118             if(This->host_type == Uri_HOST_IPV6)
4119                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2);
4120             else
4121                 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len);
4122
4123             hres = S_OK;
4124         } else {
4125             *pbstrProperty = SysAllocStringLen(NULL, 0);
4126             hres = S_FALSE;
4127         }
4128
4129         if(!(*pbstrProperty))
4130             hres = E_OUTOFMEMORY;
4131
4132         break;
4133     case Uri_PROPERTY_PASSWORD:
4134         if(This->userinfo_split > -1) {
4135             *pbstrProperty = SysAllocStringLen(
4136                 This->canon_uri+This->userinfo_start+This->userinfo_split+1,
4137                 This->userinfo_len-This->userinfo_split-1);
4138             hres = S_OK;
4139         } else {
4140             *pbstrProperty = SysAllocStringLen(NULL, 0);
4141             hres = S_FALSE;
4142         }
4143
4144         if(!(*pbstrProperty))
4145             return E_OUTOFMEMORY;
4146
4147         break;
4148     case Uri_PROPERTY_PATH:
4149         if(This->path_start > -1) {
4150             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len);
4151             hres = S_OK;
4152         } else {
4153             *pbstrProperty = SysAllocStringLen(NULL, 0);
4154             hres = S_FALSE;
4155         }
4156
4157         if(!(*pbstrProperty))
4158             hres = E_OUTOFMEMORY;
4159
4160         break;
4161     case Uri_PROPERTY_PATH_AND_QUERY:
4162         if(This->path_start > -1) {
4163             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len);
4164             hres = S_OK;
4165         } else if(This->query_start > -1) {
4166             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
4167             hres = S_OK;
4168         } else {
4169             *pbstrProperty = SysAllocStringLen(NULL, 0);
4170             hres = S_FALSE;
4171         }
4172
4173         if(!(*pbstrProperty))
4174             hres = E_OUTOFMEMORY;
4175
4176         break;
4177     case Uri_PROPERTY_QUERY:
4178         if(This->query_start > -1) {
4179             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len);
4180             hres = S_OK;
4181         } else {
4182             *pbstrProperty = SysAllocStringLen(NULL, 0);
4183             hres = S_FALSE;
4184         }
4185
4186         if(!(*pbstrProperty))
4187             hres = E_OUTOFMEMORY;
4188
4189         break;
4190     case Uri_PROPERTY_RAW_URI:
4191         *pbstrProperty = SysAllocString(This->raw_uri);
4192         if(!(*pbstrProperty))
4193             hres = E_OUTOFMEMORY;
4194         else
4195             hres = S_OK;
4196         break;
4197     case Uri_PROPERTY_SCHEME_NAME:
4198         if(This->scheme_start > -1) {
4199             *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len);
4200             hres = S_OK;
4201         } else {
4202             *pbstrProperty = SysAllocStringLen(NULL, 0);
4203             hres = S_FALSE;
4204         }
4205
4206         if(!(*pbstrProperty))
4207             hres = E_OUTOFMEMORY;
4208
4209         break;
4210     case Uri_PROPERTY_USER_INFO:
4211         if(This->userinfo_start > -1) {
4212             *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len);
4213             hres = S_OK;
4214         } else {
4215             *pbstrProperty = SysAllocStringLen(NULL, 0);
4216             hres = S_FALSE;
4217         }
4218
4219         if(!(*pbstrProperty))
4220             hres = E_OUTOFMEMORY;
4221
4222         break;
4223     case Uri_PROPERTY_USER_NAME:
4224         if(This->userinfo_start > -1 && This->userinfo_split != 0) {
4225             /* If userinfo_split is set, that means a password exists
4226              * so the username is only from userinfo_start to userinfo_split.
4227              */
4228             if(This->userinfo_split > -1) {
4229                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split);
4230                 hres = S_OK;
4231             } else {
4232                 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len);
4233                 hres = S_OK;
4234             }
4235         } else {
4236             *pbstrProperty = SysAllocStringLen(NULL, 0);
4237             hres = S_FALSE;
4238         }
4239
4240         if(!(*pbstrProperty))
4241             return E_OUTOFMEMORY;
4242
4243         break;
4244     default:
4245         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags);
4246         hres = E_NOTIMPL;
4247     }
4248
4249     return hres;
4250 }
4251
4252 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
4253 {
4254     Uri *This = URI_THIS(iface);
4255     HRESULT hres;
4256     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4257
4258     if(!pcchProperty)
4259         return E_INVALIDARG;
4260
4261     /* Can only return a length for a property if it's a string. */
4262     if(uriProp > Uri_PROPERTY_STRING_LAST)
4263         return E_INVALIDARG;
4264
4265     /* Don't have support for flags yet. */
4266     if(dwFlags) {
4267         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4268         return E_NOTIMPL;
4269     }
4270
4271     switch(uriProp) {
4272     case Uri_PROPERTY_ABSOLUTE_URI:
4273         if(!This->display_absolute) {
4274             *pcchProperty = 0;
4275             hres = S_FALSE;
4276         } else {
4277             if(This->scheme_type != URL_SCHEME_UNKNOWN) {
4278                 if(This->userinfo_start > -1 && This->userinfo_len == 0)
4279                     /* Don't include the '@' in the length. */
4280                     *pcchProperty = This->canon_len-1;
4281                 else if(This->userinfo_start > -1 && This->userinfo_len == 1 &&
4282                         This->userinfo_split == 0)
4283                     /* Don't include the ":@" in the length. */
4284                     *pcchProperty = This->canon_len-2;
4285                 else
4286                     *pcchProperty = This->canon_len;
4287             } else
4288                 *pcchProperty = This->canon_len;
4289
4290             hres = S_OK;
4291         }
4292
4293         break;
4294     case Uri_PROPERTY_AUTHORITY:
4295         *pcchProperty = This->authority_len;
4296         hres = (This->authority_start > -1) ? S_OK : S_FALSE;
4297         break;
4298     case Uri_PROPERTY_DISPLAY_URI:
4299         if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1)
4300             *pcchProperty = This->canon_len-This->userinfo_len-1;
4301         else
4302             *pcchProperty = This->canon_len;
4303
4304         hres = S_OK;
4305         break;
4306     case Uri_PROPERTY_DOMAIN:
4307         if(This->domain_offset > -1)
4308             *pcchProperty = This->host_len - This->domain_offset;
4309         else
4310             *pcchProperty = 0;
4311
4312         hres = (This->domain_offset > -1) ? S_OK : S_FALSE;
4313         break;
4314     case Uri_PROPERTY_EXTENSION:
4315         if(This->extension_offset > -1) {
4316             *pcchProperty = This->path_len - This->extension_offset;
4317             hres = S_OK;
4318         } else {
4319             *pcchProperty = 0;
4320             hres = S_FALSE;
4321         }
4322
4323         break;
4324     case Uri_PROPERTY_FRAGMENT:
4325         *pcchProperty = This->fragment_len;
4326         hres = (This->fragment_start > -1) ? S_OK : S_FALSE;
4327         break;
4328     case Uri_PROPERTY_HOST:
4329         *pcchProperty = This->host_len;
4330
4331         /* '[' and ']' aren't included in the length. */
4332         if(This->host_type == Uri_HOST_IPV6)
4333             *pcchProperty -= 2;
4334
4335         hres = (This->host_start > -1) ? S_OK : S_FALSE;
4336         break;
4337     case Uri_PROPERTY_PASSWORD:
4338         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0;
4339         hres = (This->userinfo_split > -1) ? S_OK : S_FALSE;
4340         break;
4341     case Uri_PROPERTY_PATH:
4342         *pcchProperty = This->path_len;
4343         hres = (This->path_start > -1) ? S_OK : S_FALSE;
4344         break;
4345     case Uri_PROPERTY_PATH_AND_QUERY:
4346         *pcchProperty = This->path_len+This->query_len;
4347         hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE;
4348         break;
4349     case Uri_PROPERTY_QUERY:
4350         *pcchProperty = This->query_len;
4351         hres = (This->query_start > -1) ? S_OK : S_FALSE;
4352         break;
4353     case Uri_PROPERTY_RAW_URI:
4354         *pcchProperty = SysStringLen(This->raw_uri);
4355         hres = S_OK;
4356         break;
4357     case Uri_PROPERTY_SCHEME_NAME:
4358         *pcchProperty = This->scheme_len;
4359         hres = (This->scheme_start > -1) ? S_OK : S_FALSE;
4360         break;
4361     case Uri_PROPERTY_USER_INFO:
4362         *pcchProperty = This->userinfo_len;
4363         hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
4364         break;
4365     case Uri_PROPERTY_USER_NAME:
4366         *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len;
4367         if(This->userinfo_split == 0)
4368             hres = S_FALSE;
4369         else
4370             hres = (This->userinfo_start > -1) ? S_OK : S_FALSE;
4371         break;
4372     default:
4373         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4374         hres = E_NOTIMPL;
4375     }
4376
4377     return hres;
4378 }
4379
4380 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags)
4381 {
4382     Uri *This = URI_THIS(iface);
4383     HRESULT hres;
4384
4385     TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4386
4387     if(!pcchProperty)
4388         return E_INVALIDARG;
4389
4390     /* Microsoft's implementation for the ZONE property of a URI seems to be lacking...
4391      * From what I can tell, instead of checking which URLZONE the URI belongs to it
4392      * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone
4393      * function.
4394      */
4395     if(uriProp == Uri_PROPERTY_ZONE) {
4396         *pcchProperty = URLZONE_INVALID;
4397         return E_NOTIMPL;
4398     }
4399
4400     if(uriProp < Uri_PROPERTY_DWORD_START) {
4401         *pcchProperty = 0;
4402         return E_INVALIDARG;
4403     }
4404
4405     switch(uriProp) {
4406     case Uri_PROPERTY_HOST_TYPE:
4407         *pcchProperty = This->host_type;
4408         hres = S_OK;
4409         break;
4410     case Uri_PROPERTY_PORT:
4411         if(!This->has_port) {
4412             *pcchProperty = 0;
4413             hres = S_FALSE;
4414         } else {
4415             *pcchProperty = This->port;
4416             hres = S_OK;
4417         }
4418
4419         break;
4420     case Uri_PROPERTY_SCHEME:
4421         *pcchProperty = This->scheme_type;
4422         hres = S_OK;
4423         break;
4424     default:
4425         FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags);
4426         hres = E_NOTIMPL;
4427     }
4428
4429     return hres;
4430 }
4431
4432 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty)
4433 {
4434     Uri *This = URI_THIS(iface);
4435     TRACE("(%p)->(%d %p)\n", This, uriProp, pfHasProperty);
4436
4437     if(!pfHasProperty)
4438         return E_INVALIDARG;
4439
4440     switch(uriProp) {
4441     case Uri_PROPERTY_ABSOLUTE_URI:
4442         *pfHasProperty = This->display_absolute;
4443         break;
4444     case Uri_PROPERTY_AUTHORITY:
4445         *pfHasProperty = This->authority_start > -1;
4446         break;
4447     case Uri_PROPERTY_DISPLAY_URI:
4448         *pfHasProperty = TRUE;
4449         break;
4450     case Uri_PROPERTY_DOMAIN:
4451         *pfHasProperty = This->domain_offset > -1;
4452         break;
4453     case Uri_PROPERTY_EXTENSION:
4454         *pfHasProperty = This->extension_offset > -1;
4455         break;
4456     case Uri_PROPERTY_FRAGMENT:
4457         *pfHasProperty = This->fragment_start > -1;
4458         break;
4459     case Uri_PROPERTY_HOST:
4460         *pfHasProperty = This->host_start > -1;
4461         break;
4462     case Uri_PROPERTY_PASSWORD:
4463         *pfHasProperty = This->userinfo_split > -1;
4464         break;
4465     case Uri_PROPERTY_PATH:
4466         *pfHasProperty = This->path_start > -1;
4467         break;
4468     case Uri_PROPERTY_PATH_AND_QUERY:
4469         *pfHasProperty = (This->path_start > -1 || This->query_start > -1);
4470         break;
4471     case Uri_PROPERTY_QUERY:
4472         *pfHasProperty = This->query_start > -1;
4473         break;
4474     case Uri_PROPERTY_RAW_URI:
4475         *pfHasProperty = TRUE;
4476         break;
4477     case Uri_PROPERTY_SCHEME_NAME:
4478         *pfHasProperty = This->scheme_start > -1;
4479         break;
4480     case Uri_PROPERTY_USER_INFO:
4481         *pfHasProperty = This->userinfo_start > -1;
4482         break;
4483     case Uri_PROPERTY_USER_NAME:
4484         if(This->userinfo_split == 0)
4485             *pfHasProperty = FALSE;
4486         else
4487             *pfHasProperty = This->userinfo_start > -1;
4488         break;
4489     case Uri_PROPERTY_HOST_TYPE:
4490         *pfHasProperty = TRUE;
4491         break;
4492     case Uri_PROPERTY_PORT:
4493         *pfHasProperty = This->has_port;
4494         break;
4495     case Uri_PROPERTY_SCHEME:
4496         *pfHasProperty = TRUE;
4497         break;
4498     case Uri_PROPERTY_ZONE:
4499         *pfHasProperty = FALSE;
4500         break;
4501     default:
4502         FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty);
4503         return E_NOTIMPL;
4504     }
4505
4506     return S_OK;
4507 }
4508
4509 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri)
4510 {
4511     TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri);
4512     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0);
4513 }
4514
4515 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority)
4516 {
4517     TRACE("(%p)->(%p)\n", iface, pstrAuthority);
4518     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0);
4519 }
4520
4521 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri)
4522 {
4523     TRACE("(%p)->(%p)\n", iface, pstrDisplayUri);
4524     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0);
4525 }
4526
4527 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain)
4528 {
4529     TRACE("(%p)->(%p)\n", iface, pstrDomain);
4530     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0);
4531 }
4532
4533 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension)
4534 {
4535     TRACE("(%p)->(%p)\n", iface, pstrExtension);
4536     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0);
4537 }
4538
4539 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment)
4540 {
4541     TRACE("(%p)->(%p)\n", iface, pstrFragment);
4542     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0);
4543 }
4544
4545 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost)
4546 {
4547     TRACE("(%p)->(%p)\n", iface, pstrHost);
4548     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0);
4549 }
4550
4551 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword)
4552 {
4553     TRACE("(%p)->(%p)\n", iface, pstrPassword);
4554     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0);
4555 }
4556
4557 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath)
4558 {
4559     TRACE("(%p)->(%p)\n", iface, pstrPath);
4560     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0);
4561 }
4562
4563 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery)
4564 {
4565     TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery);
4566     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0);
4567 }
4568
4569 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery)
4570 {
4571     TRACE("(%p)->(%p)\n", iface, pstrQuery);
4572     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0);
4573 }
4574
4575 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri)
4576 {
4577     TRACE("(%p)->(%p)\n", iface, pstrRawUri);
4578     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0);
4579 }
4580
4581 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName)
4582 {
4583     TRACE("(%p)->(%p)\n", iface, pstrSchemeName);
4584     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0);
4585 }
4586
4587 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo)
4588 {
4589     TRACE("(%p)->(%p)\n", iface, pstrUserInfo);
4590     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0);
4591 }
4592
4593 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName)
4594 {
4595     TRACE("(%p)->(%p)\n", iface, pstrUserName);
4596     return Uri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0);
4597 }
4598
4599 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType)
4600 {
4601     TRACE("(%p)->(%p)\n", iface, pdwHostType);
4602     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0);
4603 }
4604
4605 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort)
4606 {
4607     TRACE("(%p)->(%p)\n", iface, pdwPort);
4608     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0);
4609 }
4610
4611 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme)
4612 {
4613     Uri *This = URI_THIS(iface);
4614     TRACE("(%p)->(%p)\n", This, pdwScheme);
4615     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0);
4616 }
4617
4618 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone)
4619 {
4620     TRACE("(%p)->(%p)\n", iface, pdwZone);
4621     return Uri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0);
4622 }
4623
4624 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties)
4625 {
4626     Uri *This = URI_THIS(iface);
4627     TRACE("(%p)->(%p)\n", This, pdwProperties);
4628
4629     if(!pdwProperties)
4630         return E_INVALIDARG;
4631
4632     /* All URIs have these. */
4633     *pdwProperties = Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE;
4634
4635     if(This->display_absolute)
4636         *pdwProperties |= Uri_HAS_ABSOLUTE_URI;
4637
4638     if(This->scheme_start > -1)
4639         *pdwProperties |= Uri_HAS_SCHEME_NAME;
4640
4641     if(This->authority_start > -1) {
4642         *pdwProperties |= Uri_HAS_AUTHORITY;
4643         if(This->userinfo_start > -1) {
4644             *pdwProperties |= Uri_HAS_USER_INFO;
4645             if(This->userinfo_split != 0)
4646                 *pdwProperties |= Uri_HAS_USER_NAME;
4647         }
4648         if(This->userinfo_split > -1)
4649             *pdwProperties |= Uri_HAS_PASSWORD;
4650         if(This->host_start > -1)
4651             *pdwProperties |= Uri_HAS_HOST;
4652         if(This->domain_offset > -1)
4653             *pdwProperties |= Uri_HAS_DOMAIN;
4654     }
4655
4656     if(This->has_port)
4657         *pdwProperties |= Uri_HAS_PORT;
4658     if(This->path_start > -1)
4659         *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY;
4660     if(This->query_start > -1)
4661         *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY;
4662
4663     if(This->extension_offset > -1)
4664         *pdwProperties |= Uri_HAS_EXTENSION;
4665
4666     if(This->fragment_start > -1)
4667         *pdwProperties |= Uri_HAS_FRAGMENT;
4668
4669     return S_OK;
4670 }
4671
4672 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual)
4673 {
4674     Uri *This = URI_THIS(iface);
4675     Uri *other;
4676
4677     TRACE("(%p)->(%p %p)\n", This, pUri, pfEqual);
4678
4679     if(!pfEqual)
4680         return E_POINTER;
4681
4682     if(!pUri) {
4683         *pfEqual = FALSE;
4684
4685         /* For some reason Windows returns S_OK here... */
4686         return S_OK;
4687     }
4688
4689     /* Try to convert it to a Uri (allows for a more simple comparison). */
4690     if((other = get_uri_obj(pUri)))
4691         *pfEqual = are_equal_simple(This, other);
4692     else {
4693         /* Do it the hard way. */
4694         FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual);
4695         return E_NOTIMPL;
4696     }
4697
4698     return S_OK;
4699 }
4700
4701 #undef URI_THIS
4702
4703 static const IUriVtbl UriVtbl = {
4704     Uri_QueryInterface,
4705     Uri_AddRef,
4706     Uri_Release,
4707     Uri_GetPropertyBSTR,
4708     Uri_GetPropertyLength,
4709     Uri_GetPropertyDWORD,
4710     Uri_HasProperty,
4711     Uri_GetAbsoluteUri,
4712     Uri_GetAuthority,
4713     Uri_GetDisplayUri,
4714     Uri_GetDomain,
4715     Uri_GetExtension,
4716     Uri_GetFragment,
4717     Uri_GetHost,
4718     Uri_GetPassword,
4719     Uri_GetPath,
4720     Uri_GetPathAndQuery,
4721     Uri_GetQuery,
4722     Uri_GetRawUri,
4723     Uri_GetSchemeName,
4724     Uri_GetUserInfo,
4725     Uri_GetUserName,
4726     Uri_GetHostType,
4727     Uri_GetPort,
4728     Uri_GetScheme,
4729     Uri_GetZone,
4730     Uri_GetProperties,
4731     Uri_IsEqual
4732 };
4733
4734 /***********************************************************************
4735  *           CreateUri (urlmon.@)
4736  *
4737  * Creates a new IUri object using the URI represented by pwzURI. This function
4738  * parses and validates the components of pwzURI and then canonicalizes the
4739  * parsed components.
4740  *
4741  * PARAMS
4742  *  pwzURI      [I] The URI to parse, validate, and canonicalize.
4743  *  dwFlags     [I] Flags which can affect how the parsing/canonicalization is performed.
4744  *  dwReserved  [I] Reserved (not used).
4745  *  ppURI       [O] The resulting IUri after parsing/canonicalization occurs.
4746  *
4747  * RETURNS
4748  *  Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri.
4749  *  Failure: E_INVALIDARG if there's invalid flag combinations in dwFlags, or an
4750  *           invalid parameters, or pwzURI doesn't represnt a valid URI.
4751  *           E_OUTOFMEMORY if any memory allocation fails.
4752  *
4753  * NOTES
4754  *  Default flags:
4755  *      Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES,
4756  *      Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS.
4757  */
4758 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI)
4759 {
4760     const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME|
4761         Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE|
4762         Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES|
4763         Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI|
4764         Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS|Uri_CREATE_FILE_USE_DOS_PATH;
4765     Uri *ret;
4766     HRESULT hr;
4767     parse_data data;
4768
4769     TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI);
4770
4771     if(!ppURI)
4772         return E_INVALIDARG;
4773
4774     if(!pwzURI || !*pwzURI) {
4775         *ppURI = NULL;
4776         return E_INVALIDARG;
4777     }
4778
4779     /* Check for invalid flags. */
4780     if(has_invalid_flag_combination(dwFlags)) {
4781         *ppURI = NULL;
4782         return E_INVALIDARG;
4783     }
4784
4785     /* Currently unsupported. */
4786     if(dwFlags & ~supported_flags)
4787         FIXME("Ignoring unsupported flag(s) %x\n", dwFlags & ~supported_flags);
4788
4789     ret = heap_alloc(sizeof(Uri));
4790     if(!ret)
4791         return E_OUTOFMEMORY;
4792
4793     ret->lpIUriVtbl = &UriVtbl;
4794     ret->ref = 1;
4795
4796     /* Explicitly set the default flags if it doesn't cause a flag conflict. */
4797     apply_default_flags(&dwFlags);
4798
4799     /* Pre process the URI, unless told otherwise. */
4800     if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI))
4801         ret->raw_uri = pre_process_uri(pwzURI);
4802     else
4803         ret->raw_uri = SysAllocString(pwzURI);
4804
4805     if(!ret->raw_uri) {
4806         heap_free(ret);
4807         return E_OUTOFMEMORY;
4808     }
4809
4810     memset(&data, 0, sizeof(parse_data));
4811     data.uri = ret->raw_uri;
4812
4813     /* Validate and parse the URI into it's components. */
4814     if(!parse_uri(&data, dwFlags)) {
4815         /* Encountered an unsupported or invalid URI */
4816         SysFreeString(ret->raw_uri);
4817         heap_free(ret);
4818         *ppURI = NULL;
4819         return E_INVALIDARG;
4820     }
4821
4822     /* Canonicalize the URI. */
4823     hr = canonicalize_uri(&data, ret, dwFlags);
4824     if(FAILED(hr)) {
4825         SysFreeString(ret->raw_uri);
4826         heap_free(ret);
4827         *ppURI = NULL;
4828         return hr;
4829     }
4830
4831     ret->create_flags = dwFlags;
4832
4833     *ppURI = URI(ret);
4834     return S_OK;
4835 }
4836
4837 /***********************************************************************
4838  *           CreateUriWithFragment (urlmon.@)
4839  *
4840  * Creates a new IUri object. This is almost the same as CreateUri, expect that
4841  * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI.
4842  *
4843  * PARAMS
4844  *  pwzURI      [I] The URI to parse and perform canonicalization on.
4845  *  pwzFragment [I] The explict fragment string which should be added to pwzURI.
4846  *  dwFlags     [I] The flags which will be passed to CreateUri.
4847  *  dwReserved  [I] Reserved (not used).
4848  *  ppURI       [O] The resulting IUri after parsing/canonicalization.
4849  *
4850  * RETURNS
4851  *  Success: S_OK. ppURI contains the pointer to the newly allocated IUri.
4852  *  Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment
4853  *           isn't NULL. Will also return E_INVALIDARG for the same reasons as
4854  *           CreateUri will. E_OUTOFMEMORY if any allocations fail.
4855  */
4856 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags,
4857                                      DWORD_PTR dwReserved, IUri **ppURI)
4858 {
4859     HRESULT hres;
4860     TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI);
4861
4862     if(!ppURI)
4863         return E_INVALIDARG;
4864
4865     if(!pwzURI) {
4866         *ppURI = NULL;
4867         return E_INVALIDARG;
4868     }
4869
4870     /* Check if a fragment should be appended to the URI string. */
4871     if(pwzFragment) {
4872         WCHAR *uriW;
4873         DWORD uri_len, frag_len;
4874         BOOL add_pound;
4875
4876         /* Check if the original URI already has a fragment component. */
4877         if(StrChrW(pwzURI, '#')) {
4878             *ppURI = NULL;
4879             return E_INVALIDARG;
4880         }
4881
4882         uri_len = lstrlenW(pwzURI);
4883         frag_len = lstrlenW(pwzFragment);
4884
4885         /* If the fragment doesn't start with a '#', one will be added. */
4886         add_pound = *pwzFragment != '#';
4887
4888         if(add_pound)
4889             uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR));
4890         else
4891             uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR));
4892
4893         if(!uriW)
4894             return E_OUTOFMEMORY;
4895
4896         memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR));
4897         if(add_pound)
4898             uriW[uri_len++] = '#';
4899         memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR));
4900
4901         hres = CreateUri(uriW, dwFlags, 0, ppURI);
4902
4903         heap_free(uriW);
4904     } else
4905         /* A fragment string wasn't specified, so just forward the call. */
4906         hres = CreateUri(pwzURI, dwFlags, 0, ppURI);
4907
4908     return hres;
4909 }
4910
4911 #define URIBUILDER_THIS(iface) DEFINE_THIS(UriBuilder, IUriBuilder, iface)
4912
4913 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv)
4914 {
4915     UriBuilder *This = URIBUILDER_THIS(iface);
4916
4917     if(IsEqualGUID(&IID_IUnknown, riid)) {
4918         TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv);
4919         *ppv = URIBUILDER(This);
4920     }else if(IsEqualGUID(&IID_IUriBuilder, riid)) {
4921         TRACE("(%p)->(IID_IUri %p)\n", This, ppv);
4922         *ppv = URIBUILDER(This);
4923     }else {
4924         TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv);
4925         *ppv = NULL;
4926         return E_NOINTERFACE;
4927     }
4928
4929     IUnknown_AddRef((IUnknown*)*ppv);
4930     return S_OK;
4931 }
4932
4933 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface)
4934 {
4935     UriBuilder *This = URIBUILDER_THIS(iface);
4936     LONG ref = InterlockedIncrement(&This->ref);
4937
4938     TRACE("(%p) ref=%d\n", This, ref);
4939
4940     return ref;
4941 }
4942
4943 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface)
4944 {
4945     UriBuilder *This = URIBUILDER_THIS(iface);
4946     LONG ref = InterlockedDecrement(&This->ref);
4947
4948     TRACE("(%p) ref=%d\n", This, ref);
4949
4950     if(!ref) {
4951         if(This->uri) IUri_Release(URI(This->uri));
4952         heap_free(This->fragment);
4953         heap_free(This->host);
4954         heap_free(This->password);
4955         heap_free(This->path);
4956         heap_free(This->query);
4957         heap_free(This->scheme);
4958         heap_free(This->username);
4959         heap_free(This);
4960     }
4961
4962     return ref;
4963 }
4964
4965 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface,
4966                                                  DWORD        dwAllowEncodingPropertyMask,
4967                                                  DWORD_PTR    dwReserved,
4968                                                  IUri       **ppIUri)
4969 {
4970     UriBuilder *This = URIBUILDER_THIS(iface);
4971     HRESULT hr;
4972     TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4973
4974     hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask);
4975     if(hr == E_NOTIMPL)
4976         FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4977     return hr;
4978 }
4979
4980 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface,
4981                                            DWORD        dwCreateFlags,
4982                                            DWORD        dwAllowEncodingPropertyMask,
4983                                            DWORD_PTR    dwReserved,
4984                                            IUri       **ppIUri)
4985 {
4986     UriBuilder *This = URIBUILDER_THIS(iface);
4987     HRESULT hr;
4988     TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4989
4990     if(dwCreateFlags == -1)
4991         hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask);
4992     else
4993         hr = build_uri(This, ppIUri, dwCreateFlags, 0, dwAllowEncodingPropertyMask);
4994
4995     if(hr == E_NOTIMPL)
4996         FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
4997     return hr;
4998 }
4999
5000 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface,
5001                                          DWORD        dwCreateFlags,
5002                                          DWORD        dwUriBuilderFlags,
5003                                          DWORD        dwAllowEncodingPropertyMask,
5004                                          DWORD_PTR    dwReserved,
5005                                          IUri       **ppIUri)
5006 {
5007     UriBuilder *This = URIBUILDER_THIS(iface);
5008     HRESULT hr;
5009     TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
5010         dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
5011
5012     hr = build_uri(This, ppIUri, dwCreateFlags, dwUriBuilderFlags, dwAllowEncodingPropertyMask);
5013     if(hr == E_NOTIMPL)
5014         FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags,
5015             dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri);
5016     return hr;
5017 }
5018
5019 static HRESULT WINAPI  UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri)
5020 {
5021     UriBuilder *This = URIBUILDER_THIS(iface);
5022     TRACE("(%p)->(%p)\n", This, ppIUri);
5023
5024     if(!ppIUri)
5025         return E_POINTER;
5026
5027     if(This->uri) {
5028         IUri *uri = URI(This->uri);
5029         IUri_AddRef(uri);
5030         *ppIUri = uri;
5031     } else
5032         *ppIUri = NULL;
5033
5034     return S_OK;
5035 }
5036
5037 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri)
5038 {
5039     UriBuilder *This = URIBUILDER_THIS(iface);
5040     TRACE("(%p)->(%p)\n", This, pIUri);
5041
5042     if(pIUri) {
5043         Uri *uri;
5044
5045         if((uri = get_uri_obj(pIUri))) {
5046             /* Only reset the builder if it's Uri isn't the same as
5047              * the Uri passed to the function.
5048              */
5049             if(This->uri != uri) {
5050                 reset_builder(This);
5051
5052                 This->uri = uri;
5053                 if(uri->has_port)
5054                     This->port = uri->port;
5055
5056                 IUri_AddRef(pIUri);
5057             }
5058         } else {
5059             FIXME("(%p)->(%p) Unknown IUri types not supported yet.\n", This, pIUri);
5060             return E_NOTIMPL;
5061         }
5062     } else if(This->uri)
5063         /* Only reset the builder if it's Uri isn't NULL. */
5064         reset_builder(This);
5065
5066     return S_OK;
5067 }
5068
5069 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment)
5070 {
5071     UriBuilder *This = URIBUILDER_THIS(iface);
5072     TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment);
5073
5074     if(!This->uri || This->uri->fragment_start == -1 || This->modified_props & Uri_HAS_FRAGMENT)
5075         return get_builder_component(&This->fragment, &This->fragment_len, NULL, 0, ppwzFragment, pcchFragment);
5076     else
5077         return get_builder_component(&This->fragment, &This->fragment_len, This->uri->canon_uri+This->uri->fragment_start,
5078                                      This->uri->fragment_len, ppwzFragment, pcchFragment);
5079 }
5080
5081 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost)
5082 {
5083     UriBuilder *This = URIBUILDER_THIS(iface);
5084     TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost);
5085
5086     if(!This->uri || This->uri->host_start == -1 || This->modified_props & Uri_HAS_HOST)
5087         return get_builder_component(&This->host, &This->host_len, NULL, 0, ppwzHost, pcchHost);
5088     else {
5089         if(This->uri->host_type == Uri_HOST_IPV6)
5090             /* Don't include the '[' and ']' around the address. */
5091             return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start+1,
5092                                          This->uri->host_len-2, ppwzHost, pcchHost);
5093         else
5094             return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start,
5095                                          This->uri->host_len, ppwzHost, pcchHost);
5096     }
5097 }
5098
5099 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword)
5100 {
5101     UriBuilder *This = URIBUILDER_THIS(iface);
5102     TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword);
5103
5104     if(!This->uri || This->uri->userinfo_split == -1 || This->modified_props & Uri_HAS_PASSWORD)
5105         return get_builder_component(&This->password, &This->password_len, NULL, 0, ppwzPassword, pcchPassword);
5106     else {
5107         const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start+This->uri->userinfo_split+1;
5108         DWORD len = This->uri->userinfo_len-This->uri->userinfo_split-1;
5109         return get_builder_component(&This->password, &This->password_len, start, len, ppwzPassword, pcchPassword);
5110     }
5111 }
5112
5113 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath)
5114 {
5115     UriBuilder *This = URIBUILDER_THIS(iface);
5116     TRACE("(%p)->(%p %p)\n", This, pcchPath, ppwzPath);
5117
5118     if(!This->uri || This->uri->path_start == -1 || This->modified_props & Uri_HAS_PATH)
5119         return get_builder_component(&This->path, &This->path_len, NULL, 0, ppwzPath, pcchPath);
5120     else
5121         return get_builder_component(&This->path, &This->path_len, This->uri->canon_uri+This->uri->path_start,
5122                                      This->uri->path_len, ppwzPath, pcchPath);
5123 }
5124
5125 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort)
5126 {
5127     UriBuilder *This = URIBUILDER_THIS(iface);
5128     TRACE("(%p)->(%p %p)\n", This, pfHasPort, pdwPort);
5129
5130     if(!pfHasPort) {
5131         if(pdwPort)
5132             *pdwPort = 0;
5133         return E_POINTER;
5134     }
5135
5136     if(!pdwPort) {
5137         *pfHasPort = FALSE;
5138         return E_POINTER;
5139     }
5140
5141     *pfHasPort = This->has_port;
5142     *pdwPort = This->port;
5143     return S_OK;
5144 }
5145
5146 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery)
5147 {
5148     UriBuilder *This = URIBUILDER_THIS(iface);
5149     TRACE("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery);
5150
5151     if(!This->uri || This->uri->query_start == -1 || This->modified_props & Uri_HAS_QUERY)
5152         return get_builder_component(&This->query, &This->query_len, NULL, 0, ppwzQuery, pcchQuery);
5153     else
5154         return get_builder_component(&This->query, &This->query_len, This->uri->canon_uri+This->uri->query_start,
5155                                      This->uri->query_len, ppwzQuery, pcchQuery);
5156 }
5157
5158 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName)
5159 {
5160     UriBuilder *This = URIBUILDER_THIS(iface);
5161     TRACE("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName);
5162
5163     if(!This->uri || This->uri->scheme_start == -1 || This->modified_props & Uri_HAS_SCHEME_NAME)
5164         return get_builder_component(&This->scheme, &This->scheme_len, NULL, 0, ppwzSchemeName, pcchSchemeName);
5165     else
5166         return get_builder_component(&This->scheme, &This->scheme_len, This->uri->canon_uri+This->uri->scheme_start,
5167                                      This->uri->scheme_len, ppwzSchemeName, pcchSchemeName);
5168 }
5169
5170 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName)
5171 {
5172     UriBuilder *This = URIBUILDER_THIS(iface);
5173     TRACE("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName);
5174
5175     if(!This->uri || This->uri->userinfo_start == -1 || This->uri->userinfo_split == 0 ||
5176        This->modified_props & Uri_HAS_USER_NAME)
5177         return get_builder_component(&This->username, &This->username_len, NULL, 0, ppwzUserName, pcchUserName);
5178     else {
5179         const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start;
5180
5181         /* Check if there's a password in the userinfo section. */
5182         if(This->uri->userinfo_split > -1)
5183             /* Don't include the password. */
5184             return get_builder_component(&This->username, &This->username_len, start,
5185                                          This->uri->userinfo_split, ppwzUserName, pcchUserName);
5186         else
5187             return get_builder_component(&This->username, &This->username_len, start,
5188                                          This->uri->userinfo_len, ppwzUserName, pcchUserName);
5189     }
5190 }
5191
5192 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue)
5193 {
5194     UriBuilder *This = URIBUILDER_THIS(iface);
5195     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5196     return set_builder_component(&This->fragment, &This->fragment_len, pwzNewValue, '#',
5197                                  &This->modified_props, Uri_HAS_FRAGMENT);
5198 }
5199
5200 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue)
5201 {
5202     UriBuilder *This = URIBUILDER_THIS(iface);
5203     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5204
5205     /* Host name can't be set to NULL. */
5206     if(!pwzNewValue)
5207         return E_INVALIDARG;
5208
5209     return set_builder_component(&This->host, &This->host_len, pwzNewValue, 0,
5210                                  &This->modified_props, Uri_HAS_HOST);
5211 }
5212
5213 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue)
5214 {
5215     UriBuilder *This = URIBUILDER_THIS(iface);
5216     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5217     return set_builder_component(&This->password, &This->password_len, pwzNewValue, 0,
5218                                  &This->modified_props, Uri_HAS_PASSWORD);
5219 }
5220
5221 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue)
5222 {
5223     UriBuilder *This = URIBUILDER_THIS(iface);
5224     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5225     return set_builder_component(&This->path, &This->path_len, pwzNewValue, 0,
5226                                  &This->modified_props, Uri_HAS_PATH);
5227 }
5228
5229 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue)
5230 {
5231     UriBuilder *This = URIBUILDER_THIS(iface);
5232     TRACE("(%p)->(%d %d)\n", This, fHasPort, dwNewValue);
5233
5234     This->has_port = fHasPort;
5235     This->port = dwNewValue;
5236     This->modified_props |= Uri_HAS_PORT;
5237     return S_OK;
5238 }
5239
5240 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue)
5241 {
5242     UriBuilder *This = URIBUILDER_THIS(iface);
5243     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5244     return set_builder_component(&This->query, &This->query_len, pwzNewValue, '?',
5245                                  &This->modified_props, Uri_HAS_QUERY);
5246 }
5247
5248 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue)
5249 {
5250     UriBuilder *This = URIBUILDER_THIS(iface);
5251     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5252
5253     /* Only set the scheme name if it's not NULL or empty. */
5254     if(!pwzNewValue || !*pwzNewValue)
5255         return E_INVALIDARG;
5256
5257     return set_builder_component(&This->scheme, &This->scheme_len, pwzNewValue, 0,
5258                                  &This->modified_props, Uri_HAS_SCHEME_NAME);
5259 }
5260
5261 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue)
5262 {
5263     UriBuilder *This = URIBUILDER_THIS(iface);
5264     TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue));
5265     return set_builder_component(&This->username, &This->username_len, pwzNewValue, 0,
5266                                  &This->modified_props, Uri_HAS_USER_NAME);
5267 }
5268
5269 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask)
5270 {
5271     const DWORD accepted_flags = Uri_HAS_AUTHORITY|Uri_HAS_DOMAIN|Uri_HAS_EXTENSION|Uri_HAS_FRAGMENT|Uri_HAS_HOST|
5272                                  Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY|
5273                                  Uri_HAS_USER_INFO|Uri_HAS_USER_NAME;
5274
5275     UriBuilder *This = URIBUILDER_THIS(iface);
5276     TRACE("(%p)->(0x%08x)\n", This, dwPropertyMask);
5277
5278     if(dwPropertyMask & ~accepted_flags)
5279         return E_INVALIDARG;
5280
5281     if(dwPropertyMask & Uri_HAS_FRAGMENT)
5282         UriBuilder_SetFragment(iface, NULL);
5283
5284     /* Even though you can't set the host name to NULL or an
5285      * empty string, you can still remove it... for some reason.
5286      */
5287     if(dwPropertyMask & Uri_HAS_HOST)
5288         set_builder_component(&This->host, &This->host_len, NULL, 0,
5289                               &This->modified_props, Uri_HAS_HOST);
5290
5291     if(dwPropertyMask & Uri_HAS_PASSWORD)
5292         UriBuilder_SetPassword(iface, NULL);
5293
5294     if(dwPropertyMask & Uri_HAS_PATH)
5295         UriBuilder_SetPath(iface, NULL);
5296
5297     if(dwPropertyMask & Uri_HAS_PORT)
5298         UriBuilder_SetPort(iface, FALSE, 0);
5299
5300     if(dwPropertyMask & Uri_HAS_QUERY)
5301         UriBuilder_SetQuery(iface, NULL);
5302
5303     if(dwPropertyMask & Uri_HAS_USER_NAME)
5304         UriBuilder_SetUserName(iface, NULL);
5305
5306     return S_OK;
5307 }
5308
5309 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified)
5310 {
5311     UriBuilder *This = URIBUILDER_THIS(iface);
5312     TRACE("(%p)->(%p)\n", This, pfModified);
5313
5314     if(!pfModified)
5315         return E_POINTER;
5316
5317     *pfModified = This->modified_props > 0;
5318     return S_OK;
5319 }
5320
5321 #undef URIBUILDER_THIS
5322
5323 static const IUriBuilderVtbl UriBuilderVtbl = {
5324     UriBuilder_QueryInterface,
5325     UriBuilder_AddRef,
5326     UriBuilder_Release,
5327     UriBuilder_CreateUriSimple,
5328     UriBuilder_CreateUri,
5329     UriBuilder_CreateUriWithFlags,
5330     UriBuilder_GetIUri,
5331     UriBuilder_SetIUri,
5332     UriBuilder_GetFragment,
5333     UriBuilder_GetHost,
5334     UriBuilder_GetPassword,
5335     UriBuilder_GetPath,
5336     UriBuilder_GetPort,
5337     UriBuilder_GetQuery,
5338     UriBuilder_GetSchemeName,
5339     UriBuilder_GetUserName,
5340     UriBuilder_SetFragment,
5341     UriBuilder_SetHost,
5342     UriBuilder_SetPassword,
5343     UriBuilder_SetPath,
5344     UriBuilder_SetPort,
5345     UriBuilder_SetQuery,
5346     UriBuilder_SetSchemeName,
5347     UriBuilder_SetUserName,
5348     UriBuilder_RemoveProperties,
5349     UriBuilder_HasBeenModified,
5350 };
5351
5352 /***********************************************************************
5353  *           CreateIUriBuilder (urlmon.@)
5354  */
5355 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder)
5356 {
5357     UriBuilder *ret;
5358
5359     TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder);
5360
5361     if(!ppIUriBuilder)
5362         return E_POINTER;
5363
5364     ret = heap_alloc_zero(sizeof(UriBuilder));
5365     if(!ret)
5366         return E_OUTOFMEMORY;
5367
5368     ret->lpIUriBuilderVtbl = &UriBuilderVtbl;
5369     ret->ref = 1;
5370
5371     if(pIUri) {
5372         Uri *uri;
5373
5374         if((uri = get_uri_obj(pIUri))) {
5375             IUri_AddRef(pIUri);
5376             ret->uri = uri;
5377
5378             if(uri->has_port)
5379                 /* Windows doesn't set 'has_port' to TRUE in this case. */
5380                 ret->port = uri->port;
5381
5382         } else {
5383             heap_free(ret);
5384             *ppIUriBuilder = NULL;
5385             FIXME("(%p %x %x %p): Unknown IUri types not supported yet.\n", pIUri, dwFlags,
5386                 (DWORD)dwReserved, ppIUriBuilder);
5387             return E_NOTIMPL;
5388         }
5389     }
5390
5391     *ppIUriBuilder = URIBUILDER(ret);
5392     return S_OK;
5393 }