reactos/lib/3rdparty/icu4ros/icu/source/common/unames.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2007, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *   file name:  unames.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 1999oct04
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18 #include "unicode/putil.h"
  19 #include "unicode/uchar.h"
  20 #include "unicode/udata.h"
  21 #include "ustr_imp.h"
  22 #include "umutex.h"
  23 #include "cmemory.h"
  24 #include "cstring.h"
  25 #include "ucln_cmn.h"
  26 #include "udataswp.h"
  27 #include "uprops.h"
  28
  29 /* prototypes ------------------------------------------------------------- */
  30
  31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
  32
  33 static const char DATA_NAME[] = "unames";
  34 static const char DATA_TYPE[] = "icu";
  35
  36 #define GROUP_SHIFT 5
  37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
  38 #define GROUP_MASK (LINES_PER_GROUP-1)
  39
  40 typedef struct {
  41     uint16_t groupMSB,
  42              offsetHigh, offsetLow; /* avoid padding */
  43 } Group;
  44
  45 typedef struct {
  46     uint32_t start, end;
  47     uint8_t type, variant;
  48     uint16_t size;
  49 } AlgorithmicRange;
  50
  51 typedef struct {
  52     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
  53 } UCharNames;
  54
  55 typedef struct {
  56     const char *otherName;
  57     UChar32 code;
  58 } FindName;
  59
  60 #define DO_FIND_NAME NULL
  61
  62 static UDataMemory *uCharNamesData=NULL;
  63 static UCharNames *uCharNames=NULL;
  64 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
  65
  66 /*
  67  * Maximum length of character names (regular & 1.0).
  68  */
  69 static int32_t gMaxNameLength=0;
  70
  71 /*
  72  * Set of chars used in character names (regular & 1.0).
  73  * Chars are platform-dependent (can be EBCDIC).
  74  */
  75 static uint32_t gNameSet[8]={ 0 };
  76
  77 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
  78 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
  79 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
  80
  81 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
  82
  83 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
  84     "unassigned",
  85     "uppercase letter",
  86     "lowercase letter",
  87     "titlecase letter",
  88     "modifier letter",
  89     "other letter",
  90     "non spacing mark",
  91     "enclosing mark",
  92     "combining spacing mark",
  93     "decimal digit number",
  94     "letter number",
  95     "other number",
  96     "space separator",
  97     "line separator",
  98     "paragraph separator",
  99     "control",
 100     "format",
 101     "private use area",
 102     "surrogate",
 103     "dash punctuation",
 104     "start punctuation",
 105     "end punctuation",
 106     "connector punctuation",
 107     "other punctuation",
 108     "math symbol",
 109     "currency symbol",
 110     "modifier symbol",
 111     "other symbol",
 112     "initial punctuation",
 113     "final punctuation",
 114     "noncharacter",
 115     "lead surrogate",
 116     "trail surrogate"
 117 };
 118
 119 /* implementation ----------------------------------------------------------- */
 120
 121 static UBool U_CALLCONV unames_cleanup(void)
 122 {
 123     if(uCharNamesData) {
 124         udata_close(uCharNamesData);
 125         uCharNamesData = NULL;
 126     }
 127     if(uCharNames) {
 128         uCharNames = NULL;
 129     }
 130     gMaxNameLength=0;
 131     return TRUE;
 132 }
 133
 134 static UBool U_CALLCONV
 135 isAcceptable(void *context,
 136              const char *type, const char *name,
 137              const UDataInfo *pInfo) {
 138     return (UBool)(
 139         pInfo->size>=20 &&
 140         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
 141         pInfo->charsetFamily==U_CHARSET_FAMILY &&
 142         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
 143         pInfo->dataFormat[1]==0x6e &&
 144         pInfo->dataFormat[2]==0x61 &&
 145         pInfo->dataFormat[3]==0x6d &&
 146         pInfo->formatVersion[0]==1);
 147 }
 148
 149 static UBool
 150 isDataLoaded(UErrorCode *pErrorCode) {
 151     /* load UCharNames from file if necessary */
 152     UBool isCached;
 153
 154     /* do this because double-checked locking is broken */
 155     UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
 156
 157     if(!isCached) {
 158         UCharNames *names;
 159         UDataMemory *data;
 160
 161         /* check error code from previous attempt */
 162         if(U_FAILURE(gLoadErrorCode)) {
 163             *pErrorCode=gLoadErrorCode;
 164             return FALSE;
 165         }
 166
 167         /* open the data outside the mutex block */
 168         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
 169         if(U_FAILURE(*pErrorCode)) {
 170             gLoadErrorCode=*pErrorCode;
 171             return FALSE;
 172         }
 173
 174         names=(UCharNames *)udata_getMemory(data);
 175
 176         /* in the mutex block, set the data for this process */
 177         {
 178             umtx_lock(NULL);
 179             if(uCharNames==NULL) {
 180                 uCharNames=names;
 181                 uCharNamesData=data;
 182                 data=NULL;
 183                 names=NULL;
 184                 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
 185             }
 186             umtx_unlock(NULL);
 187         }
 188
 189         /* if a different thread set it first, then close the extra data */
 190         if(data!=NULL) {
 191             udata_close(data); /* NULL if it was set correctly */
 192         }
 193     }
 194     return TRUE;
 195 }
 196
 197 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
 198     if((bufferLength)>0) { \
 199         *(buffer)++=c; \
 200         --(bufferLength); \
 201     } \
 202     ++(bufferPos); \
 203 }
 204
 205 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
 206
 207 /*
 208  * Important: expandName() and compareName() are almost the same -
 209  * apply fixes to both.
 210  *
 211  * UnicodeData.txt uses ';' as a field separator, so no
 212  * field can contain ';' as part of its contents.
 213  * In unames.dat, it is marked as token[';']==-1 only if the
 214  * semicolon is used in the data file - which is iff we
 215  * have Unicode 1.0 names or ISO comments.
 216  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments
 217  * although we know that it will never be part of a name.
 218  */
 219 static uint16_t
 220 expandName(UCharNames *names,
 221            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 222            char *buffer, uint16_t bufferLength) {
 223     uint16_t *tokens=(uint16_t *)names+8;
 224     uint16_t token, tokenCount=*tokens++, bufferPos=0;
 225     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 226     uint8_t c;
 227
 228     if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==U_ISO_COMMENT) {
 229         /*
 230          * skip the modern name if it is not requested _and_
 231          * if the semicolon byte value is a character, not a token number
 232          */
 233         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 234             while(nameLength>0) {
 235                 --nameLength;
 236                 if(*name++==';') {
 237                     break;
 238                 }
 239             }
 240             if(nameChoice==U_ISO_COMMENT) {
 241                 /* skip the Unicode 1.0 name as well to get the ISO comment */
 242                 while(nameLength>0) {
 243                     --nameLength;
 244                     if(*name++==';') {
 245                         break;
 246                     }
 247                 }
 248             }
 249         } else {
 250             /*
 251              * the semicolon byte value is a token number, therefore
 252              * only modern names are stored in unames.dat and there is no
 253              * such requested Unicode 1.0 name here
 254              */
 255             nameLength=0;
 256         }
 257     }
 258
 259     /* write each letter directly, and write a token word per token */
 260     while(nameLength>0) {
 261         --nameLength;
 262         c=*name++;
 263
 264         if(c>=tokenCount) {
 265             if(c!=';') {
 266                 /* implicit letter */
 267                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 268             } else {
 269                 /* finished */
 270                 break;
 271             }
 272         } else {
 273             token=tokens[c];
 274             if(token==(uint16_t)(-2)) {
 275                 /* this is a lead byte for a double-byte token */
 276                 token=tokens[c<<8|*name++];
 277                 --nameLength;
 278             }
 279             if(token==(uint16_t)(-1)) {
 280                 if(c!=';') {
 281                     /* explicit letter */
 282                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 283                 } else {
 284                     /* stop, but skip the semicolon if we are seeking
 285                        extended names and there was no 2.0 name but there
 286                        is a 1.0 name. */
 287                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
 288                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 289                             continue;
 290                         }
 291                     }
 292                     /* finished */
 293                     break;
 294                 }
 295             } else {
 296                 /* write token word */
 297                 uint8_t *tokenString=tokenStrings+token;
 298                 while((c=*tokenString++)!=0) {
 299                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 300                 }
 301             }
 302         }
 303     }
 304
 305     /* zero-terminate */
 306     if(bufferLength>0) {
 307         *buffer=0;
 308     }
 309
 310     return bufferPos;
 311 }
 312
 313 /*
 314  * compareName() is almost the same as expandName() except that it compares
 315  * the currently expanded name to an input name.
 316  * It returns the match/no match result as soon as possible.
 317  */
 318 static UBool
 319 compareName(UCharNames *names,
 320             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 321             const char *otherName) {
 322     uint16_t *tokens=(uint16_t *)names+8;
 323     uint16_t token, tokenCount=*tokens++;
 324     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 325     uint8_t c;
 326     const char *origOtherName = otherName;
 327
 328     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
 329         /*
 330          * skip the modern name if it is not requested _and_
 331          * if the semicolon byte value is a character, not a token number
 332          */
 333         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 334             while(nameLength>0) {
 335                 --nameLength;
 336                 if(*name++==';') {
 337                     break;
 338                 }
 339             }
 340         } else {
 341             /*
 342              * the semicolon byte value is a token number, therefore
 343              * only modern names are stored in unames.dat and there is no
 344              * such requested Unicode 1.0 name here
 345              */
 346             nameLength=0;
 347         }
 348     }
 349
 350     /* compare each letter directly, and compare a token word per token */
 351     while(nameLength>0) {
 352         --nameLength;
 353         c=*name++;
 354
 355         if(c>=tokenCount) {
 356             if(c!=';') {
 357                 /* implicit letter */
 358                 if((char)c!=*otherName++) {
 359                     return FALSE;
 360                 }
 361             } else {
 362                 /* finished */
 363                 break;
 364             }
 365         } else {
 366             token=tokens[c];
 367             if(token==(uint16_t)(-2)) {
 368                 /* this is a lead byte for a double-byte token */
 369                 token=tokens[c<<8|*name++];
 370                 --nameLength;
 371             }
 372             if(token==(uint16_t)(-1)) {
 373                 if(c!=';') {
 374                     /* explicit letter */
 375                     if((char)c!=*otherName++) {
 376                         return FALSE;
 377                     }
 378                 } else {
 379                     /* stop, but skip the semicolon if we are seeking
 380                        extended names and there was no 2.0 name but there
 381                        is a 1.0 name. */
 382                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
 383                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 384                             continue;
 385                         }
 386                     }
 387                     /* finished */
 388                     break;
 389                 }
 390             } else {
 391                 /* write token word */
 392                 uint8_t *tokenString=tokenStrings+token;
 393                 while((c=*tokenString++)!=0) {
 394                     if((char)c!=*otherName++) {
 395                         return FALSE;
 396                     }
 397                 }
 398             }
 399         }
 400     }
 401
 402     /* complete match? */
 403     return (UBool)(*otherName==0);
 404 }
 405
 406 static uint8_t getCharCat(UChar32 cp) {
 407     uint8_t cat;
 408
 409     if (UTF_IS_UNICODE_NONCHAR(cp)) {
 410         return U_NONCHARACTER_CODE_POINT;
 411     }
 412
 413     if ((cat = u_charType(cp)) == U_SURROGATE) {
 414         cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
 415     }
 416
 417     return cat;
 418 }
 419
 420 static const char *getCharCatName(UChar32 cp) {
 421     uint8_t cat = getCharCat(cp);
 422
 423     /* Return unknown if the table of names above is not up to
 424        date. */
 425
 426     if (cat >= LENGTHOF(charCatNames)) {
 427         return "unknown";
 428     } else {
 429         return charCatNames[cat];
 430     }
 431 }
 432
 433 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
 434     const char *catname = getCharCatName(code);
 435     uint16_t length = 0;
 436
 437     UChar32 cp;
 438     int ndigits, i;
 439
 440     WRITE_CHAR(buffer, bufferLength, length, '<');
 441     while (catname[length - 1]) {
 442         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
 443     }
 444     WRITE_CHAR(buffer, bufferLength, length, '-');
 445     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
 446         ;
 447     if (ndigits < 4)
 448         ndigits = 4;
 449     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
 450         uint8_t v = (uint8_t)(cp & 0xf);
 451         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
 452     }
 453     buffer += ndigits;
 454     length += ndigits;
 455     WRITE_CHAR(buffer, bufferLength, length, '>');
 456
 457     return length;
 458 }
 459
 460 /*
 461  * getGroup() does a binary search for the group that contains the
 462  * Unicode code point "code".
 463  * The return value is always a valid Group* that may contain "code"
 464  * or else is the highest group before "code".
 465  * If the lowest group is after "code", then that one is returned.
 466  */
 467 static Group *
 468 getGroup(UCharNames *names, uint32_t code) {
 469     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
 470              start=0,
 471              limit=*(uint16_t *)((char *)names+names->groupsOffset),
 472              number;
 473     Group *groups=(Group *)((char *)names+names->groupsOffset+2);
 474
 475     /* binary search for the group of names that contains the one for code */
 476     while(start<limit-1) {
 477         number=(uint16_t)((start+limit)/2);
 478         if(groupMSB<groups[number].groupMSB) {
 479             limit=number;
 480         } else {
 481             start=number;
 482         }
 483     }
 484
 485     /* return this regardless of whether it is an exact match */
 486     return groups+start;
 487 }
 488
 489 /*
 490  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
 491  * expands them into offsets and lengths for each string.
 492  * Lengths are stored with a variable-width encoding in consecutive nibbles:
 493  * If a nibble<0xc, then it is the length itself (0=empty string).
 494  * If a nibble>=0xc, then it forms a length value with the following nibble.
 495  * Calculation see below.
 496  * The offsets and lengths arrays must be at least 33 (one more) long because
 497  * there is no check here at the end if the last nibble is still used.
 498  */
 499 static const uint8_t *
 500 expandGroupLengths(const uint8_t *s,
 501                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
 502     /* read the lengths of the 32 strings in this group and get each string's offset */
 503     uint16_t i=0, offset=0, length=0;
 504     uint8_t lengthByte;
 505
 506     /* all 32 lengths must be read to get the offset of the first group string */
 507     while(i<LINES_PER_GROUP) {
 508         lengthByte=*s++;
 509
 510         /* read even nibble - MSBs of lengthByte */
 511         if(length>=12) {
 512             /* double-nibble length spread across two bytes */
 513             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
 514             lengthByte&=0xf;
 515         } else if((lengthByte /* &0xf0 */)>=0xc0) {
 516             /* double-nibble length spread across this one byte */
 517             length=(uint16_t)((lengthByte&0x3f)+12);
 518         } else {
 519             /* single-nibble length in MSBs */
 520             length=(uint16_t)(lengthByte>>4);
 521             lengthByte&=0xf;
 522         }
 523
 524         *offsets++=offset;
 525         *lengths++=length;
 526
 527         offset+=length;
 528         ++i;
 529
 530         /* read odd nibble - LSBs of lengthByte */
 531         if((lengthByte&0xf0)==0) {
 532             /* this nibble was not consumed for a double-nibble length above */
 533             length=lengthByte;
 534             if(length<12) {
 535                 /* single-nibble length in LSBs */
 536                 *offsets++=offset;
 537                 *lengths++=length;
 538
 539                 offset+=length;
 540                 ++i;
 541             }
 542         } else {
 543             length=0;   /* prevent double-nibble detection in the next iteration */
 544         }
 545     }
 546
 547     /* now, s is at the first group string */
 548     return s;
 549 }
 550
 551 static uint16_t
 552 expandGroupName(UCharNames *names, Group *group,
 553                 uint16_t lineNumber, UCharNameChoice nameChoice,
 554                 char *buffer, uint16_t bufferLength) {
 555     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 556     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
 557                                     (group->offsetHigh<<16|group->offsetLow);
 558     s=expandGroupLengths(s, offsets, lengths);
 559     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
 560                       buffer, bufferLength);
 561 }
 562
 563 static uint16_t
 564 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
 565         char *buffer, uint16_t bufferLength) {
 566     Group *group=getGroup(names, code);
 567     if((uint16_t)(code>>GROUP_SHIFT)==group->groupMSB) {
 568         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
 569                                buffer, bufferLength);
 570     } else {
 571         /* group not found */
 572         /* zero-terminate */
 573         if(bufferLength>0) {
 574             *buffer=0;
 575         }
 576         return 0;
 577     }
 578 }
 579
 580 /*
 581  * enumGroupNames() enumerates all the names in a 32-group
 582  * and either calls the enumerator function or finds a given input name.
 583  */
 584 static UBool
 585 enumGroupNames(UCharNames *names, Group *group,
 586                UChar32 start, UChar32 end,
 587                UEnumCharNamesFn *fn, void *context,
 588                UCharNameChoice nameChoice) {
 589     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 590     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
 591                                     (group->offsetHigh<<16|group->offsetLow);
 592
 593     s=expandGroupLengths(s, offsets, lengths);
 594     if(fn!=DO_FIND_NAME) {
 595         char buffer[200];
 596         uint16_t length;
 597
 598         while(start<=end) {
 599             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
 600             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
 601                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 602             }
 603             /* here, we assume that the buffer is large enough */
 604             if(length>0) {
 605                 if(!fn(context, start, nameChoice, buffer, length)) {
 606                     return FALSE;
 607                 }
 608             }
 609             ++start;
 610         }
 611     } else {
 612         const char *otherName=((FindName *)context)->otherName;
 613         while(start<=end) {
 614             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
 615                 ((FindName *)context)->code=start;
 616                 return FALSE;
 617             }
 618             ++start;
 619         }
 620     }
 621     return TRUE;
 622 }
 623
 624 /*
 625  * enumExtNames enumerate extended names.
 626  * It only needs to do it if it is called with a real function and not
 627  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
 628  * for extended names by itself.
 629  */
 630 static UBool
 631 enumExtNames(UChar32 start, UChar32 end,
 632              UEnumCharNamesFn *fn, void *context)
 633 {
 634     if(fn!=DO_FIND_NAME) {
 635         char buffer[200];
 636         uint16_t length;
 637
 638         while(start<=end) {
 639             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 640             /* here, we assume that the buffer is large enough */
 641             if(length>0) {
 642                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
 643                     return FALSE;
 644                 }
 645             }
 646             ++start;
 647         }
 648     }
 649
 650     return TRUE;
 651 }
 652
 653 static UBool
 654 enumNames(UCharNames *names,
 655           UChar32 start, UChar32 limit,
 656           UEnumCharNamesFn *fn, void *context,
 657           UCharNameChoice nameChoice) {
 658     uint16_t startGroupMSB, endGroupMSB, groupCount;
 659     Group *group, *groupLimit;
 660
 661     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
 662     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
 663
 664     /* find the group that contains start, or the highest before it */
 665     group=getGroup(names, start);
 666
 667     if(startGroupMSB==endGroupMSB) {
 668         if(startGroupMSB==group->groupMSB) {
 669             /* if start and limit-1 are in the same group, then enumerate only in that one */
 670             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
 671         }
 672     } else {
 673         groupCount=*(uint16_t *)((char *)names+names->groupsOffset);
 674         groupLimit=(Group *)((char *)names+names->groupsOffset+2)+groupCount;
 675
 676         if(startGroupMSB==group->groupMSB) {
 677             /* enumerate characters in the partial start group */
 678             if((start&GROUP_MASK)!=0) {
 679                 if(!enumGroupNames(names, group,
 680                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
 681                                    fn, context, nameChoice)) {
 682                     return FALSE;
 683                 }
 684                 ++group; /* continue with the next group */
 685             }
 686         } else if(startGroupMSB>group->groupMSB) {
 687             /* make sure that we start enumerating with the first group after start */
 688             if (group + 1 < groupLimit && (group + 1)->groupMSB > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
 689                 UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT;
 690                 if (end > limit) {
 691                     end = limit;
 692                 }
 693                 if (!enumExtNames(start, end - 1, fn, context)) {
 694                     return FALSE;
 695                 }
 696             }
 697             ++group;
 698         }
 699
 700         /* enumerate entire groups between the start- and end-groups */
 701         while(group<groupLimit && group->groupMSB<endGroupMSB) {
 702             start=(UChar32)group->groupMSB<<GROUP_SHIFT;
 703             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
 704                 return FALSE;
 705             }
 706             if (group + 1 < groupLimit && (group + 1)->groupMSB > group->groupMSB + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
 707                 UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT;
 708                 if (end > limit) {
 709                     end = limit;
 710                 }
 711                 if (!enumExtNames((group->groupMSB + 1) << GROUP_SHIFT, end - 1, fn, context)) {
 712                     return FALSE;
 713                 }
 714             }
 715             ++group;
 716         }
 717
 718         /* enumerate within the end group (group->groupMSB==endGroupMSB) */
 719         if(group<groupLimit && group->groupMSB==endGroupMSB) {
 720             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
 721         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
 722             UChar32 next = ((group - 1)->groupMSB + 1) << GROUP_SHIFT;
 723             if (next > start) {
 724                 start = next;
 725             }
 726         } else {
 727             return TRUE;
 728         }
 729     }
 730
 731     /* we have not found a group, which means everything is made of
 732        extended names. */
 733     if (nameChoice == U_EXTENDED_CHAR_NAME) {
 734         if (limit > UCHAR_MAX_VALUE + 1) {
 735             limit = UCHAR_MAX_VALUE + 1;
 736         }
 737         return enumExtNames(start, limit - 1, fn, context);
 738     }
 739
 740     return TRUE;
 741 }
 742
 743 static uint16_t
 744 writeFactorSuffix(const uint16_t *factors, uint16_t count,
 745                   const char *s, /* suffix elements */
 746                   uint32_t code,
 747                   uint16_t indexes[8], /* output fields from here */
 748                   const char *elementBases[8], const char *elements[8],
 749                   char *buffer, uint16_t bufferLength) {
 750     uint16_t i, factor, bufferPos=0;
 751     char c;
 752
 753     /* write elements according to the factors */
 754
 755     /*
 756      * the factorized elements are determined by modulo arithmetic
 757      * with the factors of this algorithm
 758      *
 759      * note that for fewer operations, count is decremented here
 760      */
 761     --count;
 762     for(i=count; i>0; --i) {
 763         factor=factors[i];
 764         indexes[i]=(uint16_t)(code%factor);
 765         code/=factor;
 766     }
 767     /*
 768      * we don't need to calculate the last modulus because start<=code<=end
 769      * guarantees here that code<=factors[0]
 770      */
 771     indexes[0]=(uint16_t)code;
 772
 773     /* write each element */
 774     for(;;) {
 775         if(elementBases!=NULL) {
 776             *elementBases++=s;
 777         }
 778
 779         /* skip indexes[i] strings */
 780         factor=indexes[i];
 781         while(factor>0) {
 782             while(*s++!=0) {}
 783             --factor;
 784         }
 785         if(elements!=NULL) {
 786             *elements++=s;
 787         }
 788
 789         /* write element */
 790         while((c=*s++)!=0) {
 791             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 792         }
 793
 794         /* we do not need to perform the rest of this loop for i==count - break here */
 795         if(i>=count) {
 796             break;
 797         }
 798
 799         /* skip the rest of the strings for this factors[i] */
 800         factor=(uint16_t)(factors[i]-indexes[i]-1);
 801         while(factor>0) {
 802             while(*s++!=0) {}
 803             --factor;
 804         }
 805
 806         ++i;
 807     }
 808
 809     /* zero-terminate */
 810     if(bufferLength>0) {
 811         *buffer=0;
 812     }
 813
 814     return bufferPos;
 815 }
 816
 817 /*
 818  * Important:
 819  * Parts of findAlgName() are almost the same as some of getAlgName().
 820  * Fixes must be applied to both.
 821  */
 822 static uint16_t
 823 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
 824         char *buffer, uint16_t bufferLength) {
 825     uint16_t bufferPos=0;
 826
 827     /*
 828      * Do not write algorithmic Unicode 1.0 names because
 829      * Unihan names are the same as the modern ones,
 830      * extension A was only introduced with Unicode 3.0, and
 831      * the Hangul syllable block was moved and changed around Unicode 1.1.5.
 832      */
 833     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
 834         /* zero-terminate */
 835         if(bufferLength>0) {
 836             *buffer=0;
 837         }
 838         return 0;
 839     }
 840
 841     switch(range->type) {
 842     case 0: {
 843         /* name = prefix hex-digits */
 844         const char *s=(const char *)(range+1);
 845         char c;
 846
 847         uint16_t i, count;
 848
 849         /* copy prefix */
 850         while((c=*s++)!=0) {
 851             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 852         }
 853
 854         /* write hexadecimal code point value */
 855         count=range->variant;
 856
 857         /* zero-terminate */
 858         if(count<bufferLength) {
 859             buffer[count]=0;
 860         }
 861
 862         for(i=count; i>0;) {
 863             if(--i<bufferLength) {
 864                 c=(char)(code&0xf);
 865                 if(c<10) {
 866                     c+='0';
 867                 } else {
 868                     c+='A'-10;
 869                 }
 870                 buffer[i]=c;
 871             }
 872             code>>=4;
 873         }
 874
 875         bufferPos+=count;
 876         break;
 877     }
 878     case 1: {
 879         /* name = prefix factorized-elements */
 880         uint16_t indexes[8];
 881         const uint16_t *factors=(const uint16_t *)(range+1);
 882         uint16_t count=range->variant;
 883         const char *s=(const char *)(factors+count);
 884         char c;
 885
 886         /* copy prefix */
 887         while((c=*s++)!=0) {
 888             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 889         }
 890
 891         bufferPos+=writeFactorSuffix(factors, count,
 892                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
 893         break;
 894     }
 895     default:
 896         /* undefined type */
 897         /* zero-terminate */
 898         if(bufferLength>0) {
 899             *buffer=0;
 900         }
 901         break;
 902     }
 903
 904     return bufferPos;
 905 }
 906
 907 /*
 908  * Important: enumAlgNames() and findAlgName() are almost the same.
 909  * Any fix must be applied to both.
 910  */
 911 static UBool
 912 enumAlgNames(AlgorithmicRange *range,
 913              UChar32 start, UChar32 limit,
 914              UEnumCharNamesFn *fn, void *context,
 915              UCharNameChoice nameChoice) {
 916     char buffer[200];
 917     uint16_t length;
 918
 919     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
 920         return TRUE;
 921     }
 922
 923     switch(range->type) {
 924     case 0: {
 925         char *s, *end;
 926         char c;
 927
 928         /* get the full name of the start character */
 929         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
 930         if(length<=0) {
 931             return TRUE;
 932         }
 933
 934         /* call the enumerator function with this first character */
 935         if(!fn(context, start, nameChoice, buffer, length)) {
 936             return FALSE;
 937         }
 938
 939         /* go to the end of the name; all these names have the same length */
 940         end=buffer;
 941         while(*end!=0) {
 942             ++end;
 943         }
 944
 945         /* enumerate the rest of the names */
 946         while(++start<limit) {
 947             /* increment the hexadecimal number on a character-basis */
 948             s=end;
 949             for (;;) {
 950                 c=*--s;
 951                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
 952                     *s=(char)(c+1);
 953                     break;
 954                 } else if(c=='9') {
 955                     *s='A';
 956                     break;
 957                 } else if(c=='F') {
 958                     *s='0';
 959                 }
 960             }
 961
 962             if(!fn(context, start, nameChoice, buffer, length)) {
 963                 return FALSE;
 964             }
 965         }
 966         break;
 967     }
 968     case 1: {
 969         uint16_t indexes[8];
 970         const char *elementBases[8], *elements[8];
 971         const uint16_t *factors=(const uint16_t *)(range+1);
 972         uint16_t count=range->variant;
 973         const char *s=(const char *)(factors+count);
 974         char *suffix, *t;
 975         uint16_t prefixLength, i, index;
 976
 977         char c;
 978
 979         /* name = prefix factorized-elements */
 980
 981         /* copy prefix */
 982         suffix=buffer;
 983         prefixLength=0;
 984         while((c=*s++)!=0) {
 985             *suffix++=c;
 986             ++prefixLength;
 987         }
 988
 989         /* append the suffix of the start character */
 990         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
 991                                               s, (uint32_t)start-range->start,
 992                                               indexes, elementBases, elements,
 993                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
 994
 995         /* call the enumerator function with this first character */
 996         if(!fn(context, start, nameChoice, buffer, length)) {
 997             return FALSE;
 998         }
 999
1000         /* enumerate the rest of the names */
1001         while(++start<limit) {
1002             /* increment the indexes in lexical order bound by the factors */
1003             i=count;
1004             for (;;) {
1005                 index=(uint16_t)(indexes[--i]+1);
1006                 if(index<factors[i]) {
1007                     /* skip one index and its element string */
1008                     indexes[i]=index;
1009                     s=elements[i];
1010                     while(*s++!=0) {
1011                     }
1012                     elements[i]=s;
1013                     break;
1014                 } else {
1015                     /* reset this index to 0 and its element string to the first one */
1016                     indexes[i]=0;
1017                     elements[i]=elementBases[i];
1018                 }
1019             }
1020
1021             /* to make matters a little easier, just append all elements to the suffix */
1022             t=suffix;
1023             length=prefixLength;
1024             for(i=0; i<count; ++i) {
1025                 s=elements[i];
1026                 while((c=*s++)!=0) {
1027                     *t++=c;
1028                     ++length;
1029                 }
1030             }
1031             /* zero-terminate */
1032             *t=0;
1033
1034             if(!fn(context, start, nameChoice, buffer, length)) {
1035                 return FALSE;
1036             }
1037         }
1038         break;
1039     }
1040     default:
1041         /* undefined type */
1042         break;
1043     }
1044
1045     return TRUE;
1046 }
1047
1048 /*
1049  * findAlgName() is almost the same as enumAlgNames() except that it
1050  * returns the code point for a name if it fits into the range.
1051  * It returns 0xffff otherwise.
1052  */
1053 static UChar32
1054 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1055     UChar32 code;
1056
1057     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
1058         return 0xffff;
1059     }
1060
1061     switch(range->type) {
1062     case 0: {
1063         /* name = prefix hex-digits */
1064         const char *s=(const char *)(range+1);
1065         char c;
1066
1067         uint16_t i, count;
1068
1069         /* compare prefix */
1070         while((c=*s++)!=0) {
1071             if((char)c!=*otherName++) {
1072                 return 0xffff;
1073             }
1074         }
1075
1076         /* read hexadecimal code point value */
1077         count=range->variant;
1078         code=0;
1079         for(i=0; i<count; ++i) {
1080             c=*otherName++;
1081             if('0'<=c && c<='9') {
1082                 code=(code<<4)|(c-'0');
1083             } else if('A'<=c && c<='F') {
1084                 code=(code<<4)|(c-'A'+10);
1085             } else {
1086                 return 0xffff;
1087             }
1088         }
1089
1090         /* does it fit into the range? */
1091         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1092             return code;
1093         }
1094         break;
1095     }
1096     case 1: {
1097         char buffer[64];
1098         uint16_t indexes[8];
1099         const char *elementBases[8], *elements[8];
1100         const uint16_t *factors=(const uint16_t *)(range+1);
1101         uint16_t count=range->variant;
1102         const char *s=(const char *)(factors+count), *t;
1103         UChar32 start, limit;
1104         uint16_t i, index;
1105
1106         char c;
1107
1108         /* name = prefix factorized-elements */
1109
1110         /* compare prefix */
1111         while((c=*s++)!=0) {
1112             if((char)c!=*otherName++) {
1113                 return 0xffff;
1114             }
1115         }
1116
1117         start=(UChar32)range->start;
1118         limit=(UChar32)(range->end+1);
1119
1120         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1121         writeFactorSuffix(factors, count, s, 0,
1122                           indexes, elementBases, elements, buffer, sizeof(buffer));
1123
1124         /* compare the first suffix */
1125         if(0==uprv_strcmp(otherName, buffer)) {
1126             return start;
1127         }
1128
1129         /* enumerate and compare the rest of the suffixes */
1130         while(++start<limit) {
1131             /* increment the indexes in lexical order bound by the factors */
1132             i=count;
1133             for (;;) {
1134                 index=(uint16_t)(indexes[--i]+1);
1135                 if(index<factors[i]) {
1136                     /* skip one index and its element string */
1137                     indexes[i]=index;
1138                     s=elements[i];
1139                     while(*s++!=0) {}
1140                     elements[i]=s;
1141                     break;
1142                 } else {
1143                     /* reset this index to 0 and its element string to the first one */
1144                     indexes[i]=0;
1145                     elements[i]=elementBases[i];
1146                 }
1147             }
1148
1149             /* to make matters a little easier, just compare all elements of the suffix */
1150             t=otherName;
1151             for(i=0; i<count; ++i) {
1152                 s=elements[i];
1153                 while((c=*s++)!=0) {
1154                     if(c!=*t++) {
1155                         s=""; /* does not match */
1156                         i=99;
1157                     }
1158                 }
1159             }
1160             if(i<99 && *t==0) {
1161                 return start;
1162             }
1163         }
1164         break;
1165     }
1166     default:
1167         /* undefined type */
1168         break;
1169     }
1170
1171     return 0xffff;
1172 }
1173
1174 /* sets of name characters, maximum name lengths ---------------------------- */
1175
1176 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1177 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1178
1179 static int32_t
1180 calcStringSetLength(uint32_t set[8], const char *s) {
1181     int32_t length=0;
1182     char c;
1183
1184     while((c=*s++)!=0) {
1185         SET_ADD(set, c);
1186         ++length;
1187     }
1188     return length;
1189 }
1190
1191 static int32_t
1192 calcAlgNameSetsLengths(int32_t maxNameLength) {
1193     AlgorithmicRange *range;
1194     uint32_t *p;
1195     uint32_t rangeCount;
1196     int32_t length;
1197
1198     /* enumerate algorithmic ranges */
1199     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1200     rangeCount=*p;
1201     range=(AlgorithmicRange *)(p+1);
1202     while(rangeCount>0) {
1203         switch(range->type) {
1204         case 0:
1205             /* name = prefix + (range->variant times) hex-digits */
1206             /* prefix */
1207             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1208             if(length>maxNameLength) {
1209                 maxNameLength=length;
1210             }
1211             break;
1212         case 1: {
1213             /* name = prefix factorized-elements */
1214             const uint16_t *factors=(const uint16_t *)(range+1);
1215             const char *s;
1216             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1217
1218             /* prefix length */
1219             s=(const char *)(factors+count);
1220             length=calcStringSetLength(gNameSet, s);
1221             s+=length+1; /* start of factor suffixes */
1222
1223             /* get the set and maximum factor suffix length for each factor */
1224             for(i=0; i<count; ++i) {
1225                 maxFactorLength=0;
1226                 for(factor=factors[i]; factor>0; --factor) {
1227                     factorLength=calcStringSetLength(gNameSet, s);
1228                     s+=factorLength+1;
1229                     if(factorLength>maxFactorLength) {
1230                         maxFactorLength=factorLength;
1231                     }
1232                 }
1233                 length+=maxFactorLength;
1234             }
1235
1236             if(length>maxNameLength) {
1237                 maxNameLength=length;
1238             }
1239             break;
1240         }
1241         default:
1242             /* unknown type */
1243             break;
1244         }
1245
1246         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1247         --rangeCount;
1248     }
1249     return maxNameLength;
1250 }
1251
1252 static int32_t
1253 calcExtNameSetsLengths(int32_t maxNameLength) {
1254     int32_t i, length;
1255
1256     for(i=0; i<LENGTHOF(charCatNames); ++i) {
1257         /*
1258          * for each category, count the length of the category name
1259          * plus 9=
1260          * 2 for <>
1261          * 1 for -
1262          * 6 for most hex digits per code point
1263          */
1264         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
1265         if(length>maxNameLength) {
1266             maxNameLength=length;
1267         }
1268     }
1269     return maxNameLength;
1270 }
1271
1272 static int32_t
1273 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1274                   uint32_t set[8],
1275                   const uint8_t **pLine, const uint8_t *lineLimit) {
1276     const uint8_t *line=*pLine;
1277     int32_t length=0, tokenLength;
1278     uint16_t c, token;
1279
1280     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1281         if(c>=tokenCount) {
1282             /* implicit letter */
1283             SET_ADD(set, c);
1284             ++length;
1285         } else {
1286             token=tokens[c];
1287             if(token==(uint16_t)(-2)) {
1288                 /* this is a lead byte for a double-byte token */
1289                 c=c<<8|*line++;
1290                 token=tokens[c];
1291             }
1292             if(token==(uint16_t)(-1)) {
1293                 /* explicit letter */
1294                 SET_ADD(set, c);
1295                 ++length;
1296             } else {
1297                 /* count token word */
1298                 if(tokenLengths!=NULL) {
1299                     /* use cached token length */
1300                     tokenLength=tokenLengths[c];
1301                     if(tokenLength==0) {
1302                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1303                         tokenLengths[c]=(int8_t)tokenLength;
1304                     }
1305                 } else {
1306                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1307                 }
1308                 length+=tokenLength;
1309             }
1310         }
1311     }
1312
1313     *pLine=line;
1314     return length;
1315 }
1316
1317 static void
1318 calcGroupNameSetsLengths(int32_t maxNameLength) {
1319     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
1320
1321     uint16_t *tokens=(uint16_t *)uCharNames+8;
1322     uint16_t tokenCount=*tokens++;
1323     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
1324
1325     int8_t *tokenLengths;
1326
1327     uint16_t *groups;
1328     Group *group;
1329     const uint8_t *s, *line, *lineLimit;
1330
1331     int32_t groupCount, lineNumber, length;
1332
1333     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1334     if(tokenLengths!=NULL) {
1335         uprv_memset(tokenLengths, 0, tokenCount);
1336     }
1337
1338     groups=(uint16_t *)((char *)uCharNames+uCharNames->groupsOffset);
1339     groupCount=*groups++;
1340     group=(Group *)groups;
1341
1342     /* enumerate all groups */
1343     while(groupCount>0) {
1344         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+
1345                                     ((int32_t)group->offsetHigh<<16|group->offsetLow);
1346         s=expandGroupLengths(s, offsets, lengths);
1347
1348         /* enumerate all lines in each group */
1349         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1350             line=s+offsets[lineNumber];
1351             length=lengths[lineNumber];
1352             if(length==0) {
1353                 continue;
1354             }
1355
1356             lineLimit=line+length;
1357
1358             /* read regular name */
1359             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1360             if(length>maxNameLength) {
1361                 maxNameLength=length;
1362             }
1363             if(line==lineLimit) {
1364                 continue;
1365             }
1366
1367             /* read Unicode 1.0 name */
1368             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1369             if(length>maxNameLength) {
1370                 maxNameLength=length;
1371             }
1372             if(line==lineLimit) {
1373                 continue;
1374             }
1375
1376             /* read ISO comment */
1377             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
1378         }
1379
1380         ++group;
1381         --groupCount;
1382     }
1383
1384     if(tokenLengths!=NULL) {
1385         uprv_free(tokenLengths);
1386     }
1387
1388     /* set gMax... - name length last for threading */
1389     gMaxNameLength=maxNameLength;
1390 }
1391
1392 static UBool
1393 calcNameSetsLengths(UErrorCode *pErrorCode) {
1394     static const char extChars[]="0123456789ABCDEF<>-";
1395     int32_t i, maxNameLength;
1396
1397     if(gMaxNameLength!=0) {
1398         return TRUE;
1399     }
1400
1401     if(!isDataLoaded(pErrorCode)) {
1402         return FALSE;
1403     }
1404
1405     /* set hex digits, used in various names, and <>-, used in extended names */
1406     for(i=0; i<sizeof(extChars)-1; ++i) {
1407         SET_ADD(gNameSet, extChars[i]);
1408     }
1409
1410     /* set sets and lengths from algorithmic names */
1411     maxNameLength=calcAlgNameSetsLengths(0);
1412
1413     /* set sets and lengths from extended names */
1414     maxNameLength=calcExtNameSetsLengths(maxNameLength);
1415
1416     /* set sets and lengths from group names, set global maximum values */
1417     calcGroupNameSetsLengths(maxNameLength);
1418
1419     return TRUE;
1420 }
1421
1422 /* public API --------------------------------------------------------------- */
1423
1424 U_CAPI int32_t U_EXPORT2
1425 u_charName(UChar32 code, UCharNameChoice nameChoice,
1426            char *buffer, int32_t bufferLength,
1427            UErrorCode *pErrorCode) {
1428     AlgorithmicRange *algRange;
1429     uint32_t *p;
1430     uint32_t i;
1431     int32_t length;
1432
1433     /* check the argument values */
1434     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1435         return 0;
1436     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1437               bufferLength<0 || (bufferLength>0 && buffer==NULL)
1438     ) {
1439         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1440         return 0;
1441     }
1442
1443     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1444         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1445     }
1446
1447     length=0;
1448
1449     /* try algorithmic names first */
1450     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1451     i=*p;
1452     algRange=(AlgorithmicRange *)(p+1);
1453     while(i>0) {
1454         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1455             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1456             break;
1457         }
1458         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1459         --i;
1460     }
1461
1462     if(i==0) {
1463         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1464             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1465             if (!length) {
1466                 /* extended character name */
1467                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1468             }
1469         } else {
1470             /* normal character name */
1471             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1472         }
1473     }
1474
1475     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1476 }
1477
1478 U_CAPI int32_t U_EXPORT2
1479 u_getISOComment(UChar32 c,
1480                 char *dest, int32_t destCapacity,
1481                 UErrorCode *pErrorCode) {
1482     int32_t length;
1483
1484     /* check the argument values */
1485     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1486         return 0;
1487     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1488         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1489         return 0;
1490     }
1491
1492     if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1493         return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1494     }
1495
1496     /* the ISO comment is stored like a normal character name */
1497     length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
1498     return u_terminateChars(dest, destCapacity, length, pErrorCode);
1499 }
1500
1501 U_CAPI UChar32 U_EXPORT2
1502 u_charFromName(UCharNameChoice nameChoice,
1503                const char *name,
1504                UErrorCode *pErrorCode) {
1505     char upper[120], lower[120];
1506     FindName findName;
1507     AlgorithmicRange *algRange;
1508     uint32_t *p;
1509     uint32_t i;
1510     UChar32 cp = 0;
1511     char c0;
1512     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
1513
1514     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1515         return error;
1516     }
1517
1518     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1519         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1520         return error;
1521     }
1522
1523     if(!isDataLoaded(pErrorCode)) {
1524         return error;
1525     }
1526
1527     /* construct the uppercase and lowercase of the name first */
1528     for(i=0; i<sizeof(upper); ++i) {
1529         if((c0=*name++)!=0) {
1530             upper[i]=uprv_toupper(c0);
1531             lower[i]=uprv_tolower(c0);
1532         } else {
1533             upper[i]=lower[i]=0;
1534             break;
1535         }
1536     }
1537     if(i==sizeof(upper)) {
1538         /* name too long, there is no such character */
1539         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1540         return error;
1541     }
1542
1543     /* try extended names first */
1544     if (lower[0] == '<') {
1545         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1546             if (lower[--i] == '>') {
1547                 for (--i; lower[i] && lower[i] != '-'; --i) {
1548                 }
1549
1550                 if (lower[i] == '-') { /* We've got a category. */
1551                     uint32_t cIdx;
1552
1553                     lower[i] = 0;
1554
1555                     for (++i; lower[i] != '>'; ++i) {
1556                         if (lower[i] >= '0' && lower[i] <= '9') {
1557                             cp = (cp << 4) + lower[i] - '0';
1558                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1559                             cp = (cp << 4) + lower[i] - 'a' + 10;
1560                         } else {
1561                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1562                             return error;
1563                         }
1564                     }
1565
1566                     /* Now validate the category name.
1567                        We could use a binary search, or a trie, if
1568                        we really wanted to. */
1569
1570                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
1571
1572                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1573                             if (getCharCat(cp) == cIdx) {
1574                                 return cp;
1575                             }
1576                             break;
1577                         }
1578                     }
1579                 }
1580             }
1581         }
1582
1583         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1584         return error;
1585     }
1586
1587     /* try algorithmic names now */
1588     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1589     i=*p;
1590     algRange=(AlgorithmicRange *)(p+1);
1591     while(i>0) {
1592         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1593             return cp;
1594         }
1595         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1596         --i;
1597     }
1598
1599     /* normal character name */
1600     findName.otherName=upper;
1601     findName.code=error;
1602     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1603     if (findName.code == error) {
1604          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1605     }
1606     return findName.code;
1607 }
1608
1609 U_CAPI void U_EXPORT2
1610 u_enumCharNames(UChar32 start, UChar32 limit,
1611                 UEnumCharNamesFn *fn,
1612                 void *context,
1613                 UCharNameChoice nameChoice,
1614                 UErrorCode *pErrorCode) {
1615     AlgorithmicRange *algRange;
1616     uint32_t *p;
1617     uint32_t i;
1618
1619     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1620         return;
1621     }
1622
1623     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1624         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1625         return;
1626     }
1627
1628     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1629         limit = UCHAR_MAX_VALUE + 1;
1630     }
1631     if((uint32_t)start>=(uint32_t)limit) {
1632         return;
1633     }
1634
1635     if(!isDataLoaded(pErrorCode)) {
1636         return;
1637     }
1638
1639     /* interleave the data-driven ones with the algorithmic ones */
1640     /* iterate over all algorithmic ranges; assume that they are in ascending order */
1641     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1642     i=*p;
1643     algRange=(AlgorithmicRange *)(p+1);
1644     while(i>0) {
1645         /* enumerate the character names before the current algorithmic range */
1646         /* here: start<limit */
1647         if((uint32_t)start<algRange->start) {
1648             if((uint32_t)limit<=algRange->start) {
1649                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
1650                 return;
1651             }
1652             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1653                 return;
1654             }
1655             start=(UChar32)algRange->start;
1656         }
1657         /* enumerate the character names in the current algorithmic range */
1658         /* here: algRange->start<=start<limit */
1659         if((uint32_t)start<=algRange->end) {
1660             if((uint32_t)limit<=(algRange->end+1)) {
1661                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1662                 return;
1663             }
1664             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1665                 return;
1666             }
1667             start=(UChar32)algRange->end+1;
1668         }
1669         /* continue to the next algorithmic range (here: start<limit) */
1670         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1671         --i;
1672     }
1673     /* enumerate the character names after the last algorithmic range */
1674     enumNames(uCharNames, start, limit, fn, context, nameChoice);
1675 }
1676
1677 U_CAPI int32_t U_EXPORT2
1678 uprv_getMaxCharNameLength() {
1679     UErrorCode errorCode=U_ZERO_ERROR;
1680     if(calcNameSetsLengths(&errorCode)) {
1681         return gMaxNameLength;
1682     } else {
1683         return 0;
1684     }
1685 }
1686
1687 /**
1688  * Converts the char set cset into a Unicode set uset.
1689  * @param cset Set of 256 bit flags corresponding to a set of chars.
1690  * @param uset USet to receive characters. Existing contents are deleted.
1691  */
1692 static void
1693 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
1694     UChar us[256];
1695     char cs[256];
1696
1697     int32_t i, length;
1698     UErrorCode errorCode;
1699
1700     errorCode=U_ZERO_ERROR;
1701
1702     if(!calcNameSetsLengths(&errorCode)) {
1703         return;
1704     }
1705
1706     /* build a char string with all chars that are used in character names */
1707     length=0;
1708     for(i=0; i<256; ++i) {
1709         if(SET_CONTAINS(cset, i)) {
1710             cs[length++]=(char)i;
1711         }
1712     }
1713
1714     /* convert the char string to a UChar string */
1715     u_charsToUChars(cs, us, length);
1716
1717     /* add each UChar to the USet */
1718     for(i=0; i<length; ++i) {
1719         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
1720             sa->add(sa->set, us[i]);
1721         }
1722     }
1723 }
1724
1725 /**
1726  * Fills set with characters that are used in Unicode character names.
1727  * @param set USet to receive characters.
1728  */
1729 U_CAPI void U_EXPORT2
1730 uprv_getCharNameCharacters(const USetAdder *sa) {
1731     charSetToUSet(gNameSet, sa);
1732 }
1733
1734 /* data swapping ------------------------------------------------------------ */
1735
1736 /*
1737  * The token table contains non-negative entries for token bytes,
1738  * and -1 for bytes that represent themselves in the data file's charset.
1739  * -2 entries are used for lead bytes.
1740  *
1741  * Direct bytes (-1 entries) must be translated from the input charset family
1742  * to the output charset family.
1743  * makeTokenMap() writes a permutation mapping for this.
1744  * Use it once for single-/lead-byte tokens and once more for all trail byte
1745  * tokens. (';' is an unused trail byte marked with -1.)
1746  */
1747 static void
1748 makeTokenMap(const UDataSwapper *ds,
1749              int16_t tokens[], uint16_t tokenCount,
1750              uint8_t map[256],
1751              UErrorCode *pErrorCode) {
1752     UBool usedOutChar[256];
1753     uint16_t i, j;
1754     uint8_t c1, c2;
1755
1756     if(U_FAILURE(*pErrorCode)) {
1757         return;
1758     }
1759
1760     if(ds->inCharset==ds->outCharset) {
1761         /* Same charset family: identity permutation */
1762         for(i=0; i<256; ++i) {
1763             map[i]=(uint8_t)i;
1764         }
1765     } else {
1766         uprv_memset(map, 0, 256);
1767         uprv_memset(usedOutChar, 0, 256);
1768
1769         if(tokenCount>256) {
1770             tokenCount=256;
1771         }
1772
1773         /* set the direct bytes (byte 0 always maps to itself) */
1774         for(i=1; i<tokenCount; ++i) {
1775             if(tokens[i]==-1) {
1776                 /* convert the direct byte character */
1777                 c1=(uint8_t)i;
1778                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1779                 if(U_FAILURE(*pErrorCode)) {
1780                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
1781                                      i, ds->inCharset);
1782                     return;
1783                 }
1784
1785                 /* enter the converted character into the map and mark it used */
1786                 map[c1]=c2;
1787                 usedOutChar[c2]=TRUE;
1788             }
1789         }
1790
1791         /* set the mappings for the rest of the permutation */
1792         for(i=j=1; i<tokenCount; ++i) {
1793             /* set mappings that were not set for direct bytes */
1794             if(map[i]==0) {
1795                 /* set an output byte value that was not used as an output byte above */
1796                 while(usedOutChar[j]) {
1797                     ++j;
1798                 }
1799                 map[i]=(uint8_t)j++;
1800             }
1801         }
1802
1803         /*
1804          * leave mappings at tokenCount and above unset if tokenCount<256
1805          * because they won't be used
1806          */
1807     }
1808 }
1809
1810 U_CAPI int32_t U_EXPORT2
1811 uchar_swapNames(const UDataSwapper *ds,
1812                 const void *inData, int32_t length, void *outData,
1813                 UErrorCode *pErrorCode) {
1814     const UDataInfo *pInfo;
1815     int32_t headerSize;
1816
1817     const uint8_t *inBytes;
1818     uint8_t *outBytes;
1819
1820     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1821              offset, i, count, stringsCount;
1822
1823     const AlgorithmicRange *inRange;
1824     AlgorithmicRange *outRange;
1825
1826     /* udata_swapDataHeader checks the arguments */
1827     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1828     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1829         return 0;
1830     }
1831
1832     /* check data format and format version */
1833     pInfo=(const UDataInfo *)((const char *)inData+4);
1834     if(!(
1835         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
1836         pInfo->dataFormat[1]==0x6e &&
1837         pInfo->dataFormat[2]==0x61 &&
1838         pInfo->dataFormat[3]==0x6d &&
1839         pInfo->formatVersion[0]==1
1840     )) {
1841         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1842                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1843                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1844                          pInfo->formatVersion[0]);
1845         *pErrorCode=U_UNSUPPORTED_ERROR;
1846         return 0;
1847     }
1848
1849     inBytes=(const uint8_t *)inData+headerSize;
1850     outBytes=(uint8_t *)outData+headerSize;
1851     if(length<0) {
1852         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1853     } else {
1854         length-=headerSize;
1855         if( length<20 ||
1856             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1857         ) {
1858             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1859                              length);
1860             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1861             return 0;
1862         }
1863     }
1864
1865     if(length<0) {
1866         /* preflighting: iterate through algorithmic ranges */
1867         offset=algNamesOffset;
1868         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1869         offset+=4;
1870
1871         for(i=0; i<count; ++i) {
1872             inRange=(const AlgorithmicRange *)(inBytes+offset);
1873             offset+=ds->readUInt16(inRange->size);
1874         }
1875     } else {
1876         /* swap data */
1877         const uint16_t *p;
1878         uint16_t *q, *temp;
1879
1880         int16_t tokens[512];
1881         uint16_t tokenCount;
1882
1883         uint8_t map[256], trailMap[256];
1884
1885         /* copy the data for inaccessible bytes */
1886         if(inBytes!=outBytes) {
1887             uprv_memcpy(outBytes, inBytes, length);
1888         }
1889
1890         /* the initial 4 offsets first */
1891         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1892         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1893         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1894         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1895
1896         /*
1897          * now the tokens table
1898          * it needs to be permutated along with the compressed name strings
1899          */
1900         p=(const uint16_t *)(inBytes+16);
1901         q=(uint16_t *)(outBytes+16);
1902
1903         /* read and swap the tokenCount */
1904         tokenCount=ds->readUInt16(*p);
1905         ds->swapArray16(ds, p, 2, q, pErrorCode);
1906         ++p;
1907         ++q;
1908
1909         /* read the first 512 tokens and make the token maps */
1910         if(tokenCount<=512) {
1911             count=tokenCount;
1912         } else {
1913             count=512;
1914         }
1915         for(i=0; i<count; ++i) {
1916             tokens[i]=udata_readInt16(ds, p[i]);
1917         }
1918         for(; i<512; ++i) {
1919             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1920         }
1921         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1922         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1923         if(U_FAILURE(*pErrorCode)) {
1924             return 0;
1925         }
1926
1927         /*
1928          * swap and permutate the tokens
1929          * go through a temporary array to support in-place swapping
1930          */
1931         temp=(uint16_t *)uprv_malloc(tokenCount*2);
1932         if(temp==NULL) {
1933             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1934                              tokenCount);
1935             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1936             return 0;
1937         }
1938
1939         /* swap and permutate single-/lead-byte tokens */
1940         for(i=0; i<tokenCount && i<256; ++i) {
1941             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1942         }
1943
1944         /* swap and permutate trail-byte tokens */
1945         for(; i<tokenCount; ++i) {
1946             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1947         }
1948
1949         /* copy the result into the output and free the temporary array */
1950         uprv_memcpy(q, temp, tokenCount*2);
1951         uprv_free(temp);
1952
1953         /*
1954          * swap the token strings but not a possible padding byte after
1955          * the terminating NUL of the last string
1956          */
1957         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
1958                                     outBytes+tokenStringOffset, pErrorCode);
1959         if(U_FAILURE(*pErrorCode)) {
1960             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
1961             return 0;
1962         }
1963
1964         /* swap the group table */
1965         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
1966         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
1967                            outBytes+groupsOffset, pErrorCode);
1968
1969         /*
1970          * swap the group strings
1971          * swap the string bytes but not the nibble-encoded string lengths
1972          */
1973         if(ds->inCharset!=ds->outCharset) {
1974             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
1975
1976             const uint8_t *inStrings, *nextInStrings;
1977             uint8_t *outStrings;
1978
1979             uint8_t c;
1980
1981             inStrings=inBytes+groupStringOffset;
1982             outStrings=outBytes+groupStringOffset;
1983
1984             stringsCount=algNamesOffset-groupStringOffset;
1985
1986             /* iterate through string groups until only a few padding bytes are left */
1987             while(stringsCount>32) {
1988                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
1989
1990                 /* move past the length bytes */
1991                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
1992                 outStrings+=nextInStrings-inStrings;
1993                 inStrings=nextInStrings;
1994
1995                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
1996                 stringsCount-=count;
1997
1998                 /* swap the string bytes using map[] and trailMap[] */
1999                 while(count>0) {
2000                     c=*inStrings++;
2001                     *outStrings++=map[c];
2002                     if(tokens[c]!=-2) {
2003                         --count;
2004                     } else {
2005                         /* token lead byte: swap the trail byte, too */
2006                         *outStrings++=trailMap[*inStrings++];
2007                         count-=2;
2008                     }
2009                 }
2010             }
2011         }
2012
2013         /* swap the algorithmic ranges */
2014         offset=algNamesOffset;
2015         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2016         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2017         offset+=4;
2018
2019         for(i=0; i<count; ++i) {
2020             if(offset>(uint32_t)length) {
2021                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2022                                  length, i);
2023                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2024                 return 0;
2025             }
2026
2027             inRange=(const AlgorithmicRange *)(inBytes+offset);
2028             outRange=(AlgorithmicRange *)(outBytes+offset);
2029             offset+=ds->readUInt16(inRange->size);
2030
2031             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2032             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2033             switch(inRange->type) {
2034             case 0:
2035                 /* swap prefix string */
2036                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2037                                     outRange+1, pErrorCode);
2038                 if(U_FAILURE(*pErrorCode)) {
2039                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
2040                                      i);
2041                     return 0;
2042                 }
2043                 break;
2044             case 1:
2045                 {
2046                     /* swap factors and the prefix and factor strings */
2047                     uint32_t factorsCount;
2048
2049                     factorsCount=inRange->variant;
2050                     p=(const uint16_t *)(inRange+1);
2051                     q=(uint16_t *)(outRange+1);
2052                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2053
2054                     /* swap the strings, up to the last terminating NUL */
2055                     p+=factorsCount;
2056                     q+=factorsCount;
2057                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2058                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2059                         --stringsCount;
2060                     }
2061                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2062                 }
2063                 break;
2064             default:
2065                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2066                                  inRange->type, i);
2067                 *pErrorCode=U_UNSUPPORTED_ERROR;
2068                 return 0;
2069             }
2070         }
2071     }
2072
2073     return headerSize+(int32_t)offset;
2074 }
2075
2076 /*
2077  * Hey, Emacs, please set the following:
2078  *
2079  * Local Variables:
2080  * indent-tabs-mode: nil
2081  * End:
2082  *
2083  */