reactos/lib/3rdparty/icu4ros/icu/source/common/ucnv2022.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2000-2007, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   file name:  ucnv2022.c
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 *   created on: 2000feb03
  12 *   created by: Markus W. Scherer
  13 *
  14 *   Change history:
  15 *
  16 *   06/29/2000  helena  Major rewrite of the callback APIs.
  17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
  18 *                       Changed implementation of toUnicode
  19 *                       function
  20 *   08/21/2000  Ram     Added support for ISO-2022-KR
  21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
  22 *                       ucnvebdc.c
  23 *   09/20/2000  Ram     Added support for ISO-2022-CN
  24 *                       Added implementations for getNextUChar()
  25 *                       for specific 2022 country variants.
  26 *   10/31/2000  Ram     Implemented offsets logic functions
  27 */
  28
  29 #include "unicode/utypes.h"
  30
  31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
  32
  33 #include "unicode/ucnv.h"
  34 #include "unicode/uset.h"
  35 #include "unicode/ucnv_err.h"
  36 #include "unicode/ucnv_cb.h"
  37 #include "ucnv_imp.h"
  38 #include "ucnv_bld.h"
  39 #include "ucnv_cnv.h"
  40 #include "ucnvmbcs.h"
  41 #include "cstring.h"
  42 #include "cmemory.h"
  43
  44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  45
  46 #ifdef U_ENABLE_GENERIC_ISO_2022
  47 /*
  48  * I am disabling the generic ISO-2022 converter after proposing to do so on
  49  * the icu mailing list two days ago.
  50  *
  51  * Reasons:
  52  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
  53  *    its designation sequences, single shifts with return to the previous state,
  54  *    switch-with-no-return to UTF-16BE or similar, etc.
  55  *    This is unlike the language-specific variants like ISO-2022-JP which
  56  *    require a much smaller repertoire of ISO-2022 features.
  57  *    These variants continue to be supported.
  58  * 2. I believe that no one is really using the generic ISO-2022 converter
  59  *    but rather always one of the language-specific variants.
  60  *    Note that ICU's generic ISO-2022 converter has always output one escape
  61  *    sequence followed by UTF-8 for the whole stream.
  62  * 3. Switching between subcharsets is extremely slow, because each time
  63  *    the previous converter is closed and a new one opened,
  64  *    without any kind of caching, least-recently-used list, etc.
  65  * 4. The code is currently buggy, and given the above it does not seem
  66  *    reasonable to spend the time on maintenance.
  67  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
  68  *    This means, for example, that when ISO-8859-7 is designated, the following
  69  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
  70  *    The ICU ISO-2022 converter does not handle this - and has no information
  71  *    about which subconverter would have to be shifted vs. which is designed
  72  *    for 7-bit ISO-2022.
  73  *
  74  * Markus Scherer 2003-dec-03
  75  */
  76 #endif
  77
  78 static const char SHIFT_IN_STR[]  = "\x0F";
  79 static const char SHIFT_OUT_STR[] = "\x0E";
  80
  81 #define CR      0x0D
  82 #define LF      0x0A
  83 #define H_TAB   0x09
  84 #define V_TAB   0x0B
  85 #define SPACE   0x20
  86
  87 enum {
  88     HWKANA_START=0xff61,
  89     HWKANA_END=0xff9f
  90 };
  91
  92 /*
  93  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
  94  * as bytes 21..7E. (Subtract 0x80.)
  95  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
  96  * as bytes 20..7F. (Subtract 0x80.)
  97  * Do not encode C1 control codes with native bytes 80..9F
  98  * as bytes 00..1F (C0 control codes).
  99  */
 100 enum {
 101     GR94_START=0xa1,
 102     GR94_END=0xfe,
 103     GR96_START=0xa0,
 104     GR96_END=0xff
 105 };
 106
 107 /*
 108  * ISO 2022 control codes must not be converted from Unicode
 109  * because they would mess up the byte stream.
 110  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
 111  * corresponding to SO, SI, and ESC.
 112  */
 113 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
 114
 115 /* for ISO-2022-JP and -CN implementations */
 116 typedef enum  {
 117         /* shared values */
 118         INVALID_STATE=-1,
 119         ASCII = 0,
 120
 121         SS2_STATE=0x10,
 122         SS3_STATE,
 123
 124         /* JP */
 125         ISO8859_1 = 1 ,
 126         ISO8859_7 = 2 ,
 127         JISX201  = 3,
 128         JISX208 = 4,
 129         JISX212 = 5,
 130         GB2312  =6,
 131         KSC5601 =7,
 132         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
 133
 134         /* CN */
 135         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
 136         GB2312_1=1,
 137         ISO_IR_165=2,
 138         CNS_11643=3,
 139
 140         /*
 141          * these are used in StateEnum and ISO2022State variables,
 142          * but CNS_11643 must be used to index into myConverterArray[]
 143          */
 144         CNS_11643_0=0x20,
 145         CNS_11643_1,
 146         CNS_11643_2,
 147         CNS_11643_3,
 148         CNS_11643_4,
 149         CNS_11643_5,
 150         CNS_11643_6,
 151         CNS_11643_7
 152 } StateEnum;
 153
 154 /* is the StateEnum charset value for a DBCS charset? */
 155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
 156
 157 #define CSM(cs) ((uint16_t)1<<(cs))
 158
 159 /*
 160  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
 161  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
 162  *
 163  * Note: The converter uses some leniency:
 164  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
 165  *   all versions, not just JIS7 and JIS8.
 166  * - ICU does not distinguish between different versions of JIS X 0208.
 167  */
 168 static const uint16_t jpCharsetMasks[5]={
 169     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
 170     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
 171     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
 172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
 173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
 174 };
 175
 176 typedef enum {
 177         ASCII1=0,
 178         LATIN1,
 179         SBCS,
 180         DBCS,
 181         MBCS,
 182         HWKANA
 183 }Cnv2022Type;
 184
 185 typedef struct ISO2022State {
 186     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
 187     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
 188     int8_t prevG;       /* g before single shift (SS2 or SS3) */
 189 } ISO2022State;
 190
 191 #define UCNV_OPTIONS_VERSION_MASK 0xf
 192 #define UCNV_2022_MAX_CONVERTERS 10
 193
 194 typedef struct{
 195     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
 196     UConverter *currentConverter;
 197     Cnv2022Type currentType;
 198     ISO2022State toU2022State, fromU2022State;
 199     uint32_t key;
 200     uint32_t version;
 201 #ifdef U_ENABLE_GENERIC_ISO_2022
 202     UBool isFirstBuffer;
 203 #endif
 204     char name[30];
 205     char locale[3];
 206 }UConverterDataISO2022;
 207
 208 /* Protos */
 209 /* ISO-2022 ----------------------------------------------------------------- */
 210
 211 /*Forward declaration */
 212 U_CFUNC void
 213 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
 214                       UErrorCode * err);
 215 U_CFUNC void
 216 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
 217                                     UErrorCode * err);
 218
 219 #define ESC_2022 0x1B /*ESC*/
 220
 221 typedef enum
 222 {
 223         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
 224         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
 225         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
 226         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
 227 } UCNV_TableStates_2022;
 228
 229 /*
 230 * The way these state transition arrays work is:
 231 * ex : ESC$B is the sequence for JISX208
 232 *      a) First Iteration: char is ESC
 233 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
 234 *             int x = normalize_esq_chars_2022[27] which is equal to 1
 235 *         ii) Search for this value in escSeqStateTable_Key_2022[]
 236 *             value of x is stored at escSeqStateTable_Key_2022[0]
 237 *        iii) Save this index as offset
 238 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
 239 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
 240 *     b) Switch on this state and continue to next char
 241 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
 242 *             which is normalize_esq_chars_2022[36] == 4
 243 *         ii) x is currently 1(from above)
 244 *               x<<=5 -- x is now 32
 245 *               x+=normalize_esq_chars_2022[36]
 246 *               now x is 36
 247 *        iii) Search for this value in escSeqStateTable_Key_2022[]
 248 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
 249 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
 250 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
 251 *     c) Switch on this state and continue to next char
 252 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
 253 *        ii) x is currently 36 (from above)
 254 *            x<<=5 -- x is now 1152
 255 *            x+=normalize_esq_chars_2022[66]
 256 *            now x is 1161
 257 *       iii) Search for this value in escSeqStateTable_Key_2022[]
 258 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
 259 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
 260 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
 261 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
 262 */
 263
 264
 265 /*Below are the 3 arrays depicting a state transition table*/
 266 static const int8_t normalize_esq_chars_2022[256] = {
 267 /*       0      1       2       3       4      5       6        7       8       9           */
 268
 269          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 270         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 271         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
 272         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
 273         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
 274         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 275         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
 276         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
 277         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
 278         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 279         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 280         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 281         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 282         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 294         ,0     ,0      ,0      ,0      ,0      ,0
 295 };
 296
 297 #ifdef U_ENABLE_GENERIC_ISO_2022
 298 /*
 299  * When the generic ISO-2022 converter is completely removed, not just disabled
 300  * per #ifdef, then the following state table and the associated tables that are
 301  * dimensioned with MAX_STATES_2022 should be trimmed.
 302  *
 303  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
 304  * the associated escape sequences starting with ESC ( B should be removed.
 305  * This includes the ones with key values 1097 and all of the ones above 1000000.
 306  *
 307  * For the latter, the tables can simply be truncated.
 308  * For the former, since the tables must be kept parallel, it is probably best
 309  * to simply duplicate an adjacent table cell, parallel in all tables.
 310  *
 311  * It may make sense to restructure the tables, especially by using small search
 312  * tables for the variants instead of indexing them parallel to the table here.
 313  */
 314 #endif
 315
 316 #define MAX_STATES_2022 74
 317 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
 318 /*   0           1           2           3           4           5           6           7           8           9           */
 319
 320      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
 321     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
 322     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
 323     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
 324     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
 325     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
 326     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
 327     ,35947631   ,35947635   ,35947636   ,35947638
 328 };
 329
 330 #ifdef U_ENABLE_GENERIC_ISO_2022
 331
 332 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
 333  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
 334
 335      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
 336     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
 337     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
 338     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
 339     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
 340     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
 341     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
 342     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
 343 };
 344
 345 #endif
 346
 347 static const UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = {
 348 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
 349      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 350     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 351     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
 352     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 353     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 354     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 355     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 357 };
 358
 359
 360 /* Type def for refactoring changeState_2022 code*/
 361 typedef enum{
 362 #ifdef U_ENABLE_GENERIC_ISO_2022
 363     ISO_2022=0,
 364 #endif
 365     ISO_2022_JP=1,
 366     ISO_2022_KR=2,
 367     ISO_2022_CN=3
 368 } Variant2022;
 369
 370 /*********** ISO 2022 Converter Protos ***********/
 371 static void
 372 _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode);
 373
 374 static void
 375  _ISO2022Close(UConverter *converter);
 376
 377 static void
 378 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
 379
 380 static const char*
 381 _ISO2022getName(const UConverter* cnv);
 382
 383 static void
 384 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
 385
 386 static UConverter *
 387 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
 388
 389 #ifdef U_ENABLE_GENERIC_ISO_2022
 390 static void
 391 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
 392 #endif
 393
 394 /*const UConverterSharedData _ISO2022Data;*/
 395 static const UConverterSharedData _ISO2022JPData;
 396 static const UConverterSharedData _ISO2022KRData;
 397 static const UConverterSharedData _ISO2022CNData;
 398
 399 /*************** Converter implementations ******************/
 400
 401 /* The purpose of this function is to get around gcc compiler warnings. */
 402 static U_INLINE void
 403 fromUWriteUInt8(UConverter *cnv,
 404                  const char *bytes, int32_t length,
 405                  uint8_t **target, const char *targetLimit,
 406                  int32_t **offsets,
 407                  int32_t sourceIndex,
 408                  UErrorCode *pErrorCode)
 409 {
 410     char *targetChars = (char *)*target;
 411     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
 412                          offsets, sourceIndex, pErrorCode);
 413     *target = (uint8_t*)targetChars;
 414
 415 }
 416
 417 static U_INLINE void
 418 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
 419     if(myConverterData->version == 1) {
 420         UConverter *cnv = myConverterData->currentConverter;
 421
 422         cnv->toUnicodeStatus=0;     /* offset */
 423         cnv->mode=0;                /* state */
 424         cnv->toULength=0;           /* byteIndex */
 425     }
 426 }
 427
 428 static U_INLINE void
 429 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
 430    /* in ISO-2022-KR the designator sequence appears only once
 431     * in a file so we append it only once
 432     */
 433     if( converter->charErrorBufferLength==0){
 434
 435         converter->charErrorBufferLength = 4;
 436         converter->charErrorBuffer[0] = 0x1b;
 437         converter->charErrorBuffer[1] = 0x24;
 438         converter->charErrorBuffer[2] = 0x29;
 439         converter->charErrorBuffer[3] = 0x43;
 440     }
 441     if(myConverterData->version == 1) {
 442         UConverter *cnv = myConverterData->currentConverter;
 443
 444         cnv->fromUChar32=0;
 445         cnv->fromUnicodeStatus=1;   /* prevLength */
 446     }
 447 }
 448
 449 static void
 450 _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode){
 451
 452     char myLocale[6]={' ',' ',' ',' ',' ',' '};
 453
 454     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
 455     if(cnv->extraInfo != NULL) {
 456         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
 457         uint32_t version;
 458
 459         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
 460         myConverterData->currentType = ASCII1;
 461         cnv->fromUnicodeStatus =FALSE;
 462         if(locale){
 463             uprv_strncpy(myLocale, locale, sizeof(myLocale));
 464         }
 465         version = options & UCNV_OPTIONS_VERSION_MASK;
 466         myConverterData->version = version;
 467         if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
 468             (myLocale[2]=='_' || myLocale[2]=='\0'))
 469         {
 470             size_t len=0;
 471             /* open the required converters and cache them */
 472             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
 473                 myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);
 474             }
 475             myConverterData->myConverterArray[JISX201]      = ucnv_loadSharedData("JISX0201", NULL, errorCode);
 476             myConverterData->myConverterArray[JISX208]      = ucnv_loadSharedData("jisx-208", NULL, errorCode);
 477             if(jpCharsetMasks[version]&CSM(JISX212)) {
 478                 myConverterData->myConverterArray[JISX212]  = ucnv_loadSharedData("jisx-212", NULL, errorCode);
 479             }
 480             if(jpCharsetMasks[version]&CSM(GB2312)) {
 481                 myConverterData->myConverterArray[GB2312]   = ucnv_loadSharedData("ibm-5478", NULL, errorCode);   /* gb_2312_80-1 */
 482             }
 483             if(jpCharsetMasks[version]&CSM(KSC5601)) {
 484                 myConverterData->myConverterArray[KSC5601]  = ucnv_loadSharedData("ksc_5601", NULL, errorCode);
 485             }
 486
 487             /* set the function pointers to appropriate funtions */
 488             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
 489             uprv_strcpy(myConverterData->locale,"ja");
 490
 491             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
 492             len = uprv_strlen(myConverterData->name);
 493             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
 494             myConverterData->name[len+1]='\0';
 495         }
 496         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
 497             (myLocale[2]=='_' || myLocale[2]=='\0'))
 498         {
 499             if (version==1){
 500                 myConverterData->currentConverter=
 501                     ucnv_open("icu-internal-25546",errorCode);
 502
 503                 if (U_FAILURE(*errorCode)) {
 504                     _ISO2022Close(cnv);
 505                     return;
 506                 }
 507
 508                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
 509                 uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
 510                 cnv->subCharLen = myConverterData->currentConverter->subCharLen;
 511             }else{
 512                 myConverterData->currentConverter=ucnv_open("ibm-949",errorCode);
 513
 514                 if (U_FAILURE(*errorCode)) {
 515                     _ISO2022Close(cnv);
 516                     return;
 517                 }
 518
 519                 myConverterData->version = 0;
 520                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
 521             }
 522
 523             /* initialize the state variables */
 524             setInitialStateToUnicodeKR(cnv, myConverterData);
 525             setInitialStateFromUnicodeKR(cnv, myConverterData);
 526
 527             /* set the function pointers to appropriate funtions */
 528             cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
 529             uprv_strcpy(myConverterData->locale,"ko");
 530         }
 531         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
 532             (myLocale[2]=='_' || myLocale[2]=='\0'))
 533         {
 534
 535             /* open the required converters and cache them */
 536             myConverterData->myConverterArray[GB2312_1]         = ucnv_loadSharedData("ibm-5478", NULL, errorCode);
 537             if(version==1) {
 538                 myConverterData->myConverterArray[ISO_IR_165]   = ucnv_loadSharedData("iso-ir-165", NULL, errorCode);
 539             }
 540             myConverterData->myConverterArray[CNS_11643]        = ucnv_loadSharedData("cns-11643-1992", NULL, errorCode);
 541
 542
 543             /* set the function pointers to appropriate funtions */
 544             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
 545             uprv_strcpy(myConverterData->locale,"cn");
 546
 547             if (version==1){
 548                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
 549             }else{
 550                 myConverterData->version = 0;
 551                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
 552             }
 553         }
 554         else{
 555 #ifdef U_ENABLE_GENERIC_ISO_2022
 556             myConverterData->isFirstBuffer = TRUE;
 557
 558             /* append the UTF-8 escape sequence */
 559             cnv->charErrorBufferLength = 3;
 560             cnv->charErrorBuffer[0] = 0x1b;
 561             cnv->charErrorBuffer[1] = 0x25;
 562             cnv->charErrorBuffer[2] = 0x42;
 563
 564             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
 565             /* initialize the state variables */
 566             uprv_strcpy(myConverterData->name,"ISO_2022");
 567 #else
 568             *errorCode = U_UNSUPPORTED_ERROR;
 569             return;
 570 #endif
 571         }
 572
 573         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
 574
 575         if(U_FAILURE(*errorCode)) {
 576             _ISO2022Close(cnv);
 577         }
 578     } else {
 579         *errorCode = U_MEMORY_ALLOCATION_ERROR;
 580     }
 581 }
 582
 583
 584 static void
 585 _ISO2022Close(UConverter *converter) {
 586     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
 587     UConverterSharedData **array = myData->myConverterArray;
 588     int32_t i;
 589
 590     if (converter->extraInfo != NULL) {
 591         /*close the array of converter pointers and free the memory*/
 592         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
 593             if(array[i]!=NULL) {
 594                 ucnv_unloadSharedDataIfReady(array[i]);
 595             }
 596         }
 597
 598         ucnv_close(myData->currentConverter);
 599
 600         if(!converter->isExtraLocal){
 601             uprv_free (converter->extraInfo);
 602             converter->extraInfo = NULL;
 603         }
 604     }
 605 }
 606
 607 static void
 608 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
 609     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
 610     if(choice<=UCNV_RESET_TO_UNICODE) {
 611         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
 612         myConverterData->key = 0;
 613     }
 614     if(choice!=UCNV_RESET_TO_UNICODE) {
 615         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
 616     }
 617 #ifdef U_ENABLE_GENERIC_ISO_2022
 618     if(myConverterData->locale[0] == 0){
 619         if(choice<=UCNV_RESET_TO_UNICODE) {
 620             myConverterData->isFirstBuffer = TRUE;
 621             myConverterData->key = 0;
 622             if (converter->mode == UCNV_SO){
 623                 ucnv_close (myConverterData->currentConverter);
 624                 myConverterData->currentConverter=NULL;
 625             }
 626             converter->mode = UCNV_SI;
 627         }
 628         if(choice!=UCNV_RESET_TO_UNICODE) {
 629             /* re-append UTF-8 escape sequence */
 630             converter->charErrorBufferLength = 3;
 631             converter->charErrorBuffer[0] = 0x1b;
 632             converter->charErrorBuffer[1] = 0x28;
 633             converter->charErrorBuffer[2] = 0x42;
 634         }
 635     }
 636     else
 637 #endif
 638     {
 639         /* reset the state variables */
 640         if(myConverterData->locale[0] == 'k'){
 641             if(choice<=UCNV_RESET_TO_UNICODE) {
 642                 setInitialStateToUnicodeKR(converter, myConverterData);
 643             }
 644             if(choice!=UCNV_RESET_TO_UNICODE) {
 645                 setInitialStateFromUnicodeKR(converter, myConverterData);
 646             }
 647         }
 648     }
 649 }
 650
 651 static const char*
 652 _ISO2022getName(const UConverter* cnv){
 653     if(cnv->extraInfo){
 654         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
 655         return myData->name;
 656     }
 657     return NULL;
 658 }
 659
 660
 661 /*************** to unicode *******************/
 662 /****************************************************************************
 663  * Recognized escape sequences are
 664  * <ESC>(B  ASCII
 665  * <ESC>.A  ISO-8859-1
 666  * <ESC>.F  ISO-8859-7
 667  * <ESC>(J  JISX-201
 668  * <ESC>(I  JISX-201
 669  * <ESC>$B  JISX-208
 670  * <ESC>$@  JISX-208
 671  * <ESC>$(D JISX-212
 672  * <ESC>$A  GB2312
 673  * <ESC>$(C KSC5601
 674  */
 675 static const StateEnum nextStateToUnicodeJP[MAX_STATES_2022]= {
 676 /*      0                1               2               3               4               5               6               7               8               9    */
 677     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 678     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
 679     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 680     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
 681     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 682     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 683     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 684     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 685 };
 686
 687 /*************** to unicode *******************/
 688 static const StateEnum nextStateToUnicodeCN[MAX_STATES_2022]= {
 689 /*      0                1               2               3               4               5               6               7               8               9    */
 690      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 691     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 692     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 693     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 694     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
 695     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 696     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 697     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 698 };
 699
 700
 701 static UCNV_TableStates_2022
 702 getKey_2022(char c,int32_t* key,int32_t* offset){
 703     int32_t togo;
 704     int32_t low = 0;
 705     int32_t hi = MAX_STATES_2022;
 706     int32_t oldmid=0;
 707
 708     togo = normalize_esq_chars_2022[(uint8_t)c];
 709     if(togo == 0) {
 710         /* not a valid character anywhere in an escape sequence */
 711         *key = 0;
 712         *offset = 0;
 713         return INVALID_2022;
 714     }
 715     togo = (*key << 5) + togo;
 716
 717     while (hi != low)  /*binary search*/{
 718
 719         register int32_t mid = (hi+low) >> 1; /*Finds median*/
 720
 721         if (mid == oldmid)
 722             break;
 723
 724         if (escSeqStateTable_Key_2022[mid] > togo){
 725             hi = mid;
 726         }
 727         else if (escSeqStateTable_Key_2022[mid] < togo){
 728             low = mid;
 729         }
 730         else /*we found it*/{
 731             *key = togo;
 732             *offset = mid;
 733             return escSeqStateTable_Value_2022[mid];
 734         }
 735         oldmid = mid;
 736
 737     }
 738
 739     *key = 0;
 740     *offset = 0;
 741     return INVALID_2022;
 742 }
 743
 744 /*runs through a state machine to determine the escape sequence - codepage correspondance
 745  */
 746 static void
 747 changeState_2022(UConverter* _this,
 748                 const char** source,
 749                 const char* sourceLimit,
 750                 Variant2022 var,
 751                 UErrorCode* err){
 752     UCNV_TableStates_2022 value;
 753     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
 754     uint32_t key = myData2022->key;
 755     int32_t offset = 0;
 756     char c;
 757
 758     value = VALID_NON_TERMINAL_2022;
 759     while (*source < sourceLimit) {
 760         c = *(*source)++;
 761         _this->toUBytes[_this->toULength++]=(uint8_t)c;
 762         value = getKey_2022(c,(int32_t *) &key, &offset);
 763
 764         switch (value){
 765
 766         case VALID_NON_TERMINAL_2022 :
 767             /* continue with the loop */
 768             break;
 769
 770         case VALID_TERMINAL_2022:
 771             key = 0;
 772             goto DONE;
 773
 774         case INVALID_2022:
 775             goto DONE;
 776
 777         case VALID_MAYBE_TERMINAL_2022:
 778 #ifdef U_ENABLE_GENERIC_ISO_2022
 779             /* ESC ( B is ambiguous only for ISO_2022 itself */
 780             if(var == ISO_2022) {
 781                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
 782                 _this->toULength = 0;
 783
 784                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
 785
 786                 /* continue with the loop */
 787                 value = VALID_NON_TERMINAL_2022;
 788                 break;
 789             } else
 790 #endif
 791             {
 792                 /* not ISO_2022 itself, finish here */
 793                 value = VALID_TERMINAL_2022;
 794                 key = 0;
 795                 goto DONE;
 796             }
 797         }
 798     }
 799
 800 DONE:
 801     myData2022->key = key;
 802
 803     if (value == VALID_NON_TERMINAL_2022) {
 804         /* indicate that the escape sequence is incomplete: key!=0 */
 805         return;
 806     } else if (value == INVALID_2022 ) {
 807         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 808         return;
 809     } else /* value == VALID_TERMINAL_2022 */ {
 810         switch(var){
 811 #ifdef U_ENABLE_GENERIC_ISO_2022
 812         case ISO_2022:
 813         {
 814             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
 815             if(chosenConverterName == NULL) {
 816                 /* SS2 or SS3 */
 817                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 818                 return;
 819             }
 820
 821             _this->mode = UCNV_SI;
 822             ucnv_close(myData2022->currentConverter);
 823             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
 824             if(U_SUCCESS(*err)) {
 825                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
 826                 _this->mode = UCNV_SO;
 827             }
 828             break;
 829         }
 830 #endif
 831         case ISO_2022_JP:
 832             {
 833                 StateEnum tempState=nextStateToUnicodeJP[offset];
 834                 switch(tempState) {
 835                 case INVALID_STATE:
 836                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 837                     break;
 838                 case SS2_STATE:
 839                     if(myData2022->toU2022State.cs[2]!=0) {
 840                         if(myData2022->toU2022State.g<2) {
 841                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
 842                         }
 843                         myData2022->toU2022State.g=2;
 844                     } else {
 845                         /* illegal to have SS2 before a matching designator */
 846                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 847                     }
 848                     break;
 849                 /* case SS3_STATE: not used in ISO-2022-JP-x */
 850                 case ISO8859_1:
 851                 case ISO8859_7:
 852                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 853                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 854                     } else {
 855                         /* G2 charset for SS2 */
 856                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
 857                     }
 858                     break;
 859                 default:
 860                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 861                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 862                     } else {
 863                         /* G0 charset */
 864                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
 865                     }
 866                     break;
 867                 }
 868             }
 869             break;
 870         case ISO_2022_CN:
 871             {
 872                 StateEnum tempState=nextStateToUnicodeCN[offset];
 873                 switch(tempState) {
 874                 case INVALID_STATE:
 875                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 876                     break;
 877                 case SS2_STATE:
 878                     if(myData2022->toU2022State.cs[2]!=0) {
 879                         if(myData2022->toU2022State.g<2) {
 880                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
 881                         }
 882                         myData2022->toU2022State.g=2;
 883                     } else {
 884                         /* illegal to have SS2 before a matching designator */
 885                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 886                     }
 887                     break;
 888                 case SS3_STATE:
 889                     if(myData2022->toU2022State.cs[3]!=0) {
 890                         if(myData2022->toU2022State.g<2) {
 891                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
 892                         }
 893                         myData2022->toU2022State.g=3;
 894                     } else {
 895                         /* illegal to have SS3 before a matching designator */
 896                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 897                     }
 898                     break;
 899                 case ISO_IR_165:
 900                     if(myData2022->version==0) {
 901                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 902                         break;
 903                     }
 904                     /*fall through*/
 905                 case GB2312_1:
 906                     /*fall through*/
 907                 case CNS_11643_1:
 908                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
 909                     break;
 910                 case CNS_11643_2:
 911                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
 912                     break;
 913                 default:
 914                     /* other CNS 11643 planes */
 915                     if(myData2022->version==0) {
 916                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 917                     } else {
 918                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
 919                     }
 920                     break;
 921                 }
 922             }
 923             break;
 924         case ISO_2022_KR:
 925             if(offset==0x30){
 926                 /* nothing to be done, just accept this one escape sequence */
 927             } else {
 928                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 929             }
 930             break;
 931
 932         default:
 933             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 934             break;
 935         }
 936     }
 937     if(U_SUCCESS(*err)) {
 938         _this->toULength = 0;
 939     }
 940 }
 941
 942 /*Checks the characters of the buffer against valid 2022 escape sequences
 943 *if the match we return a pointer to the initial start of the sequence otherwise
 944 *we return sourceLimit
 945 */
 946 /*for 2022 looks ahead in the stream
 947  *to determine the longest possible convertible
 948  *data stream
 949  */
 950 static U_INLINE const char*
 951 getEndOfBuffer_2022(const char** source,
 952                    const char* sourceLimit,
 953                    UBool flush){
 954
 955     const char* mySource = *source;
 956
 957 #ifdef U_ENABLE_GENERIC_ISO_2022
 958     if (*source >= sourceLimit)
 959         return sourceLimit;
 960
 961     do{
 962
 963         if (*mySource == ESC_2022){
 964             int8_t i;
 965             int32_t key = 0;
 966             int32_t offset;
 967             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
 968
 969             /* Kludge: I could not
 970             * figure out the reason for validating an escape sequence
 971             * twice - once here and once in changeState_2022().
 972             * is it possible to have an ESC character in a ISO2022
 973             * byte stream which is valid in a code page? Is it legal?
 974             */
 975             for (i=0;
 976             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
 977             i++) {
 978                 value =  getKey_2022(*(mySource+i), &key, &offset);
 979             }
 980             if (value > 0 || *mySource==ESC_2022)
 981                 return mySource;
 982
 983             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
 984                 return sourceLimit;
 985         }
 986     }while (++mySource < sourceLimit);
 987
 988     return sourceLimit;
 989 #else
 990     while(mySource < sourceLimit && *mySource != ESC_2022) {
 991         ++mySource;
 992     }
 993     return mySource;
 994 #endif
 995 }
 996
 997
 998 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
 999  * any future change in _MBCSFromUChar32() function should be reflected here.
1000  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
1001  */
1002 static U_INLINE int32_t
1003 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
1004                                          UChar32 c,
1005                                          uint32_t* value,
1006                                          UBool useFallback,
1007                                          int outputType)
1008 {
1009     const int32_t *cx;
1010     const uint16_t *table;
1011     uint32_t stage2Entry;
1012     uint32_t myValue;
1013     int32_t length;
1014     const uint8_t *p;
1015     /*
1016      * TODO(markus): Use and require new, faster MBCS conversion table structures.
1017      * Use internal version of ucnv_open() that verifies that the new structures are available,
1018      * else U_INTERNAL_PROGRAM_ERROR.
1019      */
1020     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
1021     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
1022         table=sharedData->mbcs.fromUnicodeTable;
1023         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
1024         /* get the bytes and the length for the output */
1025         if(outputType==MBCS_OUTPUT_2){
1026             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
1027             if(myValue<=0xff) {
1028                 length=1;
1029             } else {
1030                 length=2;
1031             }
1032         } else /* outputType==MBCS_OUTPUT_3 */ {
1033             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
1034             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
1035             if(myValue<=0xff) {
1036                 length=1;
1037             } else if(myValue<=0xffff) {
1038                 length=2;
1039             } else {
1040                 length=3;
1041             }
1042         }
1043         /*
1044          * TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.
1045          * Pass in parameter for type of output bytes, for validation and shifting:
1046          * - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?
1047          *   (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)
1048          * - A1-FE: Subtract 80 after range check.
1049          * - SJIS: Shift DBCS result to 21-7E x 21-7E.
1050          */
1051         /* is this code point assigned, or do we use fallbacks? */
1052         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
1053             /* assigned */
1054             *value=myValue;
1055             return length;
1056         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
1057             /*
1058              * We allow a 0 byte output if the "assigned" bit is set for this entry.
1059              * There is no way with this data structure for fallback output
1060              * to be a zero byte.
1061              */
1062             *value=myValue;
1063             return -length;
1064         }
1065     }
1066
1067     cx=sharedData->mbcs.extIndexes;
1068     if(cx!=NULL) {
1069         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
1070     }
1071
1072     /* unassigned */
1073     return 0;
1074 }
1075
1076 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
1077  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
1078  * @param retval pointer to output byte
1079  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
1080  */
1081 static U_INLINE int32_t
1082 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
1083                                        UChar32 c,
1084                                        uint32_t* retval,
1085                                        UBool useFallback)
1086 {
1087     const uint16_t *table;
1088     int32_t value;
1089     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
1090     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
1091         return 0;
1092     }
1093     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
1094     table=sharedData->mbcs.fromUnicodeTable;
1095     /* get the byte for the output */
1096     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
1097     /* is this code point assigned, or do we use fallbacks? */
1098     *retval=(uint32_t)(value&0xff);
1099     if(value>=0xf00) {
1100         return 1;  /* roundtrip */
1101     } else if(useFallback ? value>=0x800 : value>=0xc00) {
1102         return -1;  /* fallback taken */
1103     } else {
1104         return 0;  /* no mapping */
1105     }
1106 }
1107
1108 #ifdef U_ENABLE_GENERIC_ISO_2022
1109
1110 /**********************************************************************************
1111 *  ISO-2022 Converter
1112 *
1113 *
1114 */
1115
1116 static void
1117 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
1118                                                            UErrorCode* err){
1119     const char* mySourceLimit, *realSourceLimit;
1120     const char* sourceStart;
1121     const UChar* myTargetStart;
1122     UConverter* saveThis;
1123     UConverterDataISO2022* myData;
1124     int8_t length;
1125
1126     saveThis = args->converter;
1127     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
1128
1129     realSourceLimit = args->sourceLimit;
1130     while (args->source < realSourceLimit) {
1131         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
1132             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
1133             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
1134
1135             if(args->source < mySourceLimit) {
1136                 if(myData->currentConverter==NULL) {
1137                     myData->currentConverter = ucnv_open("ASCII",err);
1138                     if(U_FAILURE(*err)){
1139                         return;
1140                     }
1141
1142                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
1143                     saveThis->mode = UCNV_SO;
1144                 }
1145
1146                 /* convert to before the ESC or until the end of the buffer */
1147                 myData->isFirstBuffer=FALSE;
1148                 sourceStart = args->source;
1149                 myTargetStart = args->target;
1150                 args->converter = myData->currentConverter;
1151                 ucnv_toUnicode(args->converter,
1152                     &args->target,
1153                     args->targetLimit,
1154                     &args->source,
1155                     mySourceLimit,
1156                     args->offsets,
1157                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
1158                     err);
1159                 args->converter = saveThis;
1160
1161                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
1162                     /* move the overflow buffer */
1163                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
1164                     myData->currentConverter->UCharErrorBufferLength = 0;
1165                     if(length > 0) {
1166                         uprv_memcpy(saveThis->UCharErrorBuffer,
1167                                     myData->currentConverter->UCharErrorBuffer,
1168                                     length*U_SIZEOF_UCHAR);
1169                     }
1170                     return;
1171                 }
1172
1173                 /*
1174                  * At least one of:
1175                  * -Error while converting
1176                  * -Done with entire buffer
1177                  * -Need to write offsets or update the current offset
1178                  *  (leave that up to the code in ucnv.c)
1179                  *
1180                  * or else we just stopped at an ESC byte and continue with changeState_2022()
1181                  */
1182                 if (U_FAILURE(*err) ||
1183                     (args->source == realSourceLimit) ||
1184                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
1185                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
1186                 ) {
1187                     /* copy partial or error input for truncated detection and error handling */
1188                     if(U_FAILURE(*err)) {
1189                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
1190                         if(length > 0) {
1191                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
1192                         }
1193                     } else {
1194                         length = saveThis->toULength = myData->currentConverter->toULength;
1195                         if(length > 0) {
1196                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
1197                             if(args->source < mySourceLimit) {
1198                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
1199                             }
1200                         }
1201                     }
1202                     return;
1203                 }
1204             }
1205         }
1206
1207         sourceStart = args->source;
1208         changeState_2022(args->converter,
1209                &(args->source),
1210                realSourceLimit,
1211                ISO_2022,
1212                err);
1213         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
1214             /* let the ucnv.c code update its current offset */
1215             return;
1216         }
1217     }
1218 }
1219
1220 #endif
1221
1222 /*
1223  * To Unicode Callback helper function
1224  */
1225 static void
1226 toUnicodeCallback(UConverter *cnv,
1227                   const uint32_t sourceChar, const uint32_t targetUniChar,
1228                   UErrorCode* err){
1229     if(sourceChar>0xff){
1230         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
1231         cnv->toUBytes[1] = (uint8_t)sourceChar;
1232         cnv->toULength = 2;
1233     }
1234     else{
1235         cnv->toUBytes[0] =(char) sourceChar;
1236         cnv->toULength = 2;
1237     }
1238
1239     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
1240         *err = U_INVALID_CHAR_FOUND;
1241     }
1242     else{
1243         *err = U_ILLEGAL_CHAR_FOUND;
1244     }
1245 }
1246
1247 /**************************************ISO-2022-JP*************************************************/
1248
1249 /************************************** IMPORTANT **************************************************
1250 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
1251 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
1252 * The converter iterates over each Unicode codepoint
1253 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
1254 * processed one char at a time it would make sense to reduce the extra processing a canned converter
1255 * would do as far as possible.
1256 *
1257 * If the implementation of these macros or structure of sharedData struct change in the future, make
1258 * sure that ISO-2022 is also changed.
1259 ***************************************************************************************************
1260 */
1261
1262 /***************************************************************************************************
1263 * Rules for ISO-2022-jp encoding
1264 * (i)   Escape sequences must be fully contained within a line they should not
1265 *       span new lines or CRs
1266 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
1267 *       JIS-Roman character escape sequence should follow before the line terminates
1268 * (iii) If the first character on the line is represented by two bytes then a two
1269 *       byte character escape sequence should precede it
1270 * (iv)  If no escape sequence is encountered then the characters are ASCII
1271 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
1272 *       and invoked with SS2 (ESC N).
1273 * (vi)  If there is any G0 designation in text, there must be a switch to
1274 *       ASCII or to JIS X 0201-Roman before a space character (but not
1275 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
1276 *       characters such as tab or CRLF.
1277 * (vi)  Supported encodings:
1278 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
1279 *
1280 *  source : RFC-1554
1281 *
1282 *          JISX201, JISX208,JISX212 : new .cnv data files created
1283 *          KSC5601 : alias to ibm-949 mapping table
1284 *          GB2312 : alias to ibm-1386 mapping table
1285 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
1286 *          ISO-8859-7 : alisas to ibm-9409 mapping table
1287 */
1288
1289 /* preference order of JP charsets */
1290 static const StateEnum jpCharsetPref[]={
1291     ASCII,
1292     JISX201,
1293     ISO8859_1,
1294     ISO8859_7,
1295     JISX208,
1296     JISX212,
1297     GB2312,
1298     KSC5601,
1299     HWKANA_7BIT
1300 };
1301
1302 /*
1303  * The escape sequences must be in order of the enum constants like JISX201  = 3,
1304  * not in order of jpCharsetPref[]!
1305  */
1306 static const char escSeqChars[][6] ={
1307     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
1308     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
1309     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
1310     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
1311     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
1312     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
1313     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
1314     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
1315     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
1316
1317 };
1318 static  const int32_t escSeqCharsLen[] ={
1319     3, /* length of <ESC>(B  ASCII       */
1320     3, /* length of <ESC>.A  ISO-8859-1  */
1321     3, /* length of <ESC>.F  ISO-8859-7  */
1322     3, /* length of <ESC>(J  JISX-201    */
1323     3, /* length of <ESC>$B  JISX-208    */
1324     4, /* length of <ESC>$(D JISX-212    */
1325     3, /* length of <ESC>$A  GB2312      */
1326     4, /* length of <ESC>$(C KSC5601     */
1327     3  /* length of <ESC>(I  HWKANA_7BIT */
1328 };
1329
1330 /*
1331 * The iteration over various code pages works this way:
1332 * i)   Get the currentState from myConverterData->currentState
1333 * ii)  Check if the character is mapped to a valid character in the currentState
1334 *      Yes ->  a) set the initIterState to currentState
1335 *       b) remain in this state until an invalid character is found
1336 *      No  ->  a) go to the next code page and find the character
1337 * iii) Before changing the state increment the current state check if the current state
1338 *      is equal to the intitIteration state
1339 *      Yes ->  A character that cannot be represented in any of the supported encodings
1340 *       break and return a U_INVALID_CHARACTER error
1341 *      No  ->  Continue and find the character in next code page
1342 *
1343 *
1344 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
1345 */
1346
1347 static void
1348 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
1349     UConverter *cnv = args->converter;
1350     UConverterDataISO2022 *converterData;
1351     ISO2022State *pFromU2022State;
1352     uint8_t *target = (uint8_t *) args->target;
1353     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
1354     const UChar* source = args->source;
1355     const UChar* sourceLimit = args->sourceLimit;
1356     int32_t* offsets = args->offsets;
1357     UChar32 sourceChar;
1358     char buffer[8];
1359     int32_t len, outLen;
1360     int8_t choices[10];
1361     int32_t choiceCount;
1362     uint32_t targetValue = 0;
1363     UBool useFallback;
1364
1365     int32_t i;
1366     int8_t cs, g;
1367
1368     /* set up the state */
1369     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
1370     pFromU2022State   = &converterData->fromU2022State;
1371
1372     choiceCount = 0;
1373
1374     /* check if the last codepoint of previous buffer was a lead surrogate*/
1375     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
1376         goto getTrail;
1377     }
1378
1379     while(source < sourceLimit) {
1380         if(target < targetLimit) {
1381
1382             sourceChar  = *(source++);
1383             /*check if the char is a First surrogate*/
1384             if(UTF_IS_SURROGATE(sourceChar)) {
1385                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
1386 getTrail:
1387                     /*look ahead to find the trail surrogate*/
1388                     if(source < sourceLimit) {
1389                         /* test the following code unit */
1390                         UChar trail=(UChar) *source;
1391                         if(UTF_IS_SECOND_SURROGATE(trail)) {
1392                             source++;
1393                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
1394                             cnv->fromUChar32=0x00;
1395                             /* convert this supplementary code point */
1396                             /* exit this condition tree */
1397                         } else {
1398                             /* this is an unmatched lead code unit (1st surrogate) */
1399                             /* callback(illegal) */
1400                             *err=U_ILLEGAL_CHAR_FOUND;
1401                             cnv->fromUChar32=sourceChar;
1402                             break;
1403                         }
1404                     } else {
1405                         /* no more input */
1406                         cnv->fromUChar32=sourceChar;
1407                         break;
1408                     }
1409                 } else {
1410                     /* this is an unmatched trail code unit (2nd surrogate) */
1411                     /* callback(illegal) */
1412                     *err=U_ILLEGAL_CHAR_FOUND;
1413                     cnv->fromUChar32=sourceChar;
1414                     break;
1415                 }
1416             }
1417
1418             /* do not convert SO/SI/ESC */
1419             if(IS_2022_CONTROL(sourceChar)) {
1420                 /* callback(illegal) */
1421                 *err=U_ILLEGAL_CHAR_FOUND;
1422                 cnv->fromUChar32=sourceChar;
1423                 break;
1424             }
1425
1426             /* do the conversion */
1427
1428             if(choiceCount == 0) {
1429                 uint16_t csm;
1430
1431                 /*
1432                  * The csm variable keeps track of which charsets are allowed
1433                  * and not used yet while building the choices[].
1434                  */
1435                 csm = jpCharsetMasks[converterData->version];
1436                 choiceCount = 0;
1437
1438                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
1439                 if(converterData->version == 3 || converterData->version == 4) {
1440                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
1441                 }
1442                 /* Do not try single-byte half-width Katakana for other versions. */
1443                 csm &= ~CSM(HWKANA_7BIT);
1444
1445                 /* try the current G0 charset */
1446                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
1447                 csm &= ~CSM(cs);
1448
1449                 /* try the current G2 charset */
1450                 if((cs = pFromU2022State->cs[2]) != 0) {
1451                     choices[choiceCount++] = cs;
1452                     csm &= ~CSM(cs);
1453                 }
1454
1455                 /* try all the other possible charsets */
1456                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
1457                     cs = (int8_t)jpCharsetPref[i];
1458                     if(CSM(cs) & csm) {
1459                         choices[choiceCount++] = cs;
1460                         csm &= ~CSM(cs);
1461                     }
1462                 }
1463             }
1464
1465             cs = g = 0;
1466             /*
1467              * len==0: no mapping found yet
1468              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
1469              * len>0: found a roundtrip result, done
1470              */
1471             len = 0;
1472             /*
1473              * We will turn off useFallback after finding a fallback,
1474              * but we still get fallbacks from PUA code points as usual.
1475              * Therefore, we will also need to check that we don't overwrite
1476              * an early fallback with a later one.
1477              */
1478             useFallback = cnv->useFallback;
1479
1480             for(i = 0; i < choiceCount && len <= 0; ++i) {
1481                 uint32_t value;
1482                 int32_t len2;
1483                 int8_t cs0 = choices[i];
1484                 switch(cs0) {
1485                 case ASCII:
1486                     if(sourceChar <= 0x7f) {
1487                         targetValue = (uint32_t)sourceChar;
1488                         len = 1;
1489                         cs = cs0;
1490                         g = 0;
1491                     }
1492                     break;
1493                 case ISO8859_1:
1494                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1495                         targetValue = (uint32_t)sourceChar - 0x80;
1496                         len = 1;
1497                         cs = cs0;
1498                         g = 2;
1499                     }
1500                     break;
1501                 case HWKANA_7BIT:
1502                     if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {
1503                         if(converterData->version==3) {
1504                             /* JIS7: use G1 (SO) */
1505                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1506                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1507                             len = 1;
1508                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1509                             g = 1;
1510                         } else if(converterData->version==4) {
1511                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
1512                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
1513                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
1514                             len = 1;
1515
1516                             cs = pFromU2022State->cs[0];
1517                             if(IS_JP_DBCS(cs)) {
1518                                 /* switch from a DBCS charset to JISX201 */
1519                                 cs = (int8_t)JISX201;
1520                             }
1521                             /* else stay in the current G0 charset */
1522                             g = 0;
1523                         }
1524                         /* else do not use HWKANA_7BIT with other versions */
1525                     }
1526                     break;
1527                 case JISX201:
1528                     /* G0 SBCS */
1529                     len2 = MBCS_SINGLE_FROM_UCHAR32(
1530                                 converterData->myConverterArray[cs0],
1531                                 sourceChar, &value,
1532                                 useFallback);
1533                     if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {
1534                         targetValue = value;
1535                         len = len2;
1536                         cs = cs0;
1537                         g = 0;
1538                         useFallback = FALSE;
1539                     }
1540                     break;
1541                 case ISO8859_7:
1542                     /* G0 SBCS forced to 7-bit output */
1543                     len2 = MBCS_SINGLE_FROM_UCHAR32(
1544                                 converterData->myConverterArray[cs0],
1545                                 sourceChar, &value,
1546                                 useFallback);
1547                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
1548                         targetValue = value - 0x80;
1549                         len = len2;
1550                         cs = cs0;
1551                         g = 2;
1552                         useFallback = FALSE;
1553                     }
1554                     break;
1555                 default:
1556                     /* G0 DBCS */
1557                     len2 = MBCS_FROM_UCHAR32_ISO2022(
1558                                 converterData->myConverterArray[cs0],
1559                                 sourceChar, &value,
1560                                 useFallback, MBCS_OUTPUT_2);
1561                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
1562                         if(cs0 == KSC5601) {
1563                             /*
1564                              * Check for valid bytes for the encoding scheme.
1565                              * This is necessary because the sub-converter (windows-949)
1566                              * has a broader encoding scheme than is valid for 2022.
1567                              *
1568                              * Check that the result is a 2-byte value with each byte in the range A1..FE
1569                              * (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte
1570                              * to move it to the ISO 2022 range 21..7E.
1571                              */
1572                             if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
1573                                 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
1574                             ) {
1575                                 value -= 0x8080;  /* shift down to 21..7e byte range */
1576                             } else {
1577                                 break;  /* not valid for ISO 2022 */
1578                             }
1579                         }
1580                         targetValue = value;
1581                         len = len2;
1582                         cs = cs0;
1583                         g = 0;
1584                         useFallback = FALSE;
1585                     }
1586                     break;
1587                 }
1588             }
1589
1590             if(len != 0) {
1591                 if(len < 0) {
1592                     len = -len;  /* fallback */
1593                 }
1594                 outLen = 0; /* count output bytes */
1595
1596                 /* write SI if necessary (only for JIS7) */
1597                 if(pFromU2022State->g == 1 && g == 0) {
1598                     buffer[outLen++] = UCNV_SI;
1599                     pFromU2022State->g = 0;
1600                 }
1601
1602                 /* write the designation sequence if necessary */
1603                 if(cs != pFromU2022State->cs[g]) {
1604                     int32_t escLen = escSeqCharsLen[cs];
1605                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
1606                     outLen += escLen;
1607                     pFromU2022State->cs[g] = cs;
1608
1609                     /* invalidate the choices[] */
1610                     choiceCount = 0;
1611                 }
1612
1613                 /* write the shift sequence if necessary */
1614                 if(g != pFromU2022State->g) {
1615                     switch(g) {
1616                     /* case 0 handled before writing escapes */
1617                     case 1:
1618                         buffer[outLen++] = UCNV_SO;
1619                         pFromU2022State->g = 1;
1620                         break;
1621                     default: /* case 2 */
1622                         buffer[outLen++] = 0x1b;
1623                         buffer[outLen++] = 0x4e;
1624                         break;
1625                     /* no case 3: no SS3 in ISO-2022-JP-x */
1626                     }
1627                 }
1628
1629                 /* write the output bytes */
1630                 if(len == 1) {
1631                     buffer[outLen++] = (char)targetValue;
1632                 } else /* len == 2 */ {
1633                     buffer[outLen++] = (char)(targetValue >> 8);
1634                     buffer[outLen++] = (char)targetValue;
1635                 }
1636             } else {
1637                 /*
1638                  * if we cannot find the character after checking all codepages
1639                  * then this is an error
1640                  */
1641                 *err = U_INVALID_CHAR_FOUND;
1642                 cnv->fromUChar32=sourceChar;
1643                 break;
1644             }
1645
1646             if(sourceChar == CR || sourceChar == LF) {
1647                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
1648                 pFromU2022State->cs[2] = 0;
1649                 choiceCount = 0;
1650             }
1651
1652             /* output outLen>0 bytes in buffer[] */
1653             if(outLen == 1) {
1654                 *target++ = buffer[0];
1655                 if(offsets) {
1656                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
1657                 }
1658             } else if(outLen == 2 && (target + 2) <= targetLimit) {
1659                 *target++ = buffer[0];
1660                 *target++ = buffer[1];
1661                 if(offsets) {
1662                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
1663                     *offsets++ = sourceIndex;
1664                     *offsets++ = sourceIndex;
1665                 }
1666             } else {
1667                 fromUWriteUInt8(
1668                     cnv,
1669                     buffer, outLen,
1670                     &target, (const char *)targetLimit,
1671                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
1672                     err);
1673                 if(U_FAILURE(*err)) {
1674                     break;
1675                 }
1676             }
1677         } /* end if(myTargetIndex<myTargetLength) */
1678         else{
1679             *err =U_BUFFER_OVERFLOW_ERROR;
1680             break;
1681         }
1682
1683     }/* end while(mySourceIndex<mySourceLength) */
1684
1685     /*
1686      * the end of the input stream and detection of truncated input
1687      * are handled by the framework, but for ISO-2022-JP conversion
1688      * we need to be in ASCII mode at the very end
1689      *
1690      * conditions:
1691      *   successful
1692      *   in SO mode or not in ASCII mode
1693      *   end of input and no truncated input
1694      */
1695     if( U_SUCCESS(*err) &&
1696         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
1697         args->flush && source>=sourceLimit && cnv->fromUChar32==0
1698     ) {
1699         int32_t sourceIndex;
1700
1701         outLen = 0;
1702
1703         if(pFromU2022State->g != 0) {
1704             buffer[outLen++] = UCNV_SI;
1705             pFromU2022State->g = 0;
1706         }
1707
1708         if(pFromU2022State->cs[0] != ASCII) {
1709             int32_t escLen = escSeqCharsLen[ASCII];
1710             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
1711             outLen += escLen;
1712             pFromU2022State->cs[0] = (int8_t)ASCII;
1713         }
1714
1715         /* get the source index of the last input character */
1716         /*
1717          * TODO this would be simpler and more reliable if we used a pair
1718          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
1719          * so that we could simply use the prevSourceIndex here;
1720          * this code gives an incorrect result for the rare case of an unmatched
1721          * trail surrogate that is alone in the last buffer of the text stream
1722          */
1723         sourceIndex=(int32_t)(source-args->source);
1724         if(sourceIndex>0) {
1725             --sourceIndex;
1726             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
1727                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
1728             ) {
1729                 --sourceIndex;
1730             }
1731         } else {
1732             sourceIndex=-1;
1733         }
1734
1735         fromUWriteUInt8(
1736             cnv,
1737             buffer, outLen,
1738             &target, (const char *)targetLimit,
1739             &offsets, sourceIndex,
1740             err);
1741     }
1742
1743     /*save the state and return */
1744     args->source = source;
1745     args->target = (char*)target;
1746 }
1747
1748 /*************** to unicode *******************/
1749
1750 static void
1751 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
1752                                                UErrorCode* err){
1753     char tempBuf[3];
1754     const char *mySource = (char *) args->source;
1755     UChar *myTarget = args->target;
1756     const char *mySourceLimit = args->sourceLimit;
1757     uint32_t targetUniChar = 0x0000;
1758     uint32_t mySourceChar = 0x0000;
1759     UConverterDataISO2022* myData;
1760     ISO2022State *pToU2022State;
1761     StateEnum cs;
1762
1763     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
1764     pToU2022State = &myData->toU2022State;
1765
1766     if(myData->key != 0) {
1767         /* continue with a partial escape sequence */
1768         goto escape;
1769     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
1770         /* continue with a partial double-byte character */
1771         mySourceChar = args->converter->toUBytes[0];
1772         args->converter->toULength = 0;
1773         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
1774         goto getTrailByte;
1775     }
1776
1777     while(mySource < mySourceLimit){
1778
1779         targetUniChar =missingCharMarker;
1780
1781         if(myTarget < args->targetLimit){
1782
1783             mySourceChar= (unsigned char) *mySource++;
1784
1785             switch(mySourceChar) {
1786             case UCNV_SI:
1787                 if(myData->version==3) {
1788                     pToU2022State->g=0;
1789                     continue;
1790                 } else {
1791                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
1792                     break;
1793                 }
1794
1795             case UCNV_SO:
1796                 if(myData->version==3) {
1797                     /* JIS7: switch to G1 half-width Katakana */
1798                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
1799                     pToU2022State->g=1;
1800                     continue;
1801                 } else {
1802                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
1803                     break;
1804                 }
1805
1806             case ESC_2022:
1807                 mySource--;
1808 escape:
1809                 changeState_2022(args->converter,&(mySource),
1810                     mySourceLimit, ISO_2022_JP,err);
1811
1812                 /* invalid or illegal escape sequence */
1813                 if(U_FAILURE(*err)){
1814                     args->target = myTarget;
1815                     args->source = mySource;
1816                     return;
1817                 }
1818                 continue;
1819
1820             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
1821
1822             case CR:
1823                 /*falls through*/
1824             case LF:
1825                 /* automatically reset to single-byte mode */
1826                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
1827                     pToU2022State->cs[0] = (int8_t)ASCII;
1828                 }
1829                 pToU2022State->cs[2] = 0;
1830                 pToU2022State->g = 0;
1831                 /* falls through */
1832             default:
1833                 /* convert one or two bytes */
1834                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
1835                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
1836                     !IS_JP_DBCS(cs)
1837                 ) {
1838                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
1839                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
1840
1841                     /* return from a single-shift state to the previous one */
1842                     if(pToU2022State->g >= 2) {
1843                         pToU2022State->g=pToU2022State->prevG;
1844                     }
1845                 } else switch(cs) {
1846                 case ASCII:
1847                     if(mySourceChar <= 0x7f) {
1848                         targetUniChar = mySourceChar;
1849                     }
1850                     break;
1851                 case ISO8859_1:
1852                     if(mySourceChar <= 0x7f) {
1853                         targetUniChar = mySourceChar + 0x80;
1854                     }
1855                     /* return from a single-shift state to the previous one */
1856                     pToU2022State->g=pToU2022State->prevG;
1857                     break;
1858                 case ISO8859_7:
1859                     if(mySourceChar <= 0x7f) {
1860                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
1861                         targetUniChar =
1862                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
1863                                 myData->myConverterArray[cs],
1864                                 mySourceChar + 0x80);
1865                     }
1866                     /* return from a single-shift state to the previous one */
1867                     pToU2022State->g=pToU2022State->prevG;
1868                     break;
1869                 case JISX201:
1870                     if(mySourceChar <= 0x7f) {
1871                         targetUniChar =
1872                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
1873                                 myData->myConverterArray[cs],
1874                                 mySourceChar);
1875                     }
1876                     break;
1877                 case HWKANA_7BIT:
1878                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
1879                         /* 7-bit halfwidth Katakana */
1880                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
1881                     }
1882                     break;
1883                 default:
1884                     /* G0 DBCS */
1885                     if(mySource < mySourceLimit) {
1886                         char trailByte;
1887 getTrailByte:
1888                         tempBuf[0] = (char) (mySourceChar);
1889                         tempBuf[1] = trailByte = *mySource++;
1890                         mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
1891                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
1892                     } else {
1893                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
1894                         args->converter->toULength = 1;
1895                         goto endloop;
1896                     }
1897                 }  /* End of inner switch */
1898                 break;
1899             }  /* End of outer switch */
1900             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
1901                 if(args->offsets){
1902                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
1903                 }
1904                 *(myTarget++)=(UChar)targetUniChar;
1905             }
1906             else if(targetUniChar > missingCharMarker){
1907                 /* disassemble the surrogate pair and write to output*/
1908                 targetUniChar-=0x0010000;
1909                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
1910                 if(args->offsets){
1911                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
1912                 }
1913                 ++myTarget;
1914                 if(myTarget< args->targetLimit){
1915                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
1916                     if(args->offsets){
1917                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
1918                     }
1919                     ++myTarget;
1920                 }else{
1921                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
1922                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
1923                 }
1924
1925             }
1926             else{
1927                 /* Call the callback function*/
1928                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
1929                 break;
1930             }
1931         }
1932         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
1933             *err =U_BUFFER_OVERFLOW_ERROR;
1934             break;
1935         }
1936     }
1937 endloop:
1938     args->target = myTarget;
1939     args->source = mySource;
1940 }
1941
1942
1943 /***************************************************************
1944 *   Rules for ISO-2022-KR encoding
1945 *   i) The KSC5601 designator sequence should appear only once in a file,
1946 *      at the begining of a line before any KSC5601 characters. This usually
1947 *      means that it appears by itself on the first line of the file
1948 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
1949 *      and SI to shift into single byte mode
1950 */
1951 static void
1952 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
1953
1954     UConverter* saveConv = args->converter;
1955     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
1956     args->converter=myConverterData->currentConverter;
1957
1958     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
1959     ucnv_MBCSFromUnicodeWithOffsets(args,err);
1960     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
1961
1962     if(*err == U_BUFFER_OVERFLOW_ERROR) {
1963         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
1964             uprv_memcpy(
1965                 saveConv->charErrorBuffer,
1966                 myConverterData->currentConverter->charErrorBuffer,
1967                 myConverterData->currentConverter->charErrorBufferLength);
1968         }
1969         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
1970         myConverterData->currentConverter->charErrorBufferLength = 0;
1971     }
1972     args->converter=saveConv;
1973 }
1974
1975 static void
1976 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
1977
1978     const UChar *source = args->source;
1979     const UChar *sourceLimit = args->sourceLimit;
1980     unsigned char *target = (unsigned char *) args->target;
1981     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
1982     int32_t* offsets = args->offsets;
1983     uint32_t targetByteUnit = 0x0000;
1984     UChar32 sourceChar = 0x0000;
1985     UBool isTargetByteDBCS;
1986     UBool oldIsTargetByteDBCS;
1987     UConverterDataISO2022 *converterData;
1988     UConverterSharedData* sharedData;
1989     UBool useFallback;
1990     int32_t length =0;
1991
1992     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
1993     /* if the version is 1 then the user is requesting
1994      * conversion with ibm-25546 pass the arguments to
1995      * MBCS converter and return
1996      */
1997     if(converterData->version==1){
1998         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
1999         return;
2000     }
2001
2002     /* initialize data */
2003     sharedData = converterData->currentConverter->sharedData;
2004     useFallback = args->converter->useFallback;
2005     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
2006     oldIsTargetByteDBCS = isTargetByteDBCS;
2007
2008     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
2009     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
2010         goto getTrail;
2011     }
2012     while(source < sourceLimit){
2013
2014         targetByteUnit = missingCharMarker;
2015
2016         if(target < (unsigned char*) args->targetLimit){
2017             sourceChar = *source++;
2018
2019             /* do not convert SO/SI/ESC */
2020             if(IS_2022_CONTROL(sourceChar)) {
2021                 /* callback(illegal) */
2022                 *err=U_ILLEGAL_CHAR_FOUND;
2023                 args->converter->fromUChar32=sourceChar;
2024                 break;
2025             }
2026
2027             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
2028             if(length < 0) {
2029                 length = -length;  /* fallback */
2030             }
2031             /* only DBCS or SBCS characters are expected*/
2032             /* DB characters with high bit set to 1 are expected */
2033             if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
2034                 targetByteUnit=missingCharMarker;
2035             }
2036             if (targetByteUnit != missingCharMarker){
2037
2038                 oldIsTargetByteDBCS = isTargetByteDBCS;
2039                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
2040                   /* append the shift sequence */
2041                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
2042
2043                     if (isTargetByteDBCS)
2044                         *target++ = UCNV_SO;
2045                     else
2046                         *target++ = UCNV_SI;
2047                     if(offsets)
2048                         *(offsets++) = (int32_t)(source - args->source-1);
2049                 }
2050                 /* write the targetUniChar  to target */
2051                 if(targetByteUnit <= 0x00FF){
2052                     if( target < targetLimit){
2053                         *(target++) = (unsigned char) targetByteUnit;
2054                         if(offsets){
2055                             *(offsets++) = (int32_t)(source - args->source-1);
2056                         }
2057
2058                     }else{
2059                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
2060                         *err = U_BUFFER_OVERFLOW_ERROR;
2061                     }
2062                 }else{
2063                     if(target < targetLimit){
2064                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
2065                         if(offsets){
2066                             *(offsets++) = (int32_t)(source - args->source-1);
2067                         }
2068                         if(target < targetLimit){
2069                             *(target++) =(unsigned char) (targetByteUnit -0x80);
2070                             if(offsets){
2071                                 *(offsets++) = (int32_t)(source - args->source-1);
2072                             }
2073                         }else{
2074                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
2075                             *err = U_BUFFER_OVERFLOW_ERROR;
2076                         }
2077                     }else{
2078                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
2079                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
2080                         *err = U_BUFFER_OVERFLOW_ERROR;
2081                     }
2082                 }
2083
2084             }
2085             else{
2086                 /* oops.. the code point is unassingned
2087                  * set the error and reason
2088                  */
2089
2090                 /*check if the char is a First surrogate*/
2091                 if(UTF_IS_SURROGATE(sourceChar)) {
2092                     if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
2093 getTrail:
2094                         /*look ahead to find the trail surrogate*/
2095                         if(source <  sourceLimit) {
2096                             /* test the following code unit */
2097                             UChar trail=(UChar) *source;
2098                             if(UTF_IS_SECOND_SURROGATE(trail)) {
2099                                 source++;
2100                                 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
2101                                 *err = U_INVALID_CHAR_FOUND;
2102                                 /* convert this surrogate code point */
2103                                 /* exit this condition tree */
2104                             } else {
2105                                 /* this is an unmatched lead code unit (1st surrogate) */
2106                                 /* callback(illegal) */
2107                                 *err=U_ILLEGAL_CHAR_FOUND;
2108                             }
2109                         } else {
2110                             /* no more input */
2111                             *err = U_ZERO_ERROR;
2112                         }
2113                     } else {
2114                         /* this is an unmatched trail code unit (2nd surrogate) */
2115                         /* callback(illegal) */
2116                         *err=U_ILLEGAL_CHAR_FOUND;
2117                     }
2118                 } else {
2119                     /* callback(unassigned) for a BMP code point */
2120                     *err = U_INVALID_CHAR_FOUND;
2121                 }
2122
2123                 args->converter->fromUChar32=sourceChar;
2124                 break;
2125             }
2126         } /* end if(myTargetIndex<myTargetLength) */
2127         else{
2128             *err =U_BUFFER_OVERFLOW_ERROR;
2129             break;
2130         }
2131
2132     }/* end while(mySourceIndex<mySourceLength) */
2133
2134     /*
2135      * the end of the input stream and detection of truncated input
2136      * are handled by the framework, but for ISO-2022-KR conversion
2137      * we need to be in ASCII mode at the very end
2138      *
2139      * conditions:
2140      *   successful
2141      *   not in ASCII mode
2142      *   end of input and no truncated input
2143      */
2144     if( U_SUCCESS(*err) &&
2145         isTargetByteDBCS &&
2146         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
2147     ) {
2148         int32_t sourceIndex;
2149
2150         /* we are switching to ASCII */
2151         isTargetByteDBCS=FALSE;
2152
2153         /* get the source index of the last input character */
2154         /*
2155          * TODO this would be simpler and more reliable if we used a pair
2156          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2157          * so that we could simply use the prevSourceIndex here;
2158          * this code gives an incorrect result for the rare case of an unmatched
2159          * trail surrogate that is alone in the last buffer of the text stream
2160          */
2161         sourceIndex=(int32_t)(source-args->source);
2162         if(sourceIndex>0) {
2163             --sourceIndex;
2164             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
2165                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
2166             ) {
2167                 --sourceIndex;
2168             }
2169         } else {
2170             sourceIndex=-1;
2171         }
2172
2173         fromUWriteUInt8(
2174             args->converter,
2175             SHIFT_IN_STR, 1,
2176             &target, (const char *)targetLimit,
2177             &offsets, sourceIndex,
2178             err);
2179     }
2180
2181     /*save the state and return */
2182     args->source = source;
2183     args->target = (char*)target;
2184     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
2185 }
2186
2187 /************************ To Unicode ***************************************/
2188
2189 static void
2190 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
2191                                                             UErrorCode* err){
2192     char const* sourceStart;
2193     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2194
2195     UConverterToUnicodeArgs subArgs;
2196     int32_t minArgsSize;
2197
2198     /* set up the subconverter arguments */
2199     if(args->size<sizeof(UConverterToUnicodeArgs)) {
2200         minArgsSize = args->size;
2201     } else {
2202         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
2203     }
2204
2205     uprv_memcpy(&subArgs, args, minArgsSize);
2206     subArgs.size = (uint16_t)minArgsSize;
2207     subArgs.converter = myData->currentConverter;
2208
2209     /* remember the original start of the input for offsets */
2210     sourceStart = args->source;
2211
2212     if(myData->key != 0) {
2213         /* continue with a partial escape sequence */
2214         goto escape;
2215     }
2216
2217     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
2218         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
2219         subArgs.source = args->source;
2220         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
2221         if(subArgs.source != subArgs.sourceLimit) {
2222             /*
2223              * get the current partial byte sequence
2224              *
2225              * it needs to be moved between the public and the subconverter
2226              * so that the conversion framework, which only sees the public
2227              * converter, can handle truncated and illegal input etc.
2228              */
2229             if(args->converter->toULength > 0) {
2230                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
2231             }
2232             subArgs.converter->toULength = args->converter->toULength;
2233
2234             /*
2235              * Convert up to the end of the input, or to before the next escape character.
2236              * Does not handle conversion extensions because the preToU[] state etc.
2237              * is not copied.
2238              */
2239             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
2240
2241             if(args->offsets != NULL && sourceStart != args->source) {
2242                 /* update offsets to base them on the actual start of the input */
2243                 int32_t *offsets = args->offsets;
2244                 UChar *target = args->target;
2245                 int32_t delta = (int32_t)(args->source - sourceStart);
2246                 while(target < subArgs.target) {
2247                     if(*offsets >= 0) {
2248                         *offsets += delta;
2249                     }
2250                     ++offsets;
2251                     ++target;
2252                 }
2253             }
2254             args->source = subArgs.source;
2255             args->target = subArgs.target;
2256             args->offsets = subArgs.offsets;
2257
2258             /* copy input/error/overflow buffers */
2259             if(subArgs.converter->toULength > 0) {
2260                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
2261             }
2262             args->converter->toULength = subArgs.converter->toULength;
2263
2264             if(*err == U_BUFFER_OVERFLOW_ERROR) {
2265                 if(subArgs.converter->UCharErrorBufferLength > 0) {
2266                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
2267                                 subArgs.converter->UCharErrorBufferLength);
2268                 }
2269                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
2270                 subArgs.converter->UCharErrorBufferLength = 0;
2271             }
2272         }
2273
2274         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
2275             return;
2276         }
2277
2278 escape:
2279         changeState_2022(args->converter,
2280                &(args->source),
2281                args->sourceLimit,
2282                ISO_2022_KR,
2283                err);
2284     }
2285 }
2286
2287 static void
2288 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2289                                                             UErrorCode* err){
2290     char tempBuf[2];
2291     const char *mySource = ( char *) args->source;
2292     UChar *myTarget = args->target;
2293     const char *mySourceLimit = args->sourceLimit;
2294     UChar32 targetUniChar = 0x0000;
2295     UChar mySourceChar = 0x0000;
2296     UConverterDataISO2022* myData;
2297     UConverterSharedData* sharedData ;
2298     UBool useFallback;
2299
2300     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2301     if(myData->version==1){
2302         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
2303         return;
2304     }
2305
2306     /* initialize state */
2307     sharedData = myData->currentConverter->sharedData;
2308     useFallback = args->converter->useFallback;
2309
2310     if(myData->key != 0) {
2311         /* continue with a partial escape sequence */
2312         goto escape;
2313     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
2314         /* continue with a partial double-byte character */
2315         mySourceChar = args->converter->toUBytes[0];
2316         args->converter->toULength = 0;
2317         goto getTrailByte;
2318     }
2319
2320     while(mySource< mySourceLimit){
2321
2322         if(myTarget < args->targetLimit){
2323
2324             mySourceChar= (unsigned char) *mySource++;
2325
2326             if(mySourceChar==UCNV_SI){
2327                 myData->toU2022State.g = 0;
2328                 /*consume the source */
2329                 continue;
2330             }else if(mySourceChar==UCNV_SO){
2331                 myData->toU2022State.g = 1;
2332                 /*consume the source */
2333                 continue;
2334             }else if(mySourceChar==ESC_2022){
2335                 mySource--;
2336 escape:
2337                 changeState_2022(args->converter,&(mySource),
2338                                 mySourceLimit, ISO_2022_KR, err);
2339                 if(U_FAILURE(*err)){
2340                     args->target = myTarget;
2341                     args->source = mySource;
2342                     return;
2343                 }
2344                 continue;
2345             }
2346
2347             if(myData->toU2022State.g == 1) {
2348                 if(mySource < mySourceLimit) {
2349                     char trailByte;
2350 getTrailByte:
2351                     trailByte = *mySource++;
2352                     tempBuf[0] = (char)(mySourceChar + 0x80);
2353                     tempBuf[1] = (char)(trailByte + 0x80);
2354                     mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
2355                     if((mySourceChar & 0x8080) == 0) {
2356                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
2357                     } else {
2358                         /* illegal bytes > 0x7f */
2359                         targetUniChar = missingCharMarker;
2360                     }
2361                 } else {
2362                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2363                     args->converter->toULength = 1;
2364                     break;
2365                 }
2366             }
2367             else{
2368                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
2369             }
2370             if(targetUniChar < 0xfffe){
2371                 if(args->offsets) {
2372                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2373                 }
2374                 *(myTarget++)=(UChar)targetUniChar;
2375             }
2376             else {
2377                 /* Call the callback function*/
2378                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2379                 break;
2380             }
2381         }
2382         else{
2383             *err =U_BUFFER_OVERFLOW_ERROR;
2384             break;
2385         }
2386     }
2387     args->target = myTarget;
2388     args->source = mySource;
2389 }
2390
2391 /*************************** END ISO2022-KR *********************************/
2392
2393 /*************************** ISO-2022-CN *********************************
2394 *
2395 * Rules for ISO-2022-CN Encoding:
2396 * i)   The designator sequence must appear once on a line before any instance
2397 *      of character set it designates.
2398 * ii)  If two lines contain characters from the same character set, both lines
2399 *      must include the designator sequence.
2400 * iii) Once the designator sequence is known, a shifting sequence has to be found
2401 *      to invoke the  shifting
2402 * iv)  All lines start in ASCII and end in ASCII.
2403 * v)   Four shifting sequences are employed for this purpose:
2404 *
2405 *      Sequcence   ASCII Eq    Charsets
2406 *      ----------  -------    ---------
2407 *      SI           <SI>        US-ASCII
2408 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
2409 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
2410 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
2411 *
2412 * vi)
2413 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
2414 *      SS2designator : ESC "$" "*" finalchar_for_SS2
2415 *      SS3designator : ESC "$" "+" finalchar_for_SS3
2416 *
2417 *      ESC $ ) A       Indicates the bytes following SO are Chinese
2418 *       characters as defined in GB 2312-80, until
2419 *       another SOdesignation appears
2420 *
2421 *
2422 *      ESC $ ) E       Indicates the bytes following SO are as defined
2423 *       in ISO-IR-165 (for details, see section 2.1),
2424 *       until another SOdesignation appears
2425 *
2426 *      ESC $ ) G       Indicates the bytes following SO are as defined
2427 *       in CNS 11643-plane-1, until another
2428 *       SOdesignation appears
2429 *
2430 *      ESC $ * H       Indicates the two bytes immediately following
2431 *       SS2 is a Chinese character as defined in CNS
2432 *       11643-plane-2, until another SS2designation
2433 *       appears
2434 *       (Meaning <ESC>N must preceed every 2 byte
2435 *        sequence.)
2436 *
2437 *      ESC $ + I       Indicates the immediate two bytes following SS3
2438 *       is a Chinese character as defined in CNS
2439 *       11643-plane-3, until another SS3designation
2440 *       appears
2441 *       (Meaning <ESC>O must preceed every 2 byte
2442 *        sequence.)
2443 *
2444 *      ESC $ + J       Indicates the immediate two bytes following SS3
2445 *       is a Chinese character as defined in CNS
2446 *       11643-plane-4, until another SS3designation
2447 *       appears
2448 *       (In English: <ESC>O must preceed every 2 byte
2449 *        sequence.)
2450 *
2451 *      ESC $ + K       Indicates the immediate two bytes following SS3
2452 *       is a Chinese character as defined in CNS
2453 *       11643-plane-5, until another SS3designation
2454 *       appears
2455 *
2456 *      ESC $ + L       Indicates the immediate two bytes following SS3
2457 *       is a Chinese character as defined in CNS
2458 *       11643-plane-6, until another SS3designation
2459 *       appears
2460 *
2461 *      ESC $ + M       Indicates the immediate two bytes following SS3
2462 *       is a Chinese character as defined in CNS
2463 *       11643-plane-7, until another SS3designation
2464 *       appears
2465 *
2466 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
2467 *       has its own designation information before any Chinese characters
2468 *       appear
2469 *
2470 */
2471
2472 /* The following are defined this way to make the strings truely readonly */
2473 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
2474 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
2475 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
2476 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
2477 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
2478 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
2479 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
2480 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
2481 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
2482
2483 /********************** ISO2022-CN Data **************************/
2484 static const char* const escSeqCharsCN[10] ={
2485         SHIFT_IN_STR,           /* ASCII */
2486         GB_2312_80_STR,
2487         ISO_IR_165_STR,
2488         CNS_11643_1992_Plane_1_STR,
2489         CNS_11643_1992_Plane_2_STR,
2490         CNS_11643_1992_Plane_3_STR,
2491         CNS_11643_1992_Plane_4_STR,
2492         CNS_11643_1992_Plane_5_STR,
2493         CNS_11643_1992_Plane_6_STR,
2494         CNS_11643_1992_Plane_7_STR
2495 };
2496
2497 static void
2498 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
2499     UConverter *cnv = args->converter;
2500     UConverterDataISO2022 *converterData;
2501     ISO2022State *pFromU2022State;
2502     uint8_t *target = (uint8_t *) args->target;
2503     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
2504     const UChar* source = args->source;
2505     const UChar* sourceLimit = args->sourceLimit;
2506     int32_t* offsets = args->offsets;
2507     UChar32 sourceChar;
2508     char buffer[8];
2509     int32_t len;
2510     int8_t choices[3];
2511     int32_t choiceCount;
2512     uint32_t targetValue = 0;
2513     UBool useFallback;
2514
2515     /* set up the state */
2516     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
2517     pFromU2022State   = &converterData->fromU2022State;
2518
2519     choiceCount = 0;
2520
2521     /* check if the last codepoint of previous buffer was a lead surrogate*/
2522     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
2523         goto getTrail;
2524     }
2525
2526     while( source < sourceLimit){
2527         if(target < targetLimit){
2528
2529             sourceChar  = *(source++);
2530             /*check if the char is a First surrogate*/
2531              if(UTF_IS_SURROGATE(sourceChar)) {
2532                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
2533 getTrail:
2534                     /*look ahead to find the trail surrogate*/
2535                     if(source < sourceLimit) {
2536                         /* test the following code unit */
2537                         UChar trail=(UChar) *source;
2538                         if(UTF_IS_SECOND_SURROGATE(trail)) {
2539                             source++;
2540                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
2541                             cnv->fromUChar32=0x00;
2542                             /* convert this supplementary code point */
2543                             /* exit this condition tree */
2544                         } else {
2545                             /* this is an unmatched lead code unit (1st surrogate) */
2546                             /* callback(illegal) */
2547                             *err=U_ILLEGAL_CHAR_FOUND;
2548                             cnv->fromUChar32=sourceChar;
2549                             break;
2550                         }
2551                     } else {
2552                         /* no more input */
2553                         cnv->fromUChar32=sourceChar;
2554                         break;
2555                     }
2556                 } else {
2557                     /* this is an unmatched trail code unit (2nd surrogate) */
2558                     /* callback(illegal) */
2559                     *err=U_ILLEGAL_CHAR_FOUND;
2560                     cnv->fromUChar32=sourceChar;
2561                     break;
2562                 }
2563             }
2564
2565             /* do the conversion */
2566             if(sourceChar <= 0x007f ){
2567                 /* do not convert SO/SI/ESC */
2568                 if(IS_2022_CONTROL(sourceChar)) {
2569                     /* callback(illegal) */
2570                     *err=U_ILLEGAL_CHAR_FOUND;
2571                     cnv->fromUChar32=sourceChar;
2572                     break;
2573                 }
2574
2575                 /* US-ASCII */
2576                 if(pFromU2022State->g == 0) {
2577                     buffer[0] = (char)sourceChar;
2578                     len = 1;
2579                 } else {
2580                     buffer[0] = UCNV_SI;
2581                     buffer[1] = (char)sourceChar;
2582                     len = 2;
2583                     pFromU2022State->g = 0;
2584                     choiceCount = 0;
2585                 }
2586                 if(sourceChar == CR || sourceChar == LF) {
2587                     /* reset the state at the end of a line */
2588                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
2589                     choiceCount = 0;
2590                 }
2591             }
2592             else{
2593                 /* convert U+0080..U+10ffff */
2594                 int32_t i;
2595                 int8_t cs, g;
2596
2597                 if(choiceCount == 0) {
2598                     /* try the current SO/G1 converter first */
2599                     choices[0] = pFromU2022State->cs[1];
2600
2601                     /* default to GB2312_1 if none is designated yet */
2602                     if(choices[0] == 0) {
2603                         choices[0] = GB2312_1;
2604                     }
2605
2606                     if(converterData->version == 0) {
2607                         /* ISO-2022-CN */
2608
2609                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
2610                         if(choices[0] == GB2312_1) {
2611                             choices[1] = (int8_t)CNS_11643_1;
2612                         } else {
2613                             choices[1] = (int8_t)GB2312_1;
2614                         }
2615
2616                         choiceCount = 2;
2617                     } else {
2618                         /* ISO-2022-CN-EXT */
2619
2620                         /* try one of the other converters */
2621                         switch(choices[0]) {
2622                         case GB2312_1:
2623                             choices[1] = (int8_t)CNS_11643_1;
2624                             choices[2] = (int8_t)ISO_IR_165;
2625                             break;
2626                         case ISO_IR_165:
2627                             choices[1] = (int8_t)GB2312_1;
2628                             choices[2] = (int8_t)CNS_11643_1;
2629                             break;
2630                         default: /* CNS_11643_x */
2631                             choices[1] = (int8_t)GB2312_1;
2632                             choices[2] = (int8_t)ISO_IR_165;
2633                             break;
2634                         }
2635
2636                         choiceCount = 3;
2637                     }
2638                 }
2639
2640                 cs = g = 0;
2641                 /*
2642                  * len==0: no mapping found yet
2643                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
2644                  * len>0: found a roundtrip result, done
2645                  */
2646                 len = 0;
2647                 /*
2648                  * We will turn off useFallback after finding a fallback,
2649                  * but we still get fallbacks from PUA code points as usual.
2650                  * Therefore, we will also need to check that we don't overwrite
2651                  * an early fallback with a later one.
2652                  */
2653                 useFallback = cnv->useFallback;
2654
2655                 for(i = 0; i < choiceCount && len <= 0; ++i) {
2656                     int8_t cs0 = choices[i];
2657                     if(cs0 > 0) {
2658                         uint32_t value;
2659                         int32_t len2;
2660                         if(cs0 > CNS_11643_0) {
2661                             len2 = MBCS_FROM_UCHAR32_ISO2022(
2662                                         converterData->myConverterArray[CNS_11643],
2663                                         sourceChar,
2664                                         &value,
2665                                         useFallback,
2666                                         MBCS_OUTPUT_3);
2667                             if(len2 == 3 || (len2 == -3 && len == 0)) {
2668                                 targetValue = value;
2669                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
2670                                 if(len2 >= 0) {
2671                                     len = 2;
2672                                 } else {
2673                                     len = -2;
2674                                     useFallback = FALSE;
2675                                 }
2676                                 if(cs == CNS_11643_1) {
2677                                     g = 1;
2678                                 } else if(cs == CNS_11643_2) {
2679                                     g = 2;
2680                                 } else /* plane 3..7 */ if(converterData->version == 1) {
2681                                     g = 3;
2682                                 } else {
2683                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
2684                                     len = 0;
2685                                 }
2686                             }
2687                         } else {
2688                             /* GB2312_1 or ISO-IR-165 */
2689                             len2 = MBCS_FROM_UCHAR32_ISO2022(
2690                                         converterData->myConverterArray[cs0],
2691                                         sourceChar,
2692                                         &value,
2693                                         useFallback,
2694                                         MBCS_OUTPUT_2);
2695                             if(len2 == 2 || (len2 == -2 && len == 0)) {
2696                                 targetValue = value;
2697                                 len = len2;
2698                                 cs = cs0;
2699                                 g = 1;
2700                                 useFallback = FALSE;
2701                             }
2702                         }
2703                     }
2704                 }
2705
2706                 if(len != 0) {
2707                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
2708
2709                     /* write the designation sequence if necessary */
2710                     if(cs != pFromU2022State->cs[g]) {
2711                         if(cs < CNS_11643) {
2712                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
2713                         } else {
2714                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
2715                         }
2716                         len = 4;
2717                         pFromU2022State->cs[g] = cs;
2718                         if(g == 1) {
2719                             /* changing the SO/G1 charset invalidates the choices[] */
2720                             choiceCount = 0;
2721                         }
2722                     }
2723
2724                     /* write the shift sequence if necessary */
2725                     if(g != pFromU2022State->g) {
2726                         switch(g) {
2727                         case 1:
2728                             buffer[len++] = UCNV_SO;
2729
2730                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
2731                             pFromU2022State->g = 1;
2732                             break;
2733                         case 2:
2734                             buffer[len++] = 0x1b;
2735                             buffer[len++] = 0x4e;
2736                             break;
2737                         default: /* case 3 */
2738                             buffer[len++] = 0x1b;
2739                             buffer[len++] = 0x4f;
2740                             break;
2741                         }
2742                     }
2743
2744                     /* write the two output bytes */
2745                     buffer[len++] = (char)(targetValue >> 8);
2746                     buffer[len++] = (char)targetValue;
2747                 } else {
2748                     /* if we cannot find the character after checking all codepages
2749                      * then this is an error
2750                      */
2751                     *err = U_INVALID_CHAR_FOUND;
2752                     cnv->fromUChar32=sourceChar;
2753                     break;
2754                 }
2755             }
2756
2757             /* output len>0 bytes in buffer[] */
2758             if(len == 1) {
2759                 *target++ = buffer[0];
2760                 if(offsets) {
2761                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
2762                 }
2763             } else if(len == 2 && (target + 2) <= targetLimit) {
2764                 *target++ = buffer[0];
2765                 *target++ = buffer[1];
2766                 if(offsets) {
2767                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
2768                     *offsets++ = sourceIndex;
2769                     *offsets++ = sourceIndex;
2770                 }
2771             } else {
2772                 fromUWriteUInt8(
2773                     cnv,
2774                     buffer, len,
2775                     &target, (const char *)targetLimit,
2776                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
2777                     err);
2778                 if(U_FAILURE(*err)) {
2779                     break;
2780                 }
2781             }
2782         } /* end if(myTargetIndex<myTargetLength) */
2783         else{
2784             *err =U_BUFFER_OVERFLOW_ERROR;
2785             break;
2786         }
2787
2788     }/* end while(mySourceIndex<mySourceLength) */
2789
2790     /*
2791      * the end of the input stream and detection of truncated input
2792      * are handled by the framework, but for ISO-2022-CN conversion
2793      * we need to be in ASCII mode at the very end
2794      *
2795      * conditions:
2796      *   successful
2797      *   not in ASCII mode
2798      *   end of input and no truncated input
2799      */
2800     if( U_SUCCESS(*err) &&
2801         pFromU2022State->g!=0 &&
2802         args->flush && source>=sourceLimit && cnv->fromUChar32==0
2803     ) {
2804         int32_t sourceIndex;
2805
2806         /* we are switching to ASCII */
2807         pFromU2022State->g=0;
2808
2809         /* get the source index of the last input character */
2810         /*
2811          * TODO this would be simpler and more reliable if we used a pair
2812          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2813          * so that we could simply use the prevSourceIndex here;
2814          * this code gives an incorrect result for the rare case of an unmatched
2815          * trail surrogate that is alone in the last buffer of the text stream
2816          */
2817         sourceIndex=(int32_t)(source-args->source);
2818         if(sourceIndex>0) {
2819             --sourceIndex;
2820             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
2821                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
2822             ) {
2823                 --sourceIndex;
2824             }
2825         } else {
2826             sourceIndex=-1;
2827         }
2828
2829         fromUWriteUInt8(
2830             cnv,
2831             SHIFT_IN_STR, 1,
2832             &target, (const char *)targetLimit,
2833             &offsets, sourceIndex,
2834             err);
2835     }
2836
2837     /*save the state and return */
2838     args->source = source;
2839     args->target = (char*)target;
2840 }
2841
2842
2843 static void
2844 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2845                                                UErrorCode* err){
2846     char tempBuf[3];
2847     const char *mySource = (char *) args->source;
2848     UChar *myTarget = args->target;
2849     const char *mySourceLimit = args->sourceLimit;
2850     uint32_t targetUniChar = 0x0000;
2851     uint32_t mySourceChar = 0x0000;
2852     UConverterDataISO2022* myData;
2853     ISO2022State *pToU2022State;
2854
2855     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2856     pToU2022State = &myData->toU2022State;
2857
2858     if(myData->key != 0) {
2859         /* continue with a partial escape sequence */
2860         goto escape;
2861     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
2862         /* continue with a partial double-byte character */
2863         mySourceChar = args->converter->toUBytes[0];
2864         args->converter->toULength = 0;
2865         goto getTrailByte;
2866     }
2867
2868     while(mySource < mySourceLimit){
2869
2870         targetUniChar =missingCharMarker;
2871
2872         if(myTarget < args->targetLimit){
2873
2874             mySourceChar= (unsigned char) *mySource++;
2875
2876             switch(mySourceChar){
2877             case UCNV_SI:
2878                 pToU2022State->g=0;
2879                 continue;
2880
2881             case UCNV_SO:
2882                 if(pToU2022State->cs[1] != 0) {
2883                     pToU2022State->g=1;
2884                     continue;
2885                 } else {
2886                     /* illegal to have SO before a matching designator */
2887                     break;
2888                 }
2889
2890             case ESC_2022:
2891                 mySource--;
2892 escape:
2893                 changeState_2022(args->converter,&(mySource),
2894                     mySourceLimit, ISO_2022_CN,err);
2895
2896                 /* invalid or illegal escape sequence */
2897                 if(U_FAILURE(*err)){
2898                     args->target = myTarget;
2899                     args->source = mySource;
2900                     return;
2901                 }
2902                 continue;
2903
2904             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
2905
2906             case CR:
2907                 /*falls through*/
2908             case LF:
2909                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
2910                 /* falls through */
2911             default:
2912                 /* convert one or two bytes */
2913                 if(pToU2022State->g != 0) {
2914                     if(mySource < mySourceLimit) {
2915                         UConverterSharedData *cnv;
2916                         StateEnum tempState;
2917                         int32_t tempBufLen;
2918                         char trailByte;
2919 getTrailByte:
2920                         trailByte = *mySource++;
2921                         tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
2922                         if(tempState > CNS_11643_0) {
2923                             cnv = myData->myConverterArray[CNS_11643];
2924                             tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
2925                             tempBuf[1] = (char) (mySourceChar);
2926                             tempBuf[2] = trailByte;
2927                             tempBufLen = 3;
2928
2929                         }else{
2930                             cnv = myData->myConverterArray[tempState];
2931                             tempBuf[0] = (char) (mySourceChar);
2932                             tempBuf[1] = trailByte;
2933                             tempBufLen = 2;
2934                         }
2935                         mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
2936                         if(pToU2022State->g>=2) {
2937                             /* return from a single-shift state to the previous one */
2938                             pToU2022State->g=pToU2022State->prevG;
2939                         }
2940                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
2941                     } else {
2942                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2943                         args->converter->toULength = 1;
2944                         goto endloop;
2945                     }
2946                 }
2947                 else{
2948                     if(mySourceChar <= 0x7f) {
2949                         targetUniChar = (UChar) mySourceChar;
2950                     }
2951                 }
2952                 break;
2953             }
2954             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
2955                 if(args->offsets){
2956                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2957                 }
2958                 *(myTarget++)=(UChar)targetUniChar;
2959             }
2960             else if(targetUniChar > missingCharMarker){
2961                 /* disassemble the surrogate pair and write to output*/
2962                 targetUniChar-=0x0010000;
2963                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
2964                 if(args->offsets){
2965                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2966                 }
2967                 ++myTarget;
2968                 if(myTarget< args->targetLimit){
2969                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
2970                     if(args->offsets){
2971                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2972                     }
2973                     ++myTarget;
2974                 }else{
2975                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
2976                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
2977                 }
2978
2979             }
2980             else{
2981                 /* Call the callback function*/
2982                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2983                 break;
2984             }
2985         }
2986         else{
2987             *err =U_BUFFER_OVERFLOW_ERROR;
2988             break;
2989         }
2990     }
2991 endloop:
2992     args->target = myTarget;
2993     args->source = mySource;
2994 }
2995
2996 static void
2997 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
2998     UConverter *cnv = args->converter;
2999     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
3000     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3001     char *p, *subchar;
3002     char buffer[8];
3003     int32_t length;
3004
3005     subchar=(char *)cnv->subChars;
3006     length=cnv->subCharLen; /* assume length==1 for most variants */
3007
3008     p = buffer;
3009     switch(myConverterData->locale[0]){
3010     case 'j':
3011         {
3012             int8_t cs;
3013
3014             if(pFromU2022State->g == 1) {
3015                 /* JIS7: switch from G1 to G0 */
3016                 pFromU2022State->g = 0;
3017                 *p++ = UCNV_SI;
3018             }
3019
3020             cs = pFromU2022State->cs[0];
3021             if(cs != ASCII && cs != JISX201) {
3022                 /* not in ASCII or JIS X 0201: switch to ASCII */
3023                 pFromU2022State->cs[0] = (int8_t)ASCII;
3024                 *p++ = '\x1b';
3025                 *p++ = '\x28';
3026                 *p++ = '\x42';
3027             }
3028
3029             *p++ = subchar[0];
3030             break;
3031         }
3032     case 'c':
3033         if(pFromU2022State->g != 0) {
3034             /* not in ASCII mode: switch to ASCII */
3035             pFromU2022State->g = 0;
3036             *p++ = UCNV_SI;
3037         }
3038         *p++ = subchar[0];
3039         break;
3040     case 'k':
3041         if(myConverterData->version == 0) {
3042             if(length == 1) {
3043                 if((UBool)args->converter->fromUnicodeStatus) {
3044                     /* in DBCS mode: switch to SBCS */
3045                     args->converter->fromUnicodeStatus = 0;
3046                     *p++ = UCNV_SI;
3047                 }
3048                 *p++ = subchar[0];
3049             } else /* length == 2*/ {
3050                 if(!(UBool)args->converter->fromUnicodeStatus) {
3051                     /* in SBCS mode: switch to DBCS */
3052                     args->converter->fromUnicodeStatus = 1;
3053                     *p++ = UCNV_SO;
3054                 }
3055                 *p++ = subchar[0];
3056                 *p++ = subchar[1];
3057             }
3058             break;
3059         } else {
3060             /* save the subconverter's substitution string */
3061             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
3062             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
3063
3064             /* set our substitution string into the subconverter */
3065             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
3066             myConverterData->currentConverter->subCharLen = (int8_t)length;
3067
3068             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
3069             args->converter = myConverterData->currentConverter;
3070             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
3071             ucnv_cbFromUWriteSub(args, 0, err);
3072             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
3073             args->converter = cnv;
3074
3075             /* restore the subconverter's substitution string */
3076             myConverterData->currentConverter->subChars = currentSubChars;
3077             myConverterData->currentConverter->subCharLen = currentSubCharLen;
3078
3079             if(*err == U_BUFFER_OVERFLOW_ERROR) {
3080                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
3081                     uprv_memcpy(
3082                         cnv->charErrorBuffer,
3083                         myConverterData->currentConverter->charErrorBuffer,
3084                         myConverterData->currentConverter->charErrorBufferLength);
3085                 }
3086                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
3087                 myConverterData->currentConverter->charErrorBufferLength = 0;
3088             }
3089             return;
3090         }
3091     default:
3092         /* not expected */
3093         break;
3094     }
3095     ucnv_cbFromUWriteBytes(args,
3096                            buffer, (int32_t)(p - buffer),
3097                            offsetIndex, err);
3098 }
3099
3100 /*
3101  * Structure for cloning an ISO 2022 converter into a single memory block.
3102  * ucnv_safeClone() of the converter will align the entire cloneStruct,
3103  * and then ucnv_safeClone() of the sub-converter may additionally align
3104  * currentConverter inside the cloneStruct, for which we need the deadSpace
3105  * after currentConverter.
3106  * This is because UAlignedMemory may be larger than the actually
3107  * necessary alignment size for the platform.
3108  * The other cloneStruct fields will not be moved around,
3109  * and are aligned properly with cloneStruct's alignment.
3110  */
3111 struct cloneStruct
3112 {
3113     UConverter cnv;
3114     UConverter currentConverter;
3115     UAlignedMemory deadSpace;
3116     UConverterDataISO2022 mydata;
3117 };
3118
3119
3120 static UConverter *
3121 _ISO_2022_SafeClone(
3122             const UConverter *cnv,
3123             void *stackBuffer,
3124             int32_t *pBufferSize,
3125             UErrorCode *status)
3126 {
3127     struct cloneStruct * localClone;
3128     UConverterDataISO2022 *cnvData;
3129     int32_t i, size;
3130
3131     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
3132         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
3133         return NULL;
3134     }
3135
3136     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
3137     localClone = (struct cloneStruct *)stackBuffer;
3138
3139     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
3140
3141     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
3142     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
3143     localClone->cnv.isExtraLocal = TRUE;
3144
3145     /* share the subconverters */
3146
3147     if(cnvData->currentConverter != NULL) {
3148         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
3149         localClone->mydata.currentConverter =
3150             ucnv_safeClone(cnvData->currentConverter,
3151                             &localClone->currentConverter,
3152                             &size, status);
3153         if(U_FAILURE(*status)) {
3154             return NULL;
3155         }
3156     }
3157
3158     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
3159         if(cnvData->myConverterArray[i] != NULL) {
3160             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
3161         }
3162     }
3163
3164     return &localClone->cnv;
3165 }
3166
3167 static void
3168 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
3169                     const USetAdder *sa,
3170                     UConverterUnicodeSet which,
3171                     UErrorCode *pErrorCode)
3172 {
3173     int32_t i;
3174     UConverterDataISO2022* cnvData;
3175
3176     if (U_FAILURE(*pErrorCode)) {
3177         return;
3178     }
3179 #ifdef U_ENABLE_GENERIC_ISO_2022
3180     if (cnv->sharedData == &_ISO2022Data) {
3181         /* We use UTF-8 in this case */
3182         sa->addRange(sa->set, 0, 0xd7FF);
3183         sa->addRange(sa->set, 0xE000, 0x10FFFF);
3184         return;
3185     }
3186 #endif
3187
3188     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3189
3190     /* open a set and initialize it with code points that are algorithmically round-tripped */
3191     switch(cnvData->locale[0]){
3192     case 'j':
3193         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3194             /* include Latin-1 for some variants of JP */
3195             sa->addRange(sa->set, 0, 0xff);
3196         } else {
3197             /* include ASCII for JP */
3198             sa->addRange(sa->set, 0, 0x7f);
3199         }
3200         if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
3201             /* include half-width Katakana for JP */
3202             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3203         }
3204         break;
3205     case 'c':
3206     case 'z':
3207         /* include ASCII for CN */
3208         sa->addRange(sa->set, 0, 0x7f);
3209         break;
3210     case 'k':
3211         /* there is only one converter for KR, and it is not in the myConverterArray[] */
3212         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3213                 cnvData->currentConverter, sa, which, pErrorCode);
3214         /* the loop over myConverterArray[] will simply not find another converter */
3215         break;
3216     default:
3217         break;
3218     }
3219
3220     /*
3221      * Version-specific for CN:
3222      * CN version 0 does not map CNS planes 3..7 although
3223      * they are all available in the CNS conversion table;
3224      * CN version 1 does map them all.
3225      * The two versions create different Unicode sets.
3226      */
3227     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3228         if(cnvData->myConverterArray[i]!=NULL) {
3229             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3230                 cnvData->version==0 && i==CNS_11643
3231             ) {
3232                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
3233                 ucnv_MBCSGetUnicodeSetForBytes(
3234                         cnvData->myConverterArray[i],
3235                         sa, UCNV_ROUNDTRIP_SET,
3236                         0, 0x81, 0x82,
3237                         pErrorCode);
3238             } else {
3239                 ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
3240             }
3241         }
3242     }
3243
3244     /*
3245      * ISO 2022 converters must not convert SO/SI/ESC despite what
3246      * sub-converters do by themselves.
3247      * Remove these characters from the set.
3248      */
3249     sa->remove(sa->set, 0x0e);
3250     sa->remove(sa->set, 0x0f);
3251     sa->remove(sa->set, 0x1b);
3252 }
3253
3254 static const UConverterImpl _ISO2022Impl={
3255     UCNV_ISO_2022,
3256
3257     NULL,
3258     NULL,
3259
3260     _ISO2022Open,
3261     _ISO2022Close,
3262     _ISO2022Reset,
3263
3264 #ifdef U_ENABLE_GENERIC_ISO_2022
3265     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3266     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3267     ucnv_fromUnicode_UTF8,
3268     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
3269 #else
3270     NULL,
3271     NULL,
3272     NULL,
3273     NULL,
3274 #endif
3275     NULL,
3276
3277     NULL,
3278     _ISO2022getName,
3279     _ISO_2022_WriteSub,
3280     _ISO_2022_SafeClone,
3281     _ISO_2022_GetUnicodeSet
3282 };
3283 static const UConverterStaticData _ISO2022StaticData={
3284     sizeof(UConverterStaticData),
3285     "ISO_2022",
3286     2022,
3287     UCNV_IBM,
3288     UCNV_ISO_2022,
3289     1,
3290     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
3291     { 0x1a, 0, 0, 0 },
3292     1,
3293     FALSE,
3294     FALSE,
3295     0,
3296     0,
3297     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3298 };
3299 const UConverterSharedData _ISO2022Data={
3300     sizeof(UConverterSharedData),
3301     ~((uint32_t) 0),
3302     NULL,
3303     NULL,
3304     &_ISO2022StaticData,
3305     FALSE,
3306     &_ISO2022Impl,
3307     0
3308 };
3309
3310 /*************JP****************/
3311 static const UConverterImpl _ISO2022JPImpl={
3312     UCNV_ISO_2022,
3313
3314     NULL,
3315     NULL,
3316
3317     _ISO2022Open,
3318     _ISO2022Close,
3319     _ISO2022Reset,
3320
3321     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3322     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3323     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3324     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3325     NULL,
3326
3327     NULL,
3328     _ISO2022getName,
3329     _ISO_2022_WriteSub,
3330     _ISO_2022_SafeClone,
3331     _ISO_2022_GetUnicodeSet
3332 };
3333 static const UConverterStaticData _ISO2022JPStaticData={
3334     sizeof(UConverterStaticData),
3335     "ISO_2022_JP",
3336     0,
3337     UCNV_IBM,
3338     UCNV_ISO_2022,
3339     1,
3340     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
3341     { 0x1a, 0, 0, 0 },
3342     1,
3343     FALSE,
3344     FALSE,
3345     0,
3346     0,
3347     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3348 };
3349 static const UConverterSharedData _ISO2022JPData={
3350     sizeof(UConverterSharedData),
3351     ~((uint32_t) 0),
3352     NULL,
3353     NULL,
3354     &_ISO2022JPStaticData,
3355     FALSE,
3356     &_ISO2022JPImpl,
3357     0
3358 };
3359
3360 /************* KR ***************/
3361 static const UConverterImpl _ISO2022KRImpl={
3362     UCNV_ISO_2022,
3363
3364     NULL,
3365     NULL,
3366
3367     _ISO2022Open,
3368     _ISO2022Close,
3369     _ISO2022Reset,
3370
3371     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3372     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3373     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3374     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3375     NULL,
3376
3377     NULL,
3378     _ISO2022getName,
3379     _ISO_2022_WriteSub,
3380     _ISO_2022_SafeClone,
3381     _ISO_2022_GetUnicodeSet
3382 };
3383 static const UConverterStaticData _ISO2022KRStaticData={
3384     sizeof(UConverterStaticData),
3385     "ISO_2022_KR",
3386     0,
3387     UCNV_IBM,
3388     UCNV_ISO_2022,
3389     1,
3390     3, /* max 3 bytes per UChar: SO+DBCS */
3391     { 0x1a, 0, 0, 0 },
3392     1,
3393     FALSE,
3394     FALSE,
3395     0,
3396     0,
3397     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3398 };
3399 static const UConverterSharedData _ISO2022KRData={
3400     sizeof(UConverterSharedData),
3401     ~((uint32_t) 0),
3402     NULL,
3403     NULL,
3404     &_ISO2022KRStaticData,
3405     FALSE,
3406     &_ISO2022KRImpl,
3407     0
3408 };
3409
3410 /*************** CN ***************/
3411 static const UConverterImpl _ISO2022CNImpl={
3412
3413     UCNV_ISO_2022,
3414
3415     NULL,
3416     NULL,
3417
3418     _ISO2022Open,
3419     _ISO2022Close,
3420     _ISO2022Reset,
3421
3422     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3423     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3424     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3425     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3426     NULL,
3427
3428     NULL,
3429     _ISO2022getName,
3430     _ISO_2022_WriteSub,
3431     _ISO_2022_SafeClone,
3432     _ISO_2022_GetUnicodeSet
3433 };
3434 static const UConverterStaticData _ISO2022CNStaticData={
3435     sizeof(UConverterStaticData),
3436     "ISO_2022_CN",
3437     0,
3438     UCNV_IBM,
3439     UCNV_ISO_2022,
3440     1,
3441     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
3442     { 0x1a, 0, 0, 0 },
3443     1,
3444     FALSE,
3445     FALSE,
3446     0,
3447     0,
3448     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3449 };
3450 static const UConverterSharedData _ISO2022CNData={
3451     sizeof(UConverterSharedData),
3452     ~((uint32_t) 0),
3453     NULL,
3454     NULL,
3455     &_ISO2022CNStaticData,
3456     FALSE,
3457     &_ISO2022CNImpl,
3458     0
3459 };
3460
3461
3462
3463 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */