modified dll/win32/kernel32/misc/lang.c
[reactos.git] / reactos / lib / 3rdparty / icu4ros / icu / source / common / rbbiscan.h
1 //
2 // rbbiscan.h
3 //
4 // Copyright (C) 2002-2007, International Business Machines Corporation and others.
5 // All Rights Reserved.
6 //
7 // This file contains declarations for class RBBIRuleScanner
8 //
9
10
11 #ifndef RBBISCAN_H
12 #define RBBISCAN_H
13
14 #include "unicode/utypes.h"
15 #include "unicode/uobject.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/uniset.h"
18 #include "unicode/parseerr.h"
19 #include "uhash.h"
20 #include "uvector.h"
21 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
22 // looks up references to $variables within a set.
23 #include "rbbinode.h"
24 //#include "rbbitblb.h"
25
26
27
28 U_NAMESPACE_BEGIN
29
30 class RBBIRuleBuilder;
31 class RBBISymbolTable;
32
33
34 //--------------------------------------------------------------------------------
35 //
36 // class RBBIRuleScanner does the lowest level, character-at-a-time
37 // scanning of break iterator rules.
38 //
39 // The output of the scanner is parse trees for
40 // the rule expressions and a list of all Unicode Sets
41 // encountered.
42 //
43 //--------------------------------------------------------------------------------
44 static const int kStackSize = 100; // The size of the state stack for
45 // rules parsing. Corresponds roughly
46 // to the depth of parentheses nesting
47 // that is allowed in the rules.
48
49 class RBBIRuleScanner : public UMemory {
50 public:
51
52 struct RBBIRuleChar {
53 UChar32 fChar;
54 UBool fEscaped;
55 };
56
57 RBBIRuleScanner(RBBIRuleBuilder *rb);
58
59
60 virtual ~RBBIRuleScanner();
61
62 void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
63 // Return false if at end.
64
65 UBool push(const RBBIRuleChar &c); // Push (unget) one character.
66 // Only a single character may be pushed.
67
68 void parse(); // Parse the rules, generating two parse
69 // trees, one each for the forward and
70 // reverse rules,
71 // and a list of UnicodeSets encountered.
72
73 /**
74 * Return a rules string without unnecessary
75 * characters.
76 */
77 static UnicodeString stripRules(const UnicodeString &rules);
78 private:
79
80 UBool doParseActions(int32_t a);
81 void error(UErrorCode e); // error reporting convenience function.
82 void fixOpStack(RBBINode::OpPrecedence p);
83 // a character.
84 void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
85
86 UChar32 nextCharLL();
87 #ifdef RBBI_DEBUG
88 void printNodeStack(const char *title);
89 #endif
90 RBBINode *pushNewNode(RBBINode::NodeType t);
91 void scanSet();
92
93
94 RBBIRuleBuilder *fRB; // The rule builder that we are part of.
95
96 int32_t fScanIndex; // Index of current character being processed
97 // in the rule input string.
98 int32_t fNextIndex; // Index of the next character, which
99 // is the first character not yet scanned.
100 UBool fQuoteMode; // Scan is in a 'quoted region'
101 int32_t fLineNum; // Line number in input file.
102 int32_t fCharNum; // Char position within the line.
103 UChar32 fLastChar; // Previous char, needed to count CR-LF
104 // as a single line, not two.
105
106 RBBIRuleChar fC; // Current char for parse state machine
107 // processing.
108 UnicodeString fVarName; // $variableName, valid when we've just
109 // scanned one.
110
111 RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
112 // parsing. index by p[state][char-class]
113
114 uint16_t fStack[kStackSize]; // State stack, holds state pushes
115 int32_t fStackPtr; // and pops as specified in the state
116 // transition rules.
117
118 RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
119 // during the parse of a rule
120 int32_t fNodeStackPtr;
121
122
123 UBool fReverseRule; // True if the rule currently being scanned
124 // is a reverse direction rule (if it
125 // starts with a '!')
126
127 UBool fLookAheadRule; // True if the rule includes a '/'
128 // somewhere within it.
129
130 RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
131 // $variable symbols.
132
133 UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
134 // the sets created while parsing rules.
135 // The key is the string used for creating
136 // the set.
137
138 UnicodeSet *fRuleSets[10]; // Unicode Sets that are needed during
139 // the scanning of RBBI rules. The
140 // indicies for these are assigned by the
141 // perl script that builds the state tables.
142 // See rbbirpt.h.
143
144 int32_t fRuleNum; // Counts each rule as it is scanned.
145
146 int32_t fOptionStart; // Input index of start of a !!option
147 // keyword, while being scanned.
148
149 UnicodeSet *gRuleSet_rule_char;
150 UnicodeSet *gRuleSet_white_space;
151 UnicodeSet *gRuleSet_name_char;
152 UnicodeSet *gRuleSet_name_start_char;
153
154 RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
155 RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
156 };
157
158 U_NAMESPACE_END
159
160 #endif