fix include file case
[reactos.git] / irc / TechBot / CHMLibrary / CHMDecoding / HHKParser.cs
1 using System;
2 using System.IO;
3 using System.Collections;
4 using System.Text;
5 using System.Text.RegularExpressions;
6
7 namespace HtmlHelp.ChmDecoding
8 {
9 /// <summary>
10 /// The class <c>HHKParser</c> implements a parser for HHK contents files.
11 /// </summary>
12 internal sealed class HHKParser
13 {
14 /// <summary>
15 /// regular expressions for replacing the sitemap boundary tags
16 /// </summary>
17 private static string RE_ULOpening = @"\<ul\>"; // will be replaced by a '(' for nested parsing
18 private static string RE_ULClosing = @"\</ul\>"; // will be replaced by a ')' for nested parsing
19
20 /// <summary>
21 /// Matching ul-tags
22 /// </summary>
23 private static string RE_ULBoundaries = @"\<ul\>(?<innerText>.*)\</ul\>";
24 /// <summary>
25 /// Matching the nested tree structure.
26 /// </summary>
27 private static string RE_NestedBoundaries = @"\( (?> [^()]+ | \( (?<DEPTH>) | \) (?<-DEPTH>) )* (?(DEPTH)(?!)) \)";
28 /// <summary>
29 /// Matching object-tags
30 /// </summary>
31 private static string RE_ObjectBoundaries = @"\<object(?<innerText>.*?)\</object\>";
32 /// <summary>
33 /// Matching param tags
34 /// </summary>
35 private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
36 /// <summary>
37 /// Extracting tag attributes
38 /// </summary>
39 private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
40
41 /// <summary>
42 /// private regular expressionobjects
43 /// </summary>
44 private static Regex ulRE;
45 private static Regex NestedRE;
46 private static Regex ObjectRE;
47 private static Regex ParamRE;
48 private static Regex AttributesRE;
49
50 /// <summary>
51 /// Parses a HHK file and returns an ArrayList with the index tree
52 /// </summary>
53 /// <param name="hhkFile">string content of the hhk file</param>
54 /// <param name="chmFile">CHMFile instance</param>
55 /// <returns>Returns an ArrayList with the index tree</returns>
56 public static ArrayList ParseHHK(string hhkFile, CHMFile chmFile)
57 {
58 ArrayList indexList = new ArrayList();
59
60 ulRE = new Regex(RE_ULBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
61 NestedRE = new Regex(RE_NestedBoundaries, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
62 ObjectRE = new Regex(RE_ObjectBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
63 ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
64 AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
65
66 int innerTextIdx = ulRE.GroupNumberFromName("innerText");
67
68 if( ulRE.IsMatch(hhkFile, 0) )
69 {
70 Match m = ulRE.Match(hhkFile, 0);
71
72 if( ObjectRE.IsMatch(hhkFile, 0) ) // first object block contains information types and categories
73 {
74 Match mO = ObjectRE.Match(hhkFile, 0);
75 int iOTxt = ObjectRE.GroupNumberFromName("innerText");
76
77 string globalText = mO.Groups[iOTxt].Value;
78
79 ParseGlobalSettings( globalText, chmFile );
80 }
81
82 string innerText = m.Groups["innerText"].Value;
83
84 innerText = innerText.Replace("(", "&#040;");
85 innerText = innerText.Replace(")", "&#041;");
86 innerText = Regex.Replace(innerText, RE_ULOpening, "(", RegexOptions.IgnoreCase);
87 innerText = Regex.Replace(innerText, RE_ULClosing, ")", RegexOptions.IgnoreCase);
88
89 ParseTree( innerText, null, indexList, chmFile );
90 }
91
92 return indexList;
93 }
94
95 /// <summary>
96 /// Recursively parses a sitemap tree
97 /// </summary>
98 /// <param name="text">content text</param>
99 /// <param name="parent">Parent for all read items</param>
100 /// <param name="arrNodes">arraylist which receives the extracted nodes</param>
101 /// <param name="chmFile">CHMFile instance</param>
102 private static void ParseTree( string text, IndexItem parent, ArrayList arrNodes, CHMFile chmFile )
103 {
104 string strPreItems="", strPostItems="";
105 string innerText = "";
106
107 int nIndex = 0;
108
109 while( NestedRE.IsMatch(text, nIndex) )
110 {
111 Match m = NestedRE.Match(text, nIndex);
112
113 innerText = m.Value.Substring( 1, m.Length-2);
114
115 strPreItems = text.Substring(nIndex,m.Index-nIndex);
116
117 ParseItems(strPreItems, parent, arrNodes, chmFile);
118
119 if((arrNodes.Count>0) && (innerText.Length > 0) )
120 {
121 IndexItem p = ((IndexItem)(arrNodes[arrNodes.Count-1]));
122 ParseTree( innerText, p, arrNodes, chmFile );
123 }
124
125 nIndex = m.Index+m.Length;
126 }
127
128 if( nIndex == 0)
129 {
130 strPostItems = text.Substring(nIndex, text.Length-nIndex);
131 ParseItems(strPostItems, parent, arrNodes, chmFile);
132 }
133 else if( nIndex < text.Length-1)
134 {
135 strPostItems = text.Substring(nIndex, text.Length-nIndex);
136 ParseTree(strPostItems, parent, arrNodes, chmFile);
137 }
138 }
139
140
141 /// <summary>
142 /// Parses nodes from the text
143 /// </summary>
144 /// <param name="itemstext">text containing the items</param>
145 /// <param name="parentItem">parent index item</param>
146 /// <param name="arrNodes">arraylist where the nodes should be added</param>
147 /// <param name="chmFile">CHMFile instance</param>
148 private static void ParseItems( string itemstext, IndexItem parentItem, ArrayList arrNodes, CHMFile chmFile)
149 {
150 int innerTextIdx = ObjectRE.GroupNumberFromName("innerText");
151 int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
152
153 // get group-name indexes
154 int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
155 int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
156 int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
157
158 int nObjStartIndex = 0;
159 int nLastObjStartIndex = 0;
160 string sKeyword = "";
161
162 while( ObjectRE.IsMatch(itemstext, nObjStartIndex) )
163 {
164 Match m = ObjectRE.Match(itemstext, nObjStartIndex);
165
166 string innerText = m.Groups[innerTextIdx].Value;
167
168 IndexItem idxItem = new IndexItem();
169
170 // read parameters
171 int nParamIndex = 0;
172 int nNameCnt = 0;
173
174 string paramTitle = "";
175 string paramLocal = "";
176 bool bAdded = false;
177
178 while( ParamRE.IsMatch(innerText, nParamIndex) )
179 {
180 Match mP = ParamRE.Match(innerText, nParamIndex);
181
182 string innerP = mP.Groups[innerPTextIdx].Value;
183
184 string paramName = "";
185 string paramValue = "";
186
187 int nAttrIdx = 0;
188 //sKeyword = "";
189
190 while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
191 {
192 Match mA = AttributesRE.Match(innerP, nAttrIdx);
193
194 string attributeName = mA.Groups[nameIndex].Value;
195 string attributeValue = mA.Groups[valueIndex].Value;
196 string attributeTD = mA.Groups[tdIndex].Value;
197
198 if(attributeTD.Length > 0)
199 {
200 // delete the trailing textqualifier
201 if( attributeValue.Length > 0)
202 {
203 int ltqi = attributeValue.LastIndexOf( attributeTD );
204
205 if(ltqi >= 0)
206 {
207 attributeValue = attributeValue.Substring(0,ltqi);
208 }
209 }
210 }
211
212 if( attributeName.ToLower() == "name")
213 {
214 paramName = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
215 nNameCnt++;
216 }
217
218 if( attributeName.ToLower() == "value")
219 {
220 paramValue = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
221 // delete trailing /
222 while((paramValue.Length>0)&&(paramValue[paramValue.Length-1] == '/'))
223 paramValue = paramValue.Substring(0,paramValue.Length-1);
224 }
225
226 nAttrIdx = mA.Index+mA.Length;
227 }
228
229 if( nNameCnt == 1) // first "Name" param = keyword
230 {
231 sKeyword = "";
232
233 if(parentItem != null)
234 sKeyword = parentItem.KeyWordPath + ",";
235
236 string sOldKW = sKeyword;
237
238 sKeyword += paramValue;
239
240 IndexItem idxFind = FindByKeyword(arrNodes, sKeyword);
241
242 if(idxFind != null)
243 {
244 idxItem = idxFind;
245 }
246 else
247 {
248 if( sKeyword.Split(new char[] {','}).Length > 1 )
249 {
250 idxItem.CharIndex = sKeyword.Length - paramValue.Length;
251 }
252 else
253 {
254 sKeyword = paramValue;
255 sOldKW = sKeyword;
256 idxItem.CharIndex = 0;
257 }
258
259 idxItem.KeyWordPath = sKeyword;
260 idxItem.Indent = sKeyword.Split(new char[] {','}).Length - 1;
261 idxItem.IsSeeAlso = false;
262
263 sKeyword = sOldKW;
264 }
265 }
266 else
267 {
268
269 if( (nNameCnt > 2) && (paramName.ToLower()=="name") )
270 {
271 bAdded = true;
272 IndexTopic idxTopic = new IndexTopic(paramTitle, paramLocal, chmFile.CompileFile, chmFile.ChmFilePath);
273
274 idxItem.Topics.Add( idxTopic );
275
276 paramTitle = "";
277 paramLocal = "";
278 }
279
280 switch(paramName.ToLower())
281 {
282 case "name":
283 //case "keyword":
284 {
285 paramTitle = paramValue;
286 };break;
287 case "local":
288 {
289 paramLocal = paramValue.Replace("../", "").Replace("./", "");
290 };break;
291 case "type": // information type assignment for item
292 {
293 idxItem.InfoTypeStrings.Add( paramValue );
294 };break;
295 case "see also":
296 {
297 idxItem.AddSeeAlso(paramValue);
298 idxItem.IsSeeAlso = true;
299 bAdded = true;
300 };break;
301 }
302 }
303
304 nParamIndex = mP.Index+mP.Length;
305 }
306
307 if(!bAdded)
308 {
309 bAdded=false;
310 IndexTopic idxTopic = new IndexTopic(paramTitle, paramLocal, chmFile.CompileFile, chmFile.ChmFilePath);
311
312 idxItem.Topics.Add( idxTopic );
313
314 paramTitle = "";
315 paramLocal = "";
316 }
317
318 idxItem.ChmFile = chmFile;
319 arrNodes.Add( idxItem );
320
321 nLastObjStartIndex = nObjStartIndex;
322 nObjStartIndex = m.Index+m.Length;
323 }
324 }
325
326 /// <summary>
327 /// Searches an index-keyword in the index list
328 /// </summary>
329 /// <param name="indexList">index list to search</param>
330 /// <param name="Keyword">keyword to find</param>
331 /// <returns>Returns an <see cref="IndexItem">IndexItem</see> instance if found, otherwise null.</returns>
332 private static IndexItem FindByKeyword(ArrayList indexList, string Keyword)
333 {
334 foreach(IndexItem curItem in indexList)
335 {
336 if( curItem.KeyWordPath == Keyword)
337 return curItem;
338 }
339
340 return null;
341 }
342
343 /// <summary>
344 /// Parses the very first &lt;OBJECT&gt; tag in the sitemap file and extracts
345 /// information types and categories.
346 /// </summary>
347 /// <param name="sText">text of the object tag</param>
348 /// <param name="chmFile">CHMFile instance</param>
349 private static void ParseGlobalSettings(string sText, CHMFile chmFile)
350 {
351 int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
352
353 // get group-name indexes
354 int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
355 int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
356 int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
357
358 // read parameters
359 int nParamIndex = 0;
360
361 // 0... unknown
362 // 1... inclusinve info type name
363 // 2... exclusive info type name
364 // 3... hidden info type name
365 // 4... category name
366 // 5... incl infotype name for category
367 // 6... excl infotype name for category
368 // 7... hidden infotype name for category
369 int prevItem = 0;
370
371 string sName = "";
372 string sDescription = "";
373 string curCategory = "";
374
375 while( ParamRE.IsMatch(sText, nParamIndex) )
376 {
377 Match mP = ParamRE.Match(sText, nParamIndex);
378
379 string innerP = mP.Groups[innerPTextIdx].Value;
380
381 string paramName = "";
382 string paramValue = "";
383
384 int nAttrIdx = 0;
385
386 while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
387 {
388 Match mA = AttributesRE.Match(innerP, nAttrIdx);
389
390 string attributeName = mA.Groups[nameIndex].Value;
391 string attributeValue = mA.Groups[valueIndex].Value;
392 string attributeTD = mA.Groups[tdIndex].Value;
393
394 if(attributeTD.Length > 0)
395 {
396 // delete the trailing textqualifier
397 if( attributeValue.Length > 0)
398 {
399 int ltqi = attributeValue.LastIndexOf( attributeTD );
400
401 if(ltqi >= 0)
402 {
403 attributeValue = attributeValue.Substring(0,ltqi);
404 }
405 }
406 }
407
408 if( attributeName.ToLower() == "name")
409 {
410 paramName = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
411 }
412
413 if( attributeName.ToLower() == "value")
414 {
415 paramValue = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
416 // delete trailing /
417 while((paramValue.Length>0)&&(paramValue[paramValue.Length-1] == '/'))
418 paramValue = paramValue.Substring(0,paramValue.Length-1);
419
420 }
421
422 nAttrIdx = mA.Index+mA.Length;
423 }
424
425 switch(paramName.ToLower())
426 {
427 case "savetype": // inclusive information type name
428 {
429 prevItem = 1;
430 sName = paramValue;
431 };break;
432 case "savetypedesc": // description of information type
433 {
434 InformationTypeMode mode = InformationTypeMode.Inclusive;
435 sDescription = paramValue;
436
437 if( prevItem == 1)
438 mode = InformationTypeMode.Inclusive;
439 if( prevItem == 2)
440 mode = InformationTypeMode.Exclusive;
441 if( prevItem == 3)
442 mode = InformationTypeMode.Hidden;
443
444 if( chmFile.GetInformationType( sName ) == null)
445 {
446 // check if the HtmlHelpSystem already holds such an information type
447 if( chmFile.SystemInstance.GetInformationType( sName ) == null)
448 {
449 // info type not found yet
450
451 InformationType newType = new InformationType(sName, sDescription, mode);
452 chmFile.InformationTypes.Add(newType);
453 }
454 else
455 {
456 InformationType sysType = chmFile.SystemInstance.GetInformationType( sName );
457 chmFile.InformationTypes.Add( sysType );
458 }
459 }
460
461 prevItem = 0;
462 };break;
463 case "saveexclusive": // exclusive information type name
464 {
465 prevItem = 2;
466 sName = paramValue;
467 };break;
468 case "savehidden": // hidden information type name
469 {
470 prevItem = 3;
471 sName = paramValue;
472 };break;
473 case "category": // category name
474 {
475 prevItem = 4;
476 sName = paramValue;
477 curCategory = sName;
478 };break;
479 case "categorydesc": // category description
480 {
481 sDescription = paramValue;
482
483 if( chmFile.GetCategory( sName ) == null)
484 {
485 // check if the HtmlHelpSystem already holds such a category
486 if( chmFile.SystemInstance.GetCategory( sName ) == null)
487 {
488 // add category
489 Category newCat = new Category(sName, sDescription);
490 chmFile.Categories.Add(newCat);
491 }
492 else
493 {
494 Category sysCat = chmFile.SystemInstance.GetCategory( sName );
495 chmFile.Categories.Add( sysCat );
496 }
497 }
498
499 prevItem = 0;
500 };break;
501 case "type": // inclusive information type which is member of the previously read category
502 {
503 prevItem = 5;
504 sName = paramValue;
505 };break;
506 case "typedesc": // description of type for category
507 {
508 sDescription = paramValue;
509 Category cat = chmFile.GetCategory( curCategory );
510
511 if( cat != null)
512 {
513 // category found
514 InformationType infoType = chmFile.GetInformationType( sName );
515
516 if( infoType != null)
517 {
518 if( !cat.ContainsInformationType(infoType))
519 {
520 infoType.SetCategoryFlag(true);
521 cat.AddInformationType(infoType);
522 }
523 }
524 }
525
526 prevItem = 0;
527 };break;
528 case "typeexclusive": // exclusive information type which is member of the previously read category
529 {
530 prevItem = 6;
531 sName = paramValue;
532 };break;
533 case "typehidden": // hidden information type which is member of the previously read category
534 {
535 prevItem = 7;
536 sName = paramValue;
537 };break;
538 default:
539 {
540 prevItem = 0;
541 sName = "";
542 sDescription = "";
543 };break;
544 }
545
546 nParamIndex = mP.Index+mP.Length;
547 }
548 }
549 }
550 }