Merging r37048, r37051, r37052, r37055 from the-real-msvc branch
[reactos.git] / irc / TechBot / CHMLibrary / CHMDecoding / HHCParser2.cs
1 using System;
2 using System.Collections;
3 using System.Text;
4 using System.Text.RegularExpressions;
5
6 namespace HtmlHelp.ChmDecoding
7 {
8 /// <summary>
9 /// The class <c>HHCParser</c> implements a parser for HHC contents files.
10 /// </summary>
11 // internal sealed class HHCParser : IHHCParser
12 public class HHCParser2
13 {
14 static private string m_text1="";
15 static private string m_text2="";
16 static private int m_CurrentPos=0;
17
18 /// <summary>
19 /// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
20 /// </summary>
21 /// <param name="hhcFile">string content of the hhc file</param>
22 /// <param name="chmFile">CHMFile instance</param>
23 /// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
24 public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
25 {
26 DateTime StartTime=DateTime.Now;
27
28 ArrayList tocList = new ArrayList();
29
30 m_text2=hhcFile;
31 m_text1=hhcFile.ToLower();
32
33 int idx=m_text1.IndexOf("<ul>");
34 if (idx==-1)
35 return null;
36 m_CurrentPos=idx+4;
37
38 ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
39 AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
40
41 ParseTree(tocList,chmFile);
42
43 DateTime EndTime=DateTime.Now;
44 TimeSpan Diff=EndTime-StartTime;
45 string x=Diff.ToString();
46
47 return tocList;
48 }
49
50 /// <summary>
51 /// Recursively parses a sitemap tree
52 /// </summary>
53 /// <param name="text">content text</param>
54 /// <param name="arrNodes">arraylist which receives the extracted nodes</param>
55 /// <param name="chmFile">CHMFile instance</param>
56 static private void ParseTree( ArrayList arrNodes, CHMFile chmFile )
57 {
58 bool bProcessing=true;
59 do
60 {
61 bProcessing=false;
62
63 // Indent
64 int idxa=m_text1.IndexOf("<ul>",m_CurrentPos);
65 int idxb=m_text1.IndexOf("<li>",m_CurrentPos);
66 int idxc=m_text1.IndexOf("</ul>",m_CurrentPos);
67
68 if ((idxa<idxb) && (idxa<idxc) && (idxa>-1))
69 {
70 bProcessing=true;
71 m_CurrentPos=idxa+4;
72 if (arrNodes.Count<1)
73 {
74 ParseTree(arrNodes,chmFile);
75 }
76 else
77 {
78 ParseTree(((TOCItem)(arrNodes[arrNodes.Count-1])).Children,chmFile);
79 }
80 continue;
81 }
82
83 // new item
84 if ((idxb<idxa) && (idxb<idxc) && (idxb>-1))
85 {
86
87 bProcessing=true;
88 m_CurrentPos=idxb+4;
89
90 int idx2=m_text1.IndexOf("<object",m_CurrentPos);
91 if (idx2!=-1)
92 {
93 int idx3=m_text1.IndexOf("</object>",idx2+7);
94 if (idx3!=-1)
95 {
96 string text=m_text2.Substring(idx2,idx3-idx2);
97
98 m_CurrentPos=idx3+9;
99
100 // Parse items in text.
101 TOCItem tocItem=ParseItems(text, chmFile);
102 if (tocItem!=null)
103 {
104 arrNodes.Add(tocItem);
105 }
106 }
107 }
108 }
109
110 // Undent
111 if ((idxc<idxa) && (idxc<idxb) && (idxc>-1))
112 {
113 m_CurrentPos=idxc+5;
114 bProcessing=true;
115 return;
116 }
117 }
118 while (bProcessing);
119 }
120
121
122 private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
123 private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
124 private static Regex ParamRE;
125 private static Regex AttributesRE;
126
127 /// <summary>
128 /// Parses tree nodes from the text
129 /// </summary>
130 /// <param name="itemstext">text containing the items</param>
131 /// <param name="arrNodes">arraylist where the nodes should be added</param>
132 /// <param name="chmFile">CHMFile instance</param>
133 private static TOCItem ParseItems( string itemstext, CHMFile chmFile)
134 {
135 int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
136
137 // get group-name indexes
138 int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
139 int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
140 int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
141
142 TOCItem tocItem = new TOCItem();
143
144 // read parameters
145 int nParamIndex = 0;
146
147 while( ParamRE.IsMatch(itemstext, nParamIndex) )
148 {
149 Match mP = ParamRE.Match(itemstext, nParamIndex);
150
151 string innerP = mP.Groups[innerPTextIdx].Value;
152
153 string paramName = "";
154 string paramValue = "";
155
156 int nAttrIdx = 0;
157
158 while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
159 {
160 Match mA = AttributesRE.Match(innerP, nAttrIdx);
161
162 string attributeName = mA.Groups[nameIndex].Value;
163 string attributeValue = mA.Groups[valueIndex].Value;
164 string attributeTD = mA.Groups[tdIndex].Value;
165
166 if(attributeTD.Length > 0)
167 {
168 // delete the trailing textqualifier
169 if( attributeValue.Length > 0)
170 {
171 int ltqi = attributeValue.LastIndexOf( attributeTD );
172
173 if(ltqi >= 0)
174 {
175 attributeValue = attributeValue.Substring(0,ltqi);
176 }
177 }
178 }
179
180 if( attributeName.ToLower() == "name")
181 {
182 paramName = attributeValue;
183 }
184
185 if( attributeName.ToLower() == "value")
186 {
187 paramValue = attributeValue;
188 }
189
190 nAttrIdx = mA.Index+mA.Length;
191 }
192
193 tocItem.Params[paramName] = paramValue;
194 switch(paramName.ToLower())
195 {
196 case "name":
197 {
198 tocItem.Name = paramValue;
199 };break;
200 case "local":
201 {
202 tocItem.Local = paramValue;
203 };break;
204 case "imagenumber":
205 {
206 tocItem.ImageIndex = Int32.Parse(paramValue);
207
208 if( tocItem.ImageIndex == 2)
209 tocItem.ImageIndex = TOCItem.STD_FOLDER_HH1;
210 };break;
211 }
212
213 nParamIndex = mP.Index+mP.Length;
214 }
215
216 tocItem.ChmFile = chmFile.ChmFilePath;
217 return tocItem;
218 }
219 }
220 }