2 using System.Collections;
4 using System.Text.RegularExpressions;
6 namespace HtmlHelp.ChmDecoding
9 /// The class <c>HHCParser</c> implements a parser for HHC contents files.
11 // internal sealed class HHCParser : IHHCParser
12 public class HHCParser2
14 static private string m_text1="";
15 static private string m_text2="";
16 static private int m_CurrentPos=0;
19 /// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
21 /// <param name="hhcFile">string content of the hhc file</param>
22 /// <param name="chmFile">CHMFile instance</param>
23 /// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
24 public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
26 DateTime StartTime=DateTime.Now;
28 ArrayList tocList = new ArrayList();
31 m_text1=hhcFile.ToLower();
33 int idx=m_text1.IndexOf("<ul>");
38 ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
39 AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
41 ParseTree(tocList,chmFile);
43 DateTime EndTime=DateTime.Now;
44 TimeSpan Diff=EndTime-StartTime;
45 string x=Diff.ToString();
51 /// Recursively parses a sitemap tree
53 /// <param name="text">content text</param>
54 /// <param name="arrNodes">arraylist which receives the extracted nodes</param>
55 /// <param name="chmFile">CHMFile instance</param>
56 static private void ParseTree( ArrayList arrNodes, CHMFile chmFile )
58 bool bProcessing=true;
64 int idxa=m_text1.IndexOf("<ul>",m_CurrentPos);
65 int idxb=m_text1.IndexOf("<li>",m_CurrentPos);
66 int idxc=m_text1.IndexOf("</ul>",m_CurrentPos);
68 if ((idxa<idxb) && (idxa<idxc) && (idxa>-1))
74 ParseTree(arrNodes,chmFile);
78 ParseTree(((TOCItem)(arrNodes[arrNodes.Count-1])).Children,chmFile);
84 if ((idxb<idxa) && (idxb<idxc) && (idxb>-1))
90 int idx2=m_text1.IndexOf("<object",m_CurrentPos);
93 int idx3=m_text1.IndexOf("</object>",idx2+7);
96 string text=m_text2.Substring(idx2,idx3-idx2);
100 // Parse items in text.
101 TOCItem tocItem=ParseItems(text, chmFile);
104 arrNodes.Add(tocItem);
111 if ((idxc<idxa) && (idxc<idxb) && (idxc>-1))
122 private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
123 private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
124 private static Regex ParamRE;
125 private static Regex AttributesRE;
128 /// Parses tree nodes from the text
130 /// <param name="itemstext">text containing the items</param>
131 /// <param name="arrNodes">arraylist where the nodes should be added</param>
132 /// <param name="chmFile">CHMFile instance</param>
133 private static TOCItem ParseItems( string itemstext, CHMFile chmFile)
135 int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
137 // get group-name indexes
138 int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
139 int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
140 int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
142 TOCItem tocItem = new TOCItem();
147 while( ParamRE.IsMatch(itemstext, nParamIndex) )
149 Match mP = ParamRE.Match(itemstext, nParamIndex);
151 string innerP = mP.Groups[innerPTextIdx].Value;
153 string paramName = "";
154 string paramValue = "";
158 while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
160 Match mA = AttributesRE.Match(innerP, nAttrIdx);
162 string attributeName = mA.Groups[nameIndex].Value;
163 string attributeValue = mA.Groups[valueIndex].Value;
164 string attributeTD = mA.Groups[tdIndex].Value;
166 if(attributeTD.Length > 0)
168 // delete the trailing textqualifier
169 if( attributeValue.Length > 0)
171 int ltqi = attributeValue.LastIndexOf( attributeTD );
175 attributeValue = attributeValue.Substring(0,ltqi);
180 if( attributeName.ToLower() == "name")
182 paramName = attributeValue;
185 if( attributeName.ToLower() == "value")
187 paramValue = attributeValue;
190 nAttrIdx = mA.Index+mA.Length;
193 tocItem.Params[paramName] = paramValue;
194 switch(paramName.ToLower())
198 tocItem.Name = paramValue;
202 tocItem.Local = paramValue;
206 tocItem.ImageIndex = Int32.Parse(paramValue);
208 if( tocItem.ImageIndex == 2)
209 tocItem.ImageIndex = TOCItem.STD_FOLDER_HH1;
213 nParamIndex = mP.Index+mP.Length;
216 tocItem.ChmFile = chmFile.ChmFilePath;