Import TechBot
[reactos.git] / irc / TechBot / CHMLibrary / CHMDecoding / CHMBtree.cs
1 using System;
2 using System.IO;
3 using System.Collections;
4 using System.Collections.Specialized;
5
6 namespace HtmlHelp.ChmDecoding
7 {
8 /// <summary>
9 /// The class <c>CHMBtree</c> implements methods/properties to decode the binary help index.
10 /// This class automatically creates an index arraylist for the current CHMFile instance.
11 /// It does not store the index internally !
12 /// </summary>
13 /// <remarks>The binary index can be found in the storage file $WWKeywordLinks/BTree</remarks>
14 internal sealed class CHMBtree : IDisposable
15 {
16 /// <summary>
17 /// Constant specifying the size of the string blocks
18 /// </summary>
19 private const int BLOCK_SIZE = 2048;
20 /// <summary>
21 /// Internal flag specifying if the object is going to be disposed
22 /// </summary>
23 private bool disposed = false;
24 /// <summary>
25 /// Internal member storing the binary file data
26 /// </summary>
27 private byte[] _binaryFileData = null;
28 /// <summary>
29 /// Internal member storing flags
30 /// </summary>
31 private int _flags = 0;
32 /// <summary>
33 /// Internal member storing the data format
34 /// </summary>
35 private byte[] _dataFormat = new byte[16];
36 /// <summary>
37 /// Internal member storing the index of the last listing block
38 /// </summary>
39 private int _indexOfLastListingBlock = 0;
40 /// <summary>
41 /// Internal member storing the index of the root block
42 /// </summary>
43 private int _indexOfRootBlock = 0;
44 /// <summary>
45 /// Internal member storing the number of blocks
46 /// </summary>
47 private int _numberOfBlocks = 0;
48 /// <summary>
49 /// Internal member storing the tree depth.
50 /// (1 if no index blocks, 2 one level of index blocks, ...)
51 /// </summary>
52 private int _treeDepth = 0;
53 /// <summary>
54 /// Internal member storing the number of keywords in the file
55 /// </summary>
56 private int _numberOfKeywords = 0;
57 /// <summary>
58 /// Internal member storing the codepage
59 /// </summary>
60 private int _codePage = 0;
61 /// <summary>
62 /// true if the index is from a CHI or CHM file, else CHW
63 /// </summary>
64 private bool _isCHI_CHM = true;
65 /// <summary>
66 /// Internal member storing the associated chmfile object
67 /// </summary>
68 private CHMFile _associatedFile = null;
69 /// <summary>
70 /// Internal flag specifying if we have to read listing or index blocks
71 /// </summary>
72 private bool _readListingBlocks = true;
73 /// <summary>
74 /// Internal member storing an indexlist of the current file.
75 /// </summary>
76 private ArrayList _indexList = new ArrayList();
77
78 /// <summary>
79 /// Constructor of the class
80 /// </summary>
81 /// <param name="binaryFileData">binary file data of the $WWKeywordLinks/BTree file</param>
82 /// <param name="associatedFile">associated chm file</param>
83 public CHMBtree(byte[] binaryFileData, CHMFile associatedFile)
84 {
85 if( associatedFile == null)
86 {
87 throw new ArgumentException("CHMBtree.ctor() - Associated CHMFile must not be null !", "associatedFile");
88 }
89
90 _binaryFileData = binaryFileData;
91 _associatedFile = associatedFile;
92 DecodeData();
93
94 // clear internal binary data after extraction
95 _binaryFileData = null;
96 }
97
98 /// <summary>
99 /// Decodes the binary file data and fills the internal properties
100 /// </summary>
101 /// <returns>true if succeeded</returns>
102 private bool DecodeData()
103 {
104 bool bRet = true;
105
106 MemoryStream memStream = new MemoryStream(_binaryFileData);
107 BinaryReader binReader = new BinaryReader(memStream);
108
109 int nCurOffset = 0;
110 int nTemp = 0;
111
112 // decode header
113 binReader.ReadChars(2); // 2chars signature (not important)
114
115 _flags = (int)binReader.ReadInt16(); // WORD flags
116
117 binReader.ReadInt16(); // size of blocks (always 2048)
118
119 _dataFormat = binReader.ReadBytes(16);
120
121 binReader.ReadInt32(); // unknown DWORD
122
123 _indexOfLastListingBlock = binReader.ReadInt32();
124 _indexOfRootBlock = binReader.ReadInt32();
125
126 binReader.ReadInt32(); // unknown DWORD
127
128 _numberOfBlocks = binReader.ReadInt32();
129 _treeDepth = binReader.ReadInt16();
130 _numberOfKeywords = binReader.ReadInt32();
131 _codePage = binReader.ReadInt32();
132
133 binReader.ReadInt32(); // lcid DWORD
134
135 nTemp = binReader.ReadInt32();
136 _isCHI_CHM = (nTemp==1);
137
138 binReader.ReadInt32(); // unknown DWORD
139 binReader.ReadInt32(); // unknown DWORD
140 binReader.ReadInt32(); // unknown DWORD
141 binReader.ReadInt32(); // unknown DWORD
142
143 // end of header decode
144
145 while( (memStream.Position < memStream.Length) && (bRet) )
146 {
147 nCurOffset = (int)memStream.Position;
148 byte [] dataBlock = binReader.ReadBytes(BLOCK_SIZE);
149 bRet &= DecodeBlock(dataBlock, ref nCurOffset, _treeDepth-1);
150 }
151
152 return bRet;
153 }
154
155 /// <summary>
156 /// Decodes a block of url-string data
157 /// </summary>
158 /// <param name="dataBlock">block of data</param>
159 /// <param name="nOffset">current file offset</param>
160 /// <param name="indexBlocks">number of index blocks</param>
161 /// <returns>true if succeeded</returns>
162 private bool DecodeBlock( byte[] dataBlock, ref int nOffset, int indexBlocks )
163 {
164 bool bRet = true;
165 int nblockOffset = nOffset;
166
167 MemoryStream memStream = new MemoryStream(dataBlock);
168 BinaryReader binReader = new BinaryReader(memStream);
169
170 int freeSpace = binReader.ReadInt16(); // length of freespace
171 int nrOfEntries = binReader.ReadInt16(); // number of entries
172
173 bool bListingEndReached = false;
174
175 //while( (memStream.Position < (memStream.Length-freeSpace)) && (bRet) )
176 //{
177 int nIndexOfPrevBlock = -1;
178 int nIndexOfNextBlock = -1;
179 int nIndexOfChildBlock = 0;
180
181 if(_readListingBlocks)
182 {
183 nIndexOfPrevBlock = binReader.ReadInt32(); // -1 if this is the header
184 nIndexOfNextBlock = binReader.ReadInt32(); // -1 if this is the last block
185 }
186 else
187 {
188 nIndexOfChildBlock = binReader.ReadInt32();
189 }
190
191 for(int nE = 0; nE < nrOfEntries; nE++)
192 {
193 if(_readListingBlocks)
194 {
195 bListingEndReached = (nIndexOfNextBlock==-1);
196
197 string keyWord = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
198
199 bool isSeeAlsoKeyword = (binReader.ReadInt16()!=0);
200
201 int indent = binReader.ReadInt16(); // indent of entry
202 int nCharIndex = binReader.ReadInt32();
203
204 binReader.ReadInt32();
205
206 int numberOfPairs = binReader.ReadInt32();
207
208 int[] nTopics = new int[numberOfPairs];
209 string[] seeAlso = new string[numberOfPairs];
210
211 for(int i=0; i < numberOfPairs; i++)
212 {
213 if(isSeeAlsoKeyword)
214 {
215 seeAlso[i] = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
216 }
217 else
218 {
219 nTopics[i] = binReader.ReadInt32();
220 }
221 }
222
223 binReader.ReadInt32(); // unknown
224
225 int nIndexOfThisEntry = binReader.ReadInt32();
226
227 IndexItem newItem = new IndexItem(_associatedFile, keyWord, isSeeAlsoKeyword, indent, nCharIndex, nIndexOfThisEntry, seeAlso, nTopics);
228 _indexList.Add(newItem);
229 }
230 else
231 {
232 string keyWord = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
233
234 bool isSeeAlsoKeyword = (binReader.ReadInt16()!=0);
235
236 int indent = binReader.ReadInt16(); // indent of entry
237 int nCharIndex = binReader.ReadInt32();
238
239 binReader.ReadInt32();
240
241 int numberOfPairs = binReader.ReadInt32();
242
243 int[] nTopics = new int[numberOfPairs];
244 string[] seeAlso = new string[numberOfPairs];
245
246 for(int i=0; i < numberOfPairs; i++)
247 {
248 if(isSeeAlsoKeyword)
249 {
250 seeAlso[i] = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
251 }
252 else
253 {
254 nTopics[i] = binReader.ReadInt32();
255 }
256 }
257
258 int nIndexChild = binReader.ReadInt32();
259 int nIndexOfThisEntry=-1;
260
261 IndexItem newItem = new IndexItem(_associatedFile, keyWord, isSeeAlsoKeyword, indent, nCharIndex, nIndexOfThisEntry, seeAlso, nTopics);
262 _indexList.Add(newItem);
263
264 }
265 }
266 //}
267
268 binReader.ReadBytes(freeSpace);
269
270
271 if( bListingEndReached )
272 _readListingBlocks = false;
273
274 return bRet;
275 }
276
277 /// <summary>
278 /// Gets the internal generated index list
279 /// </summary>
280 internal ArrayList IndexList
281 {
282 get { return _indexList; }
283 }
284
285 /// <summary>
286 /// Implement IDisposable.
287 /// </summary>
288 public void Dispose()
289 {
290 Dispose(true);
291 // This object will be cleaned up by the Dispose method.
292 // Therefore, you should call GC.SupressFinalize to
293 // take this object off the finalization queue
294 // and prevent finalization code for this object
295 // from executing a second time.
296 GC.SuppressFinalize(this);
297 }
298
299 /// <summary>
300 /// Dispose(bool disposing) executes in two distinct scenarios.
301 /// If disposing equals true, the method has been called directly
302 /// or indirectly by a user's code. Managed and unmanaged resources
303 /// can be disposed.
304 /// If disposing equals false, the method has been called by the
305 /// runtime from inside the finalizer and you should not reference
306 /// other objects. Only unmanaged resources can be disposed.
307 /// </summary>
308 /// <param name="disposing">disposing flag</param>
309 private void Dispose(bool disposing)
310 {
311 // Check to see if Dispose has already been called.
312 if(!this.disposed)
313 {
314 // If disposing equals true, dispose all managed
315 // and unmanaged resources.
316 if(disposing)
317 {
318 // Dispose managed resources.
319 _binaryFileData = null;
320 }
321 }
322 disposed = true;
323 }
324 }
325 }