2 using System.Diagnostics;
5 using System.Text.RegularExpressions;
6 using System.Collections;
7 using System.Collections.Specialized;
9 using System.Runtime.InteropServices;
14 /// Summary description for CHMFile.
17 public class CHMStream : IDisposable
19 public MemoryStream OpenStream(chmUnitInfo Info)
24 MemoryStream st=new MemoryStream();
25 this.ExtractFile(Info,st);
29 public MemoryStream OpenStream(string FileName)
31 chmUnitInfo info=this.GetFileInfo(FileName);
34 return OpenStream(info);
37 private string m_CHMFileName;
38 public string CHMFileName
50 public CHMStream(string CHMFileName)
55 public void OpenCHM(string CHMFileName)
57 m_CHMFileName=CHMFileName;
58 FileInfo fi=new FileInfo(m_CHMFileName);
62 chm_open(m_CHMFileName);
65 private bool m_bCHMLoaded=false;
74 private string m_CHMName="";
83 private string Dir="";
84 private string m_FileFind="";
85 private string m_FileFindLastPart="";
86 private chmUnitInfo m_FileInfo=null;
87 private int m_FileCount=0;
88 public void FindFile(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
90 string LocalFile=Info.path;
91 LocalFile=LocalFile.Replace("/",@"\");
92 if (!LocalFile.StartsWith(@"\"))
93 LocalFile=@"\"+LocalFile;
94 LocalFile=LocalFile.ToLower();
96 if (m_FileFind.Length<=LocalFile.Length)
98 if (LocalFile.IndexOf(m_FileFind)==LocalFile.Length-m_FileFind.Length)
100 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_SUCCESS;
105 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
108 public void FileCount(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
111 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
114 private ArrayList m_FileList=null;
115 private string m_strByExt="";
116 public void FileList(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
118 m_FileList.Add(Info.path);
119 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
122 public void FileListByExtension(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
124 FileInfo fi=new FileInfo(Info.path);
125 if (fi.Extension.ToLower()==m_strByExt.ToLower())
126 m_FileList.Add(Info.path);
127 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
130 public void FindFileIndex(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
132 if (m_FileCount==m_FileFindIndex)
135 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_SUCCESS;
140 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
144 public int GetFileCount()
150 this.CHMFileFoundEvent+=new CHMStream.CHMFileFound(FileCount);
151 this.chm_enumerate(CHM_ENUMERATE.CHM_ENUMERATE_ALL);
152 this.CHMFileFoundEvent-=new CHMStream.CHMFileFound(FileCount);
156 public ArrayList GetFileList()
162 m_FileList=new ArrayList(1000);
163 this.CHMFileFoundEvent+=new CHMStream.CHMFileFound(FileList);
164 this.chm_enumerate(CHM_ENUMERATE.CHM_ENUMERATE_ALL);
165 this.CHMFileFoundEvent-=new CHMStream.CHMFileFound(FileList);
169 public ArrayList GetFileListByExtenstion(string Ext)
175 m_FileList=new ArrayList(1000);
177 this.CHMFileFoundEvent+=new CHMStream.CHMFileFound(FileListByExtension);
178 this.chm_enumerate(CHM_ENUMERATE.CHM_ENUMERATE_ALL);
179 this.CHMFileFoundEvent-=new CHMStream.CHMFileFound(FileListByExtension);
183 public chmUnitInfo GetFileInfo(string FileName)
188 m_FileFind=FileName.ToLower().Replace("/",@"\");
190 // Remove all leading '..\'
193 if (m_FileFind.StartsWith(@"..\"))
194 m_FileFind=m_FileFind.Substring(3);
200 if (!m_FileFind.StartsWith(@"\"))
201 m_FileFind=@"\"+m_FileFind;
203 string []parts=m_FileFind.Split('\\');
204 m_FileFindLastPart=@"\"+parts[parts.GetUpperBound(0)];
206 this.CHMFileFoundEvent+=new CHMStream.CHMFileFound(FindFile);
208 this.chm_enumerate(CHM_ENUMERATE.CHM_ENUMERATE_ALL);
209 this.CHMFileFoundEvent-=new CHMStream.CHMFileFound(FindFile);
213 private int m_FileFindIndex=0;
214 public chmUnitInfo GetFileInfo(int FileIndex)
219 m_FileFindIndex=FileIndex;
221 this.CHMFileFoundEvent+=new CHMStream.CHMFileFound(FindFileIndex);
224 this.chm_enumerate(CHM_ENUMERATE.CHM_ENUMERATE_ALL);
225 this.CHMFileFoundEvent-=new CHMStream.CHMFileFound(FindFileIndex);
229 public chmUnitInfo GetFileInfoByExtension(string Ext)
231 this.CHMFileFoundEvent+=new CHMStream.CHMFileFound(FindFileByExtension);
233 m_FileFind=Ext.ToLower();
234 this.chm_enumerate(CHMStream.CHM_ENUMERATE.CHM_ENUMERATE_ALL);
235 this.CHMFileFoundEvent-=new CHMStream.CHMFileFound(FindFileByExtension);
239 public bool ExtractFile(string FileName, System.IO.Stream st)
244 chmUnitInfo Info=GetFileInfo(FileName);
245 return ExtractFile(Info,st);
248 public bool ExtractFile(chmUnitInfo Info, System.IO.Stream st)
257 chm_retrieve_object(Info,st,0,Info.length);
262 public string ExtractTextFile(string FileName)
265 return "CHM File not loaded";
267 chmUnitInfo Info=GetFileInfo(FileName);
268 return ExtractTextFile(Info);
271 public string ExtractTextFile(chmUnitInfo Info)
274 return "CHM File not loaded";
279 if (Info.path.Length>=2)
281 if (Info.path.Substring(0,2).CompareTo("/#")==0)
283 if (Info.path.Substring(0,2).CompareTo("/$")==0)
287 MemoryStream st=new MemoryStream((int)Info.length);
288 this.chm_retrieve_object(Info,st,0,Info.length);
295 ASCIIEncoding ascii=new ASCIIEncoding();
296 Text=ascii.GetString(st.ToArray(),0,50);
299 if (Text.IndexOf("UTF-8")!=-1)
301 UTF8Encoding utf8 = new UTF8Encoding();
302 Text=utf8.GetString(st.ToArray(),0,(int)st.Length);
305 Text=ascii.GetString(st.ToArray(),0,(int)st.Length);
310 public void FindFileByExtension(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
312 if ((Info.path.StartsWith("::")) || (Info.path.StartsWith("#")) ||(Info.path.StartsWith("$")))
314 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
318 FileInfo Fi=new FileInfo(Info.path);
319 if (Fi.Extension.ToLower()==m_FileFind)
321 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_SUCCESS;
325 Status=CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
328 public bool GetCHMParts(string Url, ref string CHMFileName, ref string FileName, ref string Anchor)
330 Regex ParseURLRegEx= new Regex( @"ms-its:(?'CHMFile'.*)::(?'Topic'.*)", RegexOptions.IgnoreCase| RegexOptions.Singleline | RegexOptions.ExplicitCapture| RegexOptions.IgnorePatternWhitespace| RegexOptions.Compiled);
332 // Parse URL - Get CHM Filename & Page Name
333 // Format 'ms-its:file name.chm::/topic.htm'
334 if (ParseURLRegEx.IsMatch(Url))
336 Match m=ParseURLRegEx.Match(Url);
337 CHMFileName=m.Groups["CHMFile"].Value;
338 string Topic=m.Groups["Topic"].Value;
339 int idx=Topic.IndexOf("#");
342 FileName=Topic.Substring(0,idx);
343 Anchor=Topic.Substring(idx+1);
352 private string m_TempDir="";
353 string ReplaceFileName(Match m)
355 string strReplace = m.ToString();
358 if (m.Groups["FileName"]==null)
361 string FileName=m.Groups["FileName"].Value;
362 string FileName2=FileName.Replace("/",@"\");
363 int idx=FileName2.IndexOf("::");
365 FileName2=FileName2.Substring(idx+2);
366 string []parts=FileName2.Split('\\');
367 string NewName=@"file://"+m_TempDir+parts[parts.GetUpperBound(0)];
369 strReplace=strReplace.Replace(FileName,NewName);
373 public ArrayList GetFileList(ref string Text, string TempDir)
380 ArrayList FilesList=new ArrayList();
382 // Parse HTML for CCS, ima, etc
383 string regexContent=@"[\x2f a-zA-Z0-9\x5C\x2E\x28\x29\x23\x24\x25\x26\x27\x22\x21\x3F\x3E\x3D\x3C\x3B\x3A\x5B\x5D\x5E\x5F\x7D\x7C\x7B\x7E\x40\x2D\x2C\x2B\x2A]*\s*";
384 string regexFileName=@"\s*=\s*[""|'](?'FileName'[^""^']*)[""|']\s*";
386 Regex ScriptRegex = new Regex(@"<script[^>]*>.*</script>",
387 RegexOptions.IgnoreCase
388 | RegexOptions.Multiline
389 | RegexOptions.Singleline
390 | RegexOptions.IgnorePatternWhitespace
391 | RegexOptions.Compiled);
393 Regex XMLRegex = new Regex(@"<\?xml.*\?>",
394 RegexOptions.IgnoreCase
395 | RegexOptions.Multiline
396 | RegexOptions.Singleline
397 | RegexOptions.IgnorePatternWhitespace
398 | RegexOptions.Compiled);
400 Regex XMLRegex2 = new Regex(@"<xml[^>]*>.*</xml>",
401 RegexOptions.IgnoreCase
402 | RegexOptions.Multiline
403 | RegexOptions.Singleline
404 | RegexOptions.IgnorePatternWhitespace
405 | RegexOptions.Compiled);
407 Regex SRCRegex = new Regex(
408 @"src"+regexFileName,
409 RegexOptions.IgnoreCase
410 | RegexOptions.Multiline
411 | RegexOptions.Singleline
412 | RegexOptions.IgnorePatternWhitespace
413 | RegexOptions.Compiled);
415 Regex StyleSheetRegex = new Regex(
416 @"<link\s*"+regexContent+@"rel\s*=\s*[""|']stylesheet[""|']\s*"+regexContent + "href"+regexFileName,
417 RegexOptions.IgnoreCase
418 | RegexOptions.Multiline
419 | RegexOptions.Singleline
420 | RegexOptions.IgnorePatternWhitespace
421 | RegexOptions.Compiled);
423 // Remove Script Tags
424 Text=ScriptRegex.Replace(Text,"");
427 Text=XMLRegex.Replace(Text,"");
428 Text=XMLRegex2.Replace(Text,"");
431 StringBuilder s=new StringBuilder(Text);
433 if (StyleSheetRegex.IsMatch(Text))
435 Match m = StyleSheetRegex.Match(Text);
438 string FileName=m.Groups["FileName"].ToString();
439 FilesList.Add(FileName);
442 Text=StyleSheetRegex.Replace(Text,new MatchEvaluator(ReplaceFileName));
445 if (SRCRegex.IsMatch(Text))
447 Match m = SRCRegex.Match(Text);
450 string FileName=m.Groups["FileName"].ToString();
451 FilesList.Add(FileName);
454 Text=SRCRegex.Replace(Text,new MatchEvaluator(ReplaceFileName));
460 public string GetHTMLAndFiles(string TempDir, string Url)
463 if (TempDir.EndsWith(@"\")) TempDir=TempDir.Substring(TempDir.Length-1);
465 // Delete Temp Directory
466 if (Directory.Exists(TempDir))
467 Directory.Delete(TempDir,true);
469 // Create Temp Directory
470 if (!Directory.Exists(TempDir))
471 Directory.CreateDirectory(TempDir);
473 if (!TempDir.EndsWith(@"\")) TempDir+=@"\";
475 string m_TopicName="";
477 string m_CHMFile=CHMFileName;
479 if (!GetCHMParts(Url,ref m_CHMFile, ref m_TopicName, ref Anchor))
481 m_CHMFile=this.CHMFileName;
486 return "#No TopicName defined in Url : "+ Url;
488 m_TopicName=m_TopicName.Replace("/",@"\");
489 if (!m_CHMFile.StartsWith(@"\"))
490 m_CHMFile=this.Dir+@"\"+m_CHMFile;
493 CHMStream LocalCHM=this;
495 if (this.CHMFileName.ToLower().CompareTo(m_CHMFile.ToLower())!=0)
496 LocalCHM=new CHMStream(m_CHMFile);
499 HTMLText=LocalCHM.ExtractTextFile(m_TopicName);
501 return "#Failed to find Topic in CHM File : "+Url;
503 HTMLText=GetFiles(TempDir, HTMLText, LocalCHM);
508 public string GetFiles(string TempDir, string HTMLText, CHMStream chm)
510 return GetFiles(TempDir, HTMLText, chm,0);
513 public string GetFiles(string TempDir, string HTMLText, CHMStream chm, int Level)
515 // Get FilesList & Extract Files to Temp Dir
516 ArrayList FileList=chm.GetFileList(ref HTMLText, TempDir);
519 foreach(object obj in FileList)
521 string FileName=(string)obj;
523 string CHMFileName="";
526 CHMStream NewCHM=chm;
527 if (GetCHMParts(FileName,ref CHMFileName, ref TopicName, ref Anchor))
529 NewCHM=new CHMStream(chm.Dir+@"\"+CHMFileName);
530 if (!NewCHM.CHMLoaded)
536 CHMFileName=chm.CHMFileName;
543 if (((FileName.ToLower().EndsWith(".htm")) || (FileName.ToLower().EndsWith(".html"))) && (Level<2))
545 string HTMLText2=NewCHM.ExtractTextFile(FileName);
546 FileInfo Fi=new FileInfo(FileName);
547 string path=TempDir+Fi.Name;
548 HTMLText2=GetFiles(TempDir,HTMLText2, chm, Level+1);
550 if (File.Exists(path))
553 StreamWriter st=new StreamWriter(path);
554 st.WriteLine(HTMLText2);
559 // Extract all other files as is
560 string FileName2=FileName.Replace("/",@"\");
561 if (FileName2.Substring(0,1)==@"\")
562 FileName2=FileName2.Substring(1);
564 string []parts=FileName2.Split('\\');
565 string path=TempDir+parts[parts.GetUpperBound(0)];
566 if (File.Exists(path))
568 System.IO.FileStream st=new FileStream(path,FileMode.CreateNew);
569 NewCHM.ExtractFile(FileName2,st);
575 // return HTML string of main page
579 #region CHMStream Enums
580 // the two available spaces in a CHM file
581 // N.B.: The format supports arbitrarily many spaces, but only
582 // two appear to be used at present.
583 public enum CHM_COMPRESSION { CHM_UNCOMPRESSED=0, CHM_COMPRESSED=1};
585 // resolve a particular object from the archive
586 public enum CHM_RESOLVE { CHM_RESOLVE_SUCCESS=0, CHM_RESOLVE_FAILURE=1};
588 // retrieve part of an object from the archive
589 public enum CHM_ENUMERATE
592 CHM_ENUMERATE_NORMAL =1,
593 CHM_ENUMERATE_META =2,
594 CHM_ENUMERATE_SPECIAL =4,
595 CHM_ENUMERATE_FILES =8,
596 CHM_ENUMERATE_DIRS =16,
597 CHM_ENUMERATE_ALL =31};
599 public enum CHM_ENUMERATOR
601 CHM_ENUMERATOR_FAILURE =0,
602 CHM_ENUMERATOR_SUCCESS =2,
603 CHM_ENUMERATOR_CONTINUE =1
607 #region Internal Parameters
608 private int ffs(int val)
612 while (bit != 0 && (val & bit) == 0)
623 // names of sections essential to decompression
624 private const string _CHMU_RESET_TABLE = @"::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable";
625 private const string _CHMU_LZXC_CONTROLDATA = @"::DataSpace/Storage/MSCompressed/ControlData";
626 private const string _CHMU_CONTENT = @"::DataSpace/Storage/MSCompressed/Content";
627 private const string _CHMU_SPANINFO = @"::DataSpace/Storage/MSCompressed/SpanInfo";
629 private UInt64 dir_offset=0;
630 private UInt64 dir_len=0;
631 private UInt64 data_offset=0;
632 private Int32 index_root=0;
633 private Int32 index_head=0;
634 private UInt32 block_len=0;
636 private chmUnitInfo rt_unit;
637 private chmUnitInfo cn_unit;
638 private chmLzxcResetTable reset_table;
639 private bool compression_enabled=false;
642 private int window_size=0;
643 private UInt32 reset_interval=0;
644 private UInt32 reset_blkcount=0;
645 private BinaryReader st=null;
647 // decompressor state
648 private lzw lzx_state;
649 private int lzx_last_block=0;
652 #region Open CHM Stream
653 private bool CheckSig(string Sig1, char[] Sig2)
656 foreach(char ch in Sig1.ToCharArray())
665 // open an ITS archive
666 private bool chm_open(string filename)
668 chmItsfHeader itsfHeader=new chmItsfHeader();
669 chmItspHeader itspHeader=new chmItspHeader();
670 chmUnitInfo uiSpan=new chmUnitInfo();
671 chmUnitInfo uiLzxc=new chmUnitInfo();
672 chmLzxcControlData ctlData=new chmLzxcControlData();
675 if (!File.Exists(filename))
678 st=new BinaryReader(File.OpenRead(filename));
682 // read and verify header
683 if (itsfHeader.Read_itsf_header(st)==0)
688 st.BaseStream.Seek((long)itsfHeader.dir_offset,SeekOrigin.Begin);
690 // stash important values from header
691 dir_offset = itsfHeader.dir_offset;
692 dir_len = itsfHeader.dir_len;
693 data_offset = itsfHeader.data_offset;
695 // now, read and verify the directory header chunk
696 if (itspHeader.Read_itsp_header(st)==0)
702 // grab essential information from ITSP header
703 dir_offset += (UInt64)itspHeader.header_len;
704 dir_len -= (UInt64)itspHeader.header_len;
705 index_root = itspHeader.index_root;
706 index_head = itspHeader.index_head;
707 block_len = itspHeader.block_len;
709 // if the index root is -1, this means we don't have any PMGI blocks.
710 // as a result, we must use the sole PMGL block as the index root
712 if (index_root == -1)
713 index_root = index_head;
715 compression_enabled=true;
717 // prefetch most commonly needed unit infos
718 // if (CHM_RESOLVE.CHM_RESOLVE_SUCCESS != chm_resolve_object(_CHMU_SPANINFO, ref uiSpan)
719 // || uiSpan.space == CHM_COMPRESSION.CHM_COMPRESSED ||
720 if (CHM_RESOLVE.CHM_RESOLVE_SUCCESS != chm_resolve_object(_CHMU_RESET_TABLE, ref rt_unit)
721 || rt_unit.space == CHM_COMPRESSION.CHM_COMPRESSED
722 || CHM_RESOLVE.CHM_RESOLVE_SUCCESS != chm_resolve_object(_CHMU_CONTENT,ref cn_unit)
723 || cn_unit.space == CHM_COMPRESSION.CHM_COMPRESSED
724 || CHM_RESOLVE.CHM_RESOLVE_SUCCESS != chm_resolve_object(_CHMU_LZXC_CONTROLDATA, ref uiLzxc)
725 || uiLzxc.space == CHM_COMPRESSION.CHM_COMPRESSED)
727 compression_enabled=false;
733 // N.B.: we've already checked that uiSpan is in the uncompressed section,
734 // so this should not require attempting to decompress, which may
735 // rely on having a valid "span"
737 if (compression_enabled)
739 reset_table=new chmLzxcResetTable();
740 st.BaseStream.Seek((long)((long)data_offset + (long)rt_unit.start),SeekOrigin.Begin);
741 if (reset_table.Read_lzxc_reset_table(st)!=1)
743 compression_enabled=false;
747 if (compression_enabled)
750 ctlData=new chmLzxcControlData();
751 st.BaseStream.Seek((long)((long)data_offset + (long)uiLzxc.start),SeekOrigin.Begin);
752 if (ctlData.Read_lzxc_control_data(st)!=1)
754 compression_enabled=false;
757 window_size = (int)ctlData.windowSize;
758 reset_interval = ctlData.resetInterval;
761 reset_blkcount = (uint)(reset_interval /
763 ctlData.windowsPerReset);
777 #region Close CHM Stream
778 // close an ITS archive
779 private void chm_close()
785 lzx_state.LZXteardown();
790 #region Find File in CHM Stream
791 // resolve a particular object from the archive
792 private CHMStream.CHM_RESOLVE chm_resolve_object(string objPath, ref chmUnitInfo ui)
797 curPage = index_root;
799 // until we have either returned or given up
800 while (curPage != -1)
802 st.BaseStream.Seek((long)((long)dir_offset + (long)(curPage*block_len)),SeekOrigin.Begin);
804 char[] sig=st.ReadChars(4);
805 st.BaseStream.Seek(-4,SeekOrigin.Current);
806 if (CheckSig("PMGL",sig))
808 chmPmglHeader PmglHeader=new chmPmglHeader();
809 if (PmglHeader.Read_pmgl_header(st)==1)
812 ui=PmglHeader.FindObject(st,block_len,objPath);
814 return CHMStream.CHM_RESOLVE.CHM_RESOLVE_FAILURE;
816 // parse entry and return
817 return CHMStream.CHM_RESOLVE.CHM_RESOLVE_SUCCESS;
820 else if (CheckSig("PMGI",sig))
822 chmPmgiHeader pmgiHeader=new chmPmgiHeader();
823 pmgiHeader.Read_pmgi_header(st);
824 curPage = pmgiHeader._chm_find_in_PMGI(st, block_len, objPath);
827 // else, we are confused. give up.
828 return CHMStream.CHM_RESOLVE.CHM_RESOLVE_FAILURE;
831 // didn't find anything. fail.
832 return CHMStream.CHM_RESOLVE.CHM_RESOLVE_FAILURE;
836 #region Extract File from CHM Stream
838 // * utility methods for dealing with compressed data
839 // get the bounds of a compressed block. return 0 on failure
840 private int _chm_get_cmpblock_bounds(System.IO.BinaryReader st, UInt64 block, ref UInt64 start, ref UInt64 len)
842 // for all but the last block, use the reset table
843 if (block < reset_table.block_count-1)
845 // unpack the start address
846 st.BaseStream.Seek((long)data_offset + (long)rt_unit.start + (long)reset_table.table_offset + (long)(block*8),SeekOrigin.Begin);
847 start=st.ReadUInt64();
851 // for the last block, use the span in addition to the reset table
854 // unpack the start address
855 st.BaseStream.Seek((long)data_offset + (long)rt_unit.start + (long)reset_table.table_offset + (long)(block*8),SeekOrigin.Begin);
856 start=st.ReadUInt64();
857 len = reset_table.compressed_len;
860 // compute the length and absolute start address
862 start += data_offset + cn_unit.start;
867 // decompress the block. must have lzx_mutex.
868 private ulong _chm_decompress_block(UInt64 block, System.IO.Stream OutBuffer)
870 // byte []cbuffer = new byte(reset_table.block_len + 6144);
871 ulong cmpStart=0; // compressed start
872 ulong cmpLen=0; // compressed len
873 UInt32 blockAlign = (UInt32)(block % reset_blkcount); // reset intvl. aln.
875 // check if we need previous blocks
878 /* fetch all required previous blocks since last reset */
879 for (UInt32 i = blockAlign; i > 0; i--)
881 UInt32 curBlockIdx = (UInt32)(block-i);
883 /* check if we most recently decompressed the previous block */
884 if ((ulong)lzx_last_block != curBlockIdx)
886 if ((curBlockIdx % reset_blkcount)==0)
888 lzx_state.LZXreset();
891 _chm_get_cmpblock_bounds(st,curBlockIdx, ref cmpStart, ref cmpLen);
892 st.BaseStream.Seek((long)cmpStart,SeekOrigin.Begin);
893 if (lzx_state.LZXdecompress(st,OutBuffer, ref cmpLen, ref reset_table.block_len) != lzw.DECR_OK)
896 lzx_last_block = (int)(curBlockIdx);
901 if ((block % reset_blkcount)==0)
903 lzx_state.LZXreset();
907 // decompress the block we actually want
908 if (_chm_get_cmpblock_bounds(st, block, ref cmpStart, ref cmpLen)==0)
911 st.BaseStream.Seek((long)cmpStart,SeekOrigin.Begin);
913 if (lzx_state.LZXdecompress(st, OutBuffer, ref cmpLen,ref reset_table.block_len) != lzw.DECR_OK)
916 lzx_last_block = (int)block;
918 // XXX: modify LZX routines to return the length of the data they
919 // * decompressed and return that instead, for an extra sanity check.
920 return reset_table.block_len;
923 // grab a region from a compressed block
924 private ulong _chm_decompress_region(Stream buf, ulong start, ulong len)
926 ulong nBlock, nOffset;
929 // byte [] ubuffer=null;
934 // figure out what we need to read
935 nBlock = start / reset_table.block_len;
936 nOffset = start % reset_table.block_len;
938 if (nLen > (reset_table.block_len - nOffset))
939 nLen = reset_table.block_len - nOffset;
941 // data request not satisfied, so... start up the decompressor machine
944 int window_size = ffs(this.window_size) - 1;
948 lzx_state.LZXinit(window_size);
951 // decompress some data
952 MemoryStream ms=new MemoryStream((int)reset_table.block_len+6144);
953 gotLen = _chm_decompress_block(nBlock, ms);
957 // memcpy(buf, ubuffer+nOffset, (unsigned int)nLen);
958 ms.Position=(long)nOffset;
959 for(ulong i=0;i<nLen;i++)
960 buf.WriteByte((byte)ms.ReadByte());
965 // retrieve (part of) an object
966 private ulong chm_retrieve_object(chmUnitInfo ui, Stream buf, ulong addr, ulong len)
968 // starting address must be in correct range
969 if (addr < 0 || addr >= (ulong)ui.length)
973 if (addr + (ulong)len > (ulong)ui.length)
974 len = (ulong)ui.length - (ulong)addr;
976 // if the file is uncompressed, it's simple
977 if (ui.space == CHMStream.CHM_COMPRESSION.CHM_UNCOMPRESSED)
980 long FilePos=st.BaseStream.Position;
981 st.BaseStream.Seek((long)((long)data_offset + (long)ui.start + (long)addr),SeekOrigin.Begin);
982 // byte [] buffer=st.ReadBytes((int)len);
983 buf.Write(st.ReadBytes((int)len),0,(int) len);
984 st.BaseStream.Seek(FilePos,SeekOrigin.Begin);
988 // else if the file is compressed, it's a little trickier
989 else // ui->space == CHM_COMPRESSED
993 lzx_state.LZXteardown();
996 ulong swath=0, total=0;
999 if (!compression_enabled)
1002 // swill another mouthful
1003 swath = _chm_decompress_region(buf, ui.start + addr, len);
1005 // if we didn't get any...
1008 Trace.Assert((total!=ui.length),"De-compress failed","Length Required = "+ui.length.ToString()+" Length returned = "+total.ToString());
1019 Trace.Assert((len!=ui.length),"De-compress failed","Length Required = "+ui.length.ToString()+" Length returned = "+len.ToString());
1025 #region Enumerate functions
1026 // Enumerate the objects in the .chm archive
1027 // Use delegate to handle callback
1029 public delegate void CHMFileFound(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status);
1030 public event CHMFileFound CHMFileFoundEvent;
1032 public void OnFileFound(chmUnitInfo Info, ref CHMStream.CHM_ENUMERATOR Status)
1034 if (CHMFileFoundEvent!=null)
1035 CHMFileFoundEvent(Info,ref Status);
1037 private int chm_enumerate(CHM_ENUMERATE what)
1041 // buffer to hold whatever page we're looking at
1042 chmPmglHeader header;
1047 chmUnitInfo ui= new chmUnitInfo();
1048 CHMStream.CHM_ENUMERATE flag=CHMStream.CHM_ENUMERATE.None;
1051 curPage = index_head;
1053 // until we have either returned or given up
1054 while (curPage != -1)
1056 st.BaseStream.Seek((long)((long)dir_offset + (long)(curPage*block_len)),SeekOrigin.Begin);
1058 // figure out start and end for this page
1059 cur = (uint)st.BaseStream.Position;
1061 header=new chmPmglHeader();
1062 if (header.Read_pmgl_header(st)==0)
1065 end = (uint)(st.BaseStream.Position + block_len - (header.free_space)- chmPmglHeader._CHM_PMGL_LEN);
1067 // loop over this page
1068 while (st.BaseStream.Position < end)
1070 if (header._chm_parse_PMGL_entry(st,ref ui)==0)
1074 if (ui.length == 0 && ((what & CHM_ENUMERATE.CHM_ENUMERATE_DIRS)==0))
1078 if (ui.length != 0 && ((what & CHM_ENUMERATE.CHM_ENUMERATE_FILES)==0))
1081 // check for NORMAL vs. META
1082 if (ui.path[0] == '/')
1084 // check for NORMAL vs. SPECIAL
1085 if (ui.path.Length>2)
1087 if (ui.path[1] == '#' || ui.path[1] == '$')
1088 flag = CHMStream.CHM_ENUMERATE.CHM_ENUMERATE_SPECIAL;
1090 flag = CHMStream.CHM_ENUMERATE.CHM_ENUMERATE_NORMAL;
1093 flag = CHMStream.CHM_ENUMERATE.CHM_ENUMERATE_META;
1094 if ((what & flag)==0)
1098 // call the enumerator
1100 CHMStream.CHM_ENUMERATOR status = CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE;
1101 OnFileFound(ui,ref status);
1105 case CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_FAILURE:
1108 case CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_CONTINUE:
1111 case CHMStream.CHM_ENUMERATOR.CHM_ENUMERATOR_SUCCESS:
1120 // advance to next page
1121 curPage = header.block_next;
1128 #region IDisposable Members
1130 private bool disposed=false;
1131 public void Dispose()
1134 // This object will be cleaned up by the Dispose method.
1135 // Therefore, you should call GC.SupressFinalize to
1136 // take this object off the finalization queue
1137 // and prevent finalization code for this object
1138 // from executing a second time.
1139 GC.SuppressFinalize(this);
1143 /// Dispose(bool disposing) executes in two distinct scenarios.
1144 /// If disposing equals true, the method has been called directly
1145 /// or indirectly by a user's code. Managed and unmanaged resources
1146 /// can be disposed.
1147 /// If disposing equals false, the method has been called by the
1148 /// runtime from inside the finalizer and you should not reference
1149 /// other objects. Only unmanaged resources can be disposed.
1151 /// <param name="disposing">disposing flag</param>
1152 private void Dispose(bool disposing)
1154 // Check to see if Dispose has already been called.
1157 // If disposing equals true, dispose all managed
1158 // and unmanaged resources.
1161 // Dispose managed resources.
1170 #region Structures used by CHM Storage
1171 public class BaseStructure
1173 public bool CheckSig(string Sig1, char[] Sig2)
1176 foreach(char ch in Sig1.ToCharArray())
1185 // skip a compressed dword
1186 public void skip_cword(BinaryReader st)
1189 while ((b=st.ReadByte())>= 0x80);
1192 // skip the data from a PMGL entry
1193 public void _chm_skip_PMGL_entry_data(BinaryReader st)
1200 // parse a compressed dword
1201 public UInt64 _chm_parse_cword(BinaryReader st)
1205 while ((temp=st.ReadByte()) >= 0x80)
1208 accum += (ulong)(temp & 0x7f);
1211 return (accum << 7) + temp;
1214 // parse a utf-8 string into an ASCII char buffer
1215 public int _chm_parse_UTF8(BinaryReader st, UInt64 count, ref string path)
1217 UTF8Encoding utf8=new UTF8Encoding();
1218 path=utf8.GetString(st.ReadBytes((int)count),0,(int)count);
1223 public class chmUnitInfo
1225 public UInt64 start=0;
1226 public UInt64 length=0;
1227 public CHMStream.CHM_COMPRESSION space=CHMStream.CHM_COMPRESSION.CHM_UNCOMPRESSED;
1228 public string path="";
1231 // structure of ITSF headers
1232 public class chmItsfHeader : BaseStructure
1234 public const int _CHM_ITSF_V2_LEN=0x58;
1235 public const int _CHM_ITSF_V3_LEN=0x60;
1237 public char[] signature=null; // 0 (ITSF)
1238 public Int32 version=0; // 4
1239 public Int32 header_len=0; // 8
1240 public Int32 unknown_000c=0; // c
1241 public UInt32 last_modified=0; // 10
1242 public UInt32 lang_id=0; // 14
1243 public Guid dir_uuid; // 18
1244 public Guid stream_uuid; // 28
1245 public UInt64 unknown_offset=0; // 38
1246 public UInt64 unknown_len=0; // 40
1247 public UInt64 dir_offset=0; // 48
1248 public UInt64 dir_len=0; // 50
1249 public UInt64 data_offset=0; // 58 (Not present before V3)
1251 public int Read_itsf_header(BinaryReader st)
1253 signature=st.ReadChars(4);
1254 if (CheckSig("ITSF",signature)==false)
1257 version=st.ReadInt32();
1258 header_len=st.ReadInt32();
1259 unknown_000c=st.ReadInt32();
1260 last_modified=st.ReadUInt32();
1261 lang_id=st.ReadUInt32();
1262 dir_uuid=new Guid(st.ReadBytes(16));
1263 stream_uuid=new Guid(st.ReadBytes(16));
1264 unknown_offset=st.ReadUInt64();
1265 unknown_len=st.ReadUInt64();
1266 dir_offset=st.ReadUInt64();
1267 dir_len=st.ReadUInt64();
1271 if (header_len != chmItsfHeader._CHM_ITSF_V2_LEN)
1274 else if (version==3)
1276 if (header_len != chmItsfHeader._CHM_ITSF_V3_LEN)
1282 data_offset=st.ReadUInt64();
1284 data_offset = dir_offset + dir_len;
1290 // structure of ITSP headers
1291 public class chmItspHeader : BaseStructure
1293 const int CHM_ITSP_V1_LEN=0x54;
1295 public char[] signature=null; // 0 (ITSP)
1296 public Int32 version=0;
1297 public Int32 header_len=0;
1298 public Int32 unknown_000c=0;
1299 public UInt32 block_len=0;
1300 public Int32 blockidx_intvl=0;
1301 public Int32 index_depth=0;
1302 public Int32 index_root=0;
1303 public Int32 index_head=0;
1304 public Int32 unknown_0024=0;
1305 public Int32 num_blocks=0;
1306 public Int32 unknown_002c=0;
1307 public UInt32 lang_id=0;
1308 public Guid system_uuid;
1309 public Guid unknown_0044;
1311 public int Read_itsp_header(BinaryReader st)
1313 signature=st.ReadChars(4); // 0 (ITSP)
1314 if (CheckSig("ITSP",signature)==false)
1317 version=st.ReadInt32();
1318 header_len=st.ReadInt32();
1320 if (header_len!=CHM_ITSP_V1_LEN)
1323 unknown_000c=st.ReadInt32();
1324 block_len=st.ReadUInt32();
1325 blockidx_intvl=st.ReadInt32();
1326 index_depth=st.ReadInt32();
1327 index_root=st.ReadInt32();
1328 index_head=st.ReadInt32();
1329 unknown_0024=st.ReadInt32();
1330 num_blocks=st.ReadInt32();
1331 unknown_002c=st.ReadInt32();
1332 lang_id=st.ReadUInt32();
1333 system_uuid=new Guid(st.ReadBytes(16));
1334 unknown_0044=new Guid(st.ReadBytes(16));
1340 public class chmPmglHeader : BaseStructure
1342 public const int _CHM_PMGL_LEN=0x14;
1343 public char[] signature=null; // 0 (PMGL)
1344 public UInt32 free_space=0; // 4
1345 public UInt32 unknown_0008=0; // 8
1346 public Int32 block_prev=0; // c
1347 public Int32 block_next=0; // 10
1349 public int Read_pmgl_header(BinaryReader st)
1351 signature=st.ReadChars(4);
1352 if (CheckSig("PMGL",signature)==false)
1355 free_space=st.ReadUInt32();
1356 unknown_0008=st.ReadUInt32();
1357 block_prev=st.ReadInt32();
1358 block_next=st.ReadInt32();
1362 // parse a PMGL entry into a chmUnitInfo struct; return 1 on success.
1363 public int _chm_parse_PMGL_entry(BinaryReader st, ref chmUnitInfo ui)
1368 strLen = _chm_parse_cword(st);
1371 if (_chm_parse_UTF8(st, strLen, ref ui.path)==0)
1375 ui.space = (CHMStream.CHM_COMPRESSION)_chm_parse_cword(st);
1376 ui.start = _chm_parse_cword(st);
1377 ui.length = _chm_parse_cword(st);
1381 public chmUnitInfo FindObject(BinaryReader st, UInt32 block_len, string objPath)
1383 UInt32 end = (UInt32)st.BaseStream.Position+ block_len - free_space - _CHM_PMGL_LEN;
1385 // now, scan progressively
1386 chmUnitInfo FoundObject=new chmUnitInfo();
1388 while (st.BaseStream.Position < end)
1390 _chm_parse_PMGL_entry(st,ref FoundObject);
1391 if (FoundObject.path.ToLower().CompareTo(objPath.ToLower())==0)
1400 public class chmPmgiHeader : BaseStructure
1402 public const int _CHM_PMGI_LEN=0x8;
1404 public char[] signature=null; // 0 (PMGL)
1405 public UInt32 free_space=0; // 4
1407 public int Read_pmgi_header(BinaryReader st)
1409 signature=st.ReadChars(4);
1411 if ((signature[0]!='P') || (signature[1]!='M') || (signature[2]!='G') || (signature[3]!='I'))
1414 free_space=st.ReadUInt32();
1418 public Int32 _chm_find_in_PMGI(BinaryReader st, UInt32 block_len, string objPath)
1423 uint end = (uint)st.BaseStream.Position + block_len - free_space - _CHM_PMGI_LEN;
1425 // now, scan progressively
1426 while (st.BaseStream.Position < end)
1429 strLen = _chm_parse_cword(st);
1431 if (_chm_parse_UTF8(st, strLen, ref buffer)==0)
1434 // check if it is the right name
1435 if (buffer.ToLower().CompareTo(objPath.ToLower())>0)
1438 // load next value for path
1439 page = (int)_chm_parse_cword(st);
1445 public class chmLzxcResetTable:BaseStructure
1447 public UInt32 version=0;
1448 public UInt32 block_count=0;
1449 public UInt32 unknown=0;
1450 public UInt32 table_offset=0;
1451 public UInt64 uncompressed_len=0;
1452 public UInt64 compressed_len=0;
1453 public UInt64 block_len=0;
1455 public int Read_lzxc_reset_table(BinaryReader st)
1457 version=st.ReadUInt32();
1458 block_count=st.ReadUInt32();
1459 unknown=st.ReadUInt32();
1460 table_offset=st.ReadUInt32();
1461 uncompressed_len=st.ReadUInt64();
1462 compressed_len=st.ReadUInt64();
1463 block_len=st.ReadUInt64();
1473 // structure of LZXC control data block
1474 public class chmLzxcControlData:BaseStructure
1476 public const int _CHM_LZXC_MIN_LEN=0x18;
1477 public const int _CHM_LZXC_V2_LEN=0x1c;
1479 public UInt32 size=0; // 0
1480 public char[] signature=null; // 4 (LZXC)
1481 public UInt32 version=0; // 8
1482 public UInt32 resetInterval=0; // c
1483 public UInt32 windowSize=0; // 10
1484 public UInt32 windowsPerReset=0; // 14
1485 public UInt32 unknown_18=0; // 18
1487 public int Read_lzxc_control_data(BinaryReader st)
1489 size=st.ReadUInt32();
1490 signature=st.ReadChars(4);
1492 if (CheckSig("LZXC",signature)==false)
1495 version=st.ReadUInt32();
1496 resetInterval=st.ReadUInt32();
1497 windowSize=st.ReadUInt32();
1498 windowsPerReset=st.ReadUInt32();
1500 if (size>=_CHM_LZXC_V2_LEN)
1501 unknown_18=st.ReadUInt32();
1507 resetInterval *= 0x8000;
1508 windowSize *= 0x8000;
1510 if (windowSize == 0 || resetInterval == 0)
1513 // for now, only support resetInterval a multiple of windowSize/2
1514 if (windowSize == 1)
1516 if ((resetInterval % (windowSize/2)) != 0)
1526 internal class lzx_bits
1538 /* $Id: lzx.c,v 1.5 2002/10/09 01:16:33 jedwin Exp $ */
1539 /***************************************************************************
1540 * lzx.c - LZX decompression routines *
1541 * ------------------- *
1543 * maintainer: Jed Wing <jedwin@ugcs.caltech.edu> *
1544 * source: modified lzx.c from cabextract v0.5 *
1545 * notes: This file was taken from cabextract v0.5, which was, *
1546 * itself, a modified version of the lzx decompression code *
1549 * platforms: In its current incarnation, this file has been tested on *
1550 * two different Linux platforms (one, redhat-based, with a *
1551 * 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with *
1552 * 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were *
1553 * Intel x86 compatible machines. *
1554 ***************************************************************************/
1556 /***************************************************************************
1558 * This program is free software; you can redistribute it and/or modify *
1559 * it under the terms of the GNU General Public License as published by *
1560 * the Free Software Foundation; either version 2 of the License, or *
1561 * (at your option) any later version. Note that an exemption to this *
1562 * license has been granted by Stuart Caie for the purposes of *
1563 * distribution with CHMFile. This does not, to the best of my *
1564 * knowledge, constitute a change in the license of this (the LZX) code *
1567 ***************************************************************************/
1569 /* some constants defined by the LZX specification */
1570 private const int LZX_MIN_MATCH = 2;
1571 private const int LZX_MAX_MATCH = 257;
1572 private const int LZX_NUM_CHARS = 256;
1573 private const int LZX_BLOCKTYPE_INVALID = 0; /* also blocktypes 4-7 invalid */
1574 private const int LZX_BLOCKTYPE_VERBATIM = 1;
1575 private const int LZX_BLOCKTYPE_ALIGNED = 2;
1576 private const int LZX_BLOCKTYPE_UNCOMPRESSED = 3;
1577 private const int LZX_PRETREE_NUM_ELEMENTS = 20;
1578 private const int LZX_ALIGNED_NUM_ELEMENTS = 8; /* aligned offset tree #elements */
1579 private const int LZX_NUM_PRIMARY_LENGTHS = 7; /* this one missing from spec! */
1580 private const int LZX_NUM_SECONDARY_LENGTHS = 249; /* length tree #elements */
1582 /* LZX huffman defines: tweak tablebits as desired */
1583 private const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
1584 private const int LZX_PRETREE_TABLEBITS = 6;
1585 private const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8;
1586 private const int LZX_MAINTREE_TABLEBITS = 12;
1587 private const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1;
1588 private const int LZX_LENGTH_TABLEBITS = 12;
1589 private const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
1590 private const int LZX_ALIGNED_TABLEBITS = 7;
1591 private const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
1593 public const int DECR_OK = 0;
1594 public const int DECR_DATAFORMAT = 1;
1595 public const int DECR_ILLEGALDATA = 2;
1596 public const int DECR_NOMEMORY = 3;
1598 private byte[] window; /* the actual decoding window */
1599 private ulong window_size; /* window size (32Kb through 2Mb) */
1600 private ulong actual_size; /* window size when it was first allocated */
1601 private ulong window_posn; /* current offset within the window */
1602 private ulong R0, R1, R2; /* for the LRU offset system */
1603 private UInt32 main_elements; /* number of main tree elements */
1604 private int header_read; /* have we started decoding at all yet? */
1605 private UInt32 block_type; /* type of this block */
1606 private ulong block_length; /* uncompressed length of this block */
1607 private ulong block_remaining; /* uncompressed bytes still left to decode */
1608 private ulong frames_read; /* the number of CFDATA blocks processed */
1609 private long intel_filesize; /* magic header value used for transform */
1610 private long intel_curpos; /* current offset in transform space */
1611 private int intel_started; /* have we seen any translatable data yet? */
1614 private uint [] PRETREE_table = new uint[(1<<(6)) + (((20))<<1)];
1615 private byte [] PRETREE_len = new byte [((20)) + (64)];
1617 private uint [] MAINTREE_table= new uint[(1<<(12)) + (((256) + 50*8)<<1)];
1618 private byte [] MAINTREE_len = new byte [((256) + 50*8) + (64)];
1620 private uint [] LENGTH_table= new uint[(1<<(12)) + (((249)+1)<<1)];
1621 private byte [] LENGTH_len = new byte [((249)+1) + (64)];
1623 private uint [] ALIGNED_table= new uint[(1<<(7)) + (((8))<<1)];
1624 private byte [] ALIGNED_len = new byte [((8)) + (64)];
1625 private System.IO.BinaryReader BitSource=null;
1626 private System.IO.Stream OutputStream=null;
1628 /* LZX decruncher */
1630 /* Microsoft's LZX document and their implementation of the
1631 * com.ms.util.cab Java package do not concur.
1633 * In the LZX document, there is a table showing the correlation between
1634 * window size and the number of position slots. It states that the 1MB
1635 * window = 40 slots and the 2MB window = 42 slots. In the implementation,
1636 * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
1637 * first slot whose position base is equal to or more than the required
1638 * window size'. This would explain why other tables in the document refer
1639 * to 50 slots rather than 42.
1641 * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
1642 * is not defined in the specification.
1644 * The LZX document does not state the uncompressed block has an
1645 * uncompressed length field. Where does this length field come from, so
1646 * we can know how large the block is? The implementation has it as the 24
1647 * bits following after the 3 blocktype bits, before the alignment
1650 * The LZX document states that aligned offset blocks have their aligned
1651 * offset huffman tree AFTER the main and length trees. The implementation
1652 * suggests that the aligned offset tree is BEFORE the main and length
1655 * The LZX document decoding algorithm states that, in an aligned offset
1656 * block, if an extra_bits value is 1, 2 or 3, then that number of bits
1657 * should be read and the result added to the match offset. This is
1658 * correct for 1 and 2, but not 3, where just a huffman symbol (using the
1659 * aligned tree) should be read.
1661 * Regarding the E8 preprocessing, the LZX document states 'No translation
1662 * may be performed on the last 6 bytes of the input block'. This is
1663 * correct. However, the pseudocode provided checks for the *E8 leader*
1664 * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
1665 * from the end, this would cause the next four bytes to be modified, at
1666 * least one of which would be in the last 6 bytes, which is not allowed
1667 * according to the spec.
1669 * The specification states that the huffman trees must always contain at
1670 * least one element. However, many CAB files contain blocks where the
1671 * length tree is completely empty (because there are no matches), and
1672 * this is expected to succeed.
1675 /* LZX uses what it calls 'position slots' to represent match offsets.
1676 * What this means is that a small 'position slot' number and a small
1677 * offset from that slot are encoded instead of one large offset for
1679 * - position_base is an index to the position slot bases
1680 * - extra_bits states how many bits of offset-from-base data is needed.
1682 private byte [] extra_bits = {
1683 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
1684 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
1685 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
1689 private ulong [] position_base = {
1690 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
1691 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
1692 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
1693 1835008, 1966080, 2097152
1696 private UInt32 ReadUInt16()
1703 Byte1=BitSource.ReadByte();
1704 Byte2=BitSource.ReadByte();
1709 rc=(Byte2<<8)+Byte1;
1713 public bool LZXinit(int WindowSize)
1715 ulong wndsize = (ulong)(1 << WindowSize);
1718 /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
1719 /* if a previously allocated window is big enough, keep it */
1720 if (WindowSize< 15 || WindowSize> 21) return false;
1722 /* allocate state and associated window */
1723 window = new byte[wndsize];
1729 actual_size = wndsize;
1730 window_size = wndsize;
1732 /* calculate required position slots */
1733 if (WindowSize == 20) posn_slots = 42;
1734 else if (WindowSize== 21) posn_slots = 50;
1735 else posn_slots = WindowSize << 1;
1737 /** alternatively **/
1738 /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
1740 /* initialize other state */
1742 main_elements = (uint)(LZX_NUM_CHARS + (posn_slots << 3));
1745 block_remaining = 0;
1746 block_type = LZX_BLOCKTYPE_INVALID;
1751 /* initialise tables to 0 (because deltas will be applied to them) */
1752 for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) MAINTREE_len[i] = 0;
1753 for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) LENGTH_len[i] = 0;
1758 public void LZXteardown()
1763 public int LZXreset()
1768 block_remaining = 0;
1769 block_type = LZX_BLOCKTYPE_INVALID;
1774 for (int i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) MAINTREE_len[i] = 0;
1775 for (int i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) LENGTH_len[i] = 0;
1781 /* Bitstream reading macros:
1783 * INIT_BITSTREAM should be used first to set up the system
1784 * READ_BITS(var,n) takes N bits from the buffer and puts them in var
1786 * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer
1787 * PEEK_BITS(n) extracts (without removing) N bits from the bit buffer
1788 * REMOVE_BITS(n) removes N bits from the bit buffer
1790 * These bit access routines work by using the area beyond the MSB and the
1791 * LSB as a free source of zeroes. This avoids having to mask any bits.
1792 * So we have to know the bit width of the bitbuffer variable. This is
1793 * sizeof(ulong) * 8, also defined as ULONG_BITS
1796 /* number of bits in ulong. Note: This must be at multiple of 16, and at
1797 * least 32 for the bitbuffer code to work (ie, it must be able to ensure
1798 * up to 17 bits - that's adding 16 bits when there's one bit left, or
1799 * adding 32 bits when there are no bits left. The code should work fine
1800 * for machines where ulong >= 32 bits.
1802 private int ULONG_BITS()
1804 int rc=(System.Runtime.InteropServices.Marshal.SizeOf(typeof(System.UInt32))<<3);
1808 /* make_decode_table(nsyms, nbits, length[], table[])
1810 * This function was coded by David Tritscher. It builds a fast huffman
1811 * decoding table out of just a canonical huffman code lengths table.
1813 * nsyms = total number of symbols in this huffman tree.
1814 * nbits = any symbols with a code length of nbits or less can be decoded
1815 * in one lookup of the table.
1816 * length = A table to get code lengths from [0 to syms-1]
1817 * table = The table to fill up with decoded symbols and pointers.
1819 * Returns 0 for OK or 1 for error
1822 private int make_decode_table(ulong nsyms, byte nbits, ref byte [] length, ref UInt32[] table)
1828 ulong pos = 0; /* the current position in the decode table */
1829 ulong table_mask = (ulong)(1 << nbits);
1830 ulong bit_mask = table_mask >> 1; /* don't do 0 length codes */
1831 ulong next_symbol = bit_mask; /* base of allocation for long codes */
1833 /* fill entries for codes short enough for a direct mapping */
1834 while (bit_num <= nbits)
1836 for (sym = 0; sym < nsyms; sym++)
1838 if (length[sym] == bit_num)
1842 if((pos += bit_mask) > table_mask) return 1; /* table overrun */
1844 /* fill all possible lookups of this symbol with the symbol itself */
1846 while (fill-- > 0) table[leaf++] = (uint)sym;
1853 /* if there are any codes longer than nbits */
1854 if (pos != table_mask)
1856 /* clear the remainder of the table */
1857 for (sym = pos; sym < table_mask; sym++) table[sym] = 0;
1859 /* give ourselves room for codes to grow by up to 16 more bits */
1864 while (bit_num <= 16)
1866 for (sym = 0; sym < nsyms; sym++)
1868 if (length[sym] == bit_num)
1871 for (fill = 0; fill < (ulong)(bit_num - nbits); fill++)
1873 /* if this path hasn't been taken yet, 'allocate' two entries */
1874 if (table[leaf] == 0)
1876 table[(next_symbol << 1)] = 0;
1877 table[(next_symbol << 1) + 1] = 0;
1878 table[leaf] = (uint)next_symbol++;
1880 /* follow the path and select either left or right for next bit */
1881 leaf = table[leaf] << 1;
1882 if (((pos >> (byte)(15-fill)) & 1)==1)
1885 table[leaf] = (uint)sym;
1887 if ((pos += bit_mask) > table_mask)
1888 return 1; /* table overflow */
1897 if (pos == table_mask)
1900 /* either erroneous table, or all elements are 0 - let's find out. */
1901 for (sym = 0; sym < nsyms; sym++) if (length[(uint)sym]!=0)
1907 private int lzx_read_lens(byte []lens, ulong first, ulong last, ref lzx_bits lb)
1912 UInt32 bitbuf = lb.bb;
1913 int bitsleft = lb.bl;
1915 UInt32 [] hufftbl=null;
1917 for (x = 0; x < 20; x++)
1921 while (bitsleft < (4))
1923 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
1926 y = (bitbuf >> (ULONG_BITS()- (4)));
1931 PRETREE_len[x] = (byte)y;
1933 if (make_decode_table( 20, 6, ref PRETREE_len, ref PRETREE_table)!=0)
1936 for (x = first; x < last; )
1940 while (bitsleft < 16)
1942 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
1945 hufftbl = PRETREE_table;
1946 if ((i = hufftbl[((ulong)bitbuf >> (ULONG_BITS()- 6))]) >= 20)
1948 j = (ulong)(1 << (byte)(ULONG_BITS()- ((6))));
1953 if ((bitbuf & j)!=0)
1963 while ((i = hufftbl[i]) >= 20);
1976 while (bitsleft < (4))
1978 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
1981 y = (bitbuf >> (ULONG_BITS()- (4)));
1995 while (bitsleft < (5))
1997 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2000 (y) = (bitbuf >> (ULONG_BITS()- (5)));
2015 while (bitsleft < (1))
2017 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2020 y = (bitbuf >> (ULONG_BITS()- (1)));
2028 while (bitsleft < (16))
2030 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2033 hufftbl = (PRETREE_table);
2034 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 6))]) >= 20)
2036 j = (ulong)1 << (byte)(ULONG_BITS()- 6);
2041 if ((bitbuf & j)==0)
2050 while ((i = hufftbl[i]) >= 20);
2064 lens[x++] = (byte)z;
2071 lens[x++] = (byte)z;
2079 public int LZXdecompress(System.IO.BinaryReader inpos, System.IO.Stream outpos, ref ulong inlen, ref ulong outlen)
2082 OutputStream=outpos;
2084 long endinp = BitSource.BaseStream.Position+(long)inlen;
2085 ulong runsrc, rundest;
2086 UInt32 [] hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */
2090 ulong match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */
2091 lzx_bits lb; /* used in READ_LENGTHS macro */
2094 int togo = (int)outlen, this_run, main_element, aligned_bits;
2095 int match_length, length_footer, extra, verbatim_bits;
2100 /* read header if necessary */
2106 while (bitsleft < (1))
2108 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2111 k = (bitbuf >> (ULONG_BITS()- (1)));
2121 while (bitsleft < (16))
2123 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() -16 - bitsleft);
2126 i = (bitbuf >> (ULONG_BITS()- (16)));
2134 while (bitsleft < (16))
2136 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2139 j = (bitbuf >> (ULONG_BITS()- (16)));
2145 intel_filesize = (long)((i << 16) | j);
2149 /* main decoding loop */
2152 if (block_remaining == 0)
2154 if (block_type == (3))
2156 if ((block_length & 1)!=0)
2157 BitSource.ReadByte();
2164 while (bitsleft < (3))
2166 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2169 (block_type) = (uint)(bitbuf >> (ULONG_BITS()- (3)));
2177 while (bitsleft < (16))
2179 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2182 (i) = (bitbuf >> (ULONG_BITS()- (16)));
2190 while (bitsleft < (8))
2192 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2195 (j) = (bitbuf >> (ULONG_BITS()- (8)));
2200 block_remaining = block_length = (i << 8) | j;
2204 case (LZX_BLOCKTYPE_ALIGNED):
2205 for (i = 0; i < 8; i++)
2209 while (bitsleft < (3))
2211 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2214 (j) = (bitbuf >> (ULONG_BITS()- (3)));
2219 (ALIGNED_len)[i] = (byte)j;
2221 if (make_decode_table( 8, 7, ref ALIGNED_len, ref ALIGNED_table)!=0)
2230 if (lzx_read_lens(MAINTREE_len,0,256,ref lb)!=0)
2242 if (lzx_read_lens(MAINTREE_len,256,main_elements,ref lb)!=0)
2251 if (make_decode_table( (256 + 50*8), 12, ref MAINTREE_len, ref MAINTREE_table)!=0)
2255 if (MAINTREE_len[0xE8] != 0) intel_started = 1;
2261 if (lzx_read_lens(LENGTH_len,0,249,ref lb)!=0)
2269 if (make_decode_table( (249+1), 12, ref LENGTH_len, ref LENGTH_table)!=0)
2275 case (LZX_BLOCKTYPE_VERBATIM):
2280 if (lzx_read_lens(MAINTREE_len,0,256,ref lb)!=0)
2292 if (lzx_read_lens(MAINTREE_len,256,main_elements,ref lb)!=0)
2301 if (make_decode_table( (256 + 50*8), 12, ref MAINTREE_len, ref MAINTREE_table)!=0)
2305 if (MAINTREE_len[0xE8] != 0) intel_started = 1;
2311 if (lzx_read_lens(LENGTH_len,0,249,ref lb)!=0)
2319 if (make_decode_table( (249+1), 12, ref LENGTH_len, ref LENGTH_table)!=0)
2325 case (LZX_BLOCKTYPE_UNCOMPRESSED):
2327 while (bitsleft < (16))
2329 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - bitsleft);
2334 BitSource.BaseStream.Seek(-2,System.IO.SeekOrigin.Current);
2336 R0 = (ulong)(BitSource.ReadByte()+(BitSource.ReadByte()<<8)+(BitSource.ReadByte()<<16)+(BitSource.ReadByte()<<24));
2337 R1 = (ulong)(BitSource.ReadByte()+(BitSource.ReadByte()<<8)+(BitSource.ReadByte()<<16)+(BitSource.ReadByte()<<24));
2338 R2 = (ulong)(BitSource.ReadByte()+(BitSource.ReadByte()<<8)+(BitSource.ReadByte()<<16)+(BitSource.ReadByte()<<24));
2342 return (DECR_ILLEGALDATA);
2346 /* buffer exhaustion check */
2347 if (BitSource.BaseStream.Position > (long) endinp)
2349 /* it's possible to have a file where the next run is less than
2350 * 16 bits in size. In this case, the READ_HUFFSYM() macro used
2351 * in building the tables will exhaust the buffer, so we should
2352 * allow for this, but not allow those accidentally read bits to
2353 * be used (so we check that there are at least 16 bits
2354 * remaining - in this boundary case they aren't really part of
2355 * the compressed data)
2357 if (BitSource.BaseStream.Position> (long)(endinp+2) || bitsleft < 16)
2358 return DECR_ILLEGALDATA;
2361 while ((this_run = (int)block_remaining) > 0 && togo > 0)
2363 if (this_run > togo)
2367 block_remaining -= (ulong)this_run;
2369 /* apply 2^x-1 mask */
2370 window_posn &= window_size - 1;
2372 /* runs can't straddle the window wraparound */
2373 if ((window_posn + (ulong)this_run) > window_size)
2374 return DECR_DATAFORMAT;
2378 case LZX_BLOCKTYPE_VERBATIM:
2379 while (this_run > 0)
2383 while (bitsleft < (16))
2385 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2388 hufftbl = MAINTREE_table;
2389 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 12))]) >= 256 + 50*8)
2391 j = (ulong)(1 << (ULONG_BITS()- 12));
2396 if ((bitbuf & j)!=0)
2405 while ((i = hufftbl[i]) >= (((256) + 50*8)));
2407 j = MAINTREE_len[main_element = (int)i];
2409 bitsleft -= (byte)j;
2413 if (main_element < (256))
2415 window[window_posn++] = (byte)main_element;
2420 main_element -= (256);
2422 match_length = main_element & (7);
2423 if (match_length == (7))
2427 while (bitsleft < (16))
2429 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2432 hufftbl = (LENGTH_table);
2433 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 12))]) >= (((249)+1)))
2435 j = (ulong)(1 << (ULONG_BITS()- ((12))));
2440 if ((bitbuf & j)!=0)
2449 while ((i = hufftbl[i]) >= (((249)+1)));
2451 j = LENGTH_len[(length_footer) = (int)i];
2453 bitsleft -= (byte)j;
2457 match_length += length_footer;
2459 match_length += (2);
2461 match_offset = (ulong)(main_element >> 3);
2463 if (match_offset > 2)
2465 if (match_offset != 3)
2467 extra = extra_bits[match_offset];
2470 while (bitsleft < (extra))
2472 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2475 verbatim_bits = (int)(bitbuf >> (ULONG_BITS()- (extra)));
2480 match_offset = position_base[match_offset] - 2 + (ulong)verbatim_bits;
2487 R2 = R1; R1 = R0; R0 = match_offset;
2489 else if (match_offset == 0)
2493 else if (match_offset == 1)
2496 R1 = R0; R0 = match_offset;
2501 R2 = R0; R0 = match_offset;
2504 rundest = window_posn;
2505 // rundest= window+window_posn
2506 runsrc = rundest - match_offset;
2507 window_posn += (ulong)match_length;
2508 this_run -= match_length;
2511 while ((runsrc<0) && (match_length-- > 0))
2513 window[rundest++]=window[runsrc+window_size];
2514 // *rundest++ = *(runsrc + window_size);
2518 while (match_length-- > 0)
2520 window[rundest++]=window[runsrc++];
2521 // *rundest++ = *runsrc++;
2527 case LZX_BLOCKTYPE_ALIGNED:
2528 while (this_run > 0)
2532 while (bitsleft < (16))
2534 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2537 hufftbl = MAINTREE_table;
2538 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 12))]) >= (((256) + 50*8)))
2540 j = (ulong)1 << (ULONG_BITS()- ((12)));
2545 if ((bitbuf & j)!=0)
2554 while ((i = hufftbl[i]) >= (((256) + 50*8)));
2556 j = MAINTREE_len[(main_element) = (int)i];
2562 if (main_element < (256))
2564 window[window_posn++] = (byte)main_element;
2569 main_element -= (256);
2570 match_length = main_element & (7);
2571 if (match_length == (7))
2575 while (bitsleft < (16))
2577 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2580 hufftbl = LENGTH_table;
2581 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 12))]) >= (((249)+1)))
2583 j = (ulong) 1 << (ULONG_BITS()- ((12)));
2588 if ((bitbuf & j)!=0)
2598 while ((i = hufftbl[i]) >= (((249)+1)));
2600 j = LENGTH_len[length_footer = (int)i];
2605 match_length += length_footer;
2607 match_length += (2);
2609 match_offset = (ulong)(main_element >> 3);
2611 if (match_offset > 2)
2613 extra = extra_bits[match_offset];
2614 match_offset = position_base[match_offset] - 2;
2620 while (bitsleft < (extra))
2622 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2625 verbatim_bits = (int)(bitbuf >> (ULONG_BITS()- (extra)));
2630 match_offset += (ulong)(verbatim_bits << 3);
2633 while (bitsleft < (16))
2635 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2638 hufftbl = (ALIGNED_table);
2639 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 7))]) >= 8)
2641 j = (ulong)1 << (ULONG_BITS()- ((7)));
2646 if ((bitbuf & j)!=0)
2655 while ((i = hufftbl[i]) >= (((8))));
2658 j = (ALIGNED_len)[(aligned_bits) = (int)i];
2663 match_offset += (ulong)aligned_bits;
2665 else if (extra == 3)
2669 while (bitsleft < (16))
2671 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2674 hufftbl = (ALIGNED_table);
2675 if ((i = hufftbl[(bitbuf >> (ULONG_BITS()- 7))]) >= 8)
2677 j = (ulong)1 << (ULONG_BITS()- ((7)));
2682 if ((bitbuf & j)!=0)
2691 while ((i = hufftbl[i]) >= 8);
2693 j = (ALIGNED_len)[(aligned_bits) = (int)i];
2698 match_offset += (ulong)aligned_bits;
2704 while (bitsleft < (extra))
2706 bitbuf |= (UInt32)ReadUInt16() << (ULONG_BITS() - 16 - bitsleft);
2709 (verbatim_bits) = (int)(bitbuf >> (int)(ULONG_BITS()- (extra)));
2714 match_offset += (ulong)verbatim_bits;
2721 R2 = R1; R1 = R0; R0 = match_offset;
2723 else if (match_offset == 0)
2727 else if (match_offset == 1)
2730 R1 = R0; R0 = match_offset;
2735 R2 = R0; R0 = match_offset;
2738 rundest = window_posn;
2739 runsrc = rundest - match_offset;
2740 window_posn += (ulong)match_length;
2741 this_run -= match_length;
2743 while ((runsrc<0) && (match_length-- > 0))
2745 // *rundest++ = *(runsrc + window_size); runsrc++;
2746 window[rundest++]=window[runsrc + window_size];
2750 while (match_length-- > 0)
2752 // *rundest++ = *runsrc++;
2753 window[rundest++]=window[runsrc++];
2759 case LZX_BLOCKTYPE_UNCOMPRESSED:
2760 if ((BitSource.BaseStream.Position + (long)this_run) > (long)endinp)
2763 // memcpy(window + window_posn, inposCount, this_run);
2764 for(i=0; i<(ulong)this_run;i++)
2766 window[window_posn+i]=BitSource.ReadByte();
2768 window_posn += (ulong)this_run;
2772 return DECR_ILLEGALDATA; /* might as well */
2778 if (togo != 0) return DECR_ILLEGALDATA;
2780 // memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen);
2783 start=(ulong)window_size;
2785 start=(ulong)window_posn;
2787 start-=(ulong)outlen;
2789 long Pos=OutputStream.Position;
2790 for(i=0;i<(ulong)outlen;i++)
2792 OutputStream.WriteByte(window[start+i]);
2794 OutputStream.Seek(Pos,System.IO.SeekOrigin.Begin);
2796 /* intel E8 decoding */
2797 if ((frames_read++ < 32768) && intel_filesize != 0)
2799 if (outlen <= 6 || (intel_started==0))
2801 intel_curpos += (long)outlen;
2805 // UBYTE *data = outpos;
2806 long dataend = OutputStream.Position + (int)outlen - 10;
2807 long curpos = intel_curpos;
2808 long filesize = intel_filesize;
2809 long abs_off, rel_off;
2811 intel_curpos = (long)curpos + (long)outlen;
2813 while (OutputStream.Position < dataend)
2815 if (OutputStream.ReadByte() != 0xE8)
2821 abs_off = (long)(OutputStream.ReadByte() | (OutputStream.ReadByte() <<8) | (OutputStream.ReadByte() <<16) | (OutputStream.ReadByte() <<24));
2822 if (abs_off < filesize)
2825 rel_off = (long)(abs_off - curpos);
2827 rel_off = (long)abs_off + filesize;
2828 OutputStream.WriteByte((byte)(rel_off & 0x000000ff));
2829 OutputStream.WriteByte((byte)((rel_off & 0x0000ff00)>>8));
2830 OutputStream.WriteByte((byte)((rel_off & 0x00ff0000)>>16));
2831 OutputStream.WriteByte((byte)((rel_off & 0xff000000)>>24));