SmartPDF - lightweight pdf viewer app for rosapps
[reactos.git] / rosapps / smartpdf / fitz / mupdf / pdf_cmap.c
1 /*
2 * The CMap data structure here is constructed on the fly by
3 * adding simple range-to-range mappings. Then the data structure
4 * is optimized to contain both range-to-range and range-to-table
5 * lookups.
6 *
7 * Any one-to-many mappings are inserted as one-to-table
8 * lookups in the beginning, and are not affected by the optimization
9 * stage.
10 *
11 * There is a special function to add a 256-length range-to-table mapping.
12 * The ranges do not have to be added in order.
13 *
14 * This code can be a lot simpler if we don't care about wasting memory,
15 * or can trust the parser to give us optimal mappings.
16 */
17
18 #include "fitz.h"
19 #include "mupdf.h"
20
21 typedef struct pdf_range_s pdf_range;
22
23 enum { MAXCODESPACE = 10 };
24 enum { SINGLE, RANGE, TABLE, MULTI };
25
26 struct pdf_range_s
27 {
28 int low;
29 int high;
30 int flag; /* what kind of lookup is this */
31 int offset; /* either range-delta or table-index */
32 };
33
34 static int
35 cmprange(const void *va, const void *vb)
36 {
37 return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;
38 }
39
40 struct pdf_cmap_s
41 {
42 int refs;
43 char cmapname[32];
44
45 char usecmapname[32];
46 pdf_cmap *usecmap;
47
48 int wmode;
49
50 int ncspace;
51 struct {
52 int n;
53 unsigned char lo[4];
54 unsigned char hi[4];
55 } cspace[MAXCODESPACE];
56
57 int rlen, rcap;
58 pdf_range *ranges;
59
60 int tlen, tcap;
61 int *table;
62 };
63
64 /*
65 * Allocate, destroy and simple parameters.
66 */
67
68 fz_error *
69 pdf_newcmap(pdf_cmap **cmapp)
70 {
71 pdf_cmap *cmap;
72
73 cmap = *cmapp = fz_malloc(sizeof(pdf_cmap));
74 if (!cmap)
75 return fz_outofmem;
76
77 cmap->refs = 1;
78 strcpy(cmap->cmapname, "");
79
80 strcpy(cmap->usecmapname, "");
81 cmap->usecmap = nil;
82
83 cmap->wmode = 0;
84
85 cmap->ncspace = 0;
86
87 cmap->rlen = 0;
88 cmap->rcap = 0;
89 cmap->ranges = nil;
90
91 cmap->tlen = 0;
92 cmap->tcap = 0;
93 cmap->table = nil;
94
95 return nil;
96 }
97
98 pdf_cmap *
99 pdf_keepcmap(pdf_cmap *cmap)
100 {
101 cmap->refs ++;
102 return cmap;
103 }
104
105 void
106 pdf_dropcmap(pdf_cmap *cmap)
107 {
108 if (--cmap->refs == 0)
109 {
110 if (cmap->usecmap)
111 pdf_dropcmap(cmap->usecmap);
112 fz_free(cmap->ranges);
113 fz_free(cmap->table);
114 fz_free(cmap);
115 }
116 }
117
118 pdf_cmap *
119 pdf_getusecmap(pdf_cmap *cmap)
120 {
121 return cmap->usecmap;
122 }
123
124 void
125 pdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap)
126 {
127 int i;
128
129 if (cmap->usecmap)
130 pdf_dropcmap(cmap->usecmap);
131 cmap->usecmap = pdf_keepcmap(usecmap);
132
133 if (cmap->ncspace == 0)
134 {
135 cmap->ncspace = usecmap->ncspace;
136 for (i = 0; i < usecmap->ncspace; i++)
137 cmap->cspace[i] = usecmap->cspace[i];
138 }
139 }
140
141 int
142 pdf_getwmode(pdf_cmap *cmap)
143 {
144 return cmap->wmode;
145 }
146
147 void
148 pdf_setwmode(pdf_cmap *cmap, int wmode)
149 {
150 cmap->wmode = wmode;
151 }
152
153 void
154 pdf_debugcmap(pdf_cmap *cmap)
155 {
156 int i, k, n;
157
158 printf("cmap $%p /%s {\n", cmap, cmap->cmapname);
159
160 if (cmap->usecmapname[0])
161 printf(" usecmap /%s\n", cmap->usecmapname);
162 if (cmap->usecmap)
163 printf(" usecmap $%p\n", cmap->usecmap);
164
165 printf(" wmode %d\n", cmap->wmode);
166
167 printf(" codespaces {\n");
168 for (i = 0; i < cmap->ncspace; i++)
169 {
170 printf(" <");
171 for (k = 0; k < cmap->cspace[i].n; k++)
172 printf("%02x", cmap->cspace[i].lo[k]);
173 printf("> <");
174 for (k = 0; k < cmap->cspace[i].n; k++)
175 printf("%02x", cmap->cspace[i].hi[k]);
176 printf(">\n");
177 }
178 printf(" }\n");
179
180 printf(" ranges (%d,%d) {\n", cmap->rlen, cmap->tlen);
181 for (i = 0; i < cmap->rlen; i++)
182 {
183 pdf_range *r = &cmap->ranges[i];
184 printf(" <%04x> <%04x> ", r->low, r->high);
185 if (r->flag == TABLE)
186 {
187 printf("[ ");
188 for (k = 0; k < r->high - r->low + 1; k++)
189 printf("%d ", cmap->table[r->offset + k]);
190 printf("]\n");
191 }
192 else if (r->flag == MULTI)
193 {
194 printf("< ");
195 n = cmap->table[r->offset];
196 for (k = 0; k < n; k++)
197 printf("%04x ", cmap->table[r->offset + 1 + k]);
198 printf(">\n");
199 }
200 else
201 printf("%d\n", r->offset);
202 }
203 printf(" }\n}\n");
204 }
205
206 /*
207 * Add a codespacerange section.
208 * These ranges are used by pdf_decodecmap to decode
209 * multi-byte encoded strings.
210 */
211 fz_error *
212 pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n)
213 {
214 int i;
215
216 if (cmap->ncspace + 1 == MAXCODESPACE)
217 return fz_throw("rangelimit: too many code space ranges");
218
219 cmap->cspace[cmap->ncspace].n = n;
220
221 for (i = 0; i < n; i++)
222 {
223 int o = (n - i - 1) * 8;
224 cmap->cspace[cmap->ncspace].lo[i] = (lo >> o) & 0xFF;
225 cmap->cspace[cmap->ncspace].hi[i] = (hi >> o) & 0xFF;
226 }
227
228 cmap->ncspace ++;
229
230 return nil;
231 }
232
233 /*
234 * Add an integer to the table.
235 */
236 static fz_error *
237 addtable(pdf_cmap *cmap, int value)
238 {
239 if (cmap->tlen + 1 > cmap->tcap)
240 {
241 int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2;
242 int *newtable = fz_realloc(cmap->table, newcap * sizeof(int));
243 if (!newtable)
244 return fz_outofmem;
245 cmap->tcap = newcap;
246 cmap->table = newtable;
247 }
248
249 cmap->table[cmap->tlen++] = value;
250
251 return nil;
252 }
253
254 /*
255 * Add a range.
256 */
257 static fz_error *
258 addrange(pdf_cmap *cmap, int low, int high, int flag, int offset)
259 {
260 if (cmap->rlen + 1 > cmap->rcap)
261 {
262 pdf_range *newranges;
263 int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2;
264 newranges = fz_realloc(cmap->ranges, newcap * sizeof(pdf_range));
265 if (!newranges)
266 return fz_outofmem;
267 cmap->rcap = newcap;
268 cmap->ranges = newranges;
269 }
270
271 cmap->ranges[cmap->rlen].low = low;
272 cmap->ranges[cmap->rlen].high = high;
273 cmap->ranges[cmap->rlen].flag = flag;
274 cmap->ranges[cmap->rlen].offset = offset;
275 cmap->rlen ++;
276
277 return nil;
278 }
279
280 /*
281 * Add a range-to-table mapping.
282 */
283 fz_error *
284 pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len)
285 {
286 fz_error *error;
287 int offset;
288 int high;
289 int i;
290
291 high = low + len;
292 offset = cmap->tlen;
293
294 for (i = 0; i < len; i++)
295 {
296 error = addtable(cmap, table[i]);
297 if (error)
298 return error;
299 }
300
301 return addrange(cmap, low, high, TABLE, offset);
302 }
303
304 /*
305 * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25)
306 */
307 fz_error *
308 pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset)
309 {
310 return addrange(cmap, low, high, high - low == 0 ? SINGLE : RANGE, offset);
311 }
312
313 /*
314 * Add a single one-to-many mapping.
315 */
316 fz_error *
317 pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len)
318 {
319 fz_error *error;
320 int offset;
321 int i;
322
323 if (len == 1)
324 return addrange(cmap, low, low, SINGLE, values[0]);
325
326 offset = cmap->tlen;
327
328 error = addtable(cmap, len);
329 if (error)
330 return error;
331
332 for (i = 0; i < len; i++)
333 {
334 addtable(cmap, values[i]);
335 if (error)
336 return error;
337 }
338
339 return addrange(cmap, low, low, MULTI, offset);
340 }
341
342 /*
343 * Sort the input ranges.
344 * Merge contiguous input ranges to range-to-range if the output is contiguos.
345 * Merge contiguous input ranges to range-to-table if the output is random.
346 */
347 fz_error *
348 pdf_sortcmap(pdf_cmap *cmap)
349 {
350 fz_error *error;
351 pdf_range *newranges;
352 int *newtable;
353 pdf_range *a; /* last written range on output */
354 pdf_range *b; /* current range examined on input */
355
356 qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
357
358 a = cmap->ranges;
359 b = cmap->ranges + 1;
360
361 while (b < cmap->ranges + cmap->rlen)
362 {
363 /* ignore one-to-many mappings */
364 if (b->flag == MULTI)
365 {
366 *(++a) = *b;
367 }
368
369 /* input contiguous */
370 else if (a->high + 1 == b->low)
371 {
372 /* output contiguous */
373 if (a->high - a->low + a->offset + 1 == b->offset)
374 {
375 /* SR -> R and SS -> R and RR -> R and RS -> R */
376 if (a->flag == SINGLE || a->flag == RANGE)
377 {
378 a->flag = RANGE;
379 a->high = b->high;
380 }
381
382 /* LS -> L */
383 else if (a->flag == TABLE && b->flag == SINGLE)
384 {
385 a->high = b->high;
386 error = addtable(cmap, b->offset);
387 if (error)
388 return error;
389 }
390
391 /* LR -> LR */
392 else if (a->flag == TABLE && b->flag == RANGE)
393 {
394 *(++a) = *b;
395 }
396
397 /* XX -> XX */
398 else
399 {
400 *(++a) = *b;
401 }
402 }
403
404 /* output separated */
405 else
406 {
407 /* SS -> L */
408 if (a->flag == SINGLE && b->flag == SINGLE)
409 {
410 a->flag = TABLE;
411 a->high = b->high;
412
413 error = addtable(cmap, a->offset);
414 if (error)
415 return error;
416
417 error = addtable(cmap, b->offset);
418 if (error)
419 return error;
420
421 a->offset = cmap->tlen - 2;
422 }
423
424 /* LS -> L */
425 else if (a->flag == TABLE && b->flag == SINGLE)
426 {
427 a->high = b->high;
428 error = addtable(cmap, b->offset);
429 if (error)
430 return error;
431 }
432
433 /* XX -> XX */
434 else
435 {
436 *(++a) = *b;
437 }
438 }
439 }
440
441 /* input separated: XX -> XX */
442 else
443 {
444 *(++a) = *b;
445 }
446
447 b ++;
448 }
449
450 cmap->rlen = a - cmap->ranges + 1;
451
452 assert(cmap->rlen > 0);
453
454 newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(pdf_range));
455 if (!newranges)
456 return fz_outofmem;
457 cmap->rcap = cmap->rlen;
458 cmap->ranges = newranges;
459
460 if (cmap->tlen)
461 {
462 newtable = fz_realloc(cmap->table, cmap->tlen * sizeof(int));
463 if (!newtable)
464 return fz_outofmem;
465 cmap->tcap = cmap->tlen;
466 cmap->table = newtable;
467 }
468
469 return nil;
470 }
471
472 /*
473 * Lookup the mapping of a codepoint.
474 */
475 int
476 pdf_lookupcmap(pdf_cmap *cmap, int cpt)
477 {
478 int l = 0;
479 int r = cmap->rlen - 1;
480 int m;
481
482 while (l <= r)
483 {
484 m = (l + r) >> 1;
485 if (cpt < cmap->ranges[m].low)
486 r = m - 1;
487 else if (cpt > cmap->ranges[m].high)
488 l = m + 1;
489 else
490 {
491 int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
492 if (cmap->ranges[m].flag == TABLE)
493 return cmap->table[i];
494 if (cmap->ranges[m].flag == MULTI)
495 return -1;
496 return i;
497 }
498 }
499
500 if (cmap->usecmap)
501 return pdf_lookupcmap(cmap->usecmap, cpt);
502
503 return -1;
504 }
505
506 /*
507 * Use the codespace ranges to extract a codepoint from a
508 * multi-byte encoded string.
509 */
510 unsigned char *
511 pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt)
512 {
513 int i, k;
514
515 for (k = 0; k < cmap->ncspace; k++)
516 {
517 unsigned char *lo = cmap->cspace[k].lo;
518 unsigned char *hi = cmap->cspace[k].hi;
519 int n = cmap->cspace[k].n;
520 int c = 0;
521
522 for (i = 0; i < n; i++)
523 {
524 if (lo[i] <= buf[i] && buf[i] <= hi[i])
525 c = (c << 8) | buf[i];
526 else
527 break;
528 }
529
530 if (i == n) {
531 *cpt = c;
532 return buf + n;
533 }
534 }
535
536 *cpt = 0;
537 return buf + 1;
538 }
539
540 /*
541 * CMap parser
542 */
543
544 enum
545 {
546 TUSECMAP = PDF_NTOKENS,
547 TBEGINCODESPACERANGE,
548 TENDCODESPACERANGE,
549 TBEGINBFCHAR,
550 TENDBFCHAR,
551 TBEGINBFRANGE,
552 TENDBFRANGE,
553 TBEGINCIDCHAR,
554 TENDCIDCHAR,
555 TBEGINCIDRANGE,
556 TENDCIDRANGE
557 };
558
559 static int tokenfromkeyword(char *key)
560 {
561 if (!strcmp(key, "usecmap")) return TUSECMAP;
562 if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE;
563 if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE;
564 if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR;
565 if (!strcmp(key, "endbfchar")) return TENDBFCHAR;
566 if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE;
567 if (!strcmp(key, "endbfrange")) return TENDBFRANGE;
568 if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR;
569 if (!strcmp(key, "endcidchar")) return TENDCIDCHAR;
570 if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE;
571 if (!strcmp(key, "endcidrange")) return TENDCIDRANGE;
572 return PDF_TKEYWORD;
573 }
574
575 static int codefromstring(unsigned char *buf, int len)
576 {
577 int a = 0;
578 while (len--)
579 a = (a << 8) | *buf++;
580 return a;
581 }
582
583 static int mylex(fz_stream *file, char *buf, int n, int *sl)
584 {
585 int token = pdf_lex(file, buf, n, sl);
586 if (token == PDF_TKEYWORD)
587 token = tokenfromkeyword(buf);
588 return token;
589 }
590
591 static fz_error *parsecmapname(pdf_cmap *cmap, fz_stream *file)
592 {
593 char buf[256];
594 int token;
595 int len;
596
597 token = mylex(file, buf, sizeof buf, &len);
598 if (token == PDF_TNAME) {
599 strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname));
600 return nil;
601 }
602
603 return fz_throw("syntaxerror in CMap after /CMapName");
604 }
605
606 static fz_error *parsewmode(pdf_cmap *cmap, fz_stream *file)
607 {
608 char buf[256];
609 int token;
610 int len;
611
612 token = mylex(file, buf, sizeof buf, &len);
613 if (token == PDF_TINT) {
614 pdf_setwmode(cmap, atoi(buf));
615 return nil;
616 }
617
618 return fz_throw("syntaxerror in CMap after /WMode");
619 }
620
621 static fz_error *parsecodespacerange(pdf_cmap *cmap, fz_stream *file)
622 {
623 char buf[256];
624 int token;
625 int len;
626 fz_error *error;
627 int lo, hi;
628
629 while (1)
630 {
631 token = mylex(file, buf, sizeof buf, &len);
632
633 if (token == TENDCODESPACERANGE)
634 return nil;
635
636 else if (token == PDF_TSTRING)
637 {
638 lo = codefromstring(buf, len);
639 token = mylex(file, buf, sizeof buf, &len);
640 if (token == PDF_TSTRING)
641 {
642 hi = codefromstring(buf, len);
643 error = pdf_addcodespace(cmap, lo, hi, len);
644 if (error)
645 return error;
646 }
647 else break;
648 }
649
650 else break;
651 }
652
653 return fz_throw("syntaxerror in CMap codespacerange section");
654 }
655
656 static fz_error *parsecidrange(pdf_cmap *cmap, fz_stream *file)
657 {
658 char buf[256];
659 int token;
660 int len;
661 fz_error *error;
662 int lo, hi, dst;
663
664 while (1)
665 {
666 token = mylex(file, buf, sizeof buf, &len);
667
668 if (token == TENDCIDRANGE)
669 return nil;
670
671 else if (token != PDF_TSTRING)
672 goto cleanup;
673
674 lo = codefromstring(buf, len);
675
676 token = mylex(file, buf, sizeof buf, &len);
677 if (token != PDF_TSTRING)
678 goto cleanup;
679
680 hi = codefromstring(buf, len);
681
682 token = mylex(file, buf, sizeof buf, &len);
683 if (token != PDF_TINT)
684 goto cleanup;
685
686 dst = atoi(buf);
687
688 error = pdf_maprangetorange(cmap, lo, hi, dst);
689 if (error)
690 return error;
691 }
692
693 cleanup:
694 return fz_throw("syntaxerror in CMap cidrange section");
695 }
696
697 static fz_error *parsecidchar(pdf_cmap *cmap, fz_stream *file)
698 {
699 char buf[256];
700 int token;
701 int len;
702 fz_error *error;
703 int src, dst;
704
705 while (1)
706 {
707 token = mylex(file, buf, sizeof buf, &len);
708
709 if (token == TENDCIDCHAR)
710 return nil;
711
712 else if (token != PDF_TSTRING)
713 goto cleanup;
714
715 src = codefromstring(buf, len);
716
717 token = mylex(file, buf, sizeof buf, &len);
718 if (token != PDF_TINT)
719 goto cleanup;
720
721 dst = atoi(buf);
722
723 error = pdf_maprangetorange(cmap, src, src, dst);
724 if (error)
725 return error;
726 }
727
728 cleanup:
729 return fz_throw("syntaxerror in CMap cidchar section");
730 }
731
732 static fz_error *parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
733 {
734 char buf[256];
735 int token;
736 int len;
737 fz_error *error;
738 int dst[256];
739 int i;
740
741 while (1)
742 {
743 token = mylex(file, buf, sizeof buf, &len);
744 /* Note: does not handle [ /Name /Name ... ] */
745
746 if (token == PDF_TCARRAY)
747 return nil;
748
749 else if (token != PDF_TSTRING)
750 return fz_throw("syntaxerror in CMap bfrange array section");
751
752 if (len / 2)
753 {
754 for (i = 0; i < len / 2; i++)
755 dst[i] = codefromstring(buf + i * 2, 2);
756
757 error = pdf_maponetomany(cmap, lo, dst, len / 2);
758 if (error)
759 return error;
760 }
761
762 lo ++;
763 }
764 }
765
766 static fz_error *parsebfrange(pdf_cmap *cmap, fz_stream *file)
767 {
768 char buf[256];
769 int token;
770 int len;
771 fz_error *error;
772 int lo, hi, dst;
773
774 while (1)
775 {
776 token = mylex(file, buf, sizeof buf, &len);
777
778 if (token == TENDBFRANGE)
779 return nil;
780
781 else if (token != PDF_TSTRING)
782 goto cleanup;
783
784 lo = codefromstring(buf, len);
785
786 token = mylex(file, buf, sizeof buf, &len);
787 if (token != PDF_TSTRING)
788 goto cleanup;
789
790 hi = codefromstring(buf, len);
791
792 token = mylex(file, buf, sizeof buf, &len);
793
794 if (token == PDF_TSTRING)
795 {
796 if (len == 2)
797 {
798 dst = codefromstring(buf, len);
799 error = pdf_maprangetorange(cmap, lo, hi, dst);
800 if (error)
801 return error;
802 }
803 else
804 {
805 int dststr[256];
806 int i;
807
808 if (len / 2)
809 {
810 for (i = 0; i < len / 2; i++)
811 dststr[i] = codefromstring(buf + i * 2, 2);
812
813 while (lo <= hi)
814 {
815 dststr[i-1] ++;
816 error = pdf_maponetomany(cmap, lo, dststr, i);
817 if (error)
818 return error;
819 lo ++;
820 }
821 }
822 }
823 }
824
825 else if (token == PDF_TOARRAY)
826 {
827 error = parsebfrangearray(cmap, file, lo, hi);
828 if (error)
829 return error;
830 }
831
832 else
833 {
834 goto cleanup;
835 }
836 }
837
838 cleanup:
839 return fz_throw("syntaxerror in CMap bfrange section");
840 }
841
842 static fz_error *parsebfchar(pdf_cmap *cmap, fz_stream *file)
843 {
844 char buf[256];
845 int token;
846 int len;
847 fz_error *error;
848 int dst[256];
849 int src;
850 int i;
851
852 while (1)
853 {
854 token = mylex(file, buf, sizeof buf, &len);
855
856 if (token == TENDBFCHAR)
857 return nil;
858
859 else if (token != PDF_TSTRING)
860 goto cleanup;
861
862 src = codefromstring(buf, len);
863
864 token = mylex(file, buf, sizeof buf, &len);
865 /* Note: does not handle /dstName */
866 if (token != PDF_TSTRING)
867 goto cleanup;
868
869 if (len / 2)
870 {
871 for (i = 0; i < len / 2; i++)
872 dst[i] = codefromstring(buf + i * 2, 2);
873
874 error = pdf_maponetomany(cmap, src, dst, i);
875 if (error)
876 return error;
877 }
878 }
879
880 cleanup:
881 return fz_throw("syntaxerror in CMap bfchar section");
882 }
883
884 fz_error *
885 pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file)
886 {
887 fz_error *error;
888 pdf_cmap *cmap;
889 char key[64];
890 char buf[256];
891 int token;
892 int len;
893
894 error = pdf_newcmap(&cmap);
895 if (error)
896 return error;
897
898 strcpy(key, ".notdef");
899
900 while (1)
901 {
902 token = mylex(file, buf, sizeof buf, &len);
903
904 if (token == PDF_TEOF)
905 break;
906
907 else if (token == PDF_TERROR)
908 {
909 error = fz_throw("syntaxerror in CMap");
910 goto cleanup;
911 }
912
913 else if (token == PDF_TNAME)
914 {
915 if (!strcmp(buf, "CMapName"))
916 {
917 error = parsecmapname(cmap, file);
918 if (error)
919 goto cleanup;
920 }
921 else if (!strcmp(buf, "WMode"))
922 {
923 error = parsewmode(cmap, file);
924 if (error)
925 goto cleanup;
926 }
927 else
928 strlcpy(key, buf, sizeof key);
929 }
930
931 else if (token == TUSECMAP)
932 {
933 strlcpy(cmap->usecmapname, key, sizeof(cmap->usecmapname));
934 }
935
936 else if (token == TBEGINCODESPACERANGE)
937 {
938 error = parsecodespacerange(cmap, file);
939 if (error)
940 goto cleanup;
941 }
942
943 else if (token == TBEGINBFCHAR)
944 {
945 error = parsebfchar(cmap, file);
946 if (error)
947 goto cleanup;
948 }
949
950 else if (token == TBEGINCIDCHAR)
951 {
952 error = parsecidchar(cmap, file);
953 if (error)
954 goto cleanup;
955 }
956
957 else if (token == TBEGINBFRANGE)
958 {
959 error = parsebfrange(cmap, file);
960 if (error)
961 goto cleanup;
962 }
963
964 else if (token == TBEGINCIDRANGE)
965 {
966 error = parsecidrange(cmap, file);
967 if (error)
968 goto cleanup;
969 }
970
971 /* ignore everything else */
972 }
973
974 error = pdf_sortcmap(cmap);
975 if (error)
976 goto cleanup;
977
978 *cmapp = cmap;
979 return nil;
980
981 cleanup:
982 pdf_dropcmap(cmap);
983 return error;
984 }
985
986 /*
987 * Load CMap stream in PDF file
988 */
989 fz_error *
990 pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmref)
991 {
992 fz_obj *stmobj = stmref;
993 fz_error *error = nil;
994 fz_stream *file;
995 pdf_cmap *cmap = nil;
996 pdf_cmap *usecmap;
997 fz_obj *wmode;
998 fz_obj *obj;
999
1000 if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref)))
1001 {
1002 pdf_keepcmap(*cmapp);
1003 return nil;
1004 }
1005
1006 pdf_logfont("load embedded cmap %d %d {\n", fz_tonum(stmref), fz_togen(stmref));
1007
1008 error = pdf_resolve(&stmobj, xref);
1009 if (error)
1010 return error;
1011
1012 error = pdf_openstream(&file, xref, fz_tonum(stmref), fz_togen(stmref));
1013 if (error)
1014 goto cleanup;
1015
1016 error = pdf_parsecmap(&cmap, file);
1017 if (error)
1018 goto cleanup;
1019
1020 fz_dropstream(file);
1021
1022 wmode = fz_dictgets(stmobj, "WMode");
1023 if (fz_isint(wmode))
1024 {
1025 pdf_logfont("wmode %d\n", wmode);
1026 pdf_setwmode(cmap, fz_toint(wmode));
1027 }
1028
1029 obj = fz_dictgets(stmobj, "UseCMap");
1030 if (fz_isname(obj))
1031 {
1032 pdf_logfont("usecmap /%s\n", fz_toname(obj));
1033 error = pdf_loadsystemcmap(&usecmap, fz_toname(obj));
1034 if (error)
1035 goto cleanup;
1036 pdf_setusecmap(cmap, usecmap);
1037 pdf_dropcmap(usecmap);
1038 }
1039 else if (fz_isindirect(obj))
1040 {
1041 pdf_logfont("usecmap %d %d R\n", fz_tonum(obj), fz_togen(obj));
1042 error = pdf_loadembeddedcmap(&usecmap, xref, obj);
1043 if (error)
1044 goto cleanup;
1045 pdf_setusecmap(cmap, usecmap);
1046 pdf_dropcmap(usecmap);
1047 }
1048
1049 pdf_logfont("}\n");
1050
1051 error = pdf_storeitem(xref->store, PDF_KCMAP, stmref, cmap);
1052 if (error)
1053 goto cleanup;
1054
1055 fz_dropobj(stmobj);
1056
1057 *cmapp = cmap;
1058 return nil;
1059
1060 cleanup:
1061 if (cmap)
1062 pdf_dropcmap(cmap);
1063 fz_dropobj(stmobj);
1064 return error;
1065 }
1066
1067 /*
1068 * Load predefined CMap from system
1069 */
1070 fz_error *
1071 pdf_loadsystemcmap(pdf_cmap **cmapp, char *name)
1072 {
1073 fz_error *error = nil;
1074 fz_stream *file;
1075 char *cmapdir;
1076 char *usecmapname;
1077 pdf_cmap *usecmap;
1078 pdf_cmap *cmap;
1079 char path[1024];
1080
1081 cmap = nil;
1082 file = nil;
1083
1084 pdf_logfont("load system cmap %s {\n", name);
1085
1086 cmapdir = getenv("CMAPDIR");
1087 if (!cmapdir)
1088 return fz_throw("ioerror: CMAPDIR environment not set");
1089
1090 strlcpy(path, cmapdir, sizeof path);
1091 strlcat(path, "/", sizeof path);
1092 strlcat(path, name, sizeof path);
1093
1094 error = fz_openrfile(&file, path);
1095 if (error)
1096 goto cleanup;
1097
1098 error = pdf_parsecmap(&cmap, file);
1099 if (error)
1100 goto cleanup;
1101
1102 fz_dropstream(file);
1103
1104 usecmapname = cmap->usecmapname;
1105 if (usecmapname[0])
1106 {
1107 pdf_logfont("usecmap %s\n", usecmapname);
1108 error = pdf_loadsystemcmap(&usecmap, usecmapname);
1109 if (error)
1110 goto cleanup;
1111 pdf_setusecmap(cmap, usecmap);
1112 pdf_dropcmap(usecmap);
1113 }
1114
1115 pdf_logfont("}\n");
1116
1117 *cmapp = cmap;
1118 return nil;
1119
1120 cleanup:
1121 if (cmap)
1122 pdf_dropcmap(cmap);
1123 if (file)
1124 fz_dropstream(file);
1125 return error;
1126 }
1127
1128 /*
1129 * Create an Identity-* CMap (for both 1 and 2-byte encodings)
1130 */
1131 fz_error *
1132 pdf_newidentitycmap(pdf_cmap **cmapp, int wmode, int bytes)
1133 {
1134 fz_error *error;
1135 pdf_cmap *cmap;
1136
1137 error = pdf_newcmap(&cmap);
1138 if (error)
1139 return error;
1140
1141 sprintf(cmap->cmapname, "Identity-%c", wmode ? 'V' : 'H');
1142
1143 error = pdf_addcodespace(cmap, 0x0000, 0xffff, bytes);
1144 if (error) {
1145 pdf_dropcmap(cmap);
1146 return error;
1147 }
1148
1149 error = pdf_maprangetorange(cmap, 0x0000, 0xffff, 0);
1150 if (error) {
1151 pdf_dropcmap(cmap);
1152 return error;
1153 }
1154
1155 error = pdf_sortcmap(cmap);
1156 if (error) {
1157 pdf_dropcmap(cmap);
1158 return error;
1159 }
1160
1161 pdf_setwmode(cmap, wmode);
1162
1163 *cmapp = cmap;
1164 return nil;
1165 }
1166