Change the translation of the "Help" menu item to "?", so that the menu can be displa...
[reactos.git] / rosapps / smartpdf / fitz / mupdf / pdf_open.c
1 #include <fitz.h>
2 #include <mupdf.h>
3
4 static inline int iswhite(int ch)
5 {
6 return ch == '\000' || ch == '\011' || ch == '\012' ||
7 ch == '\014' || ch == '\015' || ch == '\040';
8 }
9
10 /*
11 * magic version tag and startxref
12 */
13
14 static fz_error *
15 loadversion(pdf_xref *xref)
16 {
17 char buf[20];
18 int n;
19
20 n = fz_seek(xref->file, 0, 0);
21 if (n < 0)
22 return fz_ioerror(xref->file);
23
24 fz_readline(xref->file, buf, sizeof buf);
25 if (memcmp(buf, "%PDF-", 5) != 0)
26 return fz_throw("syntaxerror: corrupt version marker");
27
28 xref->version = atof(buf + 5);
29
30 pdf_logxref("version %g\n", xref->version);
31
32 return nil;
33 }
34
35 static fz_error *
36 readstartxref(pdf_xref *xref)
37 {
38 char buf[1024];
39 int t, n;
40 int i;
41
42 t = fz_seek(xref->file, 0, 2);
43 if (t == -1)
44 return fz_ioerror(xref->file);
45
46 t = fz_seek(xref->file, MAX(0, t - ((int)sizeof buf)), 0);
47 if (t == -1)
48 return fz_ioerror(xref->file);
49
50 n = fz_read(xref->file, buf, sizeof buf);
51 if (n == -1)
52 return fz_ioerror(xref->file);
53
54 for (i = n - 9; i >= 0; i--)
55 {
56 if (memcmp(buf + i, "startxref", 9) == 0)
57 {
58 i += 9;
59 while (iswhite(buf[i]) && i < n)
60 i ++;
61 xref->startxref = atoi(buf + i);
62 return nil;
63 }
64 }
65
66 return fz_throw("syntaxerror: could not find startxref");
67 }
68
69 #define WHITE_SPACE_CHARS " \n\t\r"
70
71 static const char *str_find_char(const char *txt, char c)
72 {
73 while (*txt != c) {
74 if (0 == *txt)
75 return NULL;
76 ++txt;
77 }
78 return txt;
79 }
80
81 static int str_contains(const char *str, char c)
82 {
83 const char *pos = str_find_char(str, c);
84 if (!pos)
85 return 0;
86 return 1;
87 }
88
89 static void str_strip_right(char *txt, const char *to_strip)
90 {
91 char * new_end;
92 char c;
93 if (!txt || !to_strip)
94 return;
95 if (0 == *txt)
96 return;
97 /* point at the last character in the string */
98 new_end = txt + strlen(txt) - 1;
99 for (;;) {
100 c = *new_end;
101 if (!str_contains(to_strip, c))
102 break;
103 if (txt == new_end)
104 break;
105 --new_end;
106 }
107 if (str_contains(to_strip, *new_end))
108 new_end[0] = 0;
109 else
110 new_end[1] = 0;
111 }
112
113 static void str_strip_ws_right(char *txt)
114 {
115 str_strip_right(txt, WHITE_SPACE_CHARS);
116 }
117
118
119 /*
120 * trailer dictionary
121 */
122
123 static fz_error *
124 readoldtrailer(pdf_xref *xref, char *buf, int cap)
125 {
126 int ofs, len;
127 char *s;
128 int n;
129 int t;
130 int c;
131
132 pdf_logxref("load old xref format trailer\n");
133
134 fz_readline(xref->file, buf, cap);
135 str_strip_ws_right(buf);
136 if (strcmp(buf, "xref") != 0)
137 return fz_throw("ioerror: missing xref");
138
139 while (1)
140 {
141 c = fz_peekbyte(xref->file);
142 if (!(c >= '0' && c <= '9'))
143 break;
144
145 n = fz_readline(xref->file, buf, cap);
146 if (n < 0)
147 return fz_ioerror(xref->file);
148
149 s = buf;
150 ofs = atoi(strsep(&s, " "));
151 len = atoi(strsep(&s, " "));
152
153 /* broken pdfs where the section is not on a separate line */
154 if (s && *s != '\0')
155 fz_seek(xref->file, -(n + buf - s + 2), 1);
156
157 t = fz_tell(xref->file);
158 if (t < 0)
159 return fz_ioerror(xref->file);
160
161 n = fz_seek(xref->file, t + 20 * len, 0);
162 if (n < 0)
163 return fz_ioerror(xref->file);
164 }
165
166 t = pdf_lex(xref->file, buf, cap, &n);
167 if (t != PDF_TTRAILER)
168 return fz_throw("syntaxerror: expected trailer");
169
170 t = pdf_lex(xref->file, buf, cap, &n);
171 if (t != PDF_TODICT)
172 return fz_throw("syntaxerror: expected trailer dictionary");
173
174 return pdf_parsedict(&xref->trailer, xref->file, buf, cap);
175 }
176
177 static fz_error *
178 readnewtrailer(pdf_xref *xref, char *buf, int cap)
179 {
180 pdf_logxref("load new xref format trailer\n");
181 return pdf_parseindobj(&xref->trailer, xref->file, buf, cap, nil, nil, nil);
182 }
183
184 static fz_error *
185 readtrailer(pdf_xref *xref, char *buf, int cap)
186 {
187 int n;
188 int c;
189
190 n = fz_seek(xref->file, xref->startxref, 0);
191 if (n < 0)
192 return fz_ioerror(xref->file);
193
194 c = fz_peekbyte(xref->file);
195 if (c == 'x')
196 return readoldtrailer(xref, buf, cap);
197 else if (c >= '0' && c <= '9')
198 return readnewtrailer(xref, buf, cap);
199
200 return fz_throw("syntaxerror: could not find xref");
201 }
202
203 /*
204 * xref tables
205 */
206
207 static fz_error *
208 readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
209 {
210 int ofs, len;
211 char *s;
212 int n;
213 int t;
214 int i;
215 int c;
216
217 pdf_logxref("load old xref format\n");
218
219 fz_readline(xref->file, buf, cap);
220 str_strip_ws_right(buf);
221 if (strcmp(buf, "xref") != 0)
222 return fz_throw("syntaxerror: expected xref");
223
224 while (1)
225 {
226 c = fz_peekbyte(xref->file);
227 if (!(c >= '0' && c <= '9'))
228 break;
229
230 n = fz_readline(xref->file, buf, cap);
231 if (n < 0)
232 return fz_ioerror(xref->file);
233
234 s = buf;
235 ofs = atoi(strsep(&s, " "));
236 len = atoi(strsep(&s, " "));
237
238 /* broken pdfs where the section is not on a separate line */
239 if (s && *s != '\0')
240 {
241 fz_warn("syntaxerror: broken xref section");
242 fz_seek(xref->file, -(n + buf - s + 2), 1);
243 }
244
245 for (i = 0; i < len; i++)
246 {
247 n = fz_read(xref->file, buf, 20);
248 if (n < 0)
249 return fz_ioerror(xref->file);
250 if (n != 20)
251 return fz_throw("syntaxerror: truncated xref table");
252 if (!xref->table[ofs + i].type)
253 {
254 s = buf;
255 xref->table[ofs + i].ofs = atoi(s);
256 xref->table[ofs + i].gen = atoi(s + 11);
257 xref->table[ofs + i].type = s[17];
258 }
259 }
260 }
261
262 t = pdf_lex(xref->file, buf, cap, &n);
263 if (t != PDF_TTRAILER)
264 return fz_throw("syntaxerror: expected trailer");
265 t = pdf_lex(xref->file, buf, cap, &n);
266 if (t != PDF_TODICT)
267 return fz_throw("syntaxerror: expected trailer dictionary");
268
269 return pdf_parsedict(trailerp, xref->file, buf, cap);
270 }
271
272 static fz_error *
273 readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
274 {
275 fz_error *error;
276 fz_stream *stm;
277 fz_obj *trailer;
278 fz_obj *obj;
279 int oid, gen, stmofs;
280 int size, w0, w1, w2, i0, i1;
281 int i, n;
282
283 pdf_logxref("load new xref format\n");
284
285 error = pdf_parseindobj(&trailer, xref->file, buf, cap, &oid, &gen, &stmofs);
286 if (error)
287 return error;
288
289 if (oid < 0 || oid >= xref->len) {
290 error = fz_throw("rangecheck: object id out of range");
291 goto cleanup;
292 }
293
294 xref->table[oid].type = 'n';
295 xref->table[oid].gen = gen;
296 xref->table[oid].obj = fz_keepobj(trailer);
297 xref->table[oid].stmofs = stmofs;
298
299 obj = fz_dictgets(trailer, "Size");
300 if (!obj) {
301 error = fz_throw("syntaxerror: xref stream missing Size entry");
302 goto cleanup;
303 }
304 size = fz_toint(obj);
305
306 obj = fz_dictgets(trailer, "W");
307 if (!obj) {
308 error = fz_throw("syntaxerror: xref stream missing W entry");
309 goto cleanup;
310 }
311 w0 = fz_toint(fz_arrayget(obj, 0));
312 w1 = fz_toint(fz_arrayget(obj, 1));
313 w2 = fz_toint(fz_arrayget(obj, 2));
314
315 obj = fz_dictgets(trailer, "Index");
316 if (obj) {
317 i0 = fz_toint(fz_arrayget(obj, 0));
318 i1 = fz_toint(fz_arrayget(obj, 1));
319 }
320 else {
321 i0 = 0;
322 i1 = size;
323 }
324
325 if (i0 < 0 || i1 > xref->len) {
326 error = fz_throw("syntaxerror: xref stream has too many entries");
327 goto cleanup;
328 }
329
330 error = pdf_openstream(&stm, xref, oid, gen);
331 if (error)
332 goto cleanup;
333
334 for (i = i0; i < i0 + i1; i++)
335 {
336 int a = 0;
337 int b = 0;
338 int c = 0;
339
340 if (fz_peekbyte(stm) == EOF)
341 {
342 error = fz_throw("syntaxerror: truncated xref stream");
343 fz_dropstream(stm);
344 goto cleanup;
345 }
346
347 for (n = 0; n < w0; n++)
348 a = (a << 8) + fz_readbyte(stm);
349 for (n = 0; n < w1; n++)
350 b = (b << 8) + fz_readbyte(stm);
351 for (n = 0; n < w2; n++)
352 c = (c << 8) + fz_readbyte(stm);
353
354 if (!xref->table[i].type)
355 {
356 int t = w0 ? a : 1;
357 xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
358 xref->table[i].ofs = w2 ? b : 0;
359 xref->table[i].gen = w1 ? c : 0;
360 }
361 }
362
363 fz_dropstream(stm);
364
365 *trailerp = trailer;
366
367 return nil;
368
369 cleanup:
370 fz_dropobj(trailer);
371 return error;
372 }
373
374 static fz_error *
375 readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
376 {
377 int n;
378 int c;
379
380 n = fz_seek(xref->file, ofs, 0);
381 if (n < 0)
382 return fz_ioerror(xref->file);
383
384 c = fz_peekbyte(xref->file);
385 if (c == 'x')
386 return readoldxref(trailerp, xref, buf, cap);
387 else if (c >= '0' && c <= '9')
388 return readnewxref(trailerp, xref, buf, cap);
389
390 return fz_throw("syntaxerror: expected xref");
391 }
392
393 static fz_error *
394 readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap)
395 {
396 fz_error *error;
397 fz_obj *trailer;
398 fz_obj *prev;
399 fz_obj *xrefstm;
400
401 error = readxref(&trailer, xref, ofs, buf, cap);
402 if (error)
403 return error;
404
405 /* FIXME: do we overwrite free entries properly? */
406 xrefstm = fz_dictgets(trailer, "XrefStm");
407 if (xrefstm)
408 {
409 pdf_logxref("load xrefstm\n");
410 error = readxrefsections(xref, fz_toint(xrefstm), buf, cap);
411 if (error)
412 goto cleanup;
413 }
414
415 prev = fz_dictgets(trailer, "Prev");
416 if (prev)
417 {
418 pdf_logxref("load prev\n");
419 error = readxrefsections(xref, fz_toint(prev), buf, cap);
420 if (error)
421 goto cleanup;
422 }
423
424 fz_dropobj(trailer);
425 return nil;
426
427 cleanup:
428 fz_dropobj(trailer);
429 return error;
430 }
431
432 /*
433 * compressed object streams
434 */
435
436 fz_error *
437 pdf_loadobjstm(pdf_xref *xref, int oid, int gen, char *buf, int cap)
438 {
439 fz_error *error;
440 fz_stream *stm;
441 fz_obj *objstm;
442 int *oidbuf;
443 int *ofsbuf;
444
445 fz_obj *obj;
446 int first;
447 int count;
448 int i, n, t;
449
450 pdf_logxref("loadobjstm %d %d\n", oid, gen);
451
452 error = pdf_loadobject(&objstm, xref, oid, gen);
453 if (error)
454 return error;
455
456 count = fz_toint(fz_dictgets(objstm, "N"));
457 first = fz_toint(fz_dictgets(objstm, "First"));
458
459 pdf_logxref(" count %d\n", count);
460
461 oidbuf = fz_malloc(count * sizeof(int));
462 if (!oidbuf) { error = fz_outofmem; goto cleanupobj; }
463
464 ofsbuf = fz_malloc(count * sizeof(int));
465 if (!ofsbuf) { error = fz_outofmem; goto cleanupoid; }
466
467 error = pdf_openstream(&stm, xref, oid, gen);
468 if (error)
469 goto cleanupofs;
470
471 for (i = 0; i < count; i++)
472 {
473 t = pdf_lex(stm, buf, cap, &n);
474 if (t != PDF_TINT)
475 {
476 error = fz_throw("syntaxerror: corrupt object stream");
477 goto cleanupstm;
478 }
479 oidbuf[i] = atoi(buf);
480
481 t = pdf_lex(stm, buf, cap, &n);
482 if (t != PDF_TINT)
483 {
484 error = fz_throw("syntaxerror: corrupt object stream");
485 goto cleanupstm;
486 }
487 ofsbuf[i] = atoi(buf);
488 }
489
490 n = fz_seek(stm, first, 0);
491 if (n < 0)
492 {
493 error = fz_ioerror(stm);
494 goto cleanupstm;
495 }
496
497 for (i = 0; i < count; i++)
498 {
499 /* FIXME: seek to first + ofsbuf[i] */
500
501 error = pdf_parsestmobj(&obj, stm, buf, cap);
502 if (error)
503 goto cleanupstm;
504
505 if (oidbuf[i] < 1 || oidbuf[i] >= xref->len)
506 {
507 error = fz_throw("rangecheck: object number out of range");
508 goto cleanupstm;
509 }
510
511 if (xref->table[oidbuf[i]].obj)
512 fz_dropobj(xref->table[oidbuf[i]].obj);
513 xref->table[oidbuf[i]].obj = obj;
514 }
515
516 fz_dropstream(stm);
517 fz_free(ofsbuf);
518 fz_free(oidbuf);
519 fz_dropobj(objstm);
520 return nil;
521
522 cleanupstm:
523 fz_dropstream(stm);
524 cleanupofs:
525 fz_free(ofsbuf);
526 cleanupoid:
527 fz_free(oidbuf);
528 cleanupobj:
529 fz_dropobj(objstm);
530 return error;
531 }
532
533 /*
534 * open and load xref tables from pdf
535 */
536
537 fz_error *
538 pdf_loadxref(pdf_xref *xref, char *filename)
539 {
540 fz_error *error;
541 fz_obj *size;
542 int i;
543
544 char buf[65536]; /* yeowch! */
545
546 pdf_logxref("loadxref '%s' %p\n", filename, xref);
547
548 error = fz_openrfile(&xref->file, filename);
549 if (error)
550 return error;
551
552 error = loadversion(xref);
553 if (error)
554 return error;
555
556 error = readstartxref(xref);
557 if (error)
558 return error;
559
560 error = readtrailer(xref, buf, sizeof buf);
561 if (error)
562 return error;
563
564 size = fz_dictgets(xref->trailer, "Size");
565 if (!size)
566 return fz_throw("syntaxerror: trailer missing Size entry");
567
568 pdf_logxref(" size %d\n", fz_toint(size));
569
570 assert(xref->table == nil);
571
572 xref->cap = fz_toint(size);
573 xref->len = fz_toint(size);
574 xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));
575 if (!xref->table)
576 return fz_outofmem;
577
578 for (i = 0; i < xref->len; i++)
579 {
580 xref->table[i].ofs = 0;
581 xref->table[i].gen = 0;
582 xref->table[i].type = 0;
583 xref->table[i].mark = 0;
584 xref->table[i].stmbuf = nil;
585 xref->table[i].stmofs = 0;
586 xref->table[i].obj = nil;
587 }
588
589 error = readxrefsections(xref, xref->startxref, buf, sizeof buf);
590 if (error)
591 return error;
592
593 return nil;
594 }
595