4 static inline int iswhite(int ch
)
6 return ch
== '\000' || ch
== '\011' || ch
== '\012' ||
7 ch
== '\014' || ch
== '\015' || ch
== '\040';
11 * magic version tag and startxref
15 loadversion(pdf_xref
*xref
)
20 n
= fz_seek(xref
->file
, 0, 0);
22 return fz_ioerror(xref
->file
);
24 fz_readline(xref
->file
, buf
, sizeof buf
);
25 if (memcmp(buf
, "%PDF-", 5) != 0)
26 return fz_throw("syntaxerror: corrupt version marker");
28 xref
->version
= atof(buf
+ 5);
30 pdf_logxref("version %g\n", xref
->version
);
36 readstartxref(pdf_xref
*xref
)
42 t
= fz_seek(xref
->file
, 0, 2);
44 return fz_ioerror(xref
->file
);
46 t
= fz_seek(xref
->file
, MAX(0, t
- ((int)sizeof buf
)), 0);
48 return fz_ioerror(xref
->file
);
50 n
= fz_read(xref
->file
, buf
, sizeof buf
);
52 return fz_ioerror(xref
->file
);
54 for (i
= n
- 9; i
>= 0; i
--)
56 if (memcmp(buf
+ i
, "startxref", 9) == 0)
59 while (iswhite(buf
[i
]) && i
< n
)
61 xref
->startxref
= atoi(buf
+ i
);
66 return fz_throw("syntaxerror: could not find startxref");
69 #define WHITE_SPACE_CHARS " \n\t\r"
71 static const char *str_find_char(const char *txt
, char c
)
81 static int str_contains(const char *str
, char c
)
83 const char *pos
= str_find_char(str
, c
);
89 static void str_strip_right(char *txt
, const char *to_strip
)
93 if (!txt
|| !to_strip
)
97 /* point at the last character in the string */
98 new_end
= txt
+ strlen(txt
) - 1;
101 if (!str_contains(to_strip
, c
))
107 if (str_contains(to_strip
, *new_end
))
113 static void str_strip_ws_right(char *txt
)
115 str_strip_right(txt
, WHITE_SPACE_CHARS
);
124 readoldtrailer(pdf_xref
*xref
, char *buf
, int cap
)
132 pdf_logxref("load old xref format trailer\n");
134 fz_readline(xref
->file
, buf
, cap
);
135 str_strip_ws_right(buf
);
136 if (strcmp(buf
, "xref") != 0)
137 return fz_throw("ioerror: missing xref");
141 c
= fz_peekbyte(xref
->file
);
142 if (!(c
>= '0' && c
<= '9'))
145 n
= fz_readline(xref
->file
, buf
, cap
);
147 return fz_ioerror(xref
->file
);
150 ofs
= atoi(strsep(&s
, " "));
151 len
= atoi(strsep(&s
, " "));
153 /* broken pdfs where the section is not on a separate line */
155 fz_seek(xref
->file
, -(n
+ buf
- s
+ 2), 1);
157 t
= fz_tell(xref
->file
);
159 return fz_ioerror(xref
->file
);
161 n
= fz_seek(xref
->file
, t
+ 20 * len
, 0);
163 return fz_ioerror(xref
->file
);
166 t
= pdf_lex(xref
->file
, buf
, cap
, &n
);
167 if (t
!= PDF_TTRAILER
)
168 return fz_throw("syntaxerror: expected trailer");
170 t
= pdf_lex(xref
->file
, buf
, cap
, &n
);
172 return fz_throw("syntaxerror: expected trailer dictionary");
174 return pdf_parsedict(&xref
->trailer
, xref
->file
, buf
, cap
);
178 readnewtrailer(pdf_xref
*xref
, char *buf
, int cap
)
180 pdf_logxref("load new xref format trailer\n");
181 return pdf_parseindobj(&xref
->trailer
, xref
->file
, buf
, cap
, nil
, nil
, nil
);
185 readtrailer(pdf_xref
*xref
, char *buf
, int cap
)
190 n
= fz_seek(xref
->file
, xref
->startxref
, 0);
192 return fz_ioerror(xref
->file
);
194 c
= fz_peekbyte(xref
->file
);
196 return readoldtrailer(xref
, buf
, cap
);
197 else if (c
>= '0' && c
<= '9')
198 return readnewtrailer(xref
, buf
, cap
);
200 return fz_throw("syntaxerror: could not find xref");
208 readoldxref(fz_obj
**trailerp
, pdf_xref
*xref
, char *buf
, int cap
)
217 pdf_logxref("load old xref format\n");
219 fz_readline(xref
->file
, buf
, cap
);
220 str_strip_ws_right(buf
);
221 if (strcmp(buf
, "xref") != 0)
222 return fz_throw("syntaxerror: expected xref");
226 c
= fz_peekbyte(xref
->file
);
227 if (!(c
>= '0' && c
<= '9'))
230 n
= fz_readline(xref
->file
, buf
, cap
);
232 return fz_ioerror(xref
->file
);
235 ofs
= atoi(strsep(&s
, " "));
236 len
= atoi(strsep(&s
, " "));
238 /* broken pdfs where the section is not on a separate line */
241 fz_warn("syntaxerror: broken xref section");
242 fz_seek(xref
->file
, -(n
+ buf
- s
+ 2), 1);
245 for (i
= 0; i
< len
; i
++)
247 n
= fz_read(xref
->file
, buf
, 20);
249 return fz_ioerror(xref
->file
);
251 return fz_throw("syntaxerror: truncated xref table");
252 if (!xref
->table
[ofs
+ i
].type
)
255 xref
->table
[ofs
+ i
].ofs
= atoi(s
);
256 xref
->table
[ofs
+ i
].gen
= atoi(s
+ 11);
257 xref
->table
[ofs
+ i
].type
= s
[17];
262 t
= pdf_lex(xref
->file
, buf
, cap
, &n
);
263 if (t
!= PDF_TTRAILER
)
264 return fz_throw("syntaxerror: expected trailer");
265 t
= pdf_lex(xref
->file
, buf
, cap
, &n
);
267 return fz_throw("syntaxerror: expected trailer dictionary");
269 return pdf_parsedict(trailerp
, xref
->file
, buf
, cap
);
273 readnewxref(fz_obj
**trailerp
, pdf_xref
*xref
, char *buf
, int cap
)
279 int oid
, gen
, stmofs
;
280 int size
, w0
, w1
, w2
, i0
, i1
;
283 pdf_logxref("load new xref format\n");
285 error
= pdf_parseindobj(&trailer
, xref
->file
, buf
, cap
, &oid
, &gen
, &stmofs
);
289 if (oid
< 0 || oid
>= xref
->len
) {
290 error
= fz_throw("rangecheck: object id out of range");
294 xref
->table
[oid
].type
= 'n';
295 xref
->table
[oid
].gen
= gen
;
296 xref
->table
[oid
].obj
= fz_keepobj(trailer
);
297 xref
->table
[oid
].stmofs
= stmofs
;
299 obj
= fz_dictgets(trailer
, "Size");
301 error
= fz_throw("syntaxerror: xref stream missing Size entry");
304 size
= fz_toint(obj
);
306 obj
= fz_dictgets(trailer
, "W");
308 error
= fz_throw("syntaxerror: xref stream missing W entry");
311 w0
= fz_toint(fz_arrayget(obj
, 0));
312 w1
= fz_toint(fz_arrayget(obj
, 1));
313 w2
= fz_toint(fz_arrayget(obj
, 2));
315 obj
= fz_dictgets(trailer
, "Index");
317 i0
= fz_toint(fz_arrayget(obj
, 0));
318 i1
= fz_toint(fz_arrayget(obj
, 1));
325 if (i0
< 0 || i1
> xref
->len
) {
326 error
= fz_throw("syntaxerror: xref stream has too many entries");
330 error
= pdf_openstream(&stm
, xref
, oid
, gen
);
334 for (i
= i0
; i
< i0
+ i1
; i
++)
340 if (fz_peekbyte(stm
) == EOF
)
342 error
= fz_throw("syntaxerror: truncated xref stream");
347 for (n
= 0; n
< w0
; n
++)
348 a
= (a
<< 8) + fz_readbyte(stm
);
349 for (n
= 0; n
< w1
; n
++)
350 b
= (b
<< 8) + fz_readbyte(stm
);
351 for (n
= 0; n
< w2
; n
++)
352 c
= (c
<< 8) + fz_readbyte(stm
);
354 if (!xref
->table
[i
].type
)
357 xref
->table
[i
].type
= t
== 0 ? 'f' : t
== 1 ? 'n' : t
== 2 ? 'o' : 0;
358 xref
->table
[i
].ofs
= w2
? b
: 0;
359 xref
->table
[i
].gen
= w1
? c
: 0;
375 readxref(fz_obj
**trailerp
, pdf_xref
*xref
, int ofs
, char *buf
, int cap
)
380 n
= fz_seek(xref
->file
, ofs
, 0);
382 return fz_ioerror(xref
->file
);
384 c
= fz_peekbyte(xref
->file
);
386 return readoldxref(trailerp
, xref
, buf
, cap
);
387 else if (c
>= '0' && c
<= '9')
388 return readnewxref(trailerp
, xref
, buf
, cap
);
390 return fz_throw("syntaxerror: expected xref");
394 readxrefsections(pdf_xref
*xref
, int ofs
, char *buf
, int cap
)
401 error
= readxref(&trailer
, xref
, ofs
, buf
, cap
);
405 /* FIXME: do we overwrite free entries properly? */
406 xrefstm
= fz_dictgets(trailer
, "XrefStm");
409 pdf_logxref("load xrefstm\n");
410 error
= readxrefsections(xref
, fz_toint(xrefstm
), buf
, cap
);
415 prev
= fz_dictgets(trailer
, "Prev");
418 pdf_logxref("load prev\n");
419 error
= readxrefsections(xref
, fz_toint(prev
), buf
, cap
);
433 * compressed object streams
437 pdf_loadobjstm(pdf_xref
*xref
, int oid
, int gen
, char *buf
, int cap
)
450 pdf_logxref("loadobjstm %d %d\n", oid
, gen
);
452 error
= pdf_loadobject(&objstm
, xref
, oid
, gen
);
456 count
= fz_toint(fz_dictgets(objstm
, "N"));
457 first
= fz_toint(fz_dictgets(objstm
, "First"));
459 pdf_logxref(" count %d\n", count
);
461 oidbuf
= fz_malloc(count
* sizeof(int));
462 if (!oidbuf
) { error
= fz_outofmem
; goto cleanupobj
; }
464 ofsbuf
= fz_malloc(count
* sizeof(int));
465 if (!ofsbuf
) { error
= fz_outofmem
; goto cleanupoid
; }
467 error
= pdf_openstream(&stm
, xref
, oid
, gen
);
471 for (i
= 0; i
< count
; i
++)
473 t
= pdf_lex(stm
, buf
, cap
, &n
);
476 error
= fz_throw("syntaxerror: corrupt object stream");
479 oidbuf
[i
] = atoi(buf
);
481 t
= pdf_lex(stm
, buf
, cap
, &n
);
484 error
= fz_throw("syntaxerror: corrupt object stream");
487 ofsbuf
[i
] = atoi(buf
);
490 n
= fz_seek(stm
, first
, 0);
493 error
= fz_ioerror(stm
);
497 for (i
= 0; i
< count
; i
++)
499 /* FIXME: seek to first + ofsbuf[i] */
501 error
= pdf_parsestmobj(&obj
, stm
, buf
, cap
);
505 if (oidbuf
[i
] < 1 || oidbuf
[i
] >= xref
->len
)
507 error
= fz_throw("rangecheck: object number out of range");
511 if (xref
->table
[oidbuf
[i
]].obj
)
512 fz_dropobj(xref
->table
[oidbuf
[i
]].obj
);
513 xref
->table
[oidbuf
[i
]].obj
= obj
;
534 * open and load xref tables from pdf
538 pdf_loadxref(pdf_xref
*xref
, char *filename
)
544 char buf
[65536]; /* yeowch! */
546 pdf_logxref("loadxref '%s' %p\n", filename
, xref
);
548 error
= fz_openrfile(&xref
->file
, filename
);
552 error
= loadversion(xref
);
556 error
= readstartxref(xref
);
560 error
= readtrailer(xref
, buf
, sizeof buf
);
564 size
= fz_dictgets(xref
->trailer
, "Size");
566 return fz_throw("syntaxerror: trailer missing Size entry");
568 pdf_logxref(" size %d\n", fz_toint(size
));
570 assert(xref
->table
== nil
);
572 xref
->cap
= fz_toint(size
);
573 xref
->len
= fz_toint(size
);
574 xref
->table
= fz_malloc(xref
->cap
* sizeof(pdf_xrefentry
));
578 for (i
= 0; i
< xref
->len
; i
++)
580 xref
->table
[i
].ofs
= 0;
581 xref
->table
[i
].gen
= 0;
582 xref
->table
[i
].type
= 0;
583 xref
->table
[i
].mark
= 0;
584 xref
->table
[i
].stmbuf
= nil
;
585 xref
->table
[i
].stmofs
= 0;
586 xref
->table
[i
].obj
= nil
;
589 error
= readxrefsections(xref
, xref
->startxref
, buf
, sizeof buf
);