186942b3e86bfecd039630a644671520e49bda58
[reactos.git] / reactos / drivers / filesystems / ext2 / src / ext3 / htree.c
1 /*
2 * COPYRIGHT: See COPYRIGHT.TXT
3 * PROJECT: Ext2 File System Driver for WinNT/2K/XP
4 * FILE: lock.c
5 * PROGRAMMER: Matt Wu <mattwu@163.com>
6 * HOMEPAGE: http://www.ext2fsd.com
7 * UPDATE HISTORY:
8 * Copied from linux/lib/halfmd4.c
9 * linux/fs/ext3/hash.c
10 */
11
12 /* INCLUDES *****************************************************************/
13
14 #include "ext2fs.h"
15
16 #ifdef EXT2_HTREE_INDEX
17
18 #define DX_DEBUG 0
19
20 #if DX_DEBUG
21 #define dxtrace(command) command
22 #else
23 #define dxtrace(command)
24 #endif
25
26 #ifndef swap
27 #define swap(type, x, y) do { type z = x; x = y; y = z; } while (0)
28 #endif
29
30 /* F, G and H are basic MD4 functions: selection, majority, parity */
31 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
32 #define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
33 #define H(x, y, z) ((x) ^ (y) ^ (z))
34
35 /*
36 * The generic round function. The application is so specific that
37 * we don't bother protecting all the arguments with parens, as is generally
38 * good macro practice, in favor of extra legibility.
39 * Rotation is separate from addition to prevent recomputation
40 */
41 #define ROUND(f, a, b, c, d, x, s) \
42 (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s)))
43 #define K1 0
44 #define K2 013240474631
45 #define K3 015666365641
46
47 /*
48 * Basic cut-down MD4 transform. Returns only 32 bits of result.
49 */
50 __u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
51 {
52 __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
53
54 /* Round 1 */
55 ROUND(F, a, b, c, d, in[0] + K1, 3);
56 ROUND(F, d, a, b, c, in[1] + K1, 7);
57 ROUND(F, c, d, a, b, in[2] + K1, 11);
58 ROUND(F, b, c, d, a, in[3] + K1, 19);
59 ROUND(F, a, b, c, d, in[4] + K1, 3);
60 ROUND(F, d, a, b, c, in[5] + K1, 7);
61 ROUND(F, c, d, a, b, in[6] + K1, 11);
62 ROUND(F, b, c, d, a, in[7] + K1, 19);
63
64 /* Round 2 */
65 ROUND(G, a, b, c, d, in[1] + K2, 3);
66 ROUND(G, d, a, b, c, in[3] + K2, 5);
67 ROUND(G, c, d, a, b, in[5] + K2, 9);
68 ROUND(G, b, c, d, a, in[7] + K2, 13);
69 ROUND(G, a, b, c, d, in[0] + K2, 3);
70 ROUND(G, d, a, b, c, in[2] + K2, 5);
71 ROUND(G, c, d, a, b, in[4] + K2, 9);
72 ROUND(G, b, c, d, a, in[6] + K2, 13);
73
74 /* Round 3 */
75 ROUND(H, a, b, c, d, in[3] + K3, 3);
76 ROUND(H, d, a, b, c, in[7] + K3, 9);
77 ROUND(H, c, d, a, b, in[2] + K3, 11);
78 ROUND(H, b, c, d, a, in[6] + K3, 15);
79 ROUND(H, a, b, c, d, in[1] + K3, 3);
80 ROUND(H, d, a, b, c, in[5] + K3, 9);
81 ROUND(H, c, d, a, b, in[0] + K3, 11);
82 ROUND(H, b, c, d, a, in[4] + K3, 15);
83
84 buf[0] += a;
85 buf[1] += b;
86 buf[2] += c;
87 buf[3] += d;
88
89 return buf[1]; /* "most hashed" word */
90 }
91
92 #define DELTA 0x9E3779B9
93
94 static void TEA_transform(__u32 buf[4], __u32 const in[])
95 {
96 __u32 sum = 0;
97 __u32 b0 = buf[0], b1 = buf[1];
98 __u32 a = in[0], b = in[1], c = in[2], d = in[3];
99 int n = 16;
100
101 do {
102 sum += DELTA;
103 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
104 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
105 } while (--n);
106
107 buf[0] += b0;
108 buf[1] += b1;
109 }
110
111
112 /* The old legacy hash */
113 static __u32 dx_hack_hash_unsigned(const char *name, int len)
114 {
115 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
116 const unsigned char *ucp = (const unsigned char *) name;
117
118 while (len--) {
119 hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
120
121 if (hash & 0x80000000)
122 hash -= 0x7fffffff;
123 hash1 = hash0;
124 hash0 = hash;
125 }
126 return hash0 << 1;
127 }
128
129 static __u32 dx_hack_hash_signed(const char *name, int len)
130 {
131 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
132 const signed char *scp = (const signed char *) name;
133
134 while (len--) {
135 hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
136
137 if (hash & 0x80000000)
138 hash -= 0x7fffffff;
139 hash1 = hash0;
140 hash0 = hash;
141 }
142 return hash0 << 1;
143 }
144
145 static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
146 {
147 __u32 pad, val;
148 int i;
149 const signed char *scp = (const signed char *) msg;
150
151 pad = (__u32)len | ((__u32)len << 8);
152 pad |= pad << 16;
153
154 val = pad;
155 if (len > num*4)
156 len = num * 4;
157 for (i = 0; i < len; i++) {
158 if ((i % 4) == 0)
159 val = pad;
160 val = ((int) scp[i]) + (val << 8);
161 if ((i % 4) == 3) {
162 *buf++ = val;
163 val = pad;
164 num--;
165 }
166 }
167 if (--num >= 0)
168 *buf++ = val;
169 while (--num >= 0)
170 *buf++ = pad;
171 }
172
173 static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
174 {
175 __u32 pad, val;
176 int i;
177 const unsigned char *ucp = (const unsigned char *) msg;
178
179 pad = (__u32)len | ((__u32)len << 8);
180 pad |= pad << 16;
181
182 val = pad;
183 if (len > num*4)
184 len = num * 4;
185 for (i = 0; i < len; i++) {
186 if ((i % 4) == 0)
187 val = pad;
188 val = ((int) ucp[i]) + (val << 8);
189 if ((i % 4) == 3) {
190 *buf++ = val;
191 val = pad;
192 num--;
193 }
194 }
195 if (--num >= 0)
196 *buf++ = val;
197 while (--num >= 0)
198 *buf++ = pad;
199 }
200
201
202 #endif /* EXT2_HTREE_INDEX */
203
204 __u32 ext3_current_time(struct inode *in)
205 {
206 LARGE_INTEGER SysTime;
207 KeQuerySystemTime(&SysTime);
208
209 return Ext2LinuxTime(SysTime);
210 }
211
212 void ext3_warning (struct super_block * sb, const char * function,
213 char * fmt, ...)
214 {
215 #if DX_DEBUG
216 va_list args;
217
218 va_start(args, fmt);
219 printk("EXT3-fs warning (device %s): %s: ",
220 sb->s_id, function);
221 printk(fmt, args);
222 printk("\n");
223 va_end(args);
224 #endif
225 }
226
227
228 /* ext3_bread is safe for meta-data blocks. it's not safe to read file data,
229 since file data is managed by file cache, not volume cache */
230 struct buffer_head *ext3_bread(struct ext2_icb *icb, struct inode *inode,
231 unsigned long block, int *err)
232 {
233 struct buffer_head * bh = NULL;
234 NTSTATUS status = STATUS_SUCCESS;
235 ULONG lbn = 0, num = 0;
236
237 PEXT2_MCB Mcb = CONTAINING_RECORD(inode, EXT2_MCB, Inode);
238
239 /* for symlink file, read it's target instead */
240 if (NULL != Mcb && IsMcbSymLink(Mcb))
241 Mcb = Mcb->Target;
242 if (NULL == Mcb) {
243 *err = -EINVAL;
244 return NULL;
245 }
246
247 /* mapping file offset to ext2 block */
248 if (INODE_HAS_EXTENT(&Mcb->Inode)) {
249 status = Ext2MapExtent(icb, inode->i_sb->s_priv,
250 Mcb, block, FALSE,
251 &lbn, &num);
252 } else {
253 status = Ext2MapIndirect(icb, inode->i_sb->s_priv,
254 Mcb, block, FALSE,
255 &lbn, &num);
256 }
257
258 if (!NT_SUCCESS(status)) {
259 *err = Ext2LinuxError(status);
260 return bh;
261 }
262
263 bh = sb_getblk(inode->i_sb, lbn);
264 if (!bh) {
265 *err = -ENOMEM;
266 return bh;
267 }
268 if (buffer_uptodate(bh))
269 return bh;
270
271 *err = bh_submit_read(bh);
272 if (*err) {
273 brelse(bh);
274 return NULL;
275 }
276 return bh;
277 }
278
279 struct buffer_head *ext3_append(struct ext2_icb *icb, struct inode *inode,
280 ext3_lblk_t *block, int *err)
281 {
282 PEXT2_MCB mcb = CONTAINING_RECORD(inode, EXT2_MCB, Inode);
283 PEXT2_FCB dcb = mcb->Fcb;
284 NTSTATUS status;
285
286 ASSERT(dcb);
287 ASSERT(inode == dcb->Inode);
288
289 /* allocate new block since there's no space for us */
290 *block = (ext3_lblk_t)(inode->i_size >> inode->i_sb->s_blocksize_bits);
291 dcb->Header.AllocationSize.QuadPart += dcb->Vcb->BlockSize;
292 status = Ext2ExpandFile(icb, dcb->Vcb, mcb, &(dcb->Header.AllocationSize));
293 if (NT_SUCCESS(status)) {
294
295 /* update Dcb */
296 dcb->Header.ValidDataLength = dcb->Header.FileSize = dcb->Header.AllocationSize;
297 mcb->Inode.i_size = dcb->Header.AllocationSize.QuadPart;
298
299 /* save parent directory's inode */
300 Ext2SaveInode(icb, dcb->Vcb, inode);
301 }
302
303 return ext3_bread(icb, inode, *block, err);
304 }
305
306
307 void ext3_inc_count(struct inode *inode)
308 {
309 inode->i_nlink++;
310 }
311
312 void ext3_dec_count(struct inode *inode)
313 {
314 inode->i_nlink--;
315 }
316
317 unsigned char ext3_type_by_mode(umode_t mode)
318 {
319 unsigned char type = 0;
320
321 switch (mode & S_IFMT) {
322 case S_IFREG:
323 type = EXT3_FT_REG_FILE;
324 break;
325 case S_IFDIR:
326 type = EXT3_FT_DIR;
327 break;
328 case S_IFCHR:
329 type = EXT3_FT_CHRDEV;
330 break;
331 case S_IFBLK:
332 type = EXT3_FT_BLKDEV;
333 break;
334 case S_IFIFO:
335 type = EXT3_FT_FIFO;
336 break;
337 case S_IFSOCK:
338 type = EXT3_FT_SOCK;
339 break;
340 case S_IFLNK:
341 type = EXT3_FT_SYMLINK;
342 }
343
344 return type;
345 };
346
347 void ext3_set_de_type(struct super_block *sb,
348 struct ext3_dir_entry_2 *de,
349 umode_t mode)
350 {
351 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
352 de->file_type = ext3_type_by_mode(mode);
353 }
354
355 /*
356 * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
357 * do not perform it in these functions. We perform it at the call site,
358 * if it is needed.
359 */
360 int ext3_mark_inode_dirty(struct ext2_icb *icb, struct inode *in)
361 {
362 if (Ext2SaveInode(icb, in->i_sb->s_priv, in))
363 return 0;
364
365 return -ENOMEM;
366 }
367
368 void ext3_update_dx_flag(struct inode *inode)
369 {
370 if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
371 EXT3_FEATURE_COMPAT_DIR_INDEX))
372 EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
373 }
374
375 /*
376 * Add a new entry into a directory (leaf) block. If de is non-NULL,
377 * it points to a directory entry which is guaranteed to be large
378 * enough for new directory entry. If de is NULL, then
379 * add_dirent_to_buf will attempt search the directory block for
380 * space. It will return -ENOSPC if no space is available, and -EIO
381 * and -EEXIST if directory entry already exists.
382 *
383 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
384 * all other cases bh is released.
385 */
386 int add_dirent_to_buf(struct ext2_icb *icb, struct dentry *dentry,
387 struct inode *inode, struct ext3_dir_entry_2 *de,
388 struct buffer_head *bh)
389 {
390 struct inode *dir = dentry->d_parent->d_inode;
391 const char *name = dentry->d_name.name;
392 int namelen = dentry->d_name.len;
393 unsigned int offset = 0;
394 unsigned short reclen;
395 int nlen, rlen, err;
396 char *top;
397
398 reclen = EXT3_DIR_REC_LEN(namelen);
399 if (!de) {
400 de = (struct ext3_dir_entry_2 *)bh->b_data;
401 top = bh->b_data + dir->i_sb->s_blocksize - reclen;
402 while ((char *) de <= top) {
403 if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
404 bh, offset)) {
405 brelse(bh);
406 return -EIO;
407 }
408 if (ext3_match(namelen, name, de)) {
409 brelse(bh);
410 return -EEXIST;
411 }
412 nlen = EXT3_DIR_REC_LEN(de->name_len);
413 rlen = ext3_rec_len_from_disk(de->rec_len);
414 if ((de->inode? rlen - nlen: rlen) >= reclen)
415 break;
416 de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
417 offset += rlen;
418 }
419 if ((char *) de > top)
420 return -ENOSPC;
421 }
422
423 /* By now the buffer is marked for journaling */
424 nlen = EXT3_DIR_REC_LEN(de->name_len);
425 rlen = ext3_rec_len_from_disk(de->rec_len);
426 if (de->inode) {
427 struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
428 de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
429 de->rec_len = ext3_rec_len_to_disk(nlen);
430 de = de1;
431 }
432 de->file_type = EXT3_FT_UNKNOWN;
433 if (inode) {
434 de->inode = cpu_to_le32(inode->i_ino);
435 ext3_set_de_type(dir->i_sb, de, inode->i_mode);
436 } else
437 de->inode = 0;
438 de->name_len = (__u8)namelen;
439 memcpy(de->name, name, namelen);
440
441 /*
442 * XXX shouldn't update any times until successful
443 * completion of syscall, but too many callers depend
444 * on this.
445 *
446 * XXX similarly, too many callers depend on
447 * ext4_new_inode() setting the times, but error
448 * recovery deletes the inode, so the worst that can
449 * happen is that the times are slightly out of date
450 * and/or different from the directory change time.
451 */
452 dir->i_mtime = dir->i_ctime = ext3_current_time(dir);
453 ext3_update_dx_flag(dir);
454 dir->i_version++;
455 ext3_mark_inode_dirty(icb, dir);
456 set_buffer_dirty(bh);
457 brelse(bh);
458 return 0;
459 }
460
461 #ifdef EXT2_HTREE_INDEX
462
463 /*
464 * Returns the hash of a filename. If len is 0 and name is NULL, then
465 * this function can be used to test whether or not a hash version is
466 * supported.
467 *
468 * The seed is an 4 longword (32 bits) "secret" which can be used to
469 * uniquify a hash. If the seed is all zero's, then some default seed
470 * may be used.
471 *
472 * A particular hash version specifies whether or not the seed is
473 * represented, and whether or not the returned hash is 32 bits or 64
474 * bits. 32 bit hashes will return 0 for the minor hash.
475 */
476 int ext3_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
477 {
478 __u32 hash;
479 __u32 minor_hash = 0;
480 const char *p;
481 int i;
482 __u32 in[8], buf[4];
483
484 void (*str2hashbuf)(const char *, int, __u32 *, int) =
485 str2hashbuf_signed;
486
487 /* Initialize the default seed for the hash checksum functions */
488 buf[0] = 0x67452301;
489 buf[1] = 0xefcdab89;
490 buf[2] = 0x98badcfe;
491 buf[3] = 0x10325476;
492
493 /* Check to see if the seed is all zero's */
494 if (hinfo->seed) {
495 for (i = 0; i < 4; i++) {
496 if (hinfo->seed[i])
497 break;
498 }
499 if (i < 4)
500 memcpy(buf, hinfo->seed, sizeof(buf));
501 }
502
503 switch (hinfo->hash_version) {
504 case DX_HASH_LEGACY_UNSIGNED:
505 hash = dx_hack_hash_unsigned(name, len);
506 break;
507 case DX_HASH_LEGACY:
508 hash = dx_hack_hash_signed(name, len);
509 break;
510 case DX_HASH_HALF_MD4_UNSIGNED:
511 str2hashbuf = str2hashbuf_unsigned;
512 case DX_HASH_HALF_MD4:
513 p = name;
514 while (len > 0) {
515 (*str2hashbuf)(p, len, in, 8);
516 half_md4_transform(buf, in);
517 len -= 32;
518 p += 32;
519 }
520 minor_hash = buf[2];
521 hash = buf[1];
522 break;
523 case DX_HASH_TEA_UNSIGNED:
524 str2hashbuf = str2hashbuf_unsigned;
525 case DX_HASH_TEA:
526 p = name;
527 while (len > 0) {
528 (*str2hashbuf)(p, len, in, 4);
529 TEA_transform(buf, in);
530 len -= 16;
531 p += 16;
532 }
533 hash = buf[0];
534 minor_hash = buf[1];
535 break;
536 default:
537 hinfo->hash = 0;
538 return -1;
539 }
540 hash = hash & ~1;
541 if (hash == (EXT4_HTREE_EOF_32BIT << 1))
542 hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
543 hinfo->hash = hash;
544 hinfo->minor_hash = minor_hash;
545 return 0;
546 }
547 EXPORT_SYMBOL(ext3_dirhash);
548
549
550 /*
551 * These functions convert from the major/minor hash to an f_pos
552 * value.
553 *
554 * Currently we only use major hash numer. This is unfortunate, but
555 * on 32-bit machines, the same VFS interface is used for lseek and
556 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
557 * lseek/telldir/seekdir will blow out spectacularly, and from within
558 * the ext2 low-level routine, we don't know if we're being called by
559 * a 64-bit version of the system call or the 32-bit version of the
560 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
561 * cookie. Sigh.
562 */
563 #define hash2pos(major, minor) (major >> 1)
564 #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
565 #define pos2min_hash(pos) (0)
566
567 /*
568 * This structure holds the nodes of the red-black tree used to store
569 * the directory entry in hash order.
570 */
571 struct fname {
572 __u32 hash;
573 __u32 minor_hash;
574 struct rb_node rb_hash;
575 struct fname *next;
576 __u32 inode;
577 __u8 name_len;
578 __u8 file_type;
579 char name[0];
580 };
581
582 /*
583 * This functoin implements a non-recursive way of freeing all of the
584 * nodes in the red-black tree.
585 */
586 static void free_rb_tree_fname(struct rb_root *root)
587 {
588 struct rb_node *n = root->rb_node;
589 struct rb_node *parent;
590 struct fname *fname;
591
592 while (n) {
593 /* Do the node's children first */
594 if ((n)->rb_left) {
595 n = n->rb_left;
596 continue;
597 }
598 if (n->rb_right) {
599 n = n->rb_right;
600 continue;
601 }
602 /*
603 * The node has no children; free it, and then zero
604 * out parent's link to it. Finally go to the
605 * beginning of the loop and try to free the parent
606 * node.
607 */
608 parent = rb_parent(n);
609 fname = rb_entry(n, struct fname, rb_hash);
610 while (fname) {
611 struct fname * old = fname;
612 fname = fname->next;
613 kfree (old);
614 }
615 if (!parent)
616 root->rb_node = NULL;
617 else if (parent->rb_left == n)
618 parent->rb_left = NULL;
619 else if (parent->rb_right == n)
620 parent->rb_right = NULL;
621 n = parent;
622 }
623 root->rb_node = NULL;
624 }
625
626
627 static struct dir_private_info *create_dir_info(loff_t pos)
628 {
629 struct dir_private_info *p;
630
631 p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
632 if (!p)
633 return NULL;
634 p->root.rb_node = NULL;
635 p->curr_node = NULL;
636 p->extra_fname = NULL;
637 p->last_pos = 0;
638 p->curr_hash = (__u32)pos2maj_hash(pos);
639 p->curr_minor_hash = (__u32)pos2min_hash(pos);
640 p->next_hash = 0;
641 return p;
642 }
643
644 void ext3_htree_free_dir_info(struct dir_private_info *p)
645 {
646 free_rb_tree_fname(&p->root);
647 kfree(p);
648 }
649
650 /*
651 * Given a directory entry, enter it into the fname rb tree.
652 */
653 int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
654 __u32 minor_hash,
655 struct ext3_dir_entry_2 *dirent)
656 {
657 struct rb_node **p, *parent = NULL;
658 struct fname * fname, *new_fn;
659 struct dir_private_info *info;
660 int extra_data = 0;
661 int len;
662
663 info = (struct dir_private_info *) dir_file->private_data;
664 p = &info->root.rb_node;
665
666 /* Create and allocate the fname structure */
667 if (dirent->file_type & EXT3_DIRENT_LUFID)
668 extra_data = ext3_get_dirent_data_len(dirent);
669
670 len = sizeof(struct fname) + dirent->name_len + extra_data;
671 new_fn = kmalloc(len, GFP_KERNEL);
672 if (!new_fn)
673 return -ENOMEM;
674 memset(new_fn, 0, len);
675 new_fn->hash = hash;
676 new_fn->minor_hash = minor_hash;
677 new_fn->inode = le32_to_cpu(dirent->inode);
678 new_fn->name_len = dirent->name_len;
679 new_fn->file_type = dirent->file_type;
680 memcpy(&new_fn->name[0], &dirent->name[0],
681 dirent->name_len + extra_data);
682 new_fn->name[dirent->name_len] = 0;
683
684 while (*p) {
685 parent = *p;
686 fname = rb_entry(parent, struct fname, rb_hash);
687
688 /*
689 * If the hash and minor hash match up, then we put
690 * them on a linked list. This rarely happens...
691 */
692 if ((new_fn->hash == fname->hash) &&
693 (new_fn->minor_hash == fname->minor_hash)) {
694 new_fn->next = fname->next;
695 fname->next = new_fn;
696 return 0;
697 }
698
699 if (new_fn->hash < fname->hash)
700 p = &(*p)->rb_left;
701 else if (new_fn->hash > fname->hash)
702 p = &(*p)->rb_right;
703 else if (new_fn->minor_hash < fname->minor_hash)
704 p = &(*p)->rb_left;
705 else /* if (new_fn->minor_hash > fname->minor_hash) */
706 p = &(*p)->rb_right;
707 }
708
709 rb_link_node(&new_fn->rb_hash, parent, p);
710 rb_insert_color(&new_fn->rb_hash, &info->root);
711 return 0;
712 }
713
714 static unsigned char ext3_filetype_table[] = {
715 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
716 };
717
718 static unsigned char get_dtype(struct super_block *sb, int filetype)
719 {
720 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
721 (filetype >= EXT3_FT_MAX))
722 return DT_UNKNOWN;
723
724 return (ext3_filetype_table[filetype]);
725 }
726
727 /*
728 * This is a helper function for ext3_dx_readdir. It calls filldir
729 * for all entres on the fname linked list. (Normally there is only
730 * one entry on the linked list, unless there are 62 bit hash collisions.)
731 */
732 static int call_filldir(struct file * filp, void * cookie,
733 filldir_t filldir, struct fname *fname)
734 {
735 struct dir_private_info *info = filp->private_data;
736 loff_t curr_pos;
737 struct inode *inode = filp->f_dentry->d_inode;
738 struct super_block * sb;
739 int error;
740
741 sb = inode->i_sb;
742
743 if (!fname) {
744 printk("call_filldir: called with null fname?!?\n");
745 return 0;
746 }
747 curr_pos = hash2pos(fname->hash, fname->minor_hash);
748 while (fname) {
749 error = filldir(cookie, fname->name,
750 fname->name_len, (ULONG)curr_pos,
751 fname->inode, get_dtype(sb, fname->file_type));
752 if (error) {
753 filp->f_pos = curr_pos;
754 info->extra_fname = fname;
755 return error;
756 }
757 fname = fname->next;
758 }
759 return 0;
760 }
761
762 struct fake_dirent
763 {
764 __le32 inode;
765 __le16 rec_len;
766 __u8 name_len;
767 __u8 file_type;
768 };
769
770 struct dx_countlimit
771 {
772 __le16 limit;
773 __le16 count;
774 };
775
776 struct dx_entry
777 {
778 __le32 hash;
779 __le32 block;
780 };
781
782 /*
783 * dx_root_info is laid out so that if it should somehow get overlaid by a
784 * dirent the two low bits of the hash version will be zero. Therefore, the
785 * hash version mod 4 should never be 0. Sincerely, the paranoia department.
786 */
787
788 struct dx_root
789 {
790 struct fake_dirent dot;
791 char dot_name[4];
792 struct fake_dirent dotdot;
793 char dotdot_name[4];
794 struct dx_root_info
795 {
796 __le32 reserved_zero;
797 __u8 hash_version;
798 __u8 info_length; /* 8 */
799 __u8 indirect_levels;
800 __u8 unused_flags;
801 }
802 info;
803 struct dx_entry entries[0];
804 };
805
806 struct dx_node
807 {
808 struct fake_dirent fake;
809 struct dx_entry entries[0];
810 };
811
812
813 struct dx_frame
814 {
815 struct buffer_head *bh;
816 struct dx_entry *entries;
817 struct dx_entry *at;
818 };
819
820 struct dx_map_entry
821 {
822 __u32 hash;
823 __u16 offs;
824 __u16 size;
825 };
826
827 #if defined(__REACTOS__) && !defined(_MSC_VER)
828 struct ext3_dir_entry_2 *
829 do_split(struct ext2_icb *icb, struct inode *dir,
830 struct buffer_head **bh,struct dx_frame *frame,
831 struct dx_hash_info *hinfo, int *error);
832 #endif
833
834 /*
835 * Future: use high four bits of block for coalesce-on-delete flags
836 * Mask them off for now.
837 */
838
839 static inline unsigned dx_get_block (struct dx_entry *entry)
840 {
841 return le32_to_cpu(entry->block) & 0x00ffffff;
842 }
843
844 static inline void dx_set_block (struct dx_entry *entry, unsigned value)
845 {
846 entry->block = cpu_to_le32(value);
847 }
848
849 static inline unsigned dx_get_hash (struct dx_entry *entry)
850 {
851 return le32_to_cpu(entry->hash);
852 }
853
854 static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
855 {
856 entry->hash = cpu_to_le32(value);
857 }
858
859 static inline unsigned dx_get_count (struct dx_entry *entries)
860 {
861 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
862 }
863
864 static inline unsigned dx_get_limit (struct dx_entry *entries)
865 {
866 return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
867 }
868
869 static inline void dx_set_count (struct dx_entry *entries, unsigned value)
870 {
871 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
872 }
873
874 static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
875 {
876 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
877 }
878
879 static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
880 {
881 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
882 EXT3_DIR_REC_LEN(2) - infosize;
883 return 0? 20: entry_space / sizeof(struct dx_entry);
884 }
885
886 static inline unsigned dx_node_limit (struct inode *dir)
887 {
888 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
889 return 0? 22: entry_space / sizeof(struct dx_entry);
890 }
891
892 /*
893 * Debug
894 */
895 #if DX_DEBUG
896 static void dx_show_index (char * label, struct dx_entry *entries)
897 {
898 int i, n = dx_get_count (entries);
899 printk("%s index ", label);
900 for (i = 0; i < n; i++)
901 {
902 printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
903 }
904 printk("\n");
905 }
906
907 struct stats
908 {
909 unsigned names;
910 unsigned space;
911 unsigned bcount;
912 };
913
914 struct stats dx_show_leaf(struct ext2_icb *icb, struct dx_hash_info *hinfo,
915 struct ext3_dir_entry_2 *de, int size, int show_names)
916 {
917 struct stats rc;
918 unsigned names = 0, space = 0;
919 char *base = (char *) de;
920 struct dx_hash_info h = *hinfo;
921
922 printk("names: ");
923 while ((char *) de < base + size)
924 {
925 if (de->inode)
926 {
927 if (show_names)
928 {
929 int len = de->name_len;
930 char *name = de->name;
931 while (len--) printk("%c", *name++);
932 ext3_dirhash(de->name, de->name_len, &h);
933 printk(":%x.%u ", h.hash,
934 ((char *) de - base));
935 }
936 space += EXT3_DIR_REC_LEN(de->name_len);
937 names++;
938 }
939 de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
940 }
941 printk("(%i)\n", names);
942
943 rc.names = names;
944 rc.space = space;
945 rc.bcount = 1;
946
947 return rc;
948 }
949
950 struct stats dx_show_entries(struct ext2_icb *icb, struct dx_hash_info *hinfo,
951 struct inode *dir, struct dx_entry *entries, int levels)
952 {
953 unsigned blocksize = dir->i_sb->s_blocksize;
954 unsigned count = dx_get_count (entries), names = 0, space = 0, i;
955 unsigned bcount = 0;
956 struct buffer_head *bh;
957 struct stats rc;
958 int err;
959
960 printk("%i indexed blocks...\n", count);
961 for (i = 0; i < count; i++, entries++)
962 {
963 u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
964 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
965 struct stats stats;
966 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
967 if (!(bh = ext3_bread (icb, dir, block, &err))) continue;
968 stats = levels?
969 dx_show_entries(icb, hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
970 dx_show_leaf(icb, hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
971 names += stats.names;
972 space += stats.space;
973 bcount += stats.bcount;
974 brelse (bh);
975 }
976 if (bcount)
977 printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
978 names, space/bcount,(space/bcount)*100/blocksize);
979
980 rc.names = names;
981 rc.space = space;
982 rc.bcount = 1;
983
984 return rc;
985 }
986 #endif /* DX_DEBUG */
987
988
989 int ext3_save_inode ( struct ext2_icb *icb, struct inode *in)
990 {
991 return Ext2SaveInode(icb, in->i_sb->s_priv, in);
992 }
993
994 /*
995 * Probe for a directory leaf block to search.
996 *
997 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
998 * error in the directory index, and the caller should fall back to
999 * searching the directory normally. The callers of dx_probe **MUST**
1000 * check for this error code, and make sure it never gets reflected
1001 * back to userspace.
1002 */
1003 static struct dx_frame *
1004 dx_probe(struct ext2_icb *icb, struct dentry *dentry, struct inode *dir,
1005 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
1006 {
1007 unsigned count, indirect;
1008 struct dx_entry *at, *entries, *p, *q, *m;
1009 struct dx_root *root;
1010 struct buffer_head *bh;
1011 struct dx_frame *frame = frame_in;
1012 u32 hash;
1013
1014 frame->bh = NULL;
1015 if (dentry)
1016 dir = dentry->d_parent->d_inode;
1017 if (!(bh = ext3_bread (icb, dir, 0, err)))
1018 goto fail;
1019 root = (struct dx_root *) bh->b_data;
1020 if (root->info.hash_version != DX_HASH_TEA &&
1021 root->info.hash_version != DX_HASH_HALF_MD4 &&
1022 root->info.hash_version != DX_HASH_LEGACY) {
1023 ext3_warning(dir->i_sb, __FUNCTION__,
1024 "Unrecognised inode hash code %d",
1025 root->info.hash_version);
1026 brelse(bh);
1027 *err = ERR_BAD_DX_DIR;
1028 goto fail;
1029 }
1030 hinfo->hash_version = root->info.hash_version;
1031 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1032 if (dentry)
1033 ext3_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
1034 hash = hinfo->hash;
1035
1036 if (root->info.unused_flags & 1) {
1037 ext3_warning(dir->i_sb, __FUNCTION__,
1038 "Unimplemented inode hash flags: %#06x",
1039 root->info.unused_flags);
1040 brelse(bh);
1041 *err = ERR_BAD_DX_DIR;
1042 goto fail;
1043 }
1044
1045 if ((indirect = root->info.indirect_levels) > 1) {
1046 ext3_warning(dir->i_sb, __FUNCTION__,
1047 "Unimplemented inode hash depth: %#06x",
1048 root->info.indirect_levels);
1049 brelse(bh);
1050 *err = ERR_BAD_DX_DIR;
1051 goto fail;
1052 }
1053
1054 entries = (struct dx_entry *) (((char *)&root->info) +
1055 root->info.info_length);
1056
1057 if (dx_get_limit(entries) != dx_root_limit(dir,
1058 root->info.info_length)) {
1059 ext3_warning(dir->i_sb, __FUNCTION__,
1060 "dx entry: limit != root limit");
1061 brelse(bh);
1062 *err = ERR_BAD_DX_DIR;
1063 goto fail;
1064 }
1065
1066 dxtrace(printk("Look up %x", hash));
1067 while (1)
1068 {
1069 count = dx_get_count(entries);
1070 if (!count || count > dx_get_limit(entries)) {
1071 ext3_warning(dir->i_sb, __FUNCTION__,
1072 "dx entry: no count or count > limit");
1073 brelse(bh);
1074 *err = ERR_BAD_DX_DIR;
1075 goto fail2;
1076 }
1077
1078 p = entries + 1;
1079 q = entries + count - 1;
1080 while (p <= q)
1081 {
1082 m = p + (q - p)/2;
1083 if (dx_get_hash(m) > hash)
1084 q = m - 1;
1085 else
1086 p = m + 1;
1087 }
1088
1089 if (0) // linear search cross check
1090 {
1091 unsigned n = count - 1;
1092 at = entries;
1093 while (n--)
1094 {
1095 if (dx_get_hash(++at) > hash)
1096 {
1097 at--;
1098 break;
1099 }
1100 }
1101 ASSERT(at == p - 1);
1102 }
1103
1104 at = p - 1;
1105 frame->bh = bh;
1106 frame->entries = entries;
1107 frame->at = at;
1108 if (!indirect--) return frame;
1109 if (!(bh = ext3_bread(icb, dir, dx_get_block(at), err)))
1110 goto fail2;
1111 at = entries = ((struct dx_node *) bh->b_data)->entries;
1112 if (dx_get_limit(entries) != dx_node_limit (dir)) {
1113 ext3_warning(dir->i_sb, __FUNCTION__,
1114 "dx entry: limit != node limit");
1115 brelse(bh);
1116 *err = ERR_BAD_DX_DIR;
1117 goto fail2;
1118 }
1119 frame++;
1120 frame->bh = NULL;
1121 }
1122 fail2:
1123 while (frame >= frame_in) {
1124 brelse(frame->bh);
1125 frame--;
1126 }
1127 fail:
1128 if (*err == ERR_BAD_DX_DIR)
1129 ext3_warning(dir->i_sb, __FUNCTION__,
1130 "Corrupt dir inode %ld, running e2fsck is "
1131 "recommended.", dir->i_ino);
1132 return NULL;
1133 }
1134
1135 static void dx_release (struct dx_frame *frames)
1136 {
1137 if (frames[0].bh == NULL)
1138 return;
1139
1140 if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
1141 brelse(frames[1].bh);
1142 brelse(frames[0].bh);
1143 }
1144
1145 /*
1146 * This function increments the frame pointer to search the next leaf
1147 * block, and reads in the necessary intervening nodes if the search
1148 * should be necessary. Whether or not the search is necessary is
1149 * controlled by the hash parameter. If the hash value is even, then
1150 * the search is only continued if the next block starts with that
1151 * hash value. This is used if we are searching for a specific file.
1152 *
1153 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
1154 *
1155 * This function returns 1 if the caller should continue to search,
1156 * or 0 if it should not. If there is an error reading one of the
1157 * index blocks, it will a negative error code.
1158 *
1159 * If start_hash is non-null, it will be filled in with the starting
1160 * hash of the next page.
1161 */
1162 int ext3_htree_next_block(struct ext2_icb *icb, struct inode *dir,
1163 __u32 hash, struct dx_frame *frame,
1164 struct dx_frame *frames, __u32 *start_hash)
1165 {
1166 struct dx_frame *p;
1167 struct buffer_head *bh;
1168 int err, num_frames = 0;
1169 __u32 bhash;
1170
1171 p = frame;
1172 /*
1173 * Find the next leaf page by incrementing the frame pointer.
1174 * If we run out of entries in the interior node, loop around and
1175 * increment pointer in the parent node. When we break out of
1176 * this loop, num_frames indicates the number of interior
1177 * nodes need to be read.
1178 */
1179 while (1) {
1180 if (++(p->at) < p->entries + dx_get_count(p->entries))
1181 break;
1182 if (p == frames)
1183 return 0;
1184 num_frames++;
1185 p--;
1186 }
1187
1188 /*
1189 * If the hash is 1, then continue only if the next page has a
1190 * continuation hash of any value. This is used for readdir
1191 * handling. Otherwise, check to see if the hash matches the
1192 * desired contiuation hash. If it doesn't, return since
1193 * there's no point to read in the successive index pages.
1194 */
1195 bhash = dx_get_hash(p->at);
1196 if (start_hash)
1197 *start_hash = bhash;
1198 if ((hash & 1) == 0) {
1199 if ((bhash & ~1) != hash)
1200 return 0;
1201 }
1202 /*
1203 * If the hash is HASH_NB_ALWAYS, we always go to the next
1204 * block so no check is necessary
1205 */
1206 while (num_frames--) {
1207 if (!(bh = ext3_bread(icb, dir, dx_get_block(p->at), &err)))
1208 return err; /* Failure */
1209 p++;
1210 brelse (p->bh);
1211 p->bh = bh;
1212 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
1213 }
1214 return 1;
1215 }
1216
1217 /*
1218 * This function fills a red-black tree with information from a
1219 * directory block. It returns the number directory entries loaded
1220 * into the tree. If there is an error it is returned in err.
1221 */
1222 int htree_dirblock_to_tree(struct ext2_icb *icb, struct file *dir_file,
1223 struct inode *dir, int block,
1224 struct dx_hash_info *hinfo,
1225 __u32 start_hash, __u32 start_minor_hash)
1226 {
1227 struct buffer_head *bh;
1228 struct ext3_dir_entry_2 *de, *top;
1229 int err, count = 0;
1230
1231 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
1232 if (!(bh = ext3_bread (icb, dir, block, &err)))
1233 return err;
1234
1235 de = (struct ext3_dir_entry_2 *) bh->b_data;
1236 top = (struct ext3_dir_entry_2 *) ((char *) de +
1237 dir->i_sb->s_blocksize -
1238 EXT3_DIR_REC_LEN(0));
1239 for (; de < top; de = ext3_next_entry(de)) {
1240 if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
1241 ((unsigned long)block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
1242 + (unsigned long)((char *)de - bh->b_data))) {
1243 /* On error, skip the f_pos to the next block. */
1244 dir_file->f_pos = (dir_file->f_pos |
1245 (dir->i_sb->s_blocksize - 1)) + 1;
1246 brelse (bh);
1247 return count;
1248 }
1249 ext3_dirhash(de->name, de->name_len, hinfo);
1250 if ((hinfo->hash < start_hash) ||
1251 ((hinfo->hash == start_hash) &&
1252 (hinfo->minor_hash < start_minor_hash)))
1253 continue;
1254 if (de->inode == 0)
1255 continue;
1256 if ((err = ext3_htree_store_dirent(dir_file,
1257 hinfo->hash, hinfo->minor_hash, de)) != 0) {
1258 brelse(bh);
1259 return err;
1260 }
1261 count++;
1262 }
1263 brelse(bh);
1264 return count;
1265 }
1266
1267 /*
1268 * This function fills a red-black tree with information from a
1269 * directory. We start scanning the directory in hash order, starting
1270 * at start_hash and start_minor_hash.
1271 *
1272 * This function returns the number of entries inserted into the tree,
1273 * or a negative error code.
1274 */
1275 int ext3_htree_fill_tree(struct ext2_icb *icb, struct file *dir_file,
1276 __u32 start_hash, __u32 start_minor_hash,
1277 __u32 *next_hash)
1278 {
1279 struct dx_hash_info hinfo;
1280 struct ext3_dir_entry_2 *de;
1281 struct dx_frame frames[2], *frame;
1282 int block, err = 0;
1283 struct inode *dir;
1284 int count = 0;
1285 int ret;
1286 __u32 hashval;
1287
1288 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
1289 start_minor_hash));
1290 dir = dir_file->f_dentry->d_inode;
1291 if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
1292 hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
1293 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1294 count = htree_dirblock_to_tree(icb, dir_file, dir, 0, &hinfo,
1295 start_hash, start_minor_hash);
1296 *next_hash = ~0;
1297 return count;
1298 }
1299
1300 hinfo.hash = start_hash;
1301 hinfo.minor_hash = 0;
1302 frame = dx_probe(icb, NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
1303 if (!frame)
1304 return err;
1305
1306 /* Add '.' and '..' from the htree header */
1307 if (!start_hash && !start_minor_hash) {
1308 de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
1309 if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
1310 goto errout;
1311 count++;
1312 }
1313 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1314 de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
1315 de = ext3_next_entry(de);
1316 if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1317 goto errout;
1318 count++;
1319 }
1320
1321 while (1) {
1322 block = dx_get_block(frame->at);
1323 ret = htree_dirblock_to_tree(icb, dir_file, dir, block, &hinfo,
1324 start_hash, start_minor_hash);
1325 if (ret < 0) {
1326 err = ret;
1327 goto errout;
1328 }
1329 count += ret;
1330 hashval = ~0;
1331 ret = ext3_htree_next_block(icb, dir, HASH_NB_ALWAYS,
1332 frame, frames, &hashval);
1333 *next_hash = hashval;
1334 if (ret < 0) {
1335 err = ret;
1336 goto errout;
1337 }
1338 /*
1339 * Stop if: (a) there are no more entries, or
1340 * (b) we have inserted at least one entry and the
1341 * next hash value is not a continuation
1342 */
1343 if ((ret == 0) ||
1344 (count && ((hashval & 1) == 0)))
1345 break;
1346 }
1347 dx_release(frames);
1348 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
1349 count, *next_hash));
1350 return count;
1351 errout:
1352 dx_release(frames);
1353
1354 return (err);
1355 }
1356
1357
1358 /*
1359 * Directory block splitting, compacting
1360 */
1361
1362 /*
1363 * Create map of hash values, offsets, and sizes, stored at end of block.
1364 * Returns number of entries mapped.
1365 */
1366 static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
1367 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
1368 {
1369 int count = 0;
1370 char *base = (char *) de;
1371 struct dx_hash_info h = *hinfo;
1372
1373 while ((char *) de < base + size)
1374 {
1375 if (de->name_len && de->inode) {
1376 ext3_dirhash(de->name, de->name_len, &h);
1377 map_tail--;
1378 map_tail->hash = h.hash;
1379 map_tail->offs = (u16) ((char *) de - base);
1380 map_tail->size = le16_to_cpu(de->rec_len);
1381 count++;
1382 cond_resched();
1383 }
1384 /* XXX: do we need to check rec_len == 0 case? -Chris */
1385 de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
1386 }
1387 return count;
1388 }
1389
1390 /* Sort map by hash value */
1391 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1392 {
1393 struct dx_map_entry *p, *q, *top = map + count - 1;
1394 int more;
1395 /* Combsort until bubble sort doesn't suck */
1396 while (count > 2)
1397 {
1398 count = count*10/13;
1399 if (count - 9 < 2) /* 9, 10 -> 11 */
1400 count = 11;
1401 for (p = top, q = p - count; q >= map; p--, q--)
1402 if (p->hash < q->hash)
1403 swap(struct dx_map_entry, *p, *q);
1404 }
1405 /* Garden variety bubble sort */
1406 do {
1407 more = 0;
1408 q = top;
1409 while (q-- > map)
1410 {
1411 if (q[1].hash >= q[0].hash)
1412 continue;
1413 swap(struct dx_map_entry, *(q+1), *q);
1414 more = 1;
1415 }
1416 } while (more);
1417 }
1418
1419 static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
1420 {
1421 struct dx_entry *entries = frame->entries;
1422 struct dx_entry *old = frame->at, *new = old + 1;
1423 unsigned int count = dx_get_count(entries);
1424
1425 ASSERT(count < dx_get_limit(entries));
1426 ASSERT(old < entries + count);
1427 memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1428 dx_set_hash(new, hash);
1429 dx_set_block(new, block);
1430 dx_set_count(entries, count + 1);
1431 }
1432
1433 struct buffer_head *
1434 ext3_dx_find_entry(struct ext2_icb *icb, struct dentry *dentry,
1435 struct ext3_dir_entry_2 **res_dir, int *err)
1436 {
1437 struct super_block * sb;
1438 struct dx_hash_info hinfo = {0};
1439 u32 hash;
1440 struct dx_frame frames[2], *frame;
1441 struct ext3_dir_entry_2 *de, *top;
1442 struct buffer_head *bh;
1443 unsigned long block;
1444 int retval;
1445 int namelen = dentry->d_name.len;
1446 const __u8 *name = dentry->d_name.name;
1447 struct inode *dir = dentry->d_parent->d_inode;
1448
1449 sb = dir->i_sb;
1450 /* NFS may look up ".." - look at dx_root directory block */
1451 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')) {
1452 if (!(frame = dx_probe(icb, dentry, NULL, &hinfo, frames, err)))
1453 return NULL;
1454 } else {
1455 frame = frames;
1456 frame->bh = NULL; /* for dx_release() */
1457 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
1458 dx_set_block(frame->at, 0); /* dx_root block is 0 */
1459 }
1460 hash = hinfo.hash;
1461 do {
1462 block = dx_get_block(frame->at);
1463 if (!(bh = ext3_bread (icb, dir, block, err)))
1464 goto errout;
1465 de = (struct ext3_dir_entry_2 *) bh->b_data;
1466 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
1467 EXT3_DIR_REC_LEN(0));
1468 for (; de < top; de = ext3_next_entry(de))
1469 if (ext3_match (namelen, name, de)) {
1470 if (!ext3_check_dir_entry("ext3_find_entry",
1471 dir, de, bh,
1472 (block<<EXT3_BLOCK_SIZE_BITS(sb))
1473 + (unsigned long)((char *)de - bh->b_data))) {
1474 brelse (bh);
1475 goto errout;
1476 }
1477 *res_dir = de;
1478 dx_release (frames);
1479 return bh;
1480 }
1481 brelse (bh);
1482 /* Check to see if we should continue to search */
1483 retval = ext3_htree_next_block(icb, dir, hash, frame,
1484 frames, NULL);
1485 if (retval < 0) {
1486 ext3_warning(sb, __FUNCTION__,
1487 "error reading index page in directory #%lu",
1488 dir->i_ino);
1489 *err = retval;
1490 goto errout;
1491 }
1492 } while (retval == 1);
1493
1494 *err = -ENOENT;
1495 errout:
1496 dxtrace(printk("%s not found\n", name));
1497 dx_release (frames);
1498 return NULL;
1499 }
1500
1501 int ext3_dx_readdir(struct file *filp, filldir_t filldir,
1502 void * context)
1503 {
1504 struct dir_private_info *info = filp->private_data;
1505 struct inode *inode = filp->f_dentry->d_inode;
1506 struct fname *fname;
1507 PEXT2_FILLDIR_CONTEXT fc = context;
1508 int ret;
1509
1510 if (!info) {
1511 info = create_dir_info(filp->f_pos);
1512 if (!info)
1513 return -ENOMEM;
1514 filp->private_data = info;
1515 }
1516
1517 if (filp->f_pos == EXT3_HTREE_EOF)
1518 return 0; /* EOF */
1519
1520 /* Some one has messed with f_pos; reset the world */
1521 if (info->last_pos != filp->f_pos) {
1522 free_rb_tree_fname(&info->root);
1523 info->curr_node = NULL;
1524 info->extra_fname = NULL;
1525 info->curr_hash = (__u32)pos2maj_hash(filp->f_pos);
1526 info->curr_minor_hash = (__u32)pos2min_hash(filp->f_pos);
1527 }
1528
1529 /*
1530 * If there are any leftover names on the hash collision
1531 * chain, return them first.
1532 */
1533 if (info->extra_fname) {
1534 if (call_filldir(filp, context, filldir, info->extra_fname))
1535 goto finished;
1536 info->extra_fname = NULL;
1537 goto next_node;
1538 } else if (!info->curr_node)
1539 info->curr_node = rb_first(&info->root);
1540
1541 while (1) {
1542 /*
1543 * Fill the rbtree if we have no more entries,
1544 * or the inode has changed since we last read in the
1545 * cached entries.
1546 */
1547 if ((!info->curr_node) ||
1548 (filp->f_version != inode->i_version)) {
1549 info->curr_node = NULL;
1550 free_rb_tree_fname(&info->root);
1551 filp->f_version = inode->i_version;
1552 ret = ext3_htree_fill_tree(fc->efc_irp, filp, info->curr_hash,
1553 info->curr_minor_hash, &info->next_hash);
1554 if (ret < 0)
1555 return ret;
1556 if (ret == 0) {
1557 filp->f_pos = EXT3_HTREE_EOF;
1558 break;
1559 }
1560 info->curr_node = rb_first(&info->root);
1561 }
1562
1563 fname = rb_entry(info->curr_node, struct fname, rb_hash);
1564 info->curr_hash = fname->hash;
1565 info->curr_minor_hash = fname->minor_hash;
1566 if (call_filldir(filp, context, filldir, fname))
1567 break;
1568 next_node:
1569 info->curr_node = rb_next(info->curr_node);
1570 if (info->curr_node) {
1571 fname = rb_entry(info->curr_node, struct fname,
1572 rb_hash);
1573 info->curr_hash = fname->hash;
1574 info->curr_minor_hash = fname->minor_hash;
1575 } else {
1576 if (info->next_hash == ~0) {
1577 filp->f_pos = EXT3_HTREE_EOF;
1578 break;
1579 }
1580 info->curr_hash = info->next_hash;
1581 info->curr_minor_hash = 0;
1582 }
1583 }
1584 finished:
1585 info->last_pos = filp->f_pos;
1586 return 0;
1587 }
1588
1589 int ext3_release_dir (struct inode * inode, struct file * filp)
1590 {
1591 if (filp->private_data) {
1592 ext3_htree_free_dir_info(filp->private_data);
1593 filp->private_data = NULL;
1594 }
1595
1596 return 0;
1597 }
1598
1599 /*
1600 * Returns 0 for success, or a negative error value
1601 */
1602 int ext3_dx_add_entry(struct ext2_icb *icb, struct dentry *dentry,
1603 struct inode *inode)
1604 {
1605 struct dx_frame frames[2], *frame;
1606 struct dx_entry *entries, *at;
1607 struct dx_hash_info hinfo;
1608 struct buffer_head * bh;
1609 struct inode *dir = dentry->d_parent->d_inode;
1610 struct super_block * sb = dir->i_sb;
1611 struct ext3_dir_entry_2 *de;
1612 int err;
1613
1614 frame = dx_probe(icb, dentry, NULL, &hinfo, frames, &err);
1615 if (!frame)
1616 return err;
1617 entries = frame->entries;
1618 at = frame->at;
1619
1620 if (!(bh = ext3_bread(icb, dir, dx_get_block(frame->at), &err)))
1621 goto cleanup;
1622
1623 err = add_dirent_to_buf(icb, dentry, inode, NULL, bh);
1624 if (err != -ENOSPC) {
1625 bh = NULL;
1626 goto cleanup;
1627 }
1628
1629 /* Block full, should compress but for now just split */
1630 dxtrace(printk("using %u of %u node entries\n",
1631 dx_get_count(entries), dx_get_limit(entries)));
1632 /* Need to split index? */
1633 if (dx_get_count(entries) == dx_get_limit(entries)) {
1634 u32 newblock;
1635 unsigned icount = dx_get_count(entries);
1636 int levels = (int)(frame - frames);
1637 struct dx_entry *entries2;
1638 struct dx_node *node2;
1639 struct buffer_head *bh2;
1640
1641 if (levels && (dx_get_count(frames->entries) ==
1642 dx_get_limit(frames->entries))) {
1643 ext3_warning(sb, __FUNCTION__,
1644 "Directory index full!");
1645 err = -ENOSPC;
1646 goto cleanup;
1647 }
1648 bh2 = ext3_append (icb, dir, &newblock, &err);
1649 if (!(bh2))
1650 goto cleanup;
1651 node2 = (struct dx_node *)(bh2->b_data);
1652 entries2 = node2->entries;
1653 node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
1654 node2->fake.inode = 0;
1655
1656 if (levels) {
1657 unsigned icount1 = icount/2, icount2 = icount - icount1;
1658 unsigned hash2 = dx_get_hash(entries + icount1);
1659 dxtrace(printk("Split index %i/%i\n", icount1, icount2));
1660
1661 memcpy ((char *) entries2, (char *) (entries + icount1),
1662 icount2 * sizeof(struct dx_entry));
1663 dx_set_count (entries, icount1);
1664 dx_set_count (entries2, icount2);
1665 dx_set_limit (entries2, dx_node_limit(dir));
1666
1667 /* Which index block gets the new entry? */
1668 if ((unsigned int)(at - entries) >= icount1) {
1669 frame->at = at = at - entries - icount1 + entries2;
1670 frame->entries = entries = entries2;
1671 swap(struct buffer_head *, frame->bh, bh2);
1672 }
1673 dx_insert_block (frames + 0, hash2, newblock);
1674 dxtrace(dx_show_index ("node", frames[1].entries));
1675 dxtrace(dx_show_index ("node",
1676 ((struct dx_node *) bh2->b_data)->entries));
1677 set_buffer_dirty(bh2);
1678 brelse (bh2);
1679 } else {
1680 dxtrace(printk("Creating second level index...\n"));
1681 memcpy((char *) entries2, (char *) entries,
1682 icount * sizeof(struct dx_entry));
1683 dx_set_limit(entries2, dx_node_limit(dir));
1684
1685 /* Set up root */
1686 dx_set_count(entries, 1);
1687 dx_set_block(entries + 0, newblock);
1688 ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
1689
1690 /* Add new access path frame */
1691 frame = frames + 1;
1692 frame->at = at = at - entries + entries2;
1693 frame->entries = entries = entries2;
1694 frame->bh = bh2;
1695 }
1696 // ext3_journal_dirty_metadata(handle, frames[0].bh);
1697 set_buffer_dirty(frames[0].bh);
1698 }
1699 de = do_split(icb, dir, &bh, frame, &hinfo, &err);
1700 if (!de)
1701 goto cleanup;
1702 err = add_dirent_to_buf(icb, dentry, inode, de, bh);
1703 bh = NULL;
1704 goto cleanup;
1705
1706 cleanup:
1707 if (bh)
1708 brelse(bh);
1709 dx_release(frames);
1710 return err;
1711 }
1712
1713 /*
1714 * Move count entries from end of map between two memory locations.
1715 * Returns pointer to last entry moved.
1716 */
1717 struct ext3_dir_entry_2 *
1718 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1719 {
1720 unsigned rec_len = 0;
1721
1722 while (count--) {
1723 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
1724 rec_len = EXT3_DIR_REC_LEN(de->name_len);
1725 memcpy (to, de, rec_len);
1726 ((struct ext3_dir_entry_2 *) to)->rec_len =
1727 cpu_to_le16(rec_len);
1728 de->inode = 0;
1729 map++;
1730 to += rec_len;
1731 }
1732 return (struct ext3_dir_entry_2 *) (to - rec_len);
1733 }
1734
1735 /*
1736 * Compact each dir entry in the range to the minimal rec_len.
1737 * Returns pointer to last entry in range.
1738 */
1739 struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1740 {
1741 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
1742 unsigned rec_len = 0;
1743
1744 prev = to = de;
1745 while ((char*)de < base + size) {
1746 next = (struct ext3_dir_entry_2 *) ((char *) de +
1747 le16_to_cpu(de->rec_len));
1748 if (de->inode && de->name_len) {
1749 rec_len = EXT3_DIR_REC_LEN(de->name_len);
1750 if (de > to)
1751 memmove(to, de, rec_len);
1752 to->rec_len = cpu_to_le16(rec_len);
1753 prev = to;
1754 to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
1755 }
1756 de = next;
1757 }
1758 return prev;
1759 }
1760
1761 /*
1762 * Split a full leaf block to make room for a new dir entry.
1763 * Allocate a new block, and move entries so that they are approx. equally full.
1764 * Returns pointer to de in block into which the new entry will be inserted.
1765 */
1766 struct ext3_dir_entry_2 *
1767 do_split(struct ext2_icb *icb, struct inode *dir,
1768 struct buffer_head **bh,struct dx_frame *frame,
1769 struct dx_hash_info *hinfo, int *error)
1770 {
1771 unsigned blocksize = dir->i_sb->s_blocksize;
1772 unsigned count, continued;
1773 struct buffer_head *bh2;
1774 u32 newblock;
1775 u32 hash2;
1776 struct dx_map_entry *map;
1777 char *data1 = (*bh)->b_data, *data2;
1778 unsigned split, move, size;
1779 struct ext3_dir_entry_2 *de = NULL, *de2;
1780 int err, i;
1781
1782 bh2 = ext3_append (icb, dir, &newblock, error);
1783 if (!(bh2)) {
1784 brelse(*bh);
1785 *bh = NULL;
1786 goto errout;
1787 }
1788
1789 data2 = bh2->b_data;
1790
1791 /* create map in the end of data2 block */
1792 map = (struct dx_map_entry *) (data2 + blocksize);
1793 count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
1794 blocksize, hinfo, map);
1795 map -= count;
1796 dx_sort_map (map, count);
1797 /* Split the existing block in the middle, size-wise */
1798 size = 0;
1799 move = 0;
1800 for (i = count-1; i >= 0; i--) {
1801 /* is more than half of this entry in 2nd half of the block? */
1802 if (size + map[i].size/2 > blocksize/2)
1803 break;
1804 size += map[i].size;
1805 move++;
1806 }
1807 /* map index at which we will split */
1808 split = count - move;
1809 hash2 = map[split].hash;
1810 continued = hash2 == map[split - 1].hash;
1811 dxtrace(printk("Split block %i at %x, %i/%i\n",
1812 dx_get_block(frame->at), hash2, split, count-split));
1813
1814 /* Fancy dance to stay within two buffers */
1815 de2 = dx_move_dirents(data1, data2, map + split, count - split);
1816 de = dx_pack_dirents(data1,blocksize);
1817 de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
1818 de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
1819 dxtrace(dx_show_leaf (icb, hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
1820 dxtrace(dx_show_leaf (icb, hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
1821
1822 /* Which block gets the new entry? */
1823 if (hinfo->hash >= hash2)
1824 {
1825 swap(struct buffer_head *, *bh, bh2);
1826 de = de2;
1827 }
1828 dx_insert_block (frame, hash2 + continued, newblock);
1829 set_buffer_dirty(bh2);
1830 set_buffer_dirty(frame->bh);
1831
1832 brelse (bh2);
1833 dxtrace(dx_show_index ("frame", frame->entries));
1834 errout:
1835 return de;
1836 }
1837
1838 /*
1839 * This converts a one block unindexed directory to a 3 block indexed
1840 * directory, and adds the dentry to the indexed directory.
1841 */
1842 int make_indexed_dir(struct ext2_icb *icb, struct dentry *dentry,
1843 struct inode *inode, struct buffer_head *bh)
1844 {
1845 struct inode *dir = dentry->d_parent->d_inode;
1846 const char *name = dentry->d_name.name;
1847 int namelen = dentry->d_name.len;
1848 struct buffer_head *bh2;
1849 struct dx_root *root;
1850 struct dx_frame frames[2], *frame;
1851 struct dx_entry *entries;
1852 struct ext3_dir_entry_2 *de, *de2;
1853 char *data1, *top;
1854 unsigned len;
1855 int retval;
1856 unsigned blocksize;
1857 struct dx_hash_info hinfo;
1858 ext3_lblk_t block;
1859 struct fake_dirent *fde;
1860
1861 blocksize = dir->i_sb->s_blocksize;
1862 dxtrace(printk("Creating index: inode %lu\n", dir->i_ino));
1863
1864 root = (struct dx_root *) bh->b_data;
1865
1866 /* The 0th block becomes the root, move the dirents out */
1867 fde = &root->dotdot;
1868 de = (struct ext3_dir_entry_2 *)((char *)fde +
1869 ext3_rec_len_from_disk(fde->rec_len));
1870 if ((char *) de >= (((char *) root) + blocksize)) {
1871 DEBUG(DL_ERR, ( "%s: invalid rec_len for '..' in inode %lu", __FUNCTION__,
1872 dir->i_ino));
1873 brelse(bh);
1874 return -EIO;
1875 }
1876 len = (unsigned int)((char *) root + blocksize - (char *) de);
1877
1878 /* Allocate new block for the 0th block's dirents */
1879 bh2 = ext3_append(icb, dir, &block, &retval);
1880 if (!(bh2)) {
1881 brelse(bh);
1882 return retval;
1883 }
1884 EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
1885 data1 = bh2->b_data;
1886
1887 memcpy (data1, de, len);
1888 de = (struct ext3_dir_entry_2 *) data1;
1889 top = data1 + len;
1890 while ((char *)(de2 = ext3_next_entry(de)) < top)
1891 de = de2;
1892 de->rec_len = ext3_rec_len_to_disk(blocksize + (__u32)(data1 - (char *)de));
1893 /* Initialize the root; the dot dirents already exist */
1894 de = (struct ext3_dir_entry_2 *) (&root->dotdot);
1895 de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2));
1896 memset (&root->info, 0, sizeof(root->info));
1897 root->info.info_length = sizeof(root->info);
1898 root->info.hash_version = (__u8)(EXT3_SB(dir->i_sb)->s_def_hash_version);
1899 entries = root->entries;
1900 dx_set_block(entries, 1);
1901 dx_set_count(entries, 1);
1902 dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1903
1904 /* Initialize as for dx_probe */
1905 hinfo.hash_version = root->info.hash_version;
1906 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
1907 ext3_dirhash(name, namelen, &hinfo);
1908 frame = frames;
1909 frame->entries = entries;
1910 frame->at = entries;
1911 frame->bh = bh;
1912 bh = bh2;
1913 /* bh and bh2 are to be marked as dirty in do_split */
1914 de = do_split(icb, dir, &bh, frame, &hinfo, &retval);
1915 dx_release (frames);
1916 if (!(de))
1917 return retval;
1918
1919 return add_dirent_to_buf(icb, dentry, inode, de, bh);
1920 }
1921
1922 #else /* EXT2_HTREE_INDEX */
1923
1924 int ext3_release_dir (struct inode * inode, struct file * filp)
1925 {
1926 return 0;
1927 }
1928
1929 #endif /* !EXT2_HTREE_INDEX */
1930
1931 /*
1932 * ext3_add_entry()
1933 *
1934 * adds a file entry to the specified directory, using the same
1935 * semantics as ext3_find_entry(). It returns NULL if it failed.
1936 *
1937 * NOTE!! The inode part of 'de' is left at 0 - which means you
1938 * may not sleep between calling this and putting something into
1939 * the entry, as someone else might have used it while you slept.
1940 */
1941 int ext3_add_entry(struct ext2_icb *icb, struct dentry *dentry, struct inode *inode)
1942 {
1943 struct inode *dir = dentry->d_parent->d_inode;
1944 struct buffer_head *bh;
1945 struct ext3_dir_entry_2 *de;
1946 struct super_block *sb;
1947 int retval;
1948 #ifdef EXT2_HTREE_INDEX
1949 int dx_fallback=0;
1950 #endif
1951 unsigned blocksize;
1952 ext3_lblk_t block, blocks;
1953
1954 sb = dir->i_sb;
1955 blocksize = sb->s_blocksize;
1956 if (!dentry->d_name.len)
1957 return -EINVAL;
1958
1959 #ifdef EXT2_HTREE_INDEX
1960 if (is_dx(dir)) {
1961 retval = ext3_dx_add_entry(icb, dentry, inode);
1962 if (!retval || (retval != ERR_BAD_DX_DIR))
1963 return retval;
1964 EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
1965 dx_fallback++;
1966 ext3_save_inode(icb, dir);
1967 }
1968 #endif
1969
1970 blocks = (ext3_lblk_t)(dir->i_size >> sb->s_blocksize_bits);
1971 for (block = 0; block < blocks; block++) {
1972 bh = ext3_bread(icb, dir, block, &retval);
1973 if (!bh)
1974 return retval;
1975 retval = add_dirent_to_buf(icb, dentry, inode, NULL, bh);
1976 if (retval != -ENOSPC)
1977 return retval;
1978
1979 #ifdef EXT2_HTREE_INDEX
1980 if (blocks == 1 && !dx_fallback &&
1981 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
1982 return make_indexed_dir(icb, dentry, inode, bh);
1983 #endif
1984
1985 brelse(bh);
1986 }
1987 bh = ext3_append(icb, dir, &block, &retval);
1988 if (!bh)
1989 return retval;
1990 de = (struct ext3_dir_entry_2 *) bh->b_data;
1991 de->inode = 0;
1992 de->rec_len = ext3_rec_len_to_disk(blocksize);
1993 return add_dirent_to_buf(icb, dentry, inode, de, bh);
1994 }
1995
1996 /*
1997 * ext3_delete_entry deletes a directory entry by merging it with the
1998 * previous entry
1999 */
2000 int ext3_delete_entry(struct ext2_icb *icb, struct inode *dir,
2001 struct ext3_dir_entry_2 *de_del,
2002 struct buffer_head *bh)
2003 {
2004 struct ext3_dir_entry_2 *de, *pde = NULL;
2005 size_t i = 0;
2006
2007 de = (struct ext3_dir_entry_2 *) bh->b_data;
2008 while (i < bh->b_size) {
2009 if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
2010 return -EIO;
2011 if (de == de_del) {
2012 if (pde)
2013 pde->rec_len = ext3_rec_len_to_disk(
2014 ext3_rec_len_from_disk(pde->rec_len) +
2015 ext3_rec_len_from_disk(de->rec_len));
2016 else
2017 de->inode = 0;
2018 dir->i_version++;
2019 /* ext3_journal_dirty_metadata(handle, bh); */
2020 set_buffer_dirty(bh);
2021 return 0;
2022 }
2023 i += ext3_rec_len_from_disk(de->rec_len);
2024 pde = de;
2025 de = ext3_next_entry(de);
2026 }
2027 return -ENOENT;
2028 }
2029
2030 /*
2031 * routine to check that the specified directory is empty (for rmdir)
2032 */
2033 int ext3_is_dir_empty(struct ext2_icb *icb, struct inode *inode)
2034 {
2035 unsigned int offset;
2036 struct buffer_head *bh;
2037 struct ext3_dir_entry_2 *de, *de1;
2038 struct super_block *sb;
2039 int err = 0;
2040
2041 sb = inode->i_sb;
2042 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
2043 !(bh = ext3_bread(icb, inode, 0, &err))) {
2044 if (err)
2045 ext3_error(inode->i_sb, __FUNCTION__,
2046 "error %d reading directory #%lu offset 0",
2047 err, inode->i_ino);
2048 else
2049 ext3_warning(inode->i_sb, __FUNCTION__,
2050 "bad directory (dir #%lu) - no data block",
2051 inode->i_ino);
2052 return 1;
2053 }
2054 de = (struct ext3_dir_entry_2 *) bh->b_data;
2055 de1 = ext3_next_entry(de);
2056 if (le32_to_cpu(de->inode) != inode->i_ino ||
2057 !le32_to_cpu(de1->inode) ||
2058 strcmp(".", de->name) ||
2059 strcmp("..", de1->name)) {
2060 ext3_warning(inode->i_sb, "empty_dir",
2061 "bad directory (dir #%lu) - no `.' or `..'",
2062 inode->i_ino);
2063 brelse(bh);
2064 return 1;
2065 }
2066 offset = ext3_rec_len_from_disk(de->rec_len) +
2067 ext3_rec_len_from_disk(de1->rec_len);
2068 de = ext3_next_entry(de1);
2069 while (offset < inode->i_size) {
2070 if (!bh ||
2071 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2072 err = 0;
2073 brelse(bh);
2074 bh = ext3_bread(icb, inode, offset >> EXT3_BLOCK_SIZE_BITS(sb), &err);
2075 if (!bh) {
2076 if (err)
2077 ext3_error(sb, __FUNCTION__, "error %d reading directory"
2078 " #%lu offset %u", err, inode->i_ino, offset);
2079 offset += sb->s_blocksize;
2080 continue;
2081 }
2082 de = (struct ext3_dir_entry_2 *) bh->b_data;
2083 }
2084 if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
2085 de = (struct ext3_dir_entry_2 *)(bh->b_data +
2086 sb->s_blocksize);
2087 offset = (offset | (sb->s_blocksize - 1)) + 1;
2088 continue;
2089 }
2090 if (le32_to_cpu(de->inode)) {
2091 brelse(bh);
2092 return 0;
2093 }
2094 offset += ext3_rec_len_from_disk(de->rec_len);
2095 de = ext3_next_entry(de);
2096 }
2097 brelse(bh);
2098 return 1;
2099 }
2100
2101 /*
2102 * Returns 0 if not found, -1 on failure, and 1 on success
2103 */
2104 static inline int search_dirblock(struct buffer_head * bh,
2105 struct inode *dir,
2106 struct dentry *dentry,
2107 unsigned long offset,
2108 struct ext3_dir_entry_2 ** res_dir)
2109 {
2110 struct ext3_dir_entry_2 * de;
2111 char * dlimit;
2112 int de_len;
2113 const char *name = dentry->d_name.name;
2114 int namelen = dentry->d_name.len;
2115
2116 de = (struct ext3_dir_entry_2 *) bh->b_data;
2117 dlimit = bh->b_data + dir->i_sb->s_blocksize;
2118 while ((char *) de < dlimit) {
2119 /* this code is executed quadratically often */
2120 /* do minimal checking `by hand' */
2121
2122 if ((char *) de + namelen <= dlimit &&
2123 ext3_match (namelen, name, de)) {
2124 /* found a match - just to be sure, do a full check */
2125 if (!ext3_check_dir_entry("ext3_find_entry",
2126 dir, de, bh, offset))
2127 return -1;
2128 *res_dir = de;
2129 return 1;
2130 }
2131 /* prevent looping on a bad block */
2132 de_len = ext3_rec_len_from_disk(de->rec_len);
2133
2134 if (de_len <= 0)
2135 return -1;
2136 offset += de_len;
2137 de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
2138 }
2139 return 0;
2140 }
2141
2142 /*
2143 * define how far ahead to read directories while searching them.
2144 */
2145 #define NAMEI_RA_CHUNKS 2
2146 #define NAMEI_RA_BLOCKS 4
2147 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
2148 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
2149
2150 /*
2151 * ext4_find_entry()
2152 *
2153 * finds an entry in the specified directory with the wanted name. It
2154 * returns the cache buffer in which the entry was found, and the entry
2155 * itself (as a parameter - res_dir). It does NOT read the inode of the
2156 * entry - you'll have to do that yourself if you want to.
2157 *
2158 * The returned buffer_head has ->b_count elevated. The caller is expected
2159 * to brelse() it when appropriate.
2160 */
2161 struct buffer_head * ext3_find_entry (struct ext2_icb *icb,
2162 struct dentry *dentry,
2163 struct ext3_dir_entry_2 ** res_dir)
2164 {
2165 struct inode *dir = dentry->d_parent->d_inode;
2166 struct super_block *sb = dir->i_sb;
2167 struct buffer_head *bh_use[NAMEI_RA_SIZE];
2168 struct buffer_head *bh, *ret = NULL;
2169 ext3_lblk_t start, block, b;
2170 int ra_max = 0; /* Number of bh's in the readahead
2171 buffer, bh_use[] */
2172 int ra_ptr = 0; /* Current index into readahead
2173 buffer */
2174 int num = 0;
2175 ext3_lblk_t nblocks;
2176 int i, err;
2177 int namelen = dentry->d_name.len;
2178
2179 *res_dir = NULL;
2180 if (namelen > EXT3_NAME_LEN)
2181 return NULL;
2182
2183 #ifdef EXT2_HTREE_INDEX
2184 if (is_dx(dir)) {
2185 bh = ext3_dx_find_entry(icb, dentry, res_dir, &err);
2186 /*
2187 * On success, or if the error was file not found,
2188 * return. Otherwise, fall back to doing a search the
2189 * old fashioned way.
2190 */
2191 if (bh || (err != ERR_BAD_DX_DIR))
2192 return bh;
2193 dxtrace(printk("ext4_find_entry: dx failed, "
2194 "falling back\n"));
2195 }
2196 #endif
2197
2198 nblocks = (ext3_lblk_t)(dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb));
2199 start = 0;
2200 block = start;
2201 restart:
2202 do {
2203 /*
2204 * We deal with the read-ahead logic here.
2205 */
2206 if (ra_ptr >= ra_max) {
2207 /* Refill the readahead buffer */
2208 ra_ptr = 0;
2209 b = block;
2210 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
2211 /*
2212 * Terminate if we reach the end of the
2213 * directory and must wrap, or if our
2214 * search has finished at this block.
2215 */
2216 if (b >= nblocks || (num && block == start)) {
2217 bh_use[ra_max] = NULL;
2218 break;
2219 }
2220 num++;
2221 bh = ext3_bread(icb, dir, b++, &err);
2222 bh_use[ra_max] = bh;
2223 }
2224 }
2225 if ((bh = bh_use[ra_ptr++]) == NULL)
2226 goto next;
2227 wait_on_buffer(bh);
2228 if (!buffer_uptodate(bh)) {
2229 /* read error, skip block & hope for the best */
2230 ext3_error(sb, __FUNCTION__, "reading directory #%lu "
2231 "offset %lu", dir->i_ino,
2232 (unsigned long)block);
2233 brelse(bh);
2234 goto next;
2235 }
2236 i = search_dirblock(bh, dir, dentry,
2237 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
2238 if (i == 1) {
2239 ret = bh;
2240 goto cleanup_and_exit;
2241 } else {
2242 brelse(bh);
2243 if (i < 0)
2244 goto cleanup_and_exit;
2245 }
2246 next:
2247 if (++block >= nblocks)
2248 block = 0;
2249 } while (block != start);
2250
2251 /*
2252 * If the directory has grown while we were searching, then
2253 * search the last part of the directory before giving up.
2254 */
2255 block = nblocks;
2256 nblocks = (ext3_lblk_t)(dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb));
2257 if (block < nblocks) {
2258 start = 0;
2259 goto restart;
2260 }
2261
2262 cleanup_and_exit:
2263 /* Clean up the read-ahead blocks */
2264 for (; ra_ptr < ra_max; ra_ptr++)
2265 brelse(bh_use[ra_ptr]);
2266 return ret;
2267 }