2 * COPYRIGHT: See COPYRIGHT.TXT
3 * PROJECT: Ext2 File System Driver for WinNT/2K/XP
5 * PROGRAMMER: Matt Wu <mattwu@163.com>
6 * HOMEPAGE: http://www.ext2fsd.com
8 * Copied from linux/lib/halfmd4.c
12 /* INCLUDES *****************************************************************/
16 #ifdef EXT2_HTREE_INDEX
21 #define dxtrace(command) command
23 #define dxtrace(command)
27 #define swap(type, x, y) do { type z = x; x = y; y = z; } while (0)
30 /* F, G and H are basic MD4 functions: selection, majority, parity */
31 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
32 #define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
33 #define H(x, y, z) ((x) ^ (y) ^ (z))
36 * The generic round function. The application is so specific that
37 * we don't bother protecting all the arguments with parens, as is generally
38 * good macro practice, in favor of extra legibility.
39 * Rotation is separate from addition to prevent recomputation
41 #define ROUND(f, a, b, c, d, x, s) \
42 (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s)))
44 #define K2 013240474631
45 #define K3 015666365641
48 * Basic cut-down MD4 transform. Returns only 32 bits of result.
50 __u32
half_md4_transform(__u32 buf
[4], __u32
const in
[8])
52 __u32 a
= buf
[0], b
= buf
[1], c
= buf
[2], d
= buf
[3];
55 ROUND(F
, a
, b
, c
, d
, in
[0] + K1
, 3);
56 ROUND(F
, d
, a
, b
, c
, in
[1] + K1
, 7);
57 ROUND(F
, c
, d
, a
, b
, in
[2] + K1
, 11);
58 ROUND(F
, b
, c
, d
, a
, in
[3] + K1
, 19);
59 ROUND(F
, a
, b
, c
, d
, in
[4] + K1
, 3);
60 ROUND(F
, d
, a
, b
, c
, in
[5] + K1
, 7);
61 ROUND(F
, c
, d
, a
, b
, in
[6] + K1
, 11);
62 ROUND(F
, b
, c
, d
, a
, in
[7] + K1
, 19);
65 ROUND(G
, a
, b
, c
, d
, in
[1] + K2
, 3);
66 ROUND(G
, d
, a
, b
, c
, in
[3] + K2
, 5);
67 ROUND(G
, c
, d
, a
, b
, in
[5] + K2
, 9);
68 ROUND(G
, b
, c
, d
, a
, in
[7] + K2
, 13);
69 ROUND(G
, a
, b
, c
, d
, in
[0] + K2
, 3);
70 ROUND(G
, d
, a
, b
, c
, in
[2] + K2
, 5);
71 ROUND(G
, c
, d
, a
, b
, in
[4] + K2
, 9);
72 ROUND(G
, b
, c
, d
, a
, in
[6] + K2
, 13);
75 ROUND(H
, a
, b
, c
, d
, in
[3] + K3
, 3);
76 ROUND(H
, d
, a
, b
, c
, in
[7] + K3
, 9);
77 ROUND(H
, c
, d
, a
, b
, in
[2] + K3
, 11);
78 ROUND(H
, b
, c
, d
, a
, in
[6] + K3
, 15);
79 ROUND(H
, a
, b
, c
, d
, in
[1] + K3
, 3);
80 ROUND(H
, d
, a
, b
, c
, in
[5] + K3
, 9);
81 ROUND(H
, c
, d
, a
, b
, in
[0] + K3
, 11);
82 ROUND(H
, b
, c
, d
, a
, in
[4] + K3
, 15);
89 return buf
[1]; /* "most hashed" word */
92 #define DELTA 0x9E3779B9
94 static void TEA_transform(__u32 buf
[4], __u32
const in
[])
97 __u32 b0
= buf
[0], b1
= buf
[1];
98 __u32 a
= in
[0], b
= in
[1], c
= in
[2], d
= in
[3];
103 b0
+= ((b1
<< 4)+a
) ^ (b1
+sum
) ^ ((b1
>> 5)+b
);
104 b1
+= ((b0
<< 4)+c
) ^ (b0
+sum
) ^ ((b0
>> 5)+d
);
112 /* The old legacy hash */
113 static __u32
dx_hack_hash_unsigned(const char *name
, int len
)
115 __u32 hash
, hash0
= 0x12a3fe2d, hash1
= 0x37abe8f9;
116 const unsigned char *ucp
= (const unsigned char *) name
;
119 hash
= hash1
+ (hash0
^ (((int) *ucp
++) * 7152373));
121 if (hash
& 0x80000000)
129 static __u32
dx_hack_hash_signed(const char *name
, int len
)
131 __u32 hash
, hash0
= 0x12a3fe2d, hash1
= 0x37abe8f9;
132 const signed char *scp
= (const signed char *) name
;
135 hash
= hash1
+ (hash0
^ (((int) *scp
++) * 7152373));
137 if (hash
& 0x80000000)
145 static void str2hashbuf_signed(const char *msg
, int len
, __u32
*buf
, int num
)
149 const signed char *scp
= (const signed char *) msg
;
151 pad
= (__u32
)len
| ((__u32
)len
<< 8);
157 for (i
= 0; i
< len
; i
++) {
160 val
= ((int) scp
[i
]) + (val
<< 8);
173 static void str2hashbuf_unsigned(const char *msg
, int len
, __u32
*buf
, int num
)
177 const unsigned char *ucp
= (const unsigned char *) msg
;
179 pad
= (__u32
)len
| ((__u32
)len
<< 8);
185 for (i
= 0; i
< len
; i
++) {
188 val
= ((int) ucp
[i
]) + (val
<< 8);
202 #endif /* EXT2_HTREE_INDEX */
204 __u32
ext3_current_time(struct inode
*in
)
206 LARGE_INTEGER SysTime
;
207 KeQuerySystemTime(&SysTime
);
209 return Ext2LinuxTime(SysTime
);
212 void ext3_warning (struct super_block
* sb
, const char * function
,
219 printk("EXT3-fs warning (device %s): %s: ",
228 /* ext3_bread is safe for meta-data blocks. it's not safe to read file data,
229 since file data is managed by file cache, not volume cache */
230 struct buffer_head
*ext3_bread(struct ext2_icb
*icb
, struct inode
*inode
,
231 unsigned long block
, int *err
)
233 struct buffer_head
* bh
= NULL
;
234 NTSTATUS status
= STATUS_SUCCESS
;
235 ULONG lbn
= 0, num
= 0;
237 PEXT2_MCB Mcb
= CONTAINING_RECORD(inode
, EXT2_MCB
, Inode
);
239 /* for symlink file, read it's target instead */
240 if (NULL
!= Mcb
&& IsMcbSymLink(Mcb
))
247 /* mapping file offset to ext2 block */
248 if (INODE_HAS_EXTENT(&Mcb
->Inode
)) {
249 status
= Ext2MapExtent(icb
, inode
->i_sb
->s_priv
,
253 status
= Ext2MapIndirect(icb
, inode
->i_sb
->s_priv
,
258 if (!NT_SUCCESS(status
)) {
259 *err
= Ext2LinuxError(status
);
263 bh
= sb_getblk(inode
->i_sb
, lbn
);
268 if (buffer_uptodate(bh
))
271 *err
= bh_submit_read(bh
);
279 struct buffer_head
*ext3_append(struct ext2_icb
*icb
, struct inode
*inode
,
280 ext3_lblk_t
*block
, int *err
)
282 PEXT2_MCB mcb
= CONTAINING_RECORD(inode
, EXT2_MCB
, Inode
);
283 PEXT2_FCB dcb
= mcb
->Fcb
;
287 ASSERT(inode
== dcb
->Inode
);
289 /* allocate new block since there's no space for us */
290 *block
= (ext3_lblk_t
)(inode
->i_size
>> inode
->i_sb
->s_blocksize_bits
);
291 dcb
->Header
.AllocationSize
.QuadPart
+= dcb
->Vcb
->BlockSize
;
292 status
= Ext2ExpandFile(icb
, dcb
->Vcb
, mcb
, &(dcb
->Header
.AllocationSize
));
293 if (NT_SUCCESS(status
)) {
296 dcb
->Header
.ValidDataLength
= dcb
->Header
.FileSize
= dcb
->Header
.AllocationSize
;
297 mcb
->Inode
.i_size
= dcb
->Header
.AllocationSize
.QuadPart
;
299 /* save parent directory's inode */
300 Ext2SaveInode(icb
, dcb
->Vcb
, inode
);
303 return ext3_bread(icb
, inode
, *block
, err
);
307 void ext3_inc_count(struct inode
*inode
)
312 void ext3_dec_count(struct inode
*inode
)
317 unsigned char ext3_type_by_mode(umode_t mode
)
319 unsigned char type
= 0;
321 switch (mode
& S_IFMT
) {
323 type
= EXT3_FT_REG_FILE
;
329 type
= EXT3_FT_CHRDEV
;
332 type
= EXT3_FT_BLKDEV
;
341 type
= EXT3_FT_SYMLINK
;
347 void ext3_set_de_type(struct super_block
*sb
,
348 struct ext3_dir_entry_2
*de
,
351 if (EXT3_HAS_INCOMPAT_FEATURE(sb
, EXT3_FEATURE_INCOMPAT_FILETYPE
))
352 de
->file_type
= ext3_type_by_mode(mode
);
356 * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
357 * do not perform it in these functions. We perform it at the call site,
360 int ext3_mark_inode_dirty(struct ext2_icb
*icb
, struct inode
*in
)
362 if (Ext2SaveInode(icb
, in
->i_sb
->s_priv
, in
))
368 void ext3_update_dx_flag(struct inode
*inode
)
370 if (!EXT3_HAS_COMPAT_FEATURE(inode
->i_sb
,
371 EXT3_FEATURE_COMPAT_DIR_INDEX
))
372 EXT3_I(inode
)->i_flags
&= ~EXT3_INDEX_FL
;
376 * Add a new entry into a directory (leaf) block. If de is non-NULL,
377 * it points to a directory entry which is guaranteed to be large
378 * enough for new directory entry. If de is NULL, then
379 * add_dirent_to_buf will attempt search the directory block for
380 * space. It will return -ENOSPC if no space is available, and -EIO
381 * and -EEXIST if directory entry already exists.
383 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
384 * all other cases bh is released.
386 int add_dirent_to_buf(struct ext2_icb
*icb
, struct dentry
*dentry
,
387 struct inode
*inode
, struct ext3_dir_entry_2
*de
,
388 struct buffer_head
*bh
)
390 struct inode
*dir
= dentry
->d_parent
->d_inode
;
391 const char *name
= dentry
->d_name
.name
;
392 int namelen
= dentry
->d_name
.len
;
393 unsigned int offset
= 0;
394 unsigned short reclen
;
398 reclen
= EXT3_DIR_REC_LEN(namelen
);
400 de
= (struct ext3_dir_entry_2
*)bh
->b_data
;
401 top
= bh
->b_data
+ dir
->i_sb
->s_blocksize
- reclen
;
402 while ((char *) de
<= top
) {
403 if (!ext3_check_dir_entry("ext3_add_entry", dir
, de
,
408 if (ext3_match(namelen
, name
, de
)) {
412 nlen
= EXT3_DIR_REC_LEN(de
->name_len
);
413 rlen
= ext3_rec_len_from_disk(de
->rec_len
);
414 if ((de
->inode
? rlen
- nlen
: rlen
) >= reclen
)
416 de
= (struct ext3_dir_entry_2
*)((char *)de
+ rlen
);
419 if ((char *) de
> top
)
423 /* By now the buffer is marked for journaling */
424 nlen
= EXT3_DIR_REC_LEN(de
->name_len
);
425 rlen
= ext3_rec_len_from_disk(de
->rec_len
);
427 struct ext3_dir_entry_2
*de1
= (struct ext3_dir_entry_2
*)((char *)de
+ nlen
);
428 de1
->rec_len
= ext3_rec_len_to_disk(rlen
- nlen
);
429 de
->rec_len
= ext3_rec_len_to_disk(nlen
);
432 de
->file_type
= EXT3_FT_UNKNOWN
;
434 de
->inode
= cpu_to_le32(inode
->i_ino
);
435 ext3_set_de_type(dir
->i_sb
, de
, inode
->i_mode
);
438 de
->name_len
= (__u8
)namelen
;
439 memcpy(de
->name
, name
, namelen
);
442 * XXX shouldn't update any times until successful
443 * completion of syscall, but too many callers depend
446 * XXX similarly, too many callers depend on
447 * ext4_new_inode() setting the times, but error
448 * recovery deletes the inode, so the worst that can
449 * happen is that the times are slightly out of date
450 * and/or different from the directory change time.
452 dir
->i_mtime
= dir
->i_ctime
= ext3_current_time(dir
);
453 ext3_update_dx_flag(dir
);
455 ext3_mark_inode_dirty(icb
, dir
);
456 set_buffer_dirty(bh
);
461 #ifdef EXT2_HTREE_INDEX
464 * Returns the hash of a filename. If len is 0 and name is NULL, then
465 * this function can be used to test whether or not a hash version is
468 * The seed is an 4 longword (32 bits) "secret" which can be used to
469 * uniquify a hash. If the seed is all zero's, then some default seed
472 * A particular hash version specifies whether or not the seed is
473 * represented, and whether or not the returned hash is 32 bits or 64
474 * bits. 32 bit hashes will return 0 for the minor hash.
476 int ext3_dirhash(const char *name
, int len
, struct dx_hash_info
*hinfo
)
479 __u32 minor_hash
= 0;
484 void (*str2hashbuf
)(const char *, int, __u32
*, int) =
487 /* Initialize the default seed for the hash checksum functions */
493 /* Check to see if the seed is all zero's */
495 for (i
= 0; i
< 4; i
++) {
500 memcpy(buf
, hinfo
->seed
, sizeof(buf
));
503 switch (hinfo
->hash_version
) {
504 case DX_HASH_LEGACY_UNSIGNED
:
505 hash
= dx_hack_hash_unsigned(name
, len
);
508 hash
= dx_hack_hash_signed(name
, len
);
510 case DX_HASH_HALF_MD4_UNSIGNED
:
511 str2hashbuf
= str2hashbuf_unsigned
;
512 case DX_HASH_HALF_MD4
:
515 (*str2hashbuf
)(p
, len
, in
, 8);
516 half_md4_transform(buf
, in
);
523 case DX_HASH_TEA_UNSIGNED
:
524 str2hashbuf
= str2hashbuf_unsigned
;
528 (*str2hashbuf
)(p
, len
, in
, 4);
529 TEA_transform(buf
, in
);
541 if (hash
== (EXT4_HTREE_EOF_32BIT
<< 1))
542 hash
= (EXT4_HTREE_EOF_32BIT
- 1) << 1;
544 hinfo
->minor_hash
= minor_hash
;
547 EXPORT_SYMBOL(ext3_dirhash
);
551 * These functions convert from the major/minor hash to an f_pos
554 * Currently we only use major hash numer. This is unfortunate, but
555 * on 32-bit machines, the same VFS interface is used for lseek and
556 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
557 * lseek/telldir/seekdir will blow out spectacularly, and from within
558 * the ext2 low-level routine, we don't know if we're being called by
559 * a 64-bit version of the system call or the 32-bit version of the
560 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
563 #define hash2pos(major, minor) (major >> 1)
564 #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
565 #define pos2min_hash(pos) (0)
568 * This structure holds the nodes of the red-black tree used to store
569 * the directory entry in hash order.
574 struct rb_node rb_hash
;
583 * This functoin implements a non-recursive way of freeing all of the
584 * nodes in the red-black tree.
586 static void free_rb_tree_fname(struct rb_root
*root
)
588 struct rb_node
*n
= root
->rb_node
;
589 struct rb_node
*parent
;
593 /* Do the node's children first */
603 * The node has no children; free it, and then zero
604 * out parent's link to it. Finally go to the
605 * beginning of the loop and try to free the parent
608 parent
= rb_parent(n
);
609 fname
= rb_entry(n
, struct fname
, rb_hash
);
611 struct fname
* old
= fname
;
616 root
->rb_node
= NULL
;
617 else if (parent
->rb_left
== n
)
618 parent
->rb_left
= NULL
;
619 else if (parent
->rb_right
== n
)
620 parent
->rb_right
= NULL
;
623 root
->rb_node
= NULL
;
627 static struct dir_private_info
*create_dir_info(loff_t pos
)
629 struct dir_private_info
*p
;
631 p
= kmalloc(sizeof(struct dir_private_info
), GFP_KERNEL
);
634 p
->root
.rb_node
= NULL
;
636 p
->extra_fname
= NULL
;
638 p
->curr_hash
= (__u32
)pos2maj_hash(pos
);
639 p
->curr_minor_hash
= (__u32
)pos2min_hash(pos
);
644 void ext3_htree_free_dir_info(struct dir_private_info
*p
)
646 free_rb_tree_fname(&p
->root
);
651 * Given a directory entry, enter it into the fname rb tree.
653 int ext3_htree_store_dirent(struct file
*dir_file
, __u32 hash
,
655 struct ext3_dir_entry_2
*dirent
)
657 struct rb_node
**p
, *parent
= NULL
;
658 struct fname
* fname
, *new_fn
;
659 struct dir_private_info
*info
;
663 info
= (struct dir_private_info
*) dir_file
->private_data
;
664 p
= &info
->root
.rb_node
;
666 /* Create and allocate the fname structure */
667 if (dirent
->file_type
& EXT3_DIRENT_LUFID
)
668 extra_data
= ext3_get_dirent_data_len(dirent
);
670 len
= sizeof(struct fname
) + dirent
->name_len
+ extra_data
;
671 new_fn
= kmalloc(len
, GFP_KERNEL
);
674 memset(new_fn
, 0, len
);
676 new_fn
->minor_hash
= minor_hash
;
677 new_fn
->inode
= le32_to_cpu(dirent
->inode
);
678 new_fn
->name_len
= dirent
->name_len
;
679 new_fn
->file_type
= dirent
->file_type
;
680 memcpy(&new_fn
->name
[0], &dirent
->name
[0],
681 dirent
->name_len
+ extra_data
);
682 new_fn
->name
[dirent
->name_len
] = 0;
686 fname
= rb_entry(parent
, struct fname
, rb_hash
);
689 * If the hash and minor hash match up, then we put
690 * them on a linked list. This rarely happens...
692 if ((new_fn
->hash
== fname
->hash
) &&
693 (new_fn
->minor_hash
== fname
->minor_hash
)) {
694 new_fn
->next
= fname
->next
;
695 fname
->next
= new_fn
;
699 if (new_fn
->hash
< fname
->hash
)
701 else if (new_fn
->hash
> fname
->hash
)
703 else if (new_fn
->minor_hash
< fname
->minor_hash
)
705 else /* if (new_fn->minor_hash > fname->minor_hash) */
709 rb_link_node(&new_fn
->rb_hash
, parent
, p
);
710 rb_insert_color(&new_fn
->rb_hash
, &info
->root
);
714 static unsigned char ext3_filetype_table
[] = {
715 DT_UNKNOWN
, DT_REG
, DT_DIR
, DT_CHR
, DT_BLK
, DT_FIFO
, DT_SOCK
, DT_LNK
718 static unsigned char get_dtype(struct super_block
*sb
, int filetype
)
720 if (!EXT3_HAS_INCOMPAT_FEATURE(sb
, EXT3_FEATURE_INCOMPAT_FILETYPE
) ||
721 (filetype
>= EXT3_FT_MAX
))
724 return (ext3_filetype_table
[filetype
]);
728 * This is a helper function for ext3_dx_readdir. It calls filldir
729 * for all entres on the fname linked list. (Normally there is only
730 * one entry on the linked list, unless there are 62 bit hash collisions.)
732 static int call_filldir(struct file
* filp
, void * cookie
,
733 filldir_t filldir
, struct fname
*fname
)
735 struct dir_private_info
*info
= filp
->private_data
;
737 struct inode
*inode
= filp
->f_dentry
->d_inode
;
738 struct super_block
* sb
;
744 printk("call_filldir: called with null fname?!?\n");
747 curr_pos
= hash2pos(fname
->hash
, fname
->minor_hash
);
749 error
= filldir(cookie
, fname
->name
,
750 fname
->name_len
, (ULONG
)curr_pos
,
751 fname
->inode
, get_dtype(sb
, fname
->file_type
));
753 filp
->f_pos
= curr_pos
;
754 info
->extra_fname
= fname
;
783 * dx_root_info is laid out so that if it should somehow get overlaid by a
784 * dirent the two low bits of the hash version will be zero. Therefore, the
785 * hash version mod 4 should never be 0. Sincerely, the paranoia department.
790 struct fake_dirent dot
;
792 struct fake_dirent dotdot
;
796 __le32 reserved_zero
;
798 __u8 info_length
; /* 8 */
799 __u8 indirect_levels
;
803 struct dx_entry entries
[0];
808 struct fake_dirent fake
;
809 struct dx_entry entries
[0];
815 struct buffer_head
*bh
;
816 struct dx_entry
*entries
;
827 #if defined(__REACTOS__) && !defined(_MSC_VER)
828 struct ext3_dir_entry_2
*
829 do_split(struct ext2_icb
*icb
, struct inode
*dir
,
830 struct buffer_head
**bh
,struct dx_frame
*frame
,
831 struct dx_hash_info
*hinfo
, int *error
);
835 * Future: use high four bits of block for coalesce-on-delete flags
836 * Mask them off for now.
839 static inline unsigned dx_get_block (struct dx_entry
*entry
)
841 return le32_to_cpu(entry
->block
) & 0x00ffffff;
844 static inline void dx_set_block (struct dx_entry
*entry
, unsigned value
)
846 entry
->block
= cpu_to_le32(value
);
849 static inline unsigned dx_get_hash (struct dx_entry
*entry
)
851 return le32_to_cpu(entry
->hash
);
854 static inline void dx_set_hash (struct dx_entry
*entry
, unsigned value
)
856 entry
->hash
= cpu_to_le32(value
);
859 static inline unsigned dx_get_count (struct dx_entry
*entries
)
861 return le16_to_cpu(((struct dx_countlimit
*) entries
)->count
);
864 static inline unsigned dx_get_limit (struct dx_entry
*entries
)
866 return le16_to_cpu(((struct dx_countlimit
*) entries
)->limit
);
869 static inline void dx_set_count (struct dx_entry
*entries
, unsigned value
)
871 ((struct dx_countlimit
*) entries
)->count
= cpu_to_le16(value
);
874 static inline void dx_set_limit (struct dx_entry
*entries
, unsigned value
)
876 ((struct dx_countlimit
*) entries
)->limit
= cpu_to_le16(value
);
879 static inline unsigned dx_root_limit (struct inode
*dir
, unsigned infosize
)
881 unsigned entry_space
= dir
->i_sb
->s_blocksize
- EXT3_DIR_REC_LEN(1) -
882 EXT3_DIR_REC_LEN(2) - infosize
;
883 return 0? 20: entry_space
/ sizeof(struct dx_entry
);
886 static inline unsigned dx_node_limit (struct inode
*dir
)
888 unsigned entry_space
= dir
->i_sb
->s_blocksize
- EXT3_DIR_REC_LEN(0);
889 return 0? 22: entry_space
/ sizeof(struct dx_entry
);
896 static void dx_show_index (char * label
, struct dx_entry
*entries
)
898 int i
, n
= dx_get_count (entries
);
899 printk("%s index ", label
);
900 for (i
= 0; i
< n
; i
++)
902 printk("%x->%u ", i
? dx_get_hash(entries
+ i
): 0, dx_get_block(entries
+ i
));
914 struct stats
dx_show_leaf(struct ext2_icb
*icb
, struct dx_hash_info
*hinfo
,
915 struct ext3_dir_entry_2
*de
, int size
, int show_names
)
918 unsigned names
= 0, space
= 0;
919 char *base
= (char *) de
;
920 struct dx_hash_info h
= *hinfo
;
923 while ((char *) de
< base
+ size
)
929 int len
= de
->name_len
;
930 char *name
= de
->name
;
931 while (len
--) printk("%c", *name
++);
932 ext3_dirhash(de
->name
, de
->name_len
, &h
);
933 printk(":%x.%u ", h
.hash
,
934 ((char *) de
- base
));
936 space
+= EXT3_DIR_REC_LEN(de
->name_len
);
939 de
= (struct ext3_dir_entry_2
*) ((char *) de
+ le16_to_cpu(de
->rec_len
));
941 printk("(%i)\n", names
);
950 struct stats
dx_show_entries(struct ext2_icb
*icb
, struct dx_hash_info
*hinfo
,
951 struct inode
*dir
, struct dx_entry
*entries
, int levels
)
953 unsigned blocksize
= dir
->i_sb
->s_blocksize
;
954 unsigned count
= dx_get_count (entries
), names
= 0, space
= 0, i
;
956 struct buffer_head
*bh
;
960 printk("%i indexed blocks...\n", count
);
961 for (i
= 0; i
< count
; i
++, entries
++)
963 u32 block
= dx_get_block(entries
), hash
= i
? dx_get_hash(entries
): 0;
964 u32 range
= i
< count
- 1? (dx_get_hash(entries
+ 1) - hash
): ~hash
;
966 printk("%s%3u:%03u hash %8x/%8x ",levels
?"":" ", i
, block
, hash
, range
);
967 if (!(bh
= ext3_bread (icb
, dir
, block
, &err
))) continue;
969 dx_show_entries(icb
, hinfo
, dir
, ((struct dx_node
*) bh
->b_data
)->entries
, levels
- 1):
970 dx_show_leaf(icb
, hinfo
, (struct ext3_dir_entry_2
*) bh
->b_data
, blocksize
, 0);
971 names
+= stats
.names
;
972 space
+= stats
.space
;
973 bcount
+= stats
.bcount
;
977 printk("%snames %u, fullness %u (%u%%)\n", levels
?"":" ",
978 names
, space
/bcount
,(space
/bcount
)*100/blocksize
);
986 #endif /* DX_DEBUG */
989 int ext3_save_inode ( struct ext2_icb
*icb
, struct inode
*in
)
991 return Ext2SaveInode(icb
, in
->i_sb
->s_priv
, in
);
995 * Probe for a directory leaf block to search.
997 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
998 * error in the directory index, and the caller should fall back to
999 * searching the directory normally. The callers of dx_probe **MUST**
1000 * check for this error code, and make sure it never gets reflected
1001 * back to userspace.
1003 static struct dx_frame
*
1004 dx_probe(struct ext2_icb
*icb
, struct dentry
*dentry
, struct inode
*dir
,
1005 struct dx_hash_info
*hinfo
, struct dx_frame
*frame_in
, int *err
)
1007 unsigned count
, indirect
;
1008 struct dx_entry
*at
, *entries
, *p
, *q
, *m
;
1009 struct dx_root
*root
;
1010 struct buffer_head
*bh
;
1011 struct dx_frame
*frame
= frame_in
;
1016 dir
= dentry
->d_parent
->d_inode
;
1017 if (!(bh
= ext3_bread (icb
, dir
, 0, err
)))
1019 root
= (struct dx_root
*) bh
->b_data
;
1020 if (root
->info
.hash_version
!= DX_HASH_TEA
&&
1021 root
->info
.hash_version
!= DX_HASH_HALF_MD4
&&
1022 root
->info
.hash_version
!= DX_HASH_LEGACY
) {
1023 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1024 "Unrecognised inode hash code %d",
1025 root
->info
.hash_version
);
1027 *err
= ERR_BAD_DX_DIR
;
1030 hinfo
->hash_version
= root
->info
.hash_version
;
1031 hinfo
->seed
= EXT3_SB(dir
->i_sb
)->s_hash_seed
;
1033 ext3_dirhash(dentry
->d_name
.name
, dentry
->d_name
.len
, hinfo
);
1036 if (root
->info
.unused_flags
& 1) {
1037 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1038 "Unimplemented inode hash flags: %#06x",
1039 root
->info
.unused_flags
);
1041 *err
= ERR_BAD_DX_DIR
;
1045 if ((indirect
= root
->info
.indirect_levels
) > 1) {
1046 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1047 "Unimplemented inode hash depth: %#06x",
1048 root
->info
.indirect_levels
);
1050 *err
= ERR_BAD_DX_DIR
;
1054 entries
= (struct dx_entry
*) (((char *)&root
->info
) +
1055 root
->info
.info_length
);
1057 if (dx_get_limit(entries
) != dx_root_limit(dir
,
1058 root
->info
.info_length
)) {
1059 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1060 "dx entry: limit != root limit");
1062 *err
= ERR_BAD_DX_DIR
;
1066 dxtrace(printk("Look up %x", hash
));
1069 count
= dx_get_count(entries
);
1070 if (!count
|| count
> dx_get_limit(entries
)) {
1071 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1072 "dx entry: no count or count > limit");
1074 *err
= ERR_BAD_DX_DIR
;
1079 q
= entries
+ count
- 1;
1083 if (dx_get_hash(m
) > hash
)
1089 if (0) // linear search cross check
1091 unsigned n
= count
- 1;
1095 if (dx_get_hash(++at
) > hash
)
1101 ASSERT(at
== p
- 1);
1106 frame
->entries
= entries
;
1108 if (!indirect
--) return frame
;
1109 if (!(bh
= ext3_bread(icb
, dir
, dx_get_block(at
), err
)))
1111 at
= entries
= ((struct dx_node
*) bh
->b_data
)->entries
;
1112 if (dx_get_limit(entries
) != dx_node_limit (dir
)) {
1113 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1114 "dx entry: limit != node limit");
1116 *err
= ERR_BAD_DX_DIR
;
1123 while (frame
>= frame_in
) {
1128 if (*err
== ERR_BAD_DX_DIR
)
1129 ext3_warning(dir
->i_sb
, __FUNCTION__
,
1130 "Corrupt dir inode %ld, running e2fsck is "
1131 "recommended.", dir
->i_ino
);
1135 static void dx_release (struct dx_frame
*frames
)
1137 if (frames
[0].bh
== NULL
)
1140 if (((struct dx_root
*) frames
[0].bh
->b_data
)->info
.indirect_levels
)
1141 brelse(frames
[1].bh
);
1142 brelse(frames
[0].bh
);
1146 * This function increments the frame pointer to search the next leaf
1147 * block, and reads in the necessary intervening nodes if the search
1148 * should be necessary. Whether or not the search is necessary is
1149 * controlled by the hash parameter. If the hash value is even, then
1150 * the search is only continued if the next block starts with that
1151 * hash value. This is used if we are searching for a specific file.
1153 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
1155 * This function returns 1 if the caller should continue to search,
1156 * or 0 if it should not. If there is an error reading one of the
1157 * index blocks, it will a negative error code.
1159 * If start_hash is non-null, it will be filled in with the starting
1160 * hash of the next page.
1162 int ext3_htree_next_block(struct ext2_icb
*icb
, struct inode
*dir
,
1163 __u32 hash
, struct dx_frame
*frame
,
1164 struct dx_frame
*frames
, __u32
*start_hash
)
1167 struct buffer_head
*bh
;
1168 int err
, num_frames
= 0;
1173 * Find the next leaf page by incrementing the frame pointer.
1174 * If we run out of entries in the interior node, loop around and
1175 * increment pointer in the parent node. When we break out of
1176 * this loop, num_frames indicates the number of interior
1177 * nodes need to be read.
1180 if (++(p
->at
) < p
->entries
+ dx_get_count(p
->entries
))
1189 * If the hash is 1, then continue only if the next page has a
1190 * continuation hash of any value. This is used for readdir
1191 * handling. Otherwise, check to see if the hash matches the
1192 * desired contiuation hash. If it doesn't, return since
1193 * there's no point to read in the successive index pages.
1195 bhash
= dx_get_hash(p
->at
);
1197 *start_hash
= bhash
;
1198 if ((hash
& 1) == 0) {
1199 if ((bhash
& ~1) != hash
)
1203 * If the hash is HASH_NB_ALWAYS, we always go to the next
1204 * block so no check is necessary
1206 while (num_frames
--) {
1207 if (!(bh
= ext3_bread(icb
, dir
, dx_get_block(p
->at
), &err
)))
1208 return err
; /* Failure */
1212 p
->at
= p
->entries
= ((struct dx_node
*) bh
->b_data
)->entries
;
1218 * This function fills a red-black tree with information from a
1219 * directory block. It returns the number directory entries loaded
1220 * into the tree. If there is an error it is returned in err.
1222 int htree_dirblock_to_tree(struct ext2_icb
*icb
, struct file
*dir_file
,
1223 struct inode
*dir
, int block
,
1224 struct dx_hash_info
*hinfo
,
1225 __u32 start_hash
, __u32 start_minor_hash
)
1227 struct buffer_head
*bh
;
1228 struct ext3_dir_entry_2
*de
, *top
;
1231 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block
));
1232 if (!(bh
= ext3_bread (icb
, dir
, block
, &err
)))
1235 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
1236 top
= (struct ext3_dir_entry_2
*) ((char *) de
+
1237 dir
->i_sb
->s_blocksize
-
1238 EXT3_DIR_REC_LEN(0));
1239 for (; de
< top
; de
= ext3_next_entry(de
)) {
1240 if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir
, de
, bh
,
1241 ((unsigned long)block
<<EXT3_BLOCK_SIZE_BITS(dir
->i_sb
))
1242 + (unsigned long)((char *)de
- bh
->b_data
))) {
1243 /* On error, skip the f_pos to the next block. */
1244 dir_file
->f_pos
= (dir_file
->f_pos
|
1245 (dir
->i_sb
->s_blocksize
- 1)) + 1;
1249 ext3_dirhash(de
->name
, de
->name_len
, hinfo
);
1250 if ((hinfo
->hash
< start_hash
) ||
1251 ((hinfo
->hash
== start_hash
) &&
1252 (hinfo
->minor_hash
< start_minor_hash
)))
1256 if ((err
= ext3_htree_store_dirent(dir_file
,
1257 hinfo
->hash
, hinfo
->minor_hash
, de
)) != 0) {
1268 * This function fills a red-black tree with information from a
1269 * directory. We start scanning the directory in hash order, starting
1270 * at start_hash and start_minor_hash.
1272 * This function returns the number of entries inserted into the tree,
1273 * or a negative error code.
1275 int ext3_htree_fill_tree(struct ext2_icb
*icb
, struct file
*dir_file
,
1276 __u32 start_hash
, __u32 start_minor_hash
,
1279 struct dx_hash_info hinfo
;
1280 struct ext3_dir_entry_2
*de
;
1281 struct dx_frame frames
[2], *frame
;
1288 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash
,
1290 dir
= dir_file
->f_dentry
->d_inode
;
1291 if (!(EXT3_I(dir
)->i_flags
& EXT3_INDEX_FL
)) {
1292 hinfo
.hash_version
= EXT3_SB(dir
->i_sb
)->s_def_hash_version
;
1293 hinfo
.seed
= EXT3_SB(dir
->i_sb
)->s_hash_seed
;
1294 count
= htree_dirblock_to_tree(icb
, dir_file
, dir
, 0, &hinfo
,
1295 start_hash
, start_minor_hash
);
1300 hinfo
.hash
= start_hash
;
1301 hinfo
.minor_hash
= 0;
1302 frame
= dx_probe(icb
, NULL
, dir_file
->f_dentry
->d_inode
, &hinfo
, frames
, &err
);
1306 /* Add '.' and '..' from the htree header */
1307 if (!start_hash
&& !start_minor_hash
) {
1308 de
= (struct ext3_dir_entry_2
*) frames
[0].bh
->b_data
;
1309 if ((err
= ext3_htree_store_dirent(dir_file
, 0, 0, de
)) != 0)
1313 if (start_hash
< 2 || (start_hash
==2 && start_minor_hash
==0)) {
1314 de
= (struct ext3_dir_entry_2
*) frames
[0].bh
->b_data
;
1315 de
= ext3_next_entry(de
);
1316 if ((err
= ext3_htree_store_dirent(dir_file
, 2, 0, de
)) != 0)
1322 block
= dx_get_block(frame
->at
);
1323 ret
= htree_dirblock_to_tree(icb
, dir_file
, dir
, block
, &hinfo
,
1324 start_hash
, start_minor_hash
);
1331 ret
= ext3_htree_next_block(icb
, dir
, HASH_NB_ALWAYS
,
1332 frame
, frames
, &hashval
);
1333 *next_hash
= hashval
;
1339 * Stop if: (a) there are no more entries, or
1340 * (b) we have inserted at least one entry and the
1341 * next hash value is not a continuation
1344 (count
&& ((hashval
& 1) == 0)))
1348 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
1349 count
, *next_hash
));
1359 * Directory block splitting, compacting
1363 * Create map of hash values, offsets, and sizes, stored at end of block.
1364 * Returns number of entries mapped.
1366 static int dx_make_map (struct ext3_dir_entry_2
*de
, int size
,
1367 struct dx_hash_info
*hinfo
, struct dx_map_entry
*map_tail
)
1370 char *base
= (char *) de
;
1371 struct dx_hash_info h
= *hinfo
;
1373 while ((char *) de
< base
+ size
)
1375 if (de
->name_len
&& de
->inode
) {
1376 ext3_dirhash(de
->name
, de
->name_len
, &h
);
1378 map_tail
->hash
= h
.hash
;
1379 map_tail
->offs
= (u16
) ((char *) de
- base
);
1380 map_tail
->size
= le16_to_cpu(de
->rec_len
);
1384 /* XXX: do we need to check rec_len == 0 case? -Chris */
1385 de
= (struct ext3_dir_entry_2
*) ((char *) de
+ le16_to_cpu(de
->rec_len
));
1390 /* Sort map by hash value */
1391 static void dx_sort_map (struct dx_map_entry
*map
, unsigned count
)
1393 struct dx_map_entry
*p
, *q
, *top
= map
+ count
- 1;
1395 /* Combsort until bubble sort doesn't suck */
1398 count
= count
*10/13;
1399 if (count
- 9 < 2) /* 9, 10 -> 11 */
1401 for (p
= top
, q
= p
- count
; q
>= map
; p
--, q
--)
1402 if (p
->hash
< q
->hash
)
1403 swap(struct dx_map_entry
, *p
, *q
);
1405 /* Garden variety bubble sort */
1411 if (q
[1].hash
>= q
[0].hash
)
1413 swap(struct dx_map_entry
, *(q
+1), *q
);
1419 static void dx_insert_block(struct dx_frame
*frame
, u32 hash
, u32 block
)
1421 struct dx_entry
*entries
= frame
->entries
;
1422 struct dx_entry
*old
= frame
->at
, *new = old
+ 1;
1423 unsigned int count
= dx_get_count(entries
);
1425 ASSERT(count
< dx_get_limit(entries
));
1426 ASSERT(old
< entries
+ count
);
1427 memmove(new + 1, new, (char *)(entries
+ count
) - (char *)(new));
1428 dx_set_hash(new, hash
);
1429 dx_set_block(new, block
);
1430 dx_set_count(entries
, count
+ 1);
1433 struct buffer_head
*
1434 ext3_dx_find_entry(struct ext2_icb
*icb
, struct dentry
*dentry
,
1435 struct ext3_dir_entry_2
**res_dir
, int *err
)
1437 struct super_block
* sb
;
1438 struct dx_hash_info hinfo
= {0};
1440 struct dx_frame frames
[2], *frame
;
1441 struct ext3_dir_entry_2
*de
, *top
;
1442 struct buffer_head
*bh
;
1443 unsigned long block
;
1445 int namelen
= dentry
->d_name
.len
;
1446 const __u8
*name
= dentry
->d_name
.name
;
1447 struct inode
*dir
= dentry
->d_parent
->d_inode
;
1450 /* NFS may look up ".." - look at dx_root directory block */
1451 if (namelen
> 2 || name
[0] != '.'||(name
[1] != '.' && name
[1] != '\0')) {
1452 if (!(frame
= dx_probe(icb
, dentry
, NULL
, &hinfo
, frames
, err
)))
1456 frame
->bh
= NULL
; /* for dx_release() */
1457 frame
->at
= (struct dx_entry
*)frames
; /* hack for zero entry*/
1458 dx_set_block(frame
->at
, 0); /* dx_root block is 0 */
1462 block
= dx_get_block(frame
->at
);
1463 if (!(bh
= ext3_bread (icb
, dir
, block
, err
)))
1465 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
1466 top
= (struct ext3_dir_entry_2
*) ((char *) de
+ sb
->s_blocksize
-
1467 EXT3_DIR_REC_LEN(0));
1468 for (; de
< top
; de
= ext3_next_entry(de
))
1469 if (ext3_match (namelen
, name
, de
)) {
1470 if (!ext3_check_dir_entry("ext3_find_entry",
1472 (block
<<EXT3_BLOCK_SIZE_BITS(sb
))
1473 + (unsigned long)((char *)de
- bh
->b_data
))) {
1478 dx_release (frames
);
1482 /* Check to see if we should continue to search */
1483 retval
= ext3_htree_next_block(icb
, dir
, hash
, frame
,
1486 ext3_warning(sb
, __FUNCTION__
,
1487 "error reading index page in directory #%lu",
1492 } while (retval
== 1);
1496 dxtrace(printk("%s not found\n", name
));
1497 dx_release (frames
);
1501 int ext3_dx_readdir(struct file
*filp
, filldir_t filldir
,
1504 struct dir_private_info
*info
= filp
->private_data
;
1505 struct inode
*inode
= filp
->f_dentry
->d_inode
;
1506 struct fname
*fname
;
1507 PEXT2_FILLDIR_CONTEXT fc
= context
;
1511 info
= create_dir_info(filp
->f_pos
);
1514 filp
->private_data
= info
;
1517 if (filp
->f_pos
== EXT3_HTREE_EOF
)
1520 /* Some one has messed with f_pos; reset the world */
1521 if (info
->last_pos
!= filp
->f_pos
) {
1522 free_rb_tree_fname(&info
->root
);
1523 info
->curr_node
= NULL
;
1524 info
->extra_fname
= NULL
;
1525 info
->curr_hash
= (__u32
)pos2maj_hash(filp
->f_pos
);
1526 info
->curr_minor_hash
= (__u32
)pos2min_hash(filp
->f_pos
);
1530 * If there are any leftover names on the hash collision
1531 * chain, return them first.
1533 if (info
->extra_fname
) {
1534 if (call_filldir(filp
, context
, filldir
, info
->extra_fname
))
1536 info
->extra_fname
= NULL
;
1538 } else if (!info
->curr_node
)
1539 info
->curr_node
= rb_first(&info
->root
);
1543 * Fill the rbtree if we have no more entries,
1544 * or the inode has changed since we last read in the
1547 if ((!info
->curr_node
) ||
1548 (filp
->f_version
!= inode
->i_version
)) {
1549 info
->curr_node
= NULL
;
1550 free_rb_tree_fname(&info
->root
);
1551 filp
->f_version
= inode
->i_version
;
1552 ret
= ext3_htree_fill_tree(fc
->efc_irp
, filp
, info
->curr_hash
,
1553 info
->curr_minor_hash
, &info
->next_hash
);
1557 filp
->f_pos
= EXT3_HTREE_EOF
;
1560 info
->curr_node
= rb_first(&info
->root
);
1563 fname
= rb_entry(info
->curr_node
, struct fname
, rb_hash
);
1564 info
->curr_hash
= fname
->hash
;
1565 info
->curr_minor_hash
= fname
->minor_hash
;
1566 if (call_filldir(filp
, context
, filldir
, fname
))
1569 info
->curr_node
= rb_next(info
->curr_node
);
1570 if (info
->curr_node
) {
1571 fname
= rb_entry(info
->curr_node
, struct fname
,
1573 info
->curr_hash
= fname
->hash
;
1574 info
->curr_minor_hash
= fname
->minor_hash
;
1576 if (info
->next_hash
== ~0) {
1577 filp
->f_pos
= EXT3_HTREE_EOF
;
1580 info
->curr_hash
= info
->next_hash
;
1581 info
->curr_minor_hash
= 0;
1585 info
->last_pos
= filp
->f_pos
;
1589 int ext3_release_dir (struct inode
* inode
, struct file
* filp
)
1591 if (filp
->private_data
) {
1592 ext3_htree_free_dir_info(filp
->private_data
);
1593 filp
->private_data
= NULL
;
1600 * Returns 0 for success, or a negative error value
1602 int ext3_dx_add_entry(struct ext2_icb
*icb
, struct dentry
*dentry
,
1603 struct inode
*inode
)
1605 struct dx_frame frames
[2], *frame
;
1606 struct dx_entry
*entries
, *at
;
1607 struct dx_hash_info hinfo
;
1608 struct buffer_head
* bh
;
1609 struct inode
*dir
= dentry
->d_parent
->d_inode
;
1610 struct super_block
* sb
= dir
->i_sb
;
1611 struct ext3_dir_entry_2
*de
;
1614 frame
= dx_probe(icb
, dentry
, NULL
, &hinfo
, frames
, &err
);
1617 entries
= frame
->entries
;
1620 if (!(bh
= ext3_bread(icb
, dir
, dx_get_block(frame
->at
), &err
)))
1623 err
= add_dirent_to_buf(icb
, dentry
, inode
, NULL
, bh
);
1624 if (err
!= -ENOSPC
) {
1629 /* Block full, should compress but for now just split */
1630 dxtrace(printk("using %u of %u node entries\n",
1631 dx_get_count(entries
), dx_get_limit(entries
)));
1632 /* Need to split index? */
1633 if (dx_get_count(entries
) == dx_get_limit(entries
)) {
1635 unsigned icount
= dx_get_count(entries
);
1636 int levels
= (int)(frame
- frames
);
1637 struct dx_entry
*entries2
;
1638 struct dx_node
*node2
;
1639 struct buffer_head
*bh2
;
1641 if (levels
&& (dx_get_count(frames
->entries
) ==
1642 dx_get_limit(frames
->entries
))) {
1643 ext3_warning(sb
, __FUNCTION__
,
1644 "Directory index full!");
1648 bh2
= ext3_append (icb
, dir
, &newblock
, &err
);
1651 node2
= (struct dx_node
*)(bh2
->b_data
);
1652 entries2
= node2
->entries
;
1653 node2
->fake
.rec_len
= cpu_to_le16(sb
->s_blocksize
);
1654 node2
->fake
.inode
= 0;
1657 unsigned icount1
= icount
/2, icount2
= icount
- icount1
;
1658 unsigned hash2
= dx_get_hash(entries
+ icount1
);
1659 dxtrace(printk("Split index %i/%i\n", icount1
, icount2
));
1661 memcpy ((char *) entries2
, (char *) (entries
+ icount1
),
1662 icount2
* sizeof(struct dx_entry
));
1663 dx_set_count (entries
, icount1
);
1664 dx_set_count (entries2
, icount2
);
1665 dx_set_limit (entries2
, dx_node_limit(dir
));
1667 /* Which index block gets the new entry? */
1668 if ((unsigned int)(at
- entries
) >= icount1
) {
1669 frame
->at
= at
= at
- entries
- icount1
+ entries2
;
1670 frame
->entries
= entries
= entries2
;
1671 swap(struct buffer_head
*, frame
->bh
, bh2
);
1673 dx_insert_block (frames
+ 0, hash2
, newblock
);
1674 dxtrace(dx_show_index ("node", frames
[1].entries
));
1675 dxtrace(dx_show_index ("node",
1676 ((struct dx_node
*) bh2
->b_data
)->entries
));
1677 set_buffer_dirty(bh2
);
1680 dxtrace(printk("Creating second level index...\n"));
1681 memcpy((char *) entries2
, (char *) entries
,
1682 icount
* sizeof(struct dx_entry
));
1683 dx_set_limit(entries2
, dx_node_limit(dir
));
1686 dx_set_count(entries
, 1);
1687 dx_set_block(entries
+ 0, newblock
);
1688 ((struct dx_root
*) frames
[0].bh
->b_data
)->info
.indirect_levels
= 1;
1690 /* Add new access path frame */
1692 frame
->at
= at
= at
- entries
+ entries2
;
1693 frame
->entries
= entries
= entries2
;
1696 // ext3_journal_dirty_metadata(handle, frames[0].bh);
1697 set_buffer_dirty(frames
[0].bh
);
1699 de
= do_split(icb
, dir
, &bh
, frame
, &hinfo
, &err
);
1702 err
= add_dirent_to_buf(icb
, dentry
, inode
, de
, bh
);
1714 * Move count entries from end of map between two memory locations.
1715 * Returns pointer to last entry moved.
1717 struct ext3_dir_entry_2
*
1718 dx_move_dirents(char *from
, char *to
, struct dx_map_entry
*map
, int count
)
1720 unsigned rec_len
= 0;
1723 struct ext3_dir_entry_2
*de
= (struct ext3_dir_entry_2
*) (from
+ map
->offs
);
1724 rec_len
= EXT3_DIR_REC_LEN(de
->name_len
);
1725 memcpy (to
, de
, rec_len
);
1726 ((struct ext3_dir_entry_2
*) to
)->rec_len
=
1727 cpu_to_le16(rec_len
);
1732 return (struct ext3_dir_entry_2
*) (to
- rec_len
);
1736 * Compact each dir entry in the range to the minimal rec_len.
1737 * Returns pointer to last entry in range.
1739 struct ext3_dir_entry_2
* dx_pack_dirents(char *base
, int size
)
1741 struct ext3_dir_entry_2
*next
, *to
, *prev
, *de
= (struct ext3_dir_entry_2
*) base
;
1742 unsigned rec_len
= 0;
1745 while ((char*)de
< base
+ size
) {
1746 next
= (struct ext3_dir_entry_2
*) ((char *) de
+
1747 le16_to_cpu(de
->rec_len
));
1748 if (de
->inode
&& de
->name_len
) {
1749 rec_len
= EXT3_DIR_REC_LEN(de
->name_len
);
1751 memmove(to
, de
, rec_len
);
1752 to
->rec_len
= cpu_to_le16(rec_len
);
1754 to
= (struct ext3_dir_entry_2
*) (((char *) to
) + rec_len
);
1762 * Split a full leaf block to make room for a new dir entry.
1763 * Allocate a new block, and move entries so that they are approx. equally full.
1764 * Returns pointer to de in block into which the new entry will be inserted.
1766 struct ext3_dir_entry_2
*
1767 do_split(struct ext2_icb
*icb
, struct inode
*dir
,
1768 struct buffer_head
**bh
,struct dx_frame
*frame
,
1769 struct dx_hash_info
*hinfo
, int *error
)
1771 unsigned blocksize
= dir
->i_sb
->s_blocksize
;
1772 unsigned count
, continued
;
1773 struct buffer_head
*bh2
;
1776 struct dx_map_entry
*map
;
1777 char *data1
= (*bh
)->b_data
, *data2
;
1778 unsigned split
, move
, size
;
1779 struct ext3_dir_entry_2
*de
= NULL
, *de2
;
1782 bh2
= ext3_append (icb
, dir
, &newblock
, error
);
1789 data2
= bh2
->b_data
;
1791 /* create map in the end of data2 block */
1792 map
= (struct dx_map_entry
*) (data2
+ blocksize
);
1793 count
= dx_make_map ((struct ext3_dir_entry_2
*) data1
,
1794 blocksize
, hinfo
, map
);
1796 dx_sort_map (map
, count
);
1797 /* Split the existing block in the middle, size-wise */
1800 for (i
= count
-1; i
>= 0; i
--) {
1801 /* is more than half of this entry in 2nd half of the block? */
1802 if (size
+ map
[i
].size
/2 > blocksize
/2)
1804 size
+= map
[i
].size
;
1807 /* map index at which we will split */
1808 split
= count
- move
;
1809 hash2
= map
[split
].hash
;
1810 continued
= hash2
== map
[split
- 1].hash
;
1811 dxtrace(printk("Split block %i at %x, %i/%i\n",
1812 dx_get_block(frame
->at
), hash2
, split
, count
-split
));
1814 /* Fancy dance to stay within two buffers */
1815 de2
= dx_move_dirents(data1
, data2
, map
+ split
, count
- split
);
1816 de
= dx_pack_dirents(data1
,blocksize
);
1817 de
->rec_len
= cpu_to_le16(data1
+ blocksize
- (char *) de
);
1818 de2
->rec_len
= cpu_to_le16(data2
+ blocksize
- (char *) de2
);
1819 dxtrace(dx_show_leaf (icb
, hinfo
, (struct ext3_dir_entry_2
*) data1
, blocksize
, 1));
1820 dxtrace(dx_show_leaf (icb
, hinfo
, (struct ext3_dir_entry_2
*) data2
, blocksize
, 1));
1822 /* Which block gets the new entry? */
1823 if (hinfo
->hash
>= hash2
)
1825 swap(struct buffer_head
*, *bh
, bh2
);
1828 dx_insert_block (frame
, hash2
+ continued
, newblock
);
1829 set_buffer_dirty(bh2
);
1830 set_buffer_dirty(frame
->bh
);
1833 dxtrace(dx_show_index ("frame", frame
->entries
));
1839 * This converts a one block unindexed directory to a 3 block indexed
1840 * directory, and adds the dentry to the indexed directory.
1842 int make_indexed_dir(struct ext2_icb
*icb
, struct dentry
*dentry
,
1843 struct inode
*inode
, struct buffer_head
*bh
)
1845 struct inode
*dir
= dentry
->d_parent
->d_inode
;
1846 const char *name
= dentry
->d_name
.name
;
1847 int namelen
= dentry
->d_name
.len
;
1848 struct buffer_head
*bh2
;
1849 struct dx_root
*root
;
1850 struct dx_frame frames
[2], *frame
;
1851 struct dx_entry
*entries
;
1852 struct ext3_dir_entry_2
*de
, *de2
;
1857 struct dx_hash_info hinfo
;
1859 struct fake_dirent
*fde
;
1861 blocksize
= dir
->i_sb
->s_blocksize
;
1862 dxtrace(printk("Creating index: inode %lu\n", dir
->i_ino
));
1864 root
= (struct dx_root
*) bh
->b_data
;
1866 /* The 0th block becomes the root, move the dirents out */
1867 fde
= &root
->dotdot
;
1868 de
= (struct ext3_dir_entry_2
*)((char *)fde
+
1869 ext3_rec_len_from_disk(fde
->rec_len
));
1870 if ((char *) de
>= (((char *) root
) + blocksize
)) {
1871 DEBUG(DL_ERR
, ( "%s: invalid rec_len for '..' in inode %lu", __FUNCTION__
,
1876 len
= (unsigned int)((char *) root
+ blocksize
- (char *) de
);
1878 /* Allocate new block for the 0th block's dirents */
1879 bh2
= ext3_append(icb
, dir
, &block
, &retval
);
1884 EXT3_I(dir
)->i_flags
|= EXT3_INDEX_FL
;
1885 data1
= bh2
->b_data
;
1887 memcpy (data1
, de
, len
);
1888 de
= (struct ext3_dir_entry_2
*) data1
;
1890 while ((char *)(de2
= ext3_next_entry(de
)) < top
)
1892 de
->rec_len
= ext3_rec_len_to_disk(blocksize
+ (__u32
)(data1
- (char *)de
));
1893 /* Initialize the root; the dot dirents already exist */
1894 de
= (struct ext3_dir_entry_2
*) (&root
->dotdot
);
1895 de
->rec_len
= ext3_rec_len_to_disk(blocksize
- EXT3_DIR_REC_LEN(2));
1896 memset (&root
->info
, 0, sizeof(root
->info
));
1897 root
->info
.info_length
= sizeof(root
->info
);
1898 root
->info
.hash_version
= (__u8
)(EXT3_SB(dir
->i_sb
)->s_def_hash_version
);
1899 entries
= root
->entries
;
1900 dx_set_block(entries
, 1);
1901 dx_set_count(entries
, 1);
1902 dx_set_limit(entries
, dx_root_limit(dir
, sizeof(root
->info
)));
1904 /* Initialize as for dx_probe */
1905 hinfo
.hash_version
= root
->info
.hash_version
;
1906 hinfo
.seed
= EXT3_SB(dir
->i_sb
)->s_hash_seed
;
1907 ext3_dirhash(name
, namelen
, &hinfo
);
1909 frame
->entries
= entries
;
1910 frame
->at
= entries
;
1913 /* bh and bh2 are to be marked as dirty in do_split */
1914 de
= do_split(icb
, dir
, &bh
, frame
, &hinfo
, &retval
);
1915 dx_release (frames
);
1919 return add_dirent_to_buf(icb
, dentry
, inode
, de
, bh
);
1922 #else /* EXT2_HTREE_INDEX */
1924 int ext3_release_dir (struct inode
* inode
, struct file
* filp
)
1929 #endif /* !EXT2_HTREE_INDEX */
1934 * adds a file entry to the specified directory, using the same
1935 * semantics as ext3_find_entry(). It returns NULL if it failed.
1937 * NOTE!! The inode part of 'de' is left at 0 - which means you
1938 * may not sleep between calling this and putting something into
1939 * the entry, as someone else might have used it while you slept.
1941 int ext3_add_entry(struct ext2_icb
*icb
, struct dentry
*dentry
, struct inode
*inode
)
1943 struct inode
*dir
= dentry
->d_parent
->d_inode
;
1944 struct buffer_head
*bh
;
1945 struct ext3_dir_entry_2
*de
;
1946 struct super_block
*sb
;
1948 #ifdef EXT2_HTREE_INDEX
1952 ext3_lblk_t block
, blocks
;
1955 blocksize
= sb
->s_blocksize
;
1956 if (!dentry
->d_name
.len
)
1959 #ifdef EXT2_HTREE_INDEX
1961 retval
= ext3_dx_add_entry(icb
, dentry
, inode
);
1962 if (!retval
|| (retval
!= ERR_BAD_DX_DIR
))
1964 EXT3_I(dir
)->i_flags
&= ~EXT3_INDEX_FL
;
1966 ext3_save_inode(icb
, dir
);
1970 blocks
= (ext3_lblk_t
)(dir
->i_size
>> sb
->s_blocksize_bits
);
1971 for (block
= 0; block
< blocks
; block
++) {
1972 bh
= ext3_bread(icb
, dir
, block
, &retval
);
1975 retval
= add_dirent_to_buf(icb
, dentry
, inode
, NULL
, bh
);
1976 if (retval
!= -ENOSPC
)
1979 #ifdef EXT2_HTREE_INDEX
1980 if (blocks
== 1 && !dx_fallback
&&
1981 EXT3_HAS_COMPAT_FEATURE(sb
, EXT3_FEATURE_COMPAT_DIR_INDEX
))
1982 return make_indexed_dir(icb
, dentry
, inode
, bh
);
1987 bh
= ext3_append(icb
, dir
, &block
, &retval
);
1990 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
1992 de
->rec_len
= ext3_rec_len_to_disk(blocksize
);
1993 return add_dirent_to_buf(icb
, dentry
, inode
, de
, bh
);
1997 * ext3_delete_entry deletes a directory entry by merging it with the
2000 int ext3_delete_entry(struct ext2_icb
*icb
, struct inode
*dir
,
2001 struct ext3_dir_entry_2
*de_del
,
2002 struct buffer_head
*bh
)
2004 struct ext3_dir_entry_2
*de
, *pde
= NULL
;
2007 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
2008 while (i
< bh
->b_size
) {
2009 if (!ext3_check_dir_entry("ext3_delete_entry", dir
, de
, bh
, i
))
2013 pde
->rec_len
= ext3_rec_len_to_disk(
2014 ext3_rec_len_from_disk(pde
->rec_len
) +
2015 ext3_rec_len_from_disk(de
->rec_len
));
2019 /* ext3_journal_dirty_metadata(handle, bh); */
2020 set_buffer_dirty(bh
);
2023 i
+= ext3_rec_len_from_disk(de
->rec_len
);
2025 de
= ext3_next_entry(de
);
2031 * routine to check that the specified directory is empty (for rmdir)
2033 int ext3_is_dir_empty(struct ext2_icb
*icb
, struct inode
*inode
)
2035 unsigned int offset
;
2036 struct buffer_head
*bh
;
2037 struct ext3_dir_entry_2
*de
, *de1
;
2038 struct super_block
*sb
;
2042 if (inode
->i_size
< EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
2043 !(bh
= ext3_bread(icb
, inode
, 0, &err
))) {
2045 ext3_error(inode
->i_sb
, __FUNCTION__
,
2046 "error %d reading directory #%lu offset 0",
2049 ext3_warning(inode
->i_sb
, __FUNCTION__
,
2050 "bad directory (dir #%lu) - no data block",
2054 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
2055 de1
= ext3_next_entry(de
);
2056 if (le32_to_cpu(de
->inode
) != inode
->i_ino
||
2057 !le32_to_cpu(de1
->inode
) ||
2058 strcmp(".", de
->name
) ||
2059 strcmp("..", de1
->name
)) {
2060 ext3_warning(inode
->i_sb
, "empty_dir",
2061 "bad directory (dir #%lu) - no `.' or `..'",
2066 offset
= ext3_rec_len_from_disk(de
->rec_len
) +
2067 ext3_rec_len_from_disk(de1
->rec_len
);
2068 de
= ext3_next_entry(de1
);
2069 while (offset
< inode
->i_size
) {
2071 (void *) de
>= (void *) (bh
->b_data
+sb
->s_blocksize
)) {
2074 bh
= ext3_bread(icb
, inode
, offset
>> EXT3_BLOCK_SIZE_BITS(sb
), &err
);
2077 ext3_error(sb
, __FUNCTION__
, "error %d reading directory"
2078 " #%lu offset %u", err
, inode
->i_ino
, offset
);
2079 offset
+= sb
->s_blocksize
;
2082 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
2084 if (!ext3_check_dir_entry("empty_dir", inode
, de
, bh
, offset
)) {
2085 de
= (struct ext3_dir_entry_2
*)(bh
->b_data
+
2087 offset
= (offset
| (sb
->s_blocksize
- 1)) + 1;
2090 if (le32_to_cpu(de
->inode
)) {
2094 offset
+= ext3_rec_len_from_disk(de
->rec_len
);
2095 de
= ext3_next_entry(de
);
2102 * Returns 0 if not found, -1 on failure, and 1 on success
2104 static inline int search_dirblock(struct buffer_head
* bh
,
2106 struct dentry
*dentry
,
2107 unsigned long offset
,
2108 struct ext3_dir_entry_2
** res_dir
)
2110 struct ext3_dir_entry_2
* de
;
2113 const char *name
= dentry
->d_name
.name
;
2114 int namelen
= dentry
->d_name
.len
;
2116 de
= (struct ext3_dir_entry_2
*) bh
->b_data
;
2117 dlimit
= bh
->b_data
+ dir
->i_sb
->s_blocksize
;
2118 while ((char *) de
< dlimit
) {
2119 /* this code is executed quadratically often */
2120 /* do minimal checking `by hand' */
2122 if ((char *) de
+ namelen
<= dlimit
&&
2123 ext3_match (namelen
, name
, de
)) {
2124 /* found a match - just to be sure, do a full check */
2125 if (!ext3_check_dir_entry("ext3_find_entry",
2126 dir
, de
, bh
, offset
))
2131 /* prevent looping on a bad block */
2132 de_len
= ext3_rec_len_from_disk(de
->rec_len
);
2137 de
= (struct ext3_dir_entry_2
*) ((char *) de
+ de_len
);
2143 * define how far ahead to read directories while searching them.
2145 #define NAMEI_RA_CHUNKS 2
2146 #define NAMEI_RA_BLOCKS 4
2147 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
2148 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
2153 * finds an entry in the specified directory with the wanted name. It
2154 * returns the cache buffer in which the entry was found, and the entry
2155 * itself (as a parameter - res_dir). It does NOT read the inode of the
2156 * entry - you'll have to do that yourself if you want to.
2158 * The returned buffer_head has ->b_count elevated. The caller is expected
2159 * to brelse() it when appropriate.
2161 struct buffer_head
* ext3_find_entry (struct ext2_icb
*icb
,
2162 struct dentry
*dentry
,
2163 struct ext3_dir_entry_2
** res_dir
)
2165 struct inode
*dir
= dentry
->d_parent
->d_inode
;
2166 struct super_block
*sb
= dir
->i_sb
;
2167 struct buffer_head
*bh_use
[NAMEI_RA_SIZE
];
2168 struct buffer_head
*bh
, *ret
= NULL
;
2169 ext3_lblk_t start
, block
, b
;
2170 int ra_max
= 0; /* Number of bh's in the readahead
2172 int ra_ptr
= 0; /* Current index into readahead
2175 ext3_lblk_t nblocks
;
2177 int namelen
= dentry
->d_name
.len
;
2180 if (namelen
> EXT3_NAME_LEN
)
2183 #ifdef EXT2_HTREE_INDEX
2185 bh
= ext3_dx_find_entry(icb
, dentry
, res_dir
, &err
);
2187 * On success, or if the error was file not found,
2188 * return. Otherwise, fall back to doing a search the
2189 * old fashioned way.
2191 if (bh
|| (err
!= ERR_BAD_DX_DIR
))
2193 dxtrace(printk("ext4_find_entry: dx failed, "
2198 nblocks
= (ext3_lblk_t
)(dir
->i_size
>> EXT3_BLOCK_SIZE_BITS(sb
));
2204 * We deal with the read-ahead logic here.
2206 if (ra_ptr
>= ra_max
) {
2207 /* Refill the readahead buffer */
2210 for (ra_max
= 0; ra_max
< NAMEI_RA_SIZE
; ra_max
++) {
2212 * Terminate if we reach the end of the
2213 * directory and must wrap, or if our
2214 * search has finished at this block.
2216 if (b
>= nblocks
|| (num
&& block
== start
)) {
2217 bh_use
[ra_max
] = NULL
;
2221 bh
= ext3_bread(icb
, dir
, b
++, &err
);
2222 bh_use
[ra_max
] = bh
;
2225 if ((bh
= bh_use
[ra_ptr
++]) == NULL
)
2228 if (!buffer_uptodate(bh
)) {
2229 /* read error, skip block & hope for the best */
2230 ext3_error(sb
, __FUNCTION__
, "reading directory #%lu "
2231 "offset %lu", dir
->i_ino
,
2232 (unsigned long)block
);
2236 i
= search_dirblock(bh
, dir
, dentry
,
2237 block
<< EXT3_BLOCK_SIZE_BITS(sb
), res_dir
);
2240 goto cleanup_and_exit
;
2244 goto cleanup_and_exit
;
2247 if (++block
>= nblocks
)
2249 } while (block
!= start
);
2252 * If the directory has grown while we were searching, then
2253 * search the last part of the directory before giving up.
2256 nblocks
= (ext3_lblk_t
)(dir
->i_size
>> EXT3_BLOCK_SIZE_BITS(sb
));
2257 if (block
< nblocks
) {
2263 /* Clean up the read-ahead blocks */
2264 for (; ra_ptr
< ra_max
; ra_ptr
++)
2265 brelse(bh_use
[ra_ptr
]);