2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
11 * You should have received a copy of the GNU General Public Licens
12 * along with this program; if not, write to the Free Software
13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 #include <linux/ext4.h>
21 #pragma warning(disable: 4018)
22 #pragma warning(disable: 4242)
23 #pragma warning(disable: 4244)
27 * used by extent splitting.
29 #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
31 #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
32 #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
34 #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
35 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
37 #define CONFIG_EXTENT_TEST
38 #ifdef CONFIG_EXTENT_TEST
40 #define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n)
41 static inline ext4_fsblk_t
ext4_inode_to_goal_block(struct inode
*inode
)
44 Vcb
= inode
->i_sb
->s_priv
;
45 return (inode
->i_ino
- 1) / BLOCKS_PER_GROUP
;
48 static ext4_fsblk_t
ext4_new_meta_blocks(void *icb
, handle_t
*handle
, struct inode
*inode
,
51 unsigned long *count
, int *errp
)
54 ULONG blockcnt
= (count
)?*count
:1;
57 status
= Ext2NewBlock((PEXT2_IRP_CONTEXT
)icb
,
65 if (!NT_SUCCESS(status
)) {
66 *errp
= Ext2LinuxError(status
);
69 inode
->i_blocks
+= (blockcnt
* (inode
->i_sb
->s_blocksize
>> 9));
73 static void ext4_free_blocks(void *icb
, handle_t
*handle
, struct inode
*inode
, void *fake
,
74 ext4_fsblk_t block
, int count
, int flags
)
76 Ext2FreeBlock((PEXT2_IRP_CONTEXT
)icb
, inode
->i_sb
->s_priv
, block
, count
);
77 inode
->i_blocks
-= count
* (inode
->i_sb
->s_blocksize
>> 9);
81 static inline void ext_debug(char *str
, ...)
85 #define EXT4_ERROR_INODE(inode, str, ...) do { \
86 DbgPrint("inode[%p]: " str "\n", inode, ##__VA_ARGS__); \
89 #define EXT4_ERROR_INODE
92 #define ext4_std_error(s, err)
98 * Return the right sibling of a tree node(either leaf or indexes node)
101 #define EXT_MAX_BLOCKS 0xffffffff
104 static inline int ext4_ext_space_block(struct inode
*inode
, int check
)
108 size
= (inode
->i_sb
->s_blocksize
- sizeof(struct ext4_extent_header
))
109 / sizeof(struct ext4_extent
);
110 #ifdef AGGRESSIVE_TEST
111 if (!check
&& size
> 6)
117 static inline int ext4_ext_space_block_idx(struct inode
*inode
, int check
)
121 size
= (inode
->i_sb
->s_blocksize
- sizeof(struct ext4_extent_header
))
122 / sizeof(struct ext4_extent_idx
);
123 #ifdef AGGRESSIVE_TEST
124 if (!check
&& size
> 5)
130 static inline int ext4_ext_space_root(struct inode
*inode
, int check
)
134 size
= sizeof(EXT4_I(inode
)->i_block
);
135 size
-= sizeof(struct ext4_extent_header
);
136 size
/= sizeof(struct ext4_extent
);
137 #ifdef AGGRESSIVE_TEST
138 if (!check
&& size
> 3)
144 static inline int ext4_ext_space_root_idx(struct inode
*inode
, int check
)
148 size
= sizeof(EXT4_I(inode
)->i_block
);
149 size
-= sizeof(struct ext4_extent_header
);
150 size
/= sizeof(struct ext4_extent_idx
);
151 #ifdef AGGRESSIVE_TEST
152 if (!check
&& size
> 4)
159 ext4_ext_max_entries(struct inode
*inode
, int depth
)
163 if (depth
== ext_depth(inode
)) {
165 max
= ext4_ext_space_root(inode
, 1);
167 max
= ext4_ext_space_root_idx(inode
, 1);
170 max
= ext4_ext_space_block(inode
, 1);
172 max
= ext4_ext_space_block_idx(inode
, 1);
178 static int __ext4_ext_check(const char *function
, unsigned int line
,
180 struct ext4_extent_header
*eh
, int depth
,
184 * read_extent_tree_block:
185 * Get a buffer_head by extents_bread, and read fresh data from the storage.
187 static struct buffer_head
*
188 __read_extent_tree_block(const char *function
, unsigned int line
,
189 struct inode
*inode
, ext4_fsblk_t pblk
, int depth
,
192 struct buffer_head
*bh
;
195 bh
= extents_bread(inode
->i_sb
, pblk
);
197 return ERR_PTR(-ENOMEM
);
199 if (!buffer_uptodate(bh
)) {
203 if (buffer_verified(bh
))
205 err
= __ext4_ext_check(function
, line
, inode
,
206 ext_block_hdr(bh
), depth
, pblk
);
209 set_buffer_verified(bh
);
217 #define read_extent_tree_block(inode, pblk, depth, flags) \
218 __read_extent_tree_block("", __LINE__, (inode), (pblk), \
221 #define ext4_ext_check(inode, eh, depth, pblk) \
222 __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk))
224 int ext4_ext_check_inode(struct inode
*inode
)
226 return ext4_ext_check(inode
, ext_inode_hdr(inode
), ext_depth(inode
), 0);
229 static uint32_t ext4_ext_block_csum(struct inode
*inode
,
230 struct ext4_extent_header
*eh
)
232 /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
236 static void ext4_extent_block_csum_set(struct inode
*inode
,
237 struct ext4_extent_header
*eh
)
239 struct ext4_extent_tail
*tail
;
241 tail
= find_ext4_extent_tail(eh
);
242 tail
->et_checksum
= ext4_ext_block_csum(
246 static int ext4_split_extent_at(void *icb
,
249 struct ext4_ext_path
**ppath
,
255 ext4_force_split_extent_at(void *icb
, handle_t
*handle
, struct inode
*inode
,
256 struct ext4_ext_path
**ppath
, ext4_lblk_t lblk
,
259 struct ext4_ext_path
*path
= *ppath
;
260 int unwritten
= ext4_ext_is_unwritten(path
[path
->p_depth
].p_ext
);
262 return ext4_split_extent_at(icb
, handle
, inode
, ppath
, lblk
, unwritten
?
263 EXT4_EXT_MARK_UNWRIT1
|EXT4_EXT_MARK_UNWRIT2
: 0,
264 EXT4_EX_NOCACHE
| EXT4_GET_BLOCKS_PRE_IO
|
265 (nofail
? EXT4_GET_BLOCKS_METADATA_NOFAIL
:0));
274 static int ext4_ext_get_access(void *icb
, handle_t
*handle
, struct inode
*inode
,
275 struct ext4_ext_path
*path
)
278 /* path points to block */
280 return ext4_journal_get_write_access(icb
, handle
, path
->p_bh
);
283 /* path points to leaf/index in inode body */
284 /* we use in-core data, no need to protect them */
289 static ext4_fsblk_t
ext4_ext_find_goal(struct inode
*inode
,
290 struct ext4_ext_path
*path
,
294 int depth
= path
->p_depth
;
295 struct ext4_extent
*ex
;
298 * Try to predict block placement assuming that we are
299 * filling in a file which will eventually be
300 * non-sparse --- i.e., in the case of libbfd writing
301 * an ELF object sections out-of-order but in a way
302 * the eventually results in a contiguous object or
303 * executable file, or some database extending a table
304 * space file. However, this is actually somewhat
305 * non-ideal if we are writing a sparse file such as
306 * qemu or KVM writing a raw image file that is going
307 * to stay fairly sparse, since it will end up
308 * fragmenting the file system's free space. Maybe we
309 * should have some hueristics or some way to allow
310 * userspace to pass a hint to file system,
311 * especially if the latter case turns out to be
314 ex
= path
[depth
].p_ext
;
316 ext4_fsblk_t ext_pblk
= ext4_ext_pblock(ex
);
317 ext4_lblk_t ext_block
= le32_to_cpu(ex
->ee_block
);
319 if (block
> ext_block
)
320 return ext_pblk
+ (block
- ext_block
);
322 return ext_pblk
- (ext_block
- block
);
325 /* it looks like index is empty;
326 * try to find starting block from index itself */
327 if (path
[depth
].p_bh
)
328 return path
[depth
].p_bh
->b_blocknr
;
331 /* OK. use inode's group */
332 return ext4_inode_to_goal_block(inode
);
336 * Allocation for a meta data block
339 ext4_ext_new_meta_block(void *icb
, handle_t
*handle
, struct inode
*inode
,
340 struct ext4_ext_path
*path
,
341 struct ext4_extent
*ex
, int *err
, unsigned int flags
)
343 ext4_fsblk_t goal
, newblock
;
345 goal
= ext4_ext_find_goal(inode
, path
, le32_to_cpu(ex
->ee_block
));
346 newblock
= ext4_new_meta_blocks(icb
, handle
, inode
, goal
, flags
,
351 int __ext4_ext_dirty(const char *where
, unsigned int line
,
352 void *icb
, handle_t
*handle
,
354 struct ext4_ext_path
*path
)
359 ext4_extent_block_csum_set(inode
, ext_block_hdr(path
->p_bh
));
360 /* path points to block */
361 err
= __ext4_handle_dirty_metadata(where
, line
, icb
, handle
, inode
, path
->p_bh
);
363 /* path points to leaf/index in inode body */
364 err
= ext4_mark_inode_dirty(icb
, handle
, inode
);
369 void ext4_ext_drop_refs(struct ext4_ext_path
*path
)
375 depth
= path
->p_depth
;
376 for (i
= 0; i
<= depth
; i
++, path
++)
378 extents_brelse(path
->p_bh
);
384 * Check that whether the basic information inside the extent header
387 static int __ext4_ext_check(const char *function
, unsigned int line
,
389 struct ext4_extent_header
*eh
, int depth
,
392 struct ext4_extent_tail
*tail
;
393 const char *error_msg
;
397 if (eh
->eh_magic
!= EXT4_EXT_MAGIC
) {
398 error_msg
= "invalid magic";
401 if (le16_to_cpu(eh
->eh_depth
) != depth
) {
402 error_msg
= "unexpected eh_depth";
405 if (eh
->eh_max
== 0) {
406 error_msg
= "invalid eh_max";
409 if (eh
->eh_entries
> eh
->eh_max
) {
410 error_msg
= "invalid eh_entries";
414 tail
= find_ext4_extent_tail(eh
);
415 if (tail
->et_checksum
!= ext4_ext_block_csum(inode
, eh
)) {
416 ext_debug("Warning: extent checksum damaged? tail->et_checksum = "
417 "%lu, ext4_ext_block_csum = %lu\n",
418 tail
->et_checksum
, ext4_ext_block_csum(inode
, eh
));
424 ext_debug("corrupted! %s\n", error_msg
);
429 * ext4_ext_binsearch_idx:
430 * binary search for the closest index of the given block
431 * the header must be checked before calling this
434 ext4_ext_binsearch_idx(struct inode
*inode
,
435 struct ext4_ext_path
*path
, ext4_lblk_t block
)
437 struct ext4_extent_header
*eh
= path
->p_hdr
;
438 struct ext4_extent_idx
*r
, *l
, *m
;
440 ext_debug("binsearch for %u(idx): ", block
);
442 l
= EXT_FIRST_INDEX(eh
) + 1;
443 r
= EXT_LAST_INDEX(eh
);
446 if (block
< (m
->ei_block
))
450 ext_debug("%p(%u):%p(%u):%p(%u) ", l
, (l
->ei_block
),
456 ext_debug(" -> %u->%lld ", (path
->p_idx
->ei_block
),
457 ext4_idx_pblock(path
->p_idx
));
459 #ifdef CHECK_BINSEARCH
461 struct ext4_extent_idx
*chix
, *ix
;
464 chix
= ix
= EXT_FIRST_INDEX(eh
);
465 for (k
= 0; k
< (eh
->eh_entries
); k
++, ix
++) {
467 (ix
->ei_block
) <= (ix
[-1].ei_block
)) {
468 printk(KERN_DEBUG
"k=%d, ix=0x%p, "
470 ix
, EXT_FIRST_INDEX(eh
));
471 printk(KERN_DEBUG
"%u <= %u\n",
475 BUG_ON(k
&& (ix
->ei_block
)
476 <= (ix
[-1].ei_block
));
477 if (block
< (ix
->ei_block
))
481 BUG_ON(chix
!= path
->p_idx
);
488 * ext4_ext_binsearch:
489 * binary search for closest extent of the given block
490 * the header must be checked before calling this
493 ext4_ext_binsearch(struct inode
*inode
,
494 struct ext4_ext_path
*path
, ext4_lblk_t block
)
496 struct ext4_extent_header
*eh
= path
->p_hdr
;
497 struct ext4_extent
*r
, *l
, *m
;
499 if (eh
->eh_entries
== 0) {
501 * this leaf is empty:
502 * we get such a leaf in split/add case
507 ext_debug("binsearch for %u: ", block
);
509 l
= EXT_FIRST_EXTENT(eh
) + 1;
510 r
= EXT_LAST_EXTENT(eh
);
514 if (block
< m
->ee_block
)
518 ext_debug("%p(%u):%p(%u):%p(%u) ", l
, l
->ee_block
,
524 ext_debug(" -> %d:%llu:[%d]%d ",
525 (path
->p_ext
->ee_block
),
526 ext4_ext_pblock(path
->p_ext
),
527 ext4_ext_is_unwritten(path
->p_ext
),
528 ext4_ext_get_actual_len(path
->p_ext
));
530 #ifdef CHECK_BINSEARCH
532 struct ext4_extent
*chex
, *ex
;
535 chex
= ex
= EXT_FIRST_EXTENT(eh
);
536 for (k
= 0; k
< le16_to_cpu(eh
->eh_entries
); k
++, ex
++) {
537 BUG_ON(k
&& (ex
->ee_block
)
538 <= (ex
[-1].ee_block
));
539 if (block
< (ex
->ee_block
))
543 BUG_ON(chex
!= path
->p_ext
);
550 static void ext4_ext_show_path(struct inode
*inode
, struct ext4_ext_path
*path
)
552 int k
, l
= path
->p_depth
;
555 for (k
= 0; k
<= l
; k
++, path
++) {
557 ext_debug(" %d->%llu", le32_to_cpu(path
->p_idx
->ei_block
),
558 ext4_idx_pblock(path
->p_idx
));
559 } else if (path
->p_ext
) {
560 ext_debug(" %d:[%d]%d:%llu ",
561 le32_to_cpu(path
->p_ext
->ee_block
),
562 ext4_ext_is_unwritten(path
->p_ext
),
563 ext4_ext_get_actual_len(path
->p_ext
),
564 ext4_ext_pblock(path
->p_ext
));
571 static void ext4_ext_show_leaf(struct inode
*inode
, struct ext4_ext_path
*path
)
573 int depth
= ext_depth(inode
);
574 struct ext4_extent_header
*eh
;
575 struct ext4_extent
*ex
;
581 eh
= path
[depth
].p_hdr
;
582 ex
= EXT_FIRST_EXTENT(eh
);
584 ext_debug("Displaying leaf extents for inode %lu\n", inode
->i_ino
);
586 for (i
= 0; i
< le16_to_cpu(eh
->eh_entries
); i
++, ex
++) {
587 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex
->ee_block
),
588 ext4_ext_is_unwritten(ex
),
589 ext4_ext_get_actual_len(ex
), ext4_ext_pblock(ex
));
594 static void ext4_ext_show_move(struct inode
*inode
, struct ext4_ext_path
*path
,
595 ext4_fsblk_t newblock
, int level
)
597 int depth
= ext_depth(inode
);
598 struct ext4_extent
*ex
;
600 if (depth
!= level
) {
601 struct ext4_extent_idx
*idx
;
602 idx
= path
[level
].p_idx
;
603 while (idx
<= EXT_MAX_INDEX(path
[level
].p_hdr
)) {
604 ext_debug("%d: move %d:%llu in new index %llu\n", level
,
605 le32_to_cpu(idx
->ei_block
),
606 ext4_idx_pblock(idx
),
614 ex
= path
[depth
].p_ext
;
615 while (ex
<= EXT_MAX_EXTENT(path
[depth
].p_hdr
)) {
616 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
617 le32_to_cpu(ex
->ee_block
),
619 ext4_ext_is_unwritten(ex
),
620 ext4_ext_get_actual_len(ex
),
627 #define ext4_ext_show_path(inode, path)
628 #define ext4_ext_show_leaf(inode, path)
629 #define ext4_ext_show_move(inode, path, newblock, level)
632 struct ext4_ext_path
*
633 ext4_find_extent(struct inode
*inode
, ext4_lblk_t block
,
634 struct ext4_ext_path
**orig_path
, int flags
)
636 struct ext4_extent_header
*eh
;
637 struct buffer_head
*bh
;
638 struct ext4_ext_path
*path
= orig_path
? *orig_path
: NULL
;
639 short int depth
, i
, ppos
= 0;
642 eh
= ext_inode_hdr(inode
);
643 depth
= ext_depth(inode
);
646 ext4_ext_drop_refs(path
);
647 if (depth
> path
[0].p_maxdepth
) {
649 *orig_path
= path
= NULL
;
653 /* account possible depth increase */
654 path
= kzalloc(sizeof(struct ext4_ext_path
) * (depth
+ 2),
657 return ERR_PTR(-ENOMEM
);
658 path
[0].p_maxdepth
= depth
+ 1;
664 /* walk through the tree */
666 ext_debug("depth %d: num %d, max %d\n",
667 ppos
, le16_to_cpu(eh
->eh_entries
), le16_to_cpu(eh
->eh_max
));
669 ext4_ext_binsearch_idx(inode
, path
+ ppos
, block
);
670 path
[ppos
].p_block
= ext4_idx_pblock(path
[ppos
].p_idx
);
671 path
[ppos
].p_depth
= i
;
672 path
[ppos
].p_ext
= NULL
;
674 bh
= read_extent_tree_block(inode
, path
[ppos
].p_block
, --i
,
676 if (unlikely(IS_ERR(bh
))) {
681 eh
= ext_block_hdr(bh
);
683 if (unlikely(ppos
> depth
)) {
685 EXT4_ERROR_INODE(inode
,
686 "ppos %d > depth %d", ppos
, depth
);
690 path
[ppos
].p_bh
= bh
;
691 path
[ppos
].p_hdr
= eh
;
694 path
[ppos
].p_depth
= i
;
695 path
[ppos
].p_ext
= NULL
;
696 path
[ppos
].p_idx
= NULL
;
699 ext4_ext_binsearch(inode
, path
+ ppos
, block
);
700 /* if not an empty leaf */
701 if (path
[ppos
].p_ext
)
702 path
[ppos
].p_block
= ext4_ext_pblock(path
[ppos
].p_ext
);
704 ext4_ext_show_path(inode
, path
);
709 ext4_ext_drop_refs(path
);
719 * ext4_ext_insert_index:
720 * insert new index [@logical;@ptr] into the block at @curp;
721 * check where to insert: before @curp or after @curp
723 static int ext4_ext_insert_index(void *icb
, handle_t
*handle
, struct inode
*inode
,
724 struct ext4_ext_path
*curp
,
725 int logical
, ext4_fsblk_t ptr
)
727 struct ext4_extent_idx
*ix
;
730 err
= ext4_ext_get_access(icb
, handle
, inode
, curp
);
734 if (unlikely(logical
== le32_to_cpu(curp
->p_idx
->ei_block
))) {
735 EXT4_ERROR_INODE(inode
,
736 "logical %d == ei_block %d!",
737 logical
, le32_to_cpu(curp
->p_idx
->ei_block
));
741 if (unlikely(le16_to_cpu(curp
->p_hdr
->eh_entries
)
742 >= le16_to_cpu(curp
->p_hdr
->eh_max
))) {
743 EXT4_ERROR_INODE(inode
,
744 "eh_entries %d >= eh_max %d!",
745 le16_to_cpu(curp
->p_hdr
->eh_entries
),
746 le16_to_cpu(curp
->p_hdr
->eh_max
));
750 if (logical
> le32_to_cpu(curp
->p_idx
->ei_block
)) {
752 ext_debug("insert new index %d after: %llu\n", logical
, ptr
);
753 ix
= curp
->p_idx
+ 1;
756 ext_debug("insert new index %d before: %llu\n", logical
, ptr
);
760 len
= EXT_LAST_INDEX(curp
->p_hdr
) - ix
+ 1;
763 ext_debug("insert new index %d: "
764 "move %d indices from 0x%p to 0x%p\n",
765 logical
, len
, ix
, ix
+ 1);
766 memmove(ix
+ 1, ix
, len
* sizeof(struct ext4_extent_idx
));
769 if (unlikely(ix
> EXT_MAX_INDEX(curp
->p_hdr
))) {
770 EXT4_ERROR_INODE(inode
, "ix > EXT_MAX_INDEX!");
774 ix
->ei_block
= cpu_to_le32(logical
);
775 ext4_idx_store_pblock(ix
, ptr
);
776 le16_add_cpu(&curp
->p_hdr
->eh_entries
, 1);
778 if (unlikely(ix
> EXT_LAST_INDEX(curp
->p_hdr
))) {
779 EXT4_ERROR_INODE(inode
, "ix > EXT_LAST_INDEX!");
783 err
= ext4_ext_dirty(icb
, handle
, inode
, curp
);
784 ext4_std_error(inode
->i_sb
, err
);
791 * inserts new subtree into the path, using free index entry
793 * - allocates all needed blocks (new leaf and all intermediate index blocks)
794 * - makes decision where to split
795 * - moves remaining extents and index entries (right to the split point)
796 * into the newly allocated blocks
797 * - initializes subtree
799 static int ext4_ext_split(void *icb
, handle_t
*handle
, struct inode
*inode
,
801 struct ext4_ext_path
*path
,
802 struct ext4_extent
*newext
, int at
)
804 struct buffer_head
*bh
= NULL
;
805 int depth
= ext_depth(inode
);
806 struct ext4_extent_header
*neh
;
807 struct ext4_extent_idx
*fidx
;
809 ext4_fsblk_t newblock
, oldblock
;
811 ext4_fsblk_t
*ablocks
= NULL
; /* array of allocated blocks */
814 /* make decision: where to split? */
815 /* FIXME: now decision is simplest: at current extent */
817 /* if current leaf will be split, then we should use
818 * border from split point */
819 if (unlikely(path
[depth
].p_ext
> EXT_MAX_EXTENT(path
[depth
].p_hdr
))) {
820 EXT4_ERROR_INODE(inode
, "p_ext > EXT_MAX_EXTENT!");
823 if (path
[depth
].p_ext
!= EXT_MAX_EXTENT(path
[depth
].p_hdr
)) {
824 border
= path
[depth
].p_ext
[1].ee_block
;
825 ext_debug("leaf will be split."
826 " next leaf starts at %d\n",
827 le32_to_cpu(border
));
829 border
= newext
->ee_block
;
830 ext_debug("leaf will be added."
831 " next leaf starts at %d\n",
832 le32_to_cpu(border
));
836 * If error occurs, then we break processing
837 * and mark filesystem read-only. index won't
838 * be inserted and tree will be in consistent
839 * state. Next mount will repair buffers too.
843 * Get array to track all allocated blocks.
844 * We need this to handle errors and free blocks
847 ablocks
= kzalloc(sizeof(ext4_fsblk_t
) * depth
, GFP_NOFS
);
851 /* allocate all needed blocks */
852 ext_debug("allocate %d blocks for indexes/leaf\n", depth
- at
);
853 for (a
= 0; a
< depth
- at
; a
++) {
854 newblock
= ext4_ext_new_meta_block(icb
, handle
, inode
, path
,
855 newext
, &err
, flags
);
858 ablocks
[a
] = newblock
;
861 /* initialize new leaf */
862 newblock
= ablocks
[--a
];
863 if (unlikely(newblock
== 0)) {
864 EXT4_ERROR_INODE(inode
, "newblock == 0!");
868 bh
= extents_bwrite(inode
->i_sb
, newblock
);
874 err
= ext4_journal_get_create_access(icb
, handle
, bh
);
878 neh
= ext_block_hdr(bh
);
880 neh
->eh_max
= cpu_to_le16(ext4_ext_space_block(inode
, 0));
881 neh
->eh_magic
= EXT4_EXT_MAGIC
;
884 /* move remainder of path[depth] to the new leaf */
885 if (unlikely(path
[depth
].p_hdr
->eh_entries
!=
886 path
[depth
].p_hdr
->eh_max
)) {
887 EXT4_ERROR_INODE(inode
, "eh_entries %d != eh_max %d!",
888 path
[depth
].p_hdr
->eh_entries
,
889 path
[depth
].p_hdr
->eh_max
);
893 /* start copy from next extent */
894 m
= EXT_MAX_EXTENT(path
[depth
].p_hdr
) - path
[depth
].p_ext
++;
895 ext4_ext_show_move(inode
, path
, newblock
, depth
);
897 struct ext4_extent
*ex
;
898 ex
= EXT_FIRST_EXTENT(neh
);
899 memmove(ex
, path
[depth
].p_ext
, sizeof(struct ext4_extent
) * m
);
900 le16_add_cpu(&neh
->eh_entries
, m
);
903 ext4_extent_block_csum_set(inode
, neh
);
904 set_buffer_uptodate(bh
);
906 err
= ext4_handle_dirty_metadata(icb
, handle
, inode
, bh
);
912 /* correct old leaf */
914 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ depth
);
917 le16_add_cpu(&path
[depth
].p_hdr
->eh_entries
, -m
);
918 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ depth
);
924 /* create intermediate indexes */
926 if (unlikely(k
< 0)) {
927 EXT4_ERROR_INODE(inode
, "k %d < 0!", k
);
932 ext_debug("create %d intermediate indices\n", k
);
933 /* insert new index into current index block */
934 /* current depth stored in i var */
938 newblock
= ablocks
[--a
];
939 bh
= extents_bwrite(inode
->i_sb
, newblock
);
945 err
= ext4_journal_get_create_access(icb
, handle
, bh
);
949 neh
= ext_block_hdr(bh
);
950 neh
->eh_entries
= cpu_to_le16(1);
951 neh
->eh_magic
= EXT4_EXT_MAGIC
;
952 neh
->eh_max
= cpu_to_le16(ext4_ext_space_block_idx(inode
, 0));
953 neh
->eh_depth
= cpu_to_le16(depth
- i
);
954 fidx
= EXT_FIRST_INDEX(neh
);
955 fidx
->ei_block
= border
;
956 ext4_idx_store_pblock(fidx
, oldblock
);
958 ext_debug("int.index at %d (block %llu): %u -> %llu\n",
959 i
, newblock
, le32_to_cpu(border
), oldblock
);
961 /* move remainder of path[i] to the new index block */
962 if (unlikely(EXT_MAX_INDEX(path
[i
].p_hdr
) !=
963 EXT_LAST_INDEX(path
[i
].p_hdr
))) {
964 EXT4_ERROR_INODE(inode
,
965 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
966 le32_to_cpu(path
[i
].p_ext
->ee_block
));
970 /* start copy indexes */
971 m
= EXT_MAX_INDEX(path
[i
].p_hdr
) - path
[i
].p_idx
++;
972 ext_debug("cur 0x%p, last 0x%p\n", path
[i
].p_idx
,
973 EXT_MAX_INDEX(path
[i
].p_hdr
));
974 ext4_ext_show_move(inode
, path
, newblock
, i
);
976 memmove(++fidx
, path
[i
].p_idx
,
977 sizeof(struct ext4_extent_idx
) * m
);
978 le16_add_cpu(&neh
->eh_entries
, m
);
980 ext4_extent_block_csum_set(inode
, neh
);
981 set_buffer_uptodate(bh
);
983 err
= ext4_handle_dirty_metadata(icb
, handle
, inode
, bh
);
989 /* correct old index */
991 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ i
);
994 le16_add_cpu(&path
[i
].p_hdr
->eh_entries
, -m
);
995 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ i
);
1003 /* insert new index */
1004 err
= ext4_ext_insert_index(icb
, handle
, inode
, path
+ at
,
1005 le32_to_cpu(border
), newblock
);
1012 /* free all allocated blocks in error case */
1013 for (i
= 0; i
< depth
; i
++) {
1016 ext4_free_blocks(icb
, handle
, inode
, NULL
, ablocks
[i
], 1,
1017 EXT4_FREE_BLOCKS_METADATA
);
1026 * ext4_ext_grow_indepth:
1027 * implements tree growing procedure:
1028 * - allocates new block
1029 * - moves top-level data (index block or leaf) into the new block
1030 * - initializes new top-level, creating index that points to the
1031 * just created block
1033 static int ext4_ext_grow_indepth(void *icb
, handle_t
*handle
, struct inode
*inode
,
1036 struct ext4_extent_header
*neh
;
1037 struct buffer_head
*bh
;
1038 ext4_fsblk_t newblock
, goal
= 0;
1041 /* Try to prepend new index to old one */
1042 if (ext_depth(inode
))
1043 goal
= ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode
)));
1044 goal
= ext4_inode_to_goal_block(inode
);
1045 newblock
= ext4_new_meta_blocks(icb
, handle
, inode
, goal
, flags
,
1050 bh
= extents_bwrite(inode
->i_sb
, newblock
);
1054 err
= ext4_journal_get_create_access(icb
, handle
, bh
);
1058 /* move top-level index/leaf into new block */
1059 memmove(bh
->b_data
, EXT4_I(inode
)->i_block
,
1060 sizeof(EXT4_I(inode
)->i_block
));
1062 /* set size of new block */
1063 neh
= ext_block_hdr(bh
);
1064 /* old root could have indexes or leaves
1065 * so calculate e_max right way */
1066 if (ext_depth(inode
))
1067 neh
->eh_max
= (ext4_ext_space_block_idx(inode
, 0));
1069 neh
->eh_max
= (ext4_ext_space_block(inode
, 0));
1070 neh
->eh_magic
= EXT4_EXT_MAGIC
;
1071 ext4_extent_block_csum_set(inode
, neh
);
1072 set_buffer_uptodate(bh
);
1074 err
= ext4_handle_dirty_metadata(icb
, handle
, inode
, bh
);
1078 /* Update top-level index: num,max,pointer */
1079 neh
= ext_inode_hdr(inode
);
1080 neh
->eh_entries
= (1);
1081 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh
), newblock
);
1082 if (neh
->eh_depth
== 0) {
1083 /* Root extent block becomes index block */
1084 neh
->eh_max
= (ext4_ext_space_root_idx(inode
, 0));
1085 EXT_FIRST_INDEX(neh
)->ei_block
=
1086 EXT_FIRST_EXTENT(neh
)->ee_block
;
1088 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1089 (neh
->eh_entries
), (neh
->eh_max
),
1090 (EXT_FIRST_INDEX(neh
)->ei_block
),
1091 ext4_idx_pblock(EXT_FIRST_INDEX(neh
)));
1093 le16_add_cpu(&neh
->eh_depth
, 1);
1094 ext4_mark_inode_dirty(icb
, handle
, inode
);
1102 * ext4_ext_create_new_leaf:
1103 * finds empty index and adds new leaf.
1104 * if no free index is found, then it requests in-depth growing.
1106 static int ext4_ext_create_new_leaf(void *icb
, handle_t
*handle
, struct inode
*inode
,
1107 unsigned int mb_flags
,
1108 unsigned int gb_flags
,
1109 struct ext4_ext_path
**ppath
,
1110 struct ext4_extent
*newext
)
1112 struct ext4_ext_path
*path
= *ppath
;
1113 struct ext4_ext_path
*curp
;
1114 int depth
, i
, err
= 0;
1117 i
= depth
= ext_depth(inode
);
1119 /* walk up to the tree and look for free index entry */
1120 curp
= path
+ depth
;
1121 while (i
> 0 && !EXT_HAS_FREE_INDEX(curp
)) {
1126 /* we use already allocated block for index block,
1127 * so subsequent data blocks should be contiguous */
1128 if (EXT_HAS_FREE_INDEX(curp
)) {
1129 /* if we found index with free entry, then use that
1130 * entry: create all needed subtree and add new leaf */
1131 err
= ext4_ext_split(icb
, handle
, inode
, mb_flags
, path
, newext
, i
);
1136 path
= ext4_find_extent(inode
,
1137 (ext4_lblk_t
)le32_to_cpu(newext
->ee_block
),
1140 err
= PTR_ERR(path
);
1142 /* tree is full, time to grow in depth */
1143 err
= ext4_ext_grow_indepth(icb
, handle
, inode
, mb_flags
);
1148 path
= ext4_find_extent(inode
,
1149 (ext4_lblk_t
)le32_to_cpu(newext
->ee_block
),
1152 err
= PTR_ERR(path
);
1157 * only first (depth 0 -> 1) produces free space;
1158 * in all other cases we have to split the grown tree
1160 depth
= ext_depth(inode
);
1161 if (path
[depth
].p_hdr
->eh_entries
== path
[depth
].p_hdr
->eh_max
) {
1162 /* now we need to split */
1172 * search the closest allocated block to the left for *logical
1173 * and returns it at @logical + it's physical address at @phys
1174 * if *logical is the smallest allocated block, the function
1175 * returns 0 at @phys
1176 * return value contains 0 (success) or error code
1178 static int ext4_ext_search_left(struct inode
*inode
,
1179 struct ext4_ext_path
*path
,
1180 ext4_lblk_t
*logical
, ext4_fsblk_t
*phys
)
1182 struct ext4_extent_idx
*ix
;
1183 struct ext4_extent
*ex
;
1186 if (unlikely(path
== NULL
)) {
1187 EXT4_ERROR_INODE(inode
, "path == NULL *logical %d!", *logical
);
1190 depth
= path
->p_depth
;
1193 if (depth
== 0 && path
->p_ext
== NULL
)
1196 /* usually extent in the path covers blocks smaller
1197 * then *logical, but it can be that extent is the
1198 * first one in the file */
1200 ex
= path
[depth
].p_ext
;
1201 ee_len
= ext4_ext_get_actual_len(ex
);
1202 if (*logical
< le32_to_cpu(ex
->ee_block
)) {
1203 if (unlikely(EXT_FIRST_EXTENT(path
[depth
].p_hdr
) != ex
)) {
1204 EXT4_ERROR_INODE(inode
,
1205 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1206 *logical
, le32_to_cpu(ex
->ee_block
));
1209 while (--depth
>= 0) {
1210 ix
= path
[depth
].p_idx
;
1211 if (unlikely(ix
!= EXT_FIRST_INDEX(path
[depth
].p_hdr
))) {
1212 EXT4_ERROR_INODE(inode
,
1213 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1214 ix
!= NULL
? le32_to_cpu(ix
->ei_block
) : 0,
1215 EXT_FIRST_INDEX(path
[depth
].p_hdr
) != NULL
?
1216 le32_to_cpu(EXT_FIRST_INDEX(path
[depth
].p_hdr
)->ei_block
) : 0,
1224 if (unlikely(*logical
< (le32_to_cpu(ex
->ee_block
) + ee_len
))) {
1225 EXT4_ERROR_INODE(inode
,
1226 "logical %d < ee_block %d + ee_len %d!",
1227 *logical
, le32_to_cpu(ex
->ee_block
), ee_len
);
1231 *logical
= le32_to_cpu(ex
->ee_block
) + ee_len
- 1;
1232 *phys
= ext4_ext_pblock(ex
) + ee_len
- 1;
1237 * search the closest allocated block to the right for *logical
1238 * and returns it at @logical + it's physical address at @phys
1239 * if *logical is the largest allocated block, the function
1240 * returns 0 at @phys
1241 * return value contains 0 (success) or error code
1243 static int ext4_ext_search_right(struct inode
*inode
,
1244 struct ext4_ext_path
*path
,
1245 ext4_lblk_t
*logical
, ext4_fsblk_t
*phys
,
1246 struct ext4_extent
**ret_ex
)
1248 struct buffer_head
*bh
= NULL
;
1249 struct ext4_extent_header
*eh
;
1250 struct ext4_extent_idx
*ix
;
1251 struct ext4_extent
*ex
;
1253 int depth
; /* Note, NOT eh_depth; depth from top of tree */
1256 if ((path
== NULL
)) {
1257 EXT4_ERROR_INODE(inode
, "path == NULL *logical %d!", *logical
);
1260 depth
= path
->p_depth
;
1263 if (depth
== 0 && path
->p_ext
== NULL
)
1266 /* usually extent in the path covers blocks smaller
1267 * then *logical, but it can be that extent is the
1268 * first one in the file */
1270 ex
= path
[depth
].p_ext
;
1271 ee_len
= ext4_ext_get_actual_len(ex
);
1272 /*if (*logical < le32_to_cpu(ex->ee_block)) {*/
1273 if (*logical
< (ex
->ee_block
)) {
1274 if (unlikely(EXT_FIRST_EXTENT(path
[depth
].p_hdr
) != ex
)) {
1275 EXT4_ERROR_INODE(inode
,
1276 "first_extent(path[%d].p_hdr) != ex",
1280 while (--depth
>= 0) {
1281 ix
= path
[depth
].p_idx
;
1282 if (unlikely(ix
!= EXT_FIRST_INDEX(path
[depth
].p_hdr
))) {
1283 EXT4_ERROR_INODE(inode
,
1284 "ix != EXT_FIRST_INDEX *logical %d!",
1292 /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/
1293 if (unlikely(*logical
< ((ex
->ee_block
) + ee_len
))) {
1294 EXT4_ERROR_INODE(inode
,
1295 "logical %d < ee_block %d + ee_len %d!",
1296 /**logical, le32_to_cpu(ex->ee_block), ee_len);*/
1297 *logical
, (ex
->ee_block
), ee_len
);
1301 if (ex
!= EXT_LAST_EXTENT(path
[depth
].p_hdr
)) {
1302 /* next allocated block in this leaf */
1307 /* go up and search for index to the right */
1308 while (--depth
>= 0) {
1309 ix
= path
[depth
].p_idx
;
1310 if (ix
!= EXT_LAST_INDEX(path
[depth
].p_hdr
))
1314 /* we've gone up to the root and found no index to the right */
1318 /* we've found index to the right, let's
1319 * follow it and find the closest allocated
1320 * block to the right */
1322 block
= ext4_idx_pblock(ix
);
1323 while (++depth
< path
->p_depth
) {
1324 /* subtract from p_depth to get proper eh_depth */
1325 bh
= read_extent_tree_block(inode
, block
,
1326 path
->p_depth
- depth
, 0);
1329 eh
= ext_block_hdr(bh
);
1330 ix
= EXT_FIRST_INDEX(eh
);
1331 block
= ext4_idx_pblock(ix
);
1335 bh
= read_extent_tree_block(inode
, block
, path
->p_depth
- depth
, 0);
1338 eh
= ext_block_hdr(bh
);
1339 ex
= EXT_FIRST_EXTENT(eh
);
1341 /**logical = le32_to_cpu(ex->ee_block);*/
1342 *logical
= (ex
->ee_block
);
1343 *phys
= ext4_ext_pblock(ex
);
1351 * ext4_ext_next_allocated_block:
1352 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1353 * NOTE: it considers block number from index entry as
1354 * allocated block. Thus, index entries have to be consistent
1358 ext4_ext_next_allocated_block(struct ext4_ext_path
*path
)
1362 depth
= path
->p_depth
;
1364 if (depth
== 0 && path
->p_ext
== NULL
)
1365 return EXT_MAX_BLOCKS
;
1367 while (depth
>= 0) {
1368 if (depth
== path
->p_depth
) {
1370 if (path
[depth
].p_ext
&&
1371 path
[depth
].p_ext
!=
1372 EXT_LAST_EXTENT(path
[depth
].p_hdr
))
1373 return le32_to_cpu(path
[depth
].p_ext
[1].ee_block
);
1376 if (path
[depth
].p_idx
!=
1377 EXT_LAST_INDEX(path
[depth
].p_hdr
))
1378 return le32_to_cpu(path
[depth
].p_idx
[1].ei_block
);
1383 return EXT_MAX_BLOCKS
;
1387 * ext4_ext_next_leaf_block:
1388 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1390 static ext4_lblk_t
ext4_ext_next_leaf_block(struct ext4_ext_path
*path
)
1394 BUG_ON(path
== NULL
);
1395 depth
= path
->p_depth
;
1397 /* zero-tree has no leaf blocks at all */
1399 return EXT_MAX_BLOCKS
;
1401 /* go to index block */
1404 while (depth
>= 0) {
1405 if (path
[depth
].p_idx
!=
1406 EXT_LAST_INDEX(path
[depth
].p_hdr
))
1407 return (ext4_lblk_t
)
1408 le32_to_cpu(path
[depth
].p_idx
[1].ei_block
);
1412 return EXT_MAX_BLOCKS
;
1416 * ext4_ext_correct_indexes:
1417 * if leaf gets modified and modified extent is first in the leaf,
1418 * then we have to correct all indexes above.
1419 * TODO: do we need to correct tree in all cases?
1421 static int ext4_ext_correct_indexes(void *icb
, handle_t
*handle
, struct inode
*inode
,
1422 struct ext4_ext_path
*path
)
1424 struct ext4_extent_header
*eh
;
1425 int depth
= ext_depth(inode
);
1426 struct ext4_extent
*ex
;
1430 eh
= path
[depth
].p_hdr
;
1431 ex
= path
[depth
].p_ext
;
1433 if (unlikely(ex
== NULL
|| eh
== NULL
)) {
1434 EXT4_ERROR_INODE(inode
,
1435 "ex %p == NULL or eh %p == NULL", ex
, eh
);
1440 /* there is no tree at all */
1444 if (ex
!= EXT_FIRST_EXTENT(eh
)) {
1445 /* we correct tree if first leaf got modified only */
1450 * TODO: we need correction if border is smaller than current one
1453 border
= path
[depth
].p_ext
->ee_block
;
1454 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ k
);
1457 path
[k
].p_idx
->ei_block
= border
;
1458 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ k
);
1463 /* change all left-side indexes */
1464 if (path
[k
+1].p_idx
!= EXT_FIRST_INDEX(path
[k
+1].p_hdr
))
1466 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ k
);
1469 path
[k
].p_idx
->ei_block
= border
;
1470 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ k
);
1479 ext4_can_extents_be_merged(struct inode
*inode
, struct ext4_extent
*ex1
,
1480 struct ext4_extent
*ex2
)
1482 unsigned short ext1_ee_len
, ext2_ee_len
;
1485 * Make sure that both extents are initialized. We don't merge
1486 * unwritten extents so that we can be sure that end_io code has
1487 * the extent that was written properly split out and conversion to
1488 * initialized is trivial.
1490 if (ext4_ext_is_unwritten(ex1
) != ext4_ext_is_unwritten(ex2
))
1493 ext1_ee_len
= ext4_ext_get_actual_len(ex1
);
1494 ext2_ee_len
= ext4_ext_get_actual_len(ex2
);
1496 if (le32_to_cpu(ex1
->ee_block
) + ext1_ee_len
!=
1497 le32_to_cpu(ex2
->ee_block
))
1501 * To allow future support for preallocated extents to be added
1502 * as an RO_COMPAT feature, refuse to merge to extents if
1503 * this can result in the top bit of ee_len being set.
1505 if (ext1_ee_len
+ ext2_ee_len
> EXT_INIT_MAX_LEN
)
1507 if (ext4_ext_is_unwritten(ex1
) &&
1508 (ext1_ee_len
+ ext2_ee_len
> EXT_UNWRITTEN_MAX_LEN
))
1510 #ifdef AGGRESSIVE_TEST
1511 if (ext1_ee_len
>= 4)
1515 if (ext4_ext_pblock(ex1
) + ext1_ee_len
== ext4_ext_pblock(ex2
))
1521 * This function tries to merge the "ex" extent to the next extent in the tree.
1522 * It always tries to merge towards right. If you want to merge towards
1523 * left, pass "ex - 1" as argument instead of "ex".
1524 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1525 * 1 if they got merged.
1527 static int ext4_ext_try_to_merge_right(struct inode
*inode
,
1528 struct ext4_ext_path
*path
,
1529 struct ext4_extent
*ex
)
1531 struct ext4_extent_header
*eh
;
1532 unsigned int depth
, len
;
1533 int merge_done
= 0, unwritten
;
1535 depth
= ext_depth(inode
);
1536 assert(path
[depth
].p_hdr
!= NULL
);
1537 eh
= path
[depth
].p_hdr
;
1539 while (ex
< EXT_LAST_EXTENT(eh
)) {
1540 if (!ext4_can_extents_be_merged(inode
, ex
, ex
+ 1))
1542 /* merge with next extent! */
1543 unwritten
= ext4_ext_is_unwritten(ex
);
1544 ex
->ee_len
= cpu_to_le16(ext4_ext_get_actual_len(ex
)
1545 + ext4_ext_get_actual_len(ex
+ 1));
1547 ext4_ext_mark_unwritten(ex
);
1549 if (ex
+ 1 < EXT_LAST_EXTENT(eh
)) {
1550 len
= (EXT_LAST_EXTENT(eh
) - ex
- 1)
1551 * sizeof(struct ext4_extent
);
1552 memmove(ex
+ 1, ex
+ 2, len
);
1554 le16_add_cpu(&eh
->eh_entries
, -1);
1556 if (!eh
->eh_entries
)
1557 EXT4_ERROR_INODE(inode
, "eh->eh_entries = 0!");
1564 * This function does a very simple check to see if we can collapse
1565 * an extent tree with a single extent tree leaf block into the inode.
1567 static void ext4_ext_try_to_merge_up(void *icb
, handle_t
*handle
,
1568 struct inode
*inode
,
1569 struct ext4_ext_path
*path
)
1572 unsigned max_root
= ext4_ext_space_root(inode
, 0);
1575 if ((path
[0].p_depth
!= 1) ||
1576 (le16_to_cpu(path
[0].p_hdr
->eh_entries
) != 1) ||
1577 (le16_to_cpu(path
[1].p_hdr
->eh_entries
) > max_root
))
1581 * We need to modify the block allocation bitmap and the block
1582 * group descriptor to release the extent tree block. If we
1583 * can't get the journal credits, give up.
1585 if (ext4_journal_extend(icb
, handle
, 2))
1589 * Copy the extent data up to the inode
1591 blk
= ext4_idx_pblock(path
[0].p_idx
);
1592 s
= le16_to_cpu(path
[1].p_hdr
->eh_entries
) *
1593 sizeof(struct ext4_extent_idx
);
1594 s
+= sizeof(struct ext4_extent_header
);
1596 path
[1].p_maxdepth
= path
[0].p_maxdepth
;
1597 memcpy(path
[0].p_hdr
, path
[1].p_hdr
, s
);
1598 path
[0].p_depth
= 0;
1599 path
[0].p_ext
= EXT_FIRST_EXTENT(path
[0].p_hdr
) +
1600 (path
[1].p_ext
- EXT_FIRST_EXTENT(path
[1].p_hdr
));
1601 path
[0].p_hdr
->eh_max
= cpu_to_le16(max_root
);
1603 extents_brelse(path
[1].p_bh
);
1604 ext4_free_blocks(icb
, handle
, inode
, NULL
, blk
, 1,
1605 EXT4_FREE_BLOCKS_METADATA
| EXT4_FREE_BLOCKS_FORGET
);
1609 * This function tries to merge the @ex extent to neighbours in the tree.
1610 * return 1 if merge left else 0.
1612 static void ext4_ext_try_to_merge(void *icb
, handle_t
*handle
,
1613 struct inode
*inode
,
1614 struct ext4_ext_path
*path
,
1615 struct ext4_extent
*ex
) {
1616 struct ext4_extent_header
*eh
;
1620 depth
= ext_depth(inode
);
1621 BUG_ON(path
[depth
].p_hdr
== NULL
);
1622 eh
= path
[depth
].p_hdr
;
1624 if (ex
> EXT_FIRST_EXTENT(eh
))
1625 merge_done
= ext4_ext_try_to_merge_right(inode
, path
, ex
- 1);
1628 (void) ext4_ext_try_to_merge_right(inode
, path
, ex
);
1630 ext4_ext_try_to_merge_up(icb
, handle
, inode
, path
);
1634 * ext4_ext_insert_extent:
1635 * tries to merge requsted extent into the existing extent or
1636 * inserts requested extent as new one into the tree,
1637 * creating new leaf in the no-space case.
1639 int ext4_ext_insert_extent(void *icb
, handle_t
*handle
, struct inode
*inode
,
1640 struct ext4_ext_path
**ppath
,
1641 struct ext4_extent
*newext
,
1644 struct ext4_ext_path
*path
= *ppath
;
1645 struct ext4_extent_header
*eh
;
1646 struct ext4_extent
*ex
, *fex
;
1647 struct ext4_extent
*nearex
; /* nearest extent */
1648 struct ext4_ext_path
*npath
= NULL
;
1649 int depth
, len
, err
;
1651 int mb_flags
= 0, unwritten
;
1653 if (gb_flags
& EXT4_GET_BLOCKS_DELALLOC_RESERVE
)
1654 mb_flags
|= EXT4_MB_DELALLOC_RESERVED
;
1655 if (unlikely(ext4_ext_get_actual_len(newext
) == 0)) {
1656 EXT4_ERROR_INODE(inode
, "ext4_ext_get_actual_len(newext) == 0");
1659 depth
= ext_depth(inode
);
1660 ex
= path
[depth
].p_ext
;
1661 eh
= path
[depth
].p_hdr
;
1662 if (unlikely(path
[depth
].p_hdr
== NULL
)) {
1663 EXT4_ERROR_INODE(inode
, "path[%d].p_hdr == NULL", depth
);
1667 /* try to insert block into found extent and return */
1668 if (ex
&& !(gb_flags
& EXT4_GET_BLOCKS_PRE_IO
)) {
1671 * Try to see whether we should rather test the extent on
1672 * right from ex, or from the left of ex. This is because
1673 * ext4_find_extent() can return either extent on the
1674 * left, or on the right from the searched position. This
1675 * will make merging more effective.
1677 if (ex
< EXT_LAST_EXTENT(eh
) &&
1678 (le32_to_cpu(ex
->ee_block
) +
1679 ext4_ext_get_actual_len(ex
) <
1680 le32_to_cpu(newext
->ee_block
))) {
1683 } else if ((ex
> EXT_FIRST_EXTENT(eh
)) &&
1684 (le32_to_cpu(newext
->ee_block
) +
1685 ext4_ext_get_actual_len(newext
) <
1686 le32_to_cpu(ex
->ee_block
)))
1689 /* Try to append newex to the ex */
1690 if (ext4_can_extents_be_merged(inode
, ex
, newext
)) {
1691 ext_debug("append [%d]%d block to %u:[%d]%d"
1693 ext4_ext_is_unwritten(newext
),
1694 ext4_ext_get_actual_len(newext
),
1695 le32_to_cpu(ex
->ee_block
),
1696 ext4_ext_is_unwritten(ex
),
1697 ext4_ext_get_actual_len(ex
),
1698 ext4_ext_pblock(ex
));
1699 err
= ext4_ext_get_access(icb
, handle
, inode
,
1703 unwritten
= ext4_ext_is_unwritten(ex
);
1704 ex
->ee_len
= cpu_to_le16(ext4_ext_get_actual_len(ex
)
1705 + ext4_ext_get_actual_len(newext
));
1707 ext4_ext_mark_unwritten(ex
);
1708 eh
= path
[depth
].p_hdr
;
1714 /* Try to prepend newex to the ex */
1715 if (ext4_can_extents_be_merged(inode
, newext
, ex
)) {
1716 ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
1718 le32_to_cpu(newext
->ee_block
),
1719 ext4_ext_is_unwritten(newext
),
1720 ext4_ext_get_actual_len(newext
),
1721 le32_to_cpu(ex
->ee_block
),
1722 ext4_ext_is_unwritten(ex
),
1723 ext4_ext_get_actual_len(ex
),
1724 ext4_ext_pblock(ex
));
1725 err
= ext4_ext_get_access(icb
, handle
, inode
,
1730 unwritten
= ext4_ext_is_unwritten(ex
);
1731 ex
->ee_block
= newext
->ee_block
;
1732 ext4_ext_store_pblock(ex
, ext4_ext_pblock(newext
));
1733 ex
->ee_len
= cpu_to_le16(ext4_ext_get_actual_len(ex
)
1734 + ext4_ext_get_actual_len(newext
));
1736 ext4_ext_mark_unwritten(ex
);
1737 eh
= path
[depth
].p_hdr
;
1743 depth
= ext_depth(inode
);
1744 eh
= path
[depth
].p_hdr
;
1745 if (le16_to_cpu(eh
->eh_entries
) < le16_to_cpu(eh
->eh_max
))
1748 /* probably next leaf has space for us? */
1749 fex
= EXT_LAST_EXTENT(eh
);
1750 next
= EXT_MAX_BLOCKS
;
1751 if (le32_to_cpu(newext
->ee_block
) > le32_to_cpu(fex
->ee_block
))
1752 next
= ext4_ext_next_leaf_block(path
);
1753 if (next
!= EXT_MAX_BLOCKS
) {
1754 ext_debug("next leaf block - %u\n", next
);
1755 BUG_ON(npath
!= NULL
);
1756 npath
= ext4_find_extent(inode
, next
, NULL
, 0);
1758 return PTR_ERR(npath
);
1759 BUG_ON(npath
->p_depth
!= path
->p_depth
);
1760 eh
= npath
[depth
].p_hdr
;
1761 if (le16_to_cpu(eh
->eh_entries
) < le16_to_cpu(eh
->eh_max
)) {
1762 ext_debug("next leaf isn't full(%d)\n",
1763 le16_to_cpu(eh
->eh_entries
));
1767 ext_debug("next leaf has no free space(%d,%d)\n",
1768 le16_to_cpu(eh
->eh_entries
), le16_to_cpu(eh
->eh_max
));
1772 * There is no free space in the found leaf.
1773 * We're gonna add a new leaf in the tree.
1775 if (gb_flags
& EXT4_GET_BLOCKS_METADATA_NOFAIL
)
1776 mb_flags
|= EXT4_MB_USE_RESERVED
;
1777 err
= ext4_ext_create_new_leaf(icb
, handle
, inode
, mb_flags
, gb_flags
,
1781 depth
= ext_depth(inode
);
1782 eh
= path
[depth
].p_hdr
;
1785 nearex
= path
[depth
].p_ext
;
1787 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ depth
);
1792 /* there is no extent in this leaf, create first one */
1793 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
1794 le32_to_cpu(newext
->ee_block
),
1795 ext4_ext_pblock(newext
),
1796 ext4_ext_is_unwritten(newext
),
1797 ext4_ext_get_actual_len(newext
));
1798 nearex
= EXT_FIRST_EXTENT(eh
);
1800 if (le32_to_cpu(newext
->ee_block
)
1801 > le32_to_cpu(nearex
->ee_block
)) {
1803 ext_debug("insert %u:%llu:[%d]%d before: "
1805 le32_to_cpu(newext
->ee_block
),
1806 ext4_ext_pblock(newext
),
1807 ext4_ext_is_unwritten(newext
),
1808 ext4_ext_get_actual_len(newext
),
1813 BUG_ON(newext
->ee_block
== nearex
->ee_block
);
1814 ext_debug("insert %u:%llu:[%d]%d after: "
1816 le32_to_cpu(newext
->ee_block
),
1817 ext4_ext_pblock(newext
),
1818 ext4_ext_is_unwritten(newext
),
1819 ext4_ext_get_actual_len(newext
),
1822 len
= EXT_LAST_EXTENT(eh
) - nearex
+ 1;
1824 ext_debug("insert %u:%llu:[%d]%d: "
1825 "move %d extents from 0x%p to 0x%p\n",
1826 le32_to_cpu(newext
->ee_block
),
1827 ext4_ext_pblock(newext
),
1828 ext4_ext_is_unwritten(newext
),
1829 ext4_ext_get_actual_len(newext
),
1830 len
, nearex
, nearex
+ 1);
1831 memmove(nearex
+ 1, nearex
,
1832 len
* sizeof(struct ext4_extent
));
1836 le16_add_cpu(&eh
->eh_entries
, 1);
1837 path
[depth
].p_ext
= nearex
;
1838 nearex
->ee_block
= newext
->ee_block
;
1839 ext4_ext_store_pblock(nearex
, ext4_ext_pblock(newext
));
1840 nearex
->ee_len
= newext
->ee_len
;
1843 /* try to merge extents */
1844 if (!(gb_flags
& EXT4_GET_BLOCKS_PRE_IO
))
1845 ext4_ext_try_to_merge(icb
, handle
, inode
, path
, nearex
);
1848 /* time to correct all indexes above */
1849 err
= ext4_ext_correct_indexes(icb
, handle
, inode
, path
);
1853 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ path
->p_depth
);
1857 ext4_ext_drop_refs(npath
);
1863 static inline int get_default_free_blocks_flags(struct inode
*inode
)
1868 /* FIXME!! we need to try to merge to left or right after zero-out */
1869 static int ext4_ext_zeroout(struct inode
*inode
, struct ext4_extent
*ex
)
1871 ext4_fsblk_t ee_pblock
;
1872 unsigned int ee_len
;
1875 ee_len
= ext4_ext_get_actual_len(ex
);
1876 ee_pblock
= ext4_ext_pblock(ex
);
1883 static int ext4_remove_blocks(void *icb
, handle_t
*handle
, struct inode
*inode
,
1884 struct ext4_extent
*ex
,
1885 unsigned long from
, unsigned long to
)
1887 struct buffer_head
*bh
;
1890 if (from
>= le32_to_cpu(ex
->ee_block
)
1891 && to
== le32_to_cpu(ex
->ee_block
) + ext4_ext_get_actual_len(ex
) - 1) {
1893 unsigned long num
, start
;
1894 num
= le32_to_cpu(ex
->ee_block
) + ext4_ext_get_actual_len(ex
) - from
;
1895 start
= ext4_ext_pblock(ex
) + ext4_ext_get_actual_len(ex
) - num
;
1896 ext4_free_blocks(icb
, handle
, inode
, NULL
, start
, num
, 0);
1897 } else if (from
== le32_to_cpu(ex
->ee_block
)
1898 && to
<= le32_to_cpu(ex
->ee_block
) + ext4_ext_get_actual_len(ex
) - 1) {
1905 * routine removes index from the index block
1906 * it's used in truncate case only. thus all requests are for
1907 * last index in the block only
1909 int ext4_ext_rm_idx(void *icb
, handle_t
*handle
, struct inode
*inode
,
1910 struct ext4_ext_path
*path
)
1915 /* free index block */
1917 leaf
= ext4_idx_pblock(path
->p_idx
);
1918 BUG_ON(path
->p_hdr
->eh_entries
== 0);
1919 if ((err
= ext4_ext_get_access(icb
, handle
, inode
, path
)))
1921 path
->p_hdr
->eh_entries
= cpu_to_le16(le16_to_cpu(path
->p_hdr
->eh_entries
)-1);
1922 if ((err
= ext4_ext_dirty(icb
, handle
, inode
, path
)))
1924 ext4_free_blocks(icb
, handle
, inode
, NULL
, leaf
, 1, 0);
1929 ext4_ext_rm_leaf(void *icb
, handle_t
*handle
, struct inode
*inode
,
1930 struct ext4_ext_path
*path
, unsigned long start
)
1932 int err
= 0, correct_index
= 0;
1933 int depth
= ext_depth(inode
), credits
;
1934 struct ext4_extent_header
*eh
;
1935 unsigned a
, b
, block
, num
;
1936 unsigned long ex_ee_block
;
1937 unsigned short ex_ee_len
;
1938 struct ext4_extent
*ex
;
1940 /* the header must be checked already in ext4_ext_remove_space() */
1941 if (!path
[depth
].p_hdr
)
1942 path
[depth
].p_hdr
= ext_block_hdr(path
[depth
].p_bh
);
1943 eh
= path
[depth
].p_hdr
;
1946 /* find where to start removing */
1947 ex
= EXT_LAST_EXTENT(eh
);
1949 ex_ee_block
= le32_to_cpu(ex
->ee_block
);
1950 ex_ee_len
= ext4_ext_get_actual_len(ex
);
1952 while (ex
>= EXT_FIRST_EXTENT(eh
) &&
1953 ex_ee_block
+ ex_ee_len
> start
) {
1954 path
[depth
].p_ext
= ex
;
1956 a
= ex_ee_block
> start
? ex_ee_block
: start
;
1957 b
= (unsigned long long)ex_ee_block
+ ex_ee_len
- 1 <
1958 EXT_MAX_BLOCKS
? ex_ee_block
+ ex_ee_len
- 1 : EXT_MAX_BLOCKS
;
1961 if (a
!= ex_ee_block
&& b
!= ex_ee_block
+ ex_ee_len
- 1) {
1965 } else if (a
!= ex_ee_block
) {
1966 /* remove tail of the extent */
1967 block
= ex_ee_block
;
1969 } else if (b
!= ex_ee_block
+ ex_ee_len
- 1) {
1970 /* remove head of the extent */
1973 /* there is no "make a hole" API yet */
1976 /* remove whole extent: excellent! */
1977 block
= ex_ee_block
;
1979 BUG_ON(a
!= ex_ee_block
);
1980 BUG_ON(b
!= ex_ee_block
+ ex_ee_len
- 1);
1983 /* at present, extent can't cross block group */
1984 /* leaf + bitmap + group desc + sb + inode */
1986 if (ex
== EXT_FIRST_EXTENT(eh
)) {
1988 credits
+= (ext_depth(inode
)) + 1;
1991 /*handle = ext4_ext_journal_restart(icb, handle, credits);*/
1992 /*if (IS_ERR(icb, handle)) {*/
1993 /*err = PTR_ERR(icb, handle);*/
1997 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ depth
);
2001 err
= ext4_remove_blocks(icb
, handle
, inode
, ex
, a
, b
);
2006 /* this extent is removed entirely mark slot unused */
2007 ext4_ext_store_pblock(ex
, 0);
2008 eh
->eh_entries
= cpu_to_le16(le16_to_cpu(eh
->eh_entries
)-1);
2011 ex
->ee_block
= cpu_to_le32(block
);
2012 ex
->ee_len
= cpu_to_le16(num
);
2014 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ depth
);
2019 ex_ee_block
= le32_to_cpu(ex
->ee_block
);
2020 ex_ee_len
= ext4_ext_get_actual_len(ex
);
2023 if (correct_index
&& eh
->eh_entries
)
2024 err
= ext4_ext_correct_indexes(icb
, handle
, inode
, path
);
2026 /* if this leaf is free, then we should
2027 * remove it from index block above */
2028 if (err
== 0 && eh
->eh_entries
== 0 && path
[depth
].p_bh
!= NULL
)
2029 err
= ext4_ext_rm_idx(icb
, handle
, inode
, path
+ depth
);
2036 * ext4_split_extent_at() splits an extent at given block.
2038 * @handle: the journal handle
2039 * @inode: the file inode
2040 * @path: the path to the extent
2041 * @split: the logical block where the extent is splitted.
2042 * @split_flags: indicates if the extent could be zeroout if split fails, and
2043 * the states(init or unwritten) of new extents.
2044 * @flags: flags used to insert new extent to extent tree.
2047 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
2048 * of which are deterimined by split_flag.
2050 * There are two cases:
2051 * a> the extent are splitted into two extent.
2052 * b> split is not needed, and just mark the extent.
2054 * return 0 on success.
2056 static int ext4_split_extent_at(void *icb
, handle_t
*handle
,
2057 struct inode
*inode
,
2058 struct ext4_ext_path
**ppath
,
2063 struct ext4_ext_path
*path
= *ppath
;
2064 ext4_fsblk_t newblock
;
2065 ext4_lblk_t ee_block
;
2066 struct ext4_extent
*ex
, newex
, orig_ex
, zero_ex
;
2067 struct ext4_extent
*ex2
= NULL
;
2068 unsigned int ee_len
, depth
;
2071 ext4_ext_show_leaf(inode
, path
);
2073 depth
= ext_depth(inode
);
2074 ex
= path
[depth
].p_ext
;
2075 ee_block
= le32_to_cpu(ex
->ee_block
);
2076 ee_len
= ext4_ext_get_actual_len(ex
);
2077 newblock
= split
- ee_block
+ ext4_ext_pblock(ex
);
2079 BUG_ON(split
< ee_block
|| split
>= (ee_block
+ ee_len
));
2081 err
= ext4_ext_get_access(icb
, handle
, inode
, path
+ depth
);
2085 if (split
== ee_block
) {
2087 * case b: block @split is the block that the extent begins with
2088 * then we just change the state of the extent, and splitting
2091 if (split_flag
& EXT4_EXT_MARK_UNWRIT2
)
2092 ext4_ext_mark_unwritten(ex
);
2094 ext4_ext_mark_initialized(ex
);
2096 if (!(flags
& EXT4_GET_BLOCKS_PRE_IO
))
2097 ext4_ext_try_to_merge(icb
, handle
, inode
, path
, ex
);
2099 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ path
->p_depth
);
2104 memcpy(&orig_ex
, ex
, sizeof(orig_ex
));
2105 ex
->ee_len
= cpu_to_le16(split
- ee_block
);
2106 if (split_flag
& EXT4_EXT_MARK_UNWRIT1
)
2107 ext4_ext_mark_unwritten(ex
);
2110 * path may lead to new leaf, not to original leaf any more
2111 * after ext4_ext_insert_extent() returns,
2113 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ depth
);
2115 goto fix_extent_len
;
2118 ex2
->ee_block
= cpu_to_le32(split
);
2119 ex2
->ee_len
= cpu_to_le16(ee_len
- (split
- ee_block
));
2120 ext4_ext_store_pblock(ex2
, newblock
);
2121 if (split_flag
& EXT4_EXT_MARK_UNWRIT2
)
2122 ext4_ext_mark_unwritten(ex2
);
2124 err
= ext4_ext_insert_extent(icb
, handle
, inode
, ppath
, &newex
, flags
);
2125 if (err
== -ENOSPC
&& (EXT4_EXT_MAY_ZEROOUT
& split_flag
)) {
2126 if (split_flag
& (EXT4_EXT_DATA_VALID1
|EXT4_EXT_DATA_VALID2
)) {
2127 if (split_flag
& EXT4_EXT_DATA_VALID1
) {
2128 err
= ext4_ext_zeroout(inode
, ex2
);
2129 zero_ex
.ee_block
= ex2
->ee_block
;
2130 zero_ex
.ee_len
= cpu_to_le16(
2131 ext4_ext_get_actual_len(ex2
));
2132 ext4_ext_store_pblock(&zero_ex
,
2133 ext4_ext_pblock(ex2
));
2135 err
= ext4_ext_zeroout(inode
, ex
);
2136 zero_ex
.ee_block
= ex
->ee_block
;
2137 zero_ex
.ee_len
= cpu_to_le16(
2138 ext4_ext_get_actual_len(ex
));
2139 ext4_ext_store_pblock(&zero_ex
,
2140 ext4_ext_pblock(ex
));
2143 err
= ext4_ext_zeroout(inode
, &orig_ex
);
2144 zero_ex
.ee_block
= orig_ex
.ee_block
;
2145 zero_ex
.ee_len
= cpu_to_le16(
2146 ext4_ext_get_actual_len(&orig_ex
));
2147 ext4_ext_store_pblock(&zero_ex
,
2148 ext4_ext_pblock(&orig_ex
));
2152 goto fix_extent_len
;
2153 /* update the extent length and mark as initialized */
2154 ex
->ee_len
= cpu_to_le16(ee_len
);
2155 ext4_ext_try_to_merge(icb
, handle
, inode
, path
, ex
);
2156 err
= ext4_ext_dirty(icb
, handle
, inode
, path
+ path
->p_depth
);
2158 goto fix_extent_len
;
2162 goto fix_extent_len
;
2165 ext4_ext_show_leaf(inode
, path
);
2169 ex
->ee_len
= orig_ex
.ee_len
;
2170 ext4_ext_dirty(icb
, handle
, inode
, path
+ path
->p_depth
);
2175 * returns 1 if current index have to be freed (even partial)
2178 ext4_ext_more_to_rm(struct ext4_ext_path
*path
)
2180 BUG_ON(path
->p_idx
== NULL
);
2182 if (path
->p_idx
< EXT_FIRST_INDEX(path
->p_hdr
))
2186 * if truncate on deeper level happened it it wasn't partial
2187 * so we have to consider current index for truncation
2189 if (le16_to_cpu(path
->p_hdr
->eh_entries
) == path
->p_block
)
2194 int ext4_ext_remove_space(void *icb
, struct inode
*inode
, unsigned long start
)
2197 struct super_block
*sb
= inode
->i_sb
;
2199 int depth
= ext_depth(inode
);
2200 struct ext4_ext_path
*path
;
2201 handle_t
*handle
= NULL
;
2204 /* probably first extent we're gonna free will be last in block */
2205 /*handle = ext4_journal_start(inode, depth + 1);*/
2206 /*if (IS_ERR(icb, handle))*/
2207 /*return PTR_ERR(icb, handle);*/
2210 * we start scanning from right side freeing all the blocks
2211 * after i_size and walking into the deep
2213 path
= kmalloc(sizeof(struct ext4_ext_path
) * (depth
+ 1), GFP_KERNEL
);
2215 ext4_journal_stop(icb
, handle
);
2218 memset(path
, 0, sizeof(struct ext4_ext_path
) * (depth
+ 1));
2219 path
[0].p_hdr
= ext_inode_hdr(inode
);
2220 if (ext4_ext_check_inode(inode
)) {
2224 path
[0].p_depth
= depth
;
2226 while (i
>= 0 && err
== 0) {
2228 /* this is leaf block */
2229 err
= ext4_ext_rm_leaf(icb
, handle
, inode
, path
, start
);
2230 /* root level have p_bh == NULL, extents_brelse() eats this */
2231 extents_brelse(path
[i
].p_bh
);
2232 path
[i
].p_bh
= NULL
;
2237 /* this is index block */
2238 if (!path
[i
].p_hdr
) {
2239 path
[i
].p_hdr
= ext_block_hdr(path
[i
].p_bh
);
2242 if (!path
[i
].p_idx
) {
2243 /* this level hasn't touched yet */
2244 path
[i
].p_idx
= EXT_LAST_INDEX(path
[i
].p_hdr
);
2245 path
[i
].p_block
= le16_to_cpu(path
[i
].p_hdr
->eh_entries
)+1;
2247 /* we've already was here, see at next index */
2251 if (ext4_ext_more_to_rm(path
+ i
)) {
2252 struct buffer_head
*bh
;
2253 /* go to the next level */
2254 memset(path
+ i
+ 1, 0, sizeof(*path
));
2255 bh
= read_extent_tree_block(inode
, ext4_idx_pblock(path
[i
].p_idx
), path
[0].p_depth
- (i
+ 1), 0);
2257 /* should we reset i_size? */
2261 path
[i
+1].p_bh
= bh
;
2263 /* put actual number of indexes to know is this
2264 * number got changed at the next iteration */
2265 path
[i
].p_block
= le16_to_cpu(path
[i
].p_hdr
->eh_entries
);
2268 /* we finish processing this index, go up */
2269 if (path
[i
].p_hdr
->eh_entries
== 0 && i
> 0) {
2270 /* index is empty, remove it
2271 * handle must be already prepared by the
2272 * truncatei_leaf() */
2273 err
= ext4_ext_rm_idx(icb
, handle
, inode
, path
+ i
);
2275 /* root level have p_bh == NULL, extents_brelse() eats this */
2276 extents_brelse(path
[i
].p_bh
);
2277 path
[i
].p_bh
= NULL
;
2282 /* TODO: flexible tree reduction should be here */
2283 if (path
->p_hdr
->eh_entries
== 0) {
2285 * truncate to zero freed all the tree
2286 * so, we need to correct eh_depth
2288 err
= ext4_ext_get_access(icb
, handle
, inode
, path
);
2290 ext_inode_hdr(inode
)->eh_depth
= 0;
2291 ext_inode_hdr(inode
)->eh_max
=
2292 cpu_to_le16(ext4_ext_space_root(inode
, 0));
2293 err
= ext4_ext_dirty(icb
, handle
, inode
, path
);
2298 ext4_ext_drop_refs(path
);
2301 ext4_journal_stop(icb
, handle
);
2306 int ext4_ext_tree_init(void *icb
, handle_t
*handle
, struct inode
*inode
)
2308 struct ext4_extent_header
*eh
;
2310 eh
= ext_inode_hdr(inode
);
2313 eh
->eh_magic
= EXT4_EXT_MAGIC
;
2314 eh
->eh_max
= cpu_to_le16(ext4_ext_space_root(inode
, 0));
2315 ext4_mark_inode_dirty(icb
, handle
, inode
);
2320 * called at mount time
2322 void ext4_ext_init(struct super_block
*sb
)
2325 * possible initialization would be here
2329 static int ext4_ext_convert_to_initialized (
2332 struct inode
*inode
,
2333 struct ext4_ext_path
**ppath
,
2335 unsigned long blocks
,
2338 int depth
= ext_depth(inode
), err
;
2339 struct ext4_extent
*ex
= (*ppath
)[depth
].p_ext
;
2341 assert (le32_to_cpu(ex
->ee_block
) <= split
);
2343 if (split
+ blocks
== le32_to_cpu(ex
->ee_block
) +
2344 ext4_ext_get_actual_len(ex
)) {
2346 /* split and initialize right part */
2347 err
= ext4_split_extent_at(icb
, handle
, inode
, ppath
, split
,
2348 EXT4_EXT_MARK_UNWRIT1
, flags
);
2350 } else if (le32_to_cpu(ex
->ee_block
) == split
) {
2352 /* split and initialize left part */
2353 err
= ext4_split_extent_at(icb
, handle
, inode
, ppath
, split
+ blocks
,
2354 EXT4_EXT_MARK_UNWRIT2
, flags
);
2358 /* split 1 extent to 3 and initialize the 2nd */
2359 err
= ext4_split_extent_at(icb
, handle
, inode
, ppath
, split
+ blocks
,
2360 EXT4_EXT_MARK_UNWRIT1
|
2361 EXT4_EXT_MARK_UNWRIT2
, flags
);
2363 err
= ext4_split_extent_at(icb
, handle
, inode
, ppath
, split
,
2364 EXT4_EXT_MARK_UNWRIT1
, flags
);
2371 int ext4_ext_get_blocks(void *icb
, handle_t
*handle
, struct inode
*inode
, ext4_fsblk_t iblock
,
2372 unsigned long max_blocks
, struct buffer_head
*bh_result
,
2373 int create
, int flags
)
2375 struct ext4_ext_path
*path
= NULL
;
2376 struct ext4_extent newex
, *ex
;
2377 int goal
, err
= 0, depth
;
2378 unsigned long allocated
= 0;
2379 ext4_fsblk_t next
, newblock
;
2381 clear_buffer_new(bh_result
);
2382 /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/
2384 /* find extent for this block */
2385 path
= ext4_find_extent(inode
, iblock
, NULL
, 0);
2387 err
= PTR_ERR(path
);
2392 depth
= ext_depth(inode
);
2395 * consistent leaf must not be empty
2396 * this situations is possible, though, _during_ tree modification
2397 * this is why assert can't be put in ext4_ext_find_extent()
2399 BUG_ON(path
[depth
].p_ext
== NULL
&& depth
!= 0);
2401 if ((ex
= path
[depth
].p_ext
)) {
2402 ext4_lblk_t ee_block
= le32_to_cpu(ex
->ee_block
);
2403 ext4_fsblk_t ee_start
= ext4_ext_pblock(ex
);
2404 unsigned short ee_len
= ext4_ext_get_actual_len(ex
);
2405 /* if found exent covers block, simple return it */
2406 if (iblock
>= ee_block
&& iblock
< ee_block
+ ee_len
) {
2408 /* number of remain blocks in the extent */
2409 allocated
= ee_len
+ ee_block
- iblock
;
2411 if (ext4_ext_is_unwritten(ex
)) {
2413 newblock
= iblock
- ee_block
+ ee_start
;
2414 err
= ext4_ext_convert_to_initialized (
2428 newblock
= iblock
- ee_block
+ ee_start
;
2435 * requested block isn't allocated yet
2436 * we couldn't try to create block if create flag is zero
2442 /* find next allocated block so that we know how many
2443 * blocks we can allocate without ovelapping next extent */
2444 next
= ext4_ext_next_allocated_block(path
);
2445 BUG_ON(next
<= iblock
);
2446 allocated
= next
- iblock
;
2447 if (flags
& EXT4_GET_BLOCKS_PRE_IO
&& max_blocks
> EXT_UNWRITTEN_MAX_LEN
)
2448 max_blocks
= EXT_UNWRITTEN_MAX_LEN
;
2449 if (allocated
> max_blocks
)
2450 allocated
= max_blocks
;
2452 /* allocate new block */
2453 goal
= ext4_ext_find_goal(inode
, path
, iblock
);
2454 newblock
= ext4_new_meta_blocks(icb
, handle
, inode
, goal
, 0,
2459 /* try to insert new extent into found leaf and return */
2460 newex
.ee_block
= cpu_to_le32(iblock
);
2461 ext4_ext_store_pblock(&newex
, newblock
);
2462 newex
.ee_len
= cpu_to_le16(allocated
);
2463 /* if it's fallocate, mark ex as unwritten */
2464 if (flags
& EXT4_GET_BLOCKS_PRE_IO
) {
2465 ext4_ext_mark_unwritten(&newex
);
2467 err
= ext4_ext_insert_extent(icb
, handle
, inode
, &path
, &newex
,
2468 flags
& EXT4_GET_BLOCKS_PRE_IO
);
2471 /* free data blocks we just allocated */
2472 ext4_free_blocks(icb
, handle
, inode
, NULL
, ext4_ext_pblock(&newex
),
2473 le16_to_cpu(newex
.ee_len
), get_default_free_blocks_flags(inode
));
2477 ext4_mark_inode_dirty(icb
, handle
, inode
);
2479 /* previous routine could use block we allocated */
2480 if (ext4_ext_is_unwritten(&newex
))
2483 newblock
= ext4_ext_pblock(&newex
);
2485 set_buffer_new(bh_result
);
2488 if (allocated
> max_blocks
)
2489 allocated
= max_blocks
;
2491 ext4_ext_show_leaf(inode
, path
);
2492 set_buffer_mapped(bh_result
);
2493 bh_result
->b_bdev
= inode
->i_sb
->s_bdev
;
2494 bh_result
->b_blocknr
= newblock
;
2497 ext4_ext_drop_refs(path
);
2500 /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/
2502 return err
? err
: allocated
;
2506 #pragma warning(pop)