[EXT2]
[reactos.git] / reactos / drivers / filesystems / ext2 / src / ext4 / ext4_extents.c
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public Licens
12 * along with this program; if not, write to the Free Software
13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
14 */
15
16 #include "ext2fs.h"
17 #include <linux/ext4.h>
18
19 #ifdef _MSC_VER
20 #pragma warning(push)
21 #pragma warning(disable: 4018)
22 #pragma warning(disable: 4242)
23 #pragma warning(disable: 4244)
24 #endif
25
26
27 /*
28 * used by extent splitting.
29 */
30 #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
31 due to ENOSPC */
32 #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
33 #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
34
35 #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
36 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
37
38 #define CONFIG_EXTENT_TEST
39 #ifdef CONFIG_EXTENT_TEST
40
41 #define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n)
42 static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
43 {
44 PEXT2_VCB Vcb;
45 Vcb = inode->i_sb->s_priv;
46 return (inode->i_ino - 1) / BLOCKS_PER_GROUP;
47 }
48
49 static ext4_fsblk_t ext4_new_meta_blocks(void *icb, handle_t *handle, struct inode *inode,
50 ext4_fsblk_t goal,
51 unsigned int flags,
52 unsigned long *count, int *errp)
53 {
54 NTSTATUS status;
55 ULONG blockcnt = (count)?*count:1;
56 ULONG block = 0;
57
58 status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb,
59 inode->i_sb->s_priv,
60 0, goal,
61 &block,
62 &blockcnt);
63 if (count)
64 *count = blockcnt;
65
66 if (!NT_SUCCESS(status)) {
67 *errp = Ext2LinuxError(status);
68 return 0;
69 }
70 inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9));
71 return block;
72 }
73
74 static void ext4_free_blocks(void *icb, handle_t *handle, struct inode *inode, void *fake,
75 ext4_fsblk_t block, int count, int flags)
76 {
77 Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, block, count);
78 inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9);
79 return;
80 }
81
82 static inline void ext_debug(char *str, ...)
83 {
84 }
85 #if TRUE
86 #define EXT4_ERROR_INODE(inode, str, ...) do { \
87 DbgPrint("inode[%p]: " str "\n", inode, ##__VA_ARGS__); \
88 } while(0)
89 #else
90 #define EXT4_ERROR_INODE
91 #endif
92
93 #define ext4_std_error(s, err)
94 #define assert ASSERT
95
96 #endif
97
98 /*
99 * Return the right sibling of a tree node(either leaf or indexes node)
100 */
101
102 #define EXT_MAX_BLOCKS 0xffffffff
103
104
105 static inline int ext4_ext_space_block(struct inode *inode, int check)
106 {
107 int size;
108
109 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
110 / sizeof(struct ext4_extent);
111 #ifdef AGGRESSIVE_TEST
112 if (!check && size > 6)
113 size = 6;
114 #endif
115 return size;
116 }
117
118 static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
119 {
120 int size;
121
122 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
123 / sizeof(struct ext4_extent_idx);
124 #ifdef AGGRESSIVE_TEST
125 if (!check && size > 5)
126 size = 5;
127 #endif
128 return size;
129 }
130
131 static inline int ext4_ext_space_root(struct inode *inode, int check)
132 {
133 int size;
134
135 size = sizeof(EXT4_I(inode)->i_block);
136 size -= sizeof(struct ext4_extent_header);
137 size /= sizeof(struct ext4_extent);
138 #ifdef AGGRESSIVE_TEST
139 if (!check && size > 3)
140 size = 3;
141 #endif
142 return size;
143 }
144
145 static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
146 {
147 int size;
148
149 size = sizeof(EXT4_I(inode)->i_block);
150 size -= sizeof(struct ext4_extent_header);
151 size /= sizeof(struct ext4_extent_idx);
152 #ifdef AGGRESSIVE_TEST
153 if (!check && size > 4)
154 size = 4;
155 #endif
156 return size;
157 }
158
159 static int
160 ext4_ext_max_entries(struct inode *inode, int depth)
161 {
162 int max;
163
164 if (depth == ext_depth(inode)) {
165 if (depth == 0)
166 max = ext4_ext_space_root(inode, 1);
167 else
168 max = ext4_ext_space_root_idx(inode, 1);
169 } else {
170 if (depth == 0)
171 max = ext4_ext_space_block(inode, 1);
172 else
173 max = ext4_ext_space_block_idx(inode, 1);
174 }
175
176 return max;
177 }
178
179 static int __ext4_ext_check(const char *function, unsigned int line,
180 struct inode *inode,
181 struct ext4_extent_header *eh, int depth,
182 ext4_fsblk_t pblk);
183
184 /*
185 * read_extent_tree_block:
186 * Get a buffer_head by extents_bread, and read fresh data from the storage.
187 */
188 static struct buffer_head *
189 __read_extent_tree_block(const char *function, unsigned int line,
190 struct inode *inode, ext4_fsblk_t pblk, int depth,
191 int flags)
192 {
193 struct buffer_head *bh;
194 int err;
195
196 bh = extents_bread(inode->i_sb, pblk);
197 if (!bh)
198 return ERR_PTR(-ENOMEM);
199
200 if (!buffer_uptodate(bh)) {
201 err = -EIO;
202 goto errout;
203 }
204 if (buffer_verified(bh))
205 return bh;
206 err = __ext4_ext_check(function, line, inode,
207 ext_block_hdr(bh), depth, pblk);
208 if (err)
209 goto errout;
210 set_buffer_verified(bh);
211 return bh;
212 errout:
213 extents_brelse(bh);
214 return ERR_PTR(err);
215
216 }
217
218 #define read_extent_tree_block(inode, pblk, depth, flags) \
219 __read_extent_tree_block("", __LINE__, (inode), (pblk), \
220 (depth), (flags))
221
222 #define ext4_ext_check(inode, eh, depth, pblk) \
223 __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk))
224
225 int ext4_ext_check_inode(struct inode *inode)
226 {
227 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
228 }
229
230 static uint32_t ext4_ext_block_csum(struct inode *inode,
231 struct ext4_extent_header *eh)
232 {
233 /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
234 return 0;
235 }
236
237 static void ext4_extent_block_csum_set(struct inode *inode,
238 struct ext4_extent_header *eh)
239 {
240 struct ext4_extent_tail *tail;
241
242 tail = find_ext4_extent_tail(eh);
243 tail->et_checksum = ext4_ext_block_csum(
244 inode, eh);
245 }
246
247 static int ext4_split_extent_at(void *icb,
248 handle_t *handle,
249 struct inode *inode,
250 struct ext4_ext_path **ppath,
251 ext4_lblk_t split,
252 int split_flag,
253 int flags);
254
255 static inline int
256 ext4_force_split_extent_at(void *icb, handle_t *handle, struct inode *inode,
257 struct ext4_ext_path **ppath, ext4_lblk_t lblk,
258 int nofail)
259 {
260 struct ext4_ext_path *path = *ppath;
261 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
262
263 return ext4_split_extent_at(icb, handle, inode, ppath, lblk, unwritten ?
264 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
265 EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
266 (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
267 }
268
269 /*
270 * could return:
271 * - EROFS
272 * - ENOMEM
273 */
274
275 static int ext4_ext_get_access(void *icb, handle_t *handle, struct inode *inode,
276 struct ext4_ext_path *path)
277 {
278 if (path->p_bh) {
279 /* path points to block */
280
281 return ext4_journal_get_write_access(icb, handle, path->p_bh);
282
283 }
284 /* path points to leaf/index in inode body */
285 /* we use in-core data, no need to protect them */
286 return 0;
287 }
288
289
290 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
291 struct ext4_ext_path *path,
292 ext4_lblk_t block)
293 {
294 if (path) {
295 int depth = path->p_depth;
296 struct ext4_extent *ex;
297
298 /*
299 * Try to predict block placement assuming that we are
300 * filling in a file which will eventually be
301 * non-sparse --- i.e., in the case of libbfd writing
302 * an ELF object sections out-of-order but in a way
303 * the eventually results in a contiguous object or
304 * executable file, or some database extending a table
305 * space file. However, this is actually somewhat
306 * non-ideal if we are writing a sparse file such as
307 * qemu or KVM writing a raw image file that is going
308 * to stay fairly sparse, since it will end up
309 * fragmenting the file system's free space. Maybe we
310 * should have some hueristics or some way to allow
311 * userspace to pass a hint to file system,
312 * especially if the latter case turns out to be
313 * common.
314 */
315 ex = path[depth].p_ext;
316 if (ex) {
317 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
318 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
319
320 if (block > ext_block)
321 return ext_pblk + (block - ext_block);
322 else
323 return ext_pblk - (ext_block - block);
324 }
325
326 /* it looks like index is empty;
327 * try to find starting block from index itself */
328 if (path[depth].p_bh)
329 return path[depth].p_bh->b_blocknr;
330 }
331
332 /* OK. use inode's group */
333 return ext4_inode_to_goal_block(inode);
334 }
335
336 /*
337 * Allocation for a meta data block
338 */
339 static ext4_fsblk_t
340 ext4_ext_new_meta_block(void *icb, handle_t *handle, struct inode *inode,
341 struct ext4_ext_path *path,
342 struct ext4_extent *ex, int *err, unsigned int flags)
343 {
344 ext4_fsblk_t goal, newblock;
345
346 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
347 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags,
348 NULL, err);
349 return newblock;
350 }
351
352 int __ext4_ext_dirty(const char *where, unsigned int line,
353 void *icb, handle_t *handle,
354 struct inode *inode,
355 struct ext4_ext_path *path)
356 {
357 int err;
358
359 if (path->p_bh) {
360 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
361 /* path points to block */
362 err = __ext4_handle_dirty_metadata(where, line, icb, handle, inode, path->p_bh);
363 } else {
364 /* path points to leaf/index in inode body */
365 err = ext4_mark_inode_dirty(icb, handle, inode);
366 }
367 return err;
368 }
369
370 void ext4_ext_drop_refs(struct ext4_ext_path *path)
371 {
372 int depth, i;
373
374 if (!path)
375 return;
376 depth = path->p_depth;
377 for (i = 0; i <= depth; i++, path++)
378 if (path->p_bh) {
379 extents_brelse(path->p_bh);
380 path->p_bh = NULL;
381 }
382 }
383
384 /*
385 * Check that whether the basic information inside the extent header
386 * is correct or not.
387 */
388 static int __ext4_ext_check(const char *function, unsigned int line,
389 struct inode *inode,
390 struct ext4_extent_header *eh, int depth,
391 ext4_fsblk_t pblk)
392 {
393 struct ext4_extent_tail *tail;
394 const char *error_msg;
395 #ifndef __REACTOS__
396 int max = 0;
397 #endif
398 if (eh->eh_magic != EXT4_EXT_MAGIC) {
399 error_msg = "invalid magic";
400 goto corrupted;
401 }
402 if (le16_to_cpu(eh->eh_depth) != depth) {
403 error_msg = "unexpected eh_depth";
404 goto corrupted;
405 }
406 if (eh->eh_max == 0) {
407 error_msg = "invalid eh_max";
408 goto corrupted;
409 }
410 if (eh->eh_entries > eh->eh_max) {
411 error_msg = "invalid eh_entries";
412 goto corrupted;
413 }
414
415 tail = find_ext4_extent_tail(eh);
416 if (tail->et_checksum != ext4_ext_block_csum(inode, eh)) {
417 ext_debug("Warning: extent checksum damaged? tail->et_checksum = "
418 "%lu, ext4_ext_block_csum = %lu\n",
419 tail->et_checksum, ext4_ext_block_csum(inode, eh));
420 }
421
422 return 0;
423
424 corrupted:
425 ext_debug("corrupted! %s\n", error_msg);
426 return -EIO;
427 }
428
429 /*
430 * ext4_ext_binsearch_idx:
431 * binary search for the closest index of the given block
432 * the header must be checked before calling this
433 */
434 static void
435 ext4_ext_binsearch_idx(struct inode *inode,
436 struct ext4_ext_path *path, ext4_lblk_t block)
437 {
438 struct ext4_extent_header *eh = path->p_hdr;
439 struct ext4_extent_idx *r, *l, *m;
440
441 ext_debug("binsearch for %u(idx): ", block);
442
443 l = EXT_FIRST_INDEX(eh) + 1;
444 r = EXT_LAST_INDEX(eh);
445 while (l <= r) {
446 m = l + (r - l) / 2;
447 if (block < (m->ei_block))
448 r = m - 1;
449 else
450 l = m + 1;
451 ext_debug("%p(%u):%p(%u):%p(%u) ", l, (l->ei_block),
452 m, (m->ei_block),
453 r, (r->ei_block));
454 }
455
456 path->p_idx = l - 1;
457 ext_debug(" -> %u->%lld ", (path->p_idx->ei_block),
458 ext4_idx_pblock(path->p_idx));
459
460 #ifdef CHECK_BINSEARCH
461 {
462 struct ext4_extent_idx *chix, *ix;
463 int k;
464
465 chix = ix = EXT_FIRST_INDEX(eh);
466 for (k = 0; k < (eh->eh_entries); k++, ix++) {
467 if (k != 0 &&
468 (ix->ei_block) <= (ix[-1].ei_block)) {
469 printk(KERN_DEBUG "k=%d, ix=0x%p, "
470 "first=0x%p\n", k,
471 ix, EXT_FIRST_INDEX(eh));
472 printk(KERN_DEBUG "%u <= %u\n",
473 (ix->ei_block),
474 (ix[-1].ei_block));
475 }
476 BUG_ON(k && (ix->ei_block)
477 <= (ix[-1].ei_block));
478 if (block < (ix->ei_block))
479 break;
480 chix = ix;
481 }
482 BUG_ON(chix != path->p_idx);
483 }
484 #endif
485
486 }
487
488 /*
489 * ext4_ext_binsearch:
490 * binary search for closest extent of the given block
491 * the header must be checked before calling this
492 */
493 static void
494 ext4_ext_binsearch(struct inode *inode,
495 struct ext4_ext_path *path, ext4_lblk_t block)
496 {
497 struct ext4_extent_header *eh = path->p_hdr;
498 struct ext4_extent *r, *l, *m;
499
500 if (eh->eh_entries == 0) {
501 /*
502 * this leaf is empty:
503 * we get such a leaf in split/add case
504 */
505 return;
506 }
507
508 ext_debug("binsearch for %u: ", block);
509
510 l = EXT_FIRST_EXTENT(eh) + 1;
511 r = EXT_LAST_EXTENT(eh);
512
513 while (l <= r) {
514 m = l + (r - l) / 2;
515 if (block < m->ee_block)
516 r = m - 1;
517 else
518 l = m + 1;
519 ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block,
520 m, (m->ee_block),
521 r, (r->ee_block));
522 }
523
524 path->p_ext = l - 1;
525 ext_debug(" -> %d:%llu:[%d]%d ",
526 (path->p_ext->ee_block),
527 ext4_ext_pblock(path->p_ext),
528 ext4_ext_is_unwritten(path->p_ext),
529 ext4_ext_get_actual_len(path->p_ext));
530
531 #ifdef CHECK_BINSEARCH
532 {
533 struct ext4_extent *chex, *ex;
534 int k;
535
536 chex = ex = EXT_FIRST_EXTENT(eh);
537 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
538 BUG_ON(k && (ex->ee_block)
539 <= (ex[-1].ee_block));
540 if (block < (ex->ee_block))
541 break;
542 chex = ex;
543 }
544 BUG_ON(chex != path->p_ext);
545 }
546 #endif
547
548 }
549
550 #ifdef EXT_DEBUG
551 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
552 {
553 int k, l = path->p_depth;
554
555 ext_debug("path:");
556 for (k = 0; k <= l; k++, path++) {
557 if (path->p_idx) {
558 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
559 ext4_idx_pblock(path->p_idx));
560 } else if (path->p_ext) {
561 ext_debug(" %d:[%d]%d:%llu ",
562 le32_to_cpu(path->p_ext->ee_block),
563 ext4_ext_is_unwritten(path->p_ext),
564 ext4_ext_get_actual_len(path->p_ext),
565 ext4_ext_pblock(path->p_ext));
566 } else
567 ext_debug(" []");
568 }
569 ext_debug("\n");
570 }
571
572 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
573 {
574 int depth = ext_depth(inode);
575 struct ext4_extent_header *eh;
576 struct ext4_extent *ex;
577 int i;
578
579 if (!path)
580 return;
581
582 eh = path[depth].p_hdr;
583 ex = EXT_FIRST_EXTENT(eh);
584
585 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
586
587 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
588 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
589 ext4_ext_is_unwritten(ex),
590 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
591 }
592 ext_debug("\n");
593 }
594
595 static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
596 ext4_fsblk_t newblock, int level)
597 {
598 int depth = ext_depth(inode);
599 struct ext4_extent *ex;
600
601 if (depth != level) {
602 struct ext4_extent_idx *idx;
603 idx = path[level].p_idx;
604 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
605 ext_debug("%d: move %d:%llu in new index %llu\n", level,
606 le32_to_cpu(idx->ei_block),
607 ext4_idx_pblock(idx),
608 newblock);
609 idx++;
610 }
611
612 return;
613 }
614
615 ex = path[depth].p_ext;
616 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
617 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
618 le32_to_cpu(ex->ee_block),
619 ext4_ext_pblock(ex),
620 ext4_ext_is_unwritten(ex),
621 ext4_ext_get_actual_len(ex),
622 newblock);
623 ex++;
624 }
625 }
626
627 #else
628 #define ext4_ext_show_path(inode, path)
629 #define ext4_ext_show_leaf(inode, path)
630 #define ext4_ext_show_move(inode, path, newblock, level)
631 #endif
632
633 struct ext4_ext_path *
634 ext4_find_extent(struct inode *inode, ext4_lblk_t block,
635 struct ext4_ext_path **orig_path, int flags)
636 {
637 struct ext4_extent_header *eh;
638 struct buffer_head *bh;
639 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
640 short int depth, i, ppos = 0;
641 int ret;
642
643 eh = ext_inode_hdr(inode);
644 depth = ext_depth(inode);
645
646 if (path) {
647 ext4_ext_drop_refs(path);
648 if (depth > path[0].p_maxdepth) {
649 kfree(path);
650 *orig_path = path = NULL;
651 }
652 }
653 if (!path) {
654 /* account possible depth increase */
655 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
656 GFP_NOFS);
657 if (unlikely(!path))
658 return ERR_PTR(-ENOMEM);
659 path[0].p_maxdepth = depth + 1;
660 }
661 path[0].p_hdr = eh;
662 path[0].p_bh = NULL;
663
664 i = depth;
665 /* walk through the tree */
666 while (i) {
667 ext_debug("depth %d: num %d, max %d\n",
668 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
669
670 ext4_ext_binsearch_idx(inode, path + ppos, block);
671 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
672 path[ppos].p_depth = i;
673 path[ppos].p_ext = NULL;
674
675 bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
676 flags);
677 if (unlikely(IS_ERR(bh))) {
678 ret = PTR_ERR(bh);
679 goto err;
680 }
681
682 eh = ext_block_hdr(bh);
683 ppos++;
684 if (unlikely(ppos > depth)) {
685 extents_brelse(bh);
686 EXT4_ERROR_INODE(inode,
687 "ppos %d > depth %d", ppos, depth);
688 ret = -EIO;
689 goto err;
690 }
691 path[ppos].p_bh = bh;
692 path[ppos].p_hdr = eh;
693 }
694
695 path[ppos].p_depth = i;
696 path[ppos].p_ext = NULL;
697 path[ppos].p_idx = NULL;
698
699 /* find extent */
700 ext4_ext_binsearch(inode, path + ppos, block);
701 /* if not an empty leaf */
702 if (path[ppos].p_ext)
703 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
704
705 ext4_ext_show_path(inode, path);
706
707 return path;
708
709 err:
710 ext4_ext_drop_refs(path);
711 if (path) {
712 kfree(path);
713 if (orig_path)
714 *orig_path = NULL;
715 }
716 return ERR_PTR(ret);
717 }
718
719 /*
720 * ext4_ext_insert_index:
721 * insert new index [@logical;@ptr] into the block at @curp;
722 * check where to insert: before @curp or after @curp
723 */
724 static int ext4_ext_insert_index(void *icb, handle_t *handle, struct inode *inode,
725 struct ext4_ext_path *curp,
726 int logical, ext4_fsblk_t ptr)
727 {
728 struct ext4_extent_idx *ix;
729 int len, err;
730
731 err = ext4_ext_get_access(icb, handle, inode, curp);
732 if (err)
733 return err;
734
735 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
736 EXT4_ERROR_INODE(inode,
737 "logical %d == ei_block %d!",
738 logical, le32_to_cpu(curp->p_idx->ei_block));
739 return -EIO;
740 }
741
742 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
743 >= le16_to_cpu(curp->p_hdr->eh_max))) {
744 EXT4_ERROR_INODE(inode,
745 "eh_entries %d >= eh_max %d!",
746 le16_to_cpu(curp->p_hdr->eh_entries),
747 le16_to_cpu(curp->p_hdr->eh_max));
748 return -EIO;
749 }
750
751 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
752 /* insert after */
753 ext_debug("insert new index %d after: %llu\n", logical, ptr);
754 ix = curp->p_idx + 1;
755 } else {
756 /* insert before */
757 ext_debug("insert new index %d before: %llu\n", logical, ptr);
758 ix = curp->p_idx;
759 }
760
761 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
762 BUG_ON(len < 0);
763 if (len > 0) {
764 ext_debug("insert new index %d: "
765 "move %d indices from 0x%p to 0x%p\n",
766 logical, len, ix, ix + 1);
767 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
768 }
769
770 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
771 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
772 return -EIO;
773 }
774
775 ix->ei_block = cpu_to_le32(logical);
776 ext4_idx_store_pblock(ix, ptr);
777 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
778
779 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
780 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
781 return -EIO;
782 }
783
784 err = ext4_ext_dirty(icb, handle, inode, curp);
785 ext4_std_error(inode->i_sb, err);
786
787 return err;
788 }
789
790 /*
791 * ext4_ext_split:
792 * inserts new subtree into the path, using free index entry
793 * at depth @at:
794 * - allocates all needed blocks (new leaf and all intermediate index blocks)
795 * - makes decision where to split
796 * - moves remaining extents and index entries (right to the split point)
797 * into the newly allocated blocks
798 * - initializes subtree
799 */
800 static int ext4_ext_split(void *icb, handle_t *handle, struct inode *inode,
801 unsigned int flags,
802 struct ext4_ext_path *path,
803 struct ext4_extent *newext, int at)
804 {
805 struct buffer_head *bh = NULL;
806 int depth = ext_depth(inode);
807 struct ext4_extent_header *neh;
808 struct ext4_extent_idx *fidx;
809 int i = at, k, m, a;
810 ext4_fsblk_t newblock, oldblock;
811 __le32 border;
812 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
813 int err = 0;
814
815 /* make decision: where to split? */
816 /* FIXME: now decision is simplest: at current extent */
817
818 /* if current leaf will be split, then we should use
819 * border from split point */
820 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
821 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
822 return -EIO;
823 }
824 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
825 border = path[depth].p_ext[1].ee_block;
826 ext_debug("leaf will be split."
827 " next leaf starts at %d\n",
828 le32_to_cpu(border));
829 } else {
830 border = newext->ee_block;
831 ext_debug("leaf will be added."
832 " next leaf starts at %d\n",
833 le32_to_cpu(border));
834 }
835
836 /*
837 * If error occurs, then we break processing
838 * and mark filesystem read-only. index won't
839 * be inserted and tree will be in consistent
840 * state. Next mount will repair buffers too.
841 */
842
843 /*
844 * Get array to track all allocated blocks.
845 * We need this to handle errors and free blocks
846 * upon them.
847 */
848 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
849 if (!ablocks)
850 return -ENOMEM;
851
852 /* allocate all needed blocks */
853 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
854 for (a = 0; a < depth - at; a++) {
855 newblock = ext4_ext_new_meta_block(icb, handle, inode, path,
856 newext, &err, flags);
857 if (newblock == 0)
858 goto cleanup;
859 ablocks[a] = newblock;
860 }
861
862 /* initialize new leaf */
863 newblock = ablocks[--a];
864 if (unlikely(newblock == 0)) {
865 EXT4_ERROR_INODE(inode, "newblock == 0!");
866 err = -EIO;
867 goto cleanup;
868 }
869 bh = extents_bwrite(inode->i_sb, newblock);
870 if (unlikely(!bh)) {
871 err = -ENOMEM;
872 goto cleanup;
873 }
874
875 err = ext4_journal_get_create_access(icb, handle, bh);
876 if (err)
877 goto cleanup;
878
879 neh = ext_block_hdr(bh);
880 neh->eh_entries = 0;
881 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
882 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC);
883 neh->eh_depth = 0;
884
885 /* move remainder of path[depth] to the new leaf */
886 if (unlikely(path[depth].p_hdr->eh_entries !=
887 path[depth].p_hdr->eh_max)) {
888 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
889 path[depth].p_hdr->eh_entries,
890 path[depth].p_hdr->eh_max);
891 err = -EIO;
892 goto cleanup;
893 }
894 /* start copy from next extent */
895 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
896 ext4_ext_show_move(inode, path, newblock, depth);
897 if (m) {
898 struct ext4_extent *ex;
899 ex = EXT_FIRST_EXTENT(neh);
900 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
901 le16_add_cpu(&neh->eh_entries, m);
902 }
903
904 ext4_extent_block_csum_set(inode, neh);
905 set_buffer_uptodate(bh);
906
907 err = ext4_handle_dirty_metadata(icb, handle, inode, bh);
908 if (err)
909 goto cleanup;
910 extents_brelse(bh);
911 bh = NULL;
912
913 /* correct old leaf */
914 if (m) {
915 err = ext4_ext_get_access(icb, handle, inode, path + depth);
916 if (err)
917 goto cleanup;
918 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
919 err = ext4_ext_dirty(icb, handle, inode, path + depth);
920 if (err)
921 goto cleanup;
922
923 }
924
925 /* create intermediate indexes */
926 k = depth - at - 1;
927 if (unlikely(k < 0)) {
928 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
929 err = -EIO;
930 goto cleanup;
931 }
932 if (k)
933 ext_debug("create %d intermediate indices\n", k);
934 /* insert new index into current index block */
935 /* current depth stored in i var */
936 i = depth - 1;
937 while (k--) {
938 oldblock = newblock;
939 newblock = ablocks[--a];
940 bh = extents_bwrite(inode->i_sb, newblock);
941 if (unlikely(!bh)) {
942 err = -ENOMEM;
943 goto cleanup;
944 }
945
946 err = ext4_journal_get_create_access(icb, handle, bh);
947 if (err)
948 goto cleanup;
949
950 neh = ext_block_hdr(bh);
951 neh->eh_entries = cpu_to_le16(1);
952 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC);
953 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
954 neh->eh_depth = cpu_to_le16(depth - i);
955 fidx = EXT_FIRST_INDEX(neh);
956 fidx->ei_block = border;
957 ext4_idx_store_pblock(fidx, oldblock);
958
959 ext_debug("int.index at %d (block %llu): %u -> %llu\n",
960 i, newblock, le32_to_cpu(border), oldblock);
961
962 /* move remainder of path[i] to the new index block */
963 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
964 EXT_LAST_INDEX(path[i].p_hdr))) {
965 EXT4_ERROR_INODE(inode,
966 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
967 le32_to_cpu(path[i].p_ext->ee_block));
968 err = -EIO;
969 goto cleanup;
970 }
971 /* start copy indexes */
972 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
973 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
974 EXT_MAX_INDEX(path[i].p_hdr));
975 ext4_ext_show_move(inode, path, newblock, i);
976 if (m) {
977 memmove(++fidx, path[i].p_idx,
978 sizeof(struct ext4_extent_idx) * m);
979 le16_add_cpu(&neh->eh_entries, m);
980 }
981 ext4_extent_block_csum_set(inode, neh);
982 set_buffer_uptodate(bh);
983
984 err = ext4_handle_dirty_metadata(icb, handle, inode, bh);
985 if (err)
986 goto cleanup;
987 extents_brelse(bh);
988 bh = NULL;
989
990 /* correct old index */
991 if (m) {
992 err = ext4_ext_get_access(icb, handle, inode, path + i);
993 if (err)
994 goto cleanup;
995 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
996 err = ext4_ext_dirty(icb, handle, inode, path + i);
997 if (err)
998 goto cleanup;
999 }
1000
1001 i--;
1002 }
1003
1004 /* insert new index */
1005 err = ext4_ext_insert_index(icb, handle, inode, path + at,
1006 le32_to_cpu(border), newblock);
1007
1008 cleanup:
1009 if (bh)
1010 extents_brelse(bh);
1011
1012 if (err) {
1013 /* free all allocated blocks in error case */
1014 for (i = 0; i < depth; i++) {
1015 if (!ablocks[i])
1016 continue;
1017 ext4_free_blocks(icb, handle, inode, NULL, ablocks[i], 1,
1018 EXT4_FREE_BLOCKS_METADATA);
1019 }
1020 }
1021 kfree(ablocks);
1022
1023 return err;
1024 }
1025
1026 /*
1027 * ext4_ext_grow_indepth:
1028 * implements tree growing procedure:
1029 * - allocates new block
1030 * - moves top-level data (index block or leaf) into the new block
1031 * - initializes new top-level, creating index that points to the
1032 * just created block
1033 */
1034 static int ext4_ext_grow_indepth(void *icb, handle_t *handle, struct inode *inode,
1035 unsigned int flags)
1036 {
1037 struct ext4_extent_header *neh;
1038 struct buffer_head *bh;
1039 ext4_fsblk_t newblock, goal = 0;
1040 int err = 0;
1041
1042 /* Try to prepend new index to old one */
1043 if (ext_depth(inode))
1044 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1045 goal = ext4_inode_to_goal_block(inode);
1046 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags,
1047 NULL, &err);
1048 if (newblock == 0)
1049 return err;
1050
1051 bh = extents_bwrite(inode->i_sb, newblock);
1052 if (!bh)
1053 return -ENOMEM;
1054
1055 err = ext4_journal_get_create_access(icb, handle, bh);
1056 if (err)
1057 goto out;
1058
1059 /* move top-level index/leaf into new block */
1060 memmove(bh->b_data, EXT4_I(inode)->i_block,
1061 sizeof(EXT4_I(inode)->i_block));
1062
1063 /* set size of new block */
1064 neh = ext_block_hdr(bh);
1065 /* old root could have indexes or leaves
1066 * so calculate e_max right way */
1067 if (ext_depth(inode))
1068 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1069 else
1070 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1071 neh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC);
1072 ext4_extent_block_csum_set(inode, neh);
1073 set_buffer_uptodate(bh);
1074
1075 err = ext4_handle_dirty_metadata(icb, handle, inode, bh);
1076 if (err)
1077 goto out;
1078
1079 /* Update top-level index: num,max,pointer */
1080 neh = ext_inode_hdr(inode);
1081 neh->eh_entries = cpu_to_le16(1);
1082 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1083 if (neh->eh_depth == 0) {
1084 /* Root extent block becomes index block */
1085 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1086 EXT_FIRST_INDEX(neh)->ei_block =
1087 EXT_FIRST_EXTENT(neh)->ee_block;
1088 }
1089 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1090 (neh->eh_entries), (neh->eh_max),
1091 (EXT_FIRST_INDEX(neh)->ei_block),
1092 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1093
1094 le16_add_cpu(&neh->eh_depth, 1);
1095 ext4_mark_inode_dirty(icb, handle, inode);
1096 out:
1097 extents_brelse(bh);
1098
1099 return err;
1100 }
1101
1102 static int ext4_ext_shrink_indepth(void *icb,
1103 handle_t *handle,
1104 struct inode *inode,
1105 unsigned int flags,
1106 int *shrinked)
1107 {
1108 struct ext4_extent_header *eh, *neh;
1109 struct ext4_extent_idx *ix;
1110 struct buffer_head *bh = NULL;
1111 ext4_fsblk_t block;
1112 int err = 0, depth = ext_depth(inode);
1113 int neh_entries;
1114 *shrinked = 0;
1115
1116 if (!depth)
1117 return 0;
1118
1119 eh = ext_inode_hdr(inode);
1120 if (le16_to_cpu(eh->eh_entries) != 1)
1121 return 0;
1122
1123 ix = EXT_FIRST_INDEX(eh);
1124 block = ext4_idx_pblock(ix);
1125 bh = extents_bread(inode->i_sb, block);
1126 if (!bh)
1127 goto out;
1128
1129 /* set size of new block */
1130 neh = ext_block_hdr(bh);
1131 neh_entries = le16_to_cpu(neh->eh_entries);
1132 if (!neh->eh_depth &&
1133 neh_entries > ext4_ext_space_root(inode, 0))
1134 goto out;
1135
1136 if (neh->eh_depth &&
1137 neh_entries > ext4_ext_space_root_idx(inode, 0))
1138 goto out;
1139
1140 /* old root could have indexes or leaves
1141 * so calculate e_max right way */
1142 if (neh->eh_depth)
1143 eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1144 else
1145 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
1146
1147 eh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC);
1148 eh->eh_entries = neh_entries;
1149 if (neh->eh_depth) {
1150 memmove(EXT_FIRST_INDEX(eh),
1151 EXT_FIRST_INDEX(neh),
1152 sizeof(struct ext4_extent_idx) * neh_entries);
1153 } else {
1154 memmove(EXT_FIRST_EXTENT(eh),
1155 EXT_FIRST_EXTENT(neh),
1156 sizeof(struct ext4_extent) * neh_entries);
1157 }
1158 le16_add_cpu(&eh->eh_depth, -1);
1159
1160 ext4_mark_inode_dirty(icb, handle, inode);
1161 *shrinked = 1;
1162 out:
1163 if (bh)
1164 extents_brelse(bh);
1165
1166 if (*shrinked)
1167 ext4_free_blocks(icb, handle, inode, NULL,
1168 block, 1, flags);
1169
1170 return err;
1171 }
1172
1173 /*
1174 * ext4_ext_create_new_leaf:
1175 * finds empty index and adds new leaf.
1176 * if no free index is found, then it requests in-depth growing.
1177 */
1178 static int ext4_ext_create_new_leaf(void *icb, handle_t *handle, struct inode *inode,
1179 unsigned int mb_flags,
1180 unsigned int gb_flags,
1181 struct ext4_ext_path **ppath,
1182 struct ext4_extent *newext)
1183 {
1184 struct ext4_ext_path *path = *ppath;
1185 struct ext4_ext_path *curp;
1186 int depth, i, err = 0;
1187
1188 repeat:
1189 i = depth = ext_depth(inode);
1190
1191 /* walk up to the tree and look for free index entry */
1192 curp = path + depth;
1193 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1194 i--;
1195 curp--;
1196 }
1197
1198 /* we use already allocated block for index block,
1199 * so subsequent data blocks should be contiguous */
1200 if (EXT_HAS_FREE_INDEX(curp)) {
1201 /* if we found index with free entry, then use that
1202 * entry: create all needed subtree and add new leaf */
1203 err = ext4_ext_split(icb, handle, inode, mb_flags, path, newext, i);
1204 if (err)
1205 goto out;
1206
1207 /* refill path */
1208 path = ext4_find_extent(inode,
1209 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1210 ppath, gb_flags);
1211 if (IS_ERR(path))
1212 err = PTR_ERR(path);
1213 } else {
1214 /* tree is full, time to grow in depth */
1215 err = ext4_ext_grow_indepth(icb, handle, inode, mb_flags);
1216 if (err)
1217 goto out;
1218
1219 /* refill path */
1220 path = ext4_find_extent(inode,
1221 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1222 ppath, gb_flags);
1223 if (IS_ERR(path)) {
1224 err = PTR_ERR(path);
1225 goto out;
1226 }
1227
1228 /*
1229 * only first (depth 0 -> 1) produces free space;
1230 * in all other cases we have to split the grown tree
1231 */
1232 depth = ext_depth(inode);
1233 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1234 /* now we need to split */
1235 goto repeat;
1236 }
1237 }
1238
1239 out:
1240 return err;
1241 }
1242
1243 /*
1244 * search the closest allocated block to the left for *logical
1245 * and returns it at @logical + it's physical address at @phys
1246 * if *logical is the smallest allocated block, the function
1247 * returns 0 at @phys
1248 * return value contains 0 (success) or error code
1249 */
1250 static int ext4_ext_search_left(struct inode *inode,
1251 struct ext4_ext_path *path,
1252 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1253 {
1254 struct ext4_extent_idx *ix;
1255 struct ext4_extent *ex;
1256 int depth, ee_len;
1257
1258 if (unlikely(path == NULL)) {
1259 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1260 return -EIO;
1261 }
1262 depth = path->p_depth;
1263 *phys = 0;
1264
1265 if (depth == 0 && path->p_ext == NULL)
1266 return 0;
1267
1268 /* usually extent in the path covers blocks smaller
1269 * then *logical, but it can be that extent is the
1270 * first one in the file */
1271
1272 ex = path[depth].p_ext;
1273 ee_len = ext4_ext_get_actual_len(ex);
1274 if (*logical < le32_to_cpu(ex->ee_block)) {
1275 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1276 EXT4_ERROR_INODE(inode,
1277 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1278 *logical, le32_to_cpu(ex->ee_block));
1279 return -EIO;
1280 }
1281 while (--depth >= 0) {
1282 ix = path[depth].p_idx;
1283 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1284 EXT4_ERROR_INODE(inode,
1285 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1286 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1287 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1288 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1289 depth);
1290 return -EIO;
1291 }
1292 }
1293 return 0;
1294 }
1295
1296 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1297 EXT4_ERROR_INODE(inode,
1298 "logical %d < ee_block %d + ee_len %d!",
1299 *logical, le32_to_cpu(ex->ee_block), ee_len);
1300 return -EIO;
1301 }
1302
1303 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1304 *phys = ext4_ext_pblock(ex) + ee_len - 1;
1305 return 0;
1306 }
1307
1308 /*
1309 * search the closest allocated block to the right for *logical
1310 * and returns it at @logical + it's physical address at @phys
1311 * if *logical is the largest allocated block, the function
1312 * returns 0 at @phys
1313 * return value contains 0 (success) or error code
1314 */
1315 static int ext4_ext_search_right(struct inode *inode,
1316 struct ext4_ext_path *path,
1317 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1318 struct ext4_extent **ret_ex)
1319 {
1320 struct buffer_head *bh = NULL;
1321 struct ext4_extent_header *eh;
1322 struct ext4_extent_idx *ix;
1323 struct ext4_extent *ex;
1324 ext4_fsblk_t block;
1325 int depth; /* Note, NOT eh_depth; depth from top of tree */
1326 int ee_len;
1327
1328 if ((path == NULL)) {
1329 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1330 return -EIO;
1331 }
1332 depth = path->p_depth;
1333 *phys = 0;
1334
1335 if (depth == 0 && path->p_ext == NULL)
1336 return 0;
1337
1338 /* usually extent in the path covers blocks smaller
1339 * then *logical, but it can be that extent is the
1340 * first one in the file */
1341
1342 ex = path[depth].p_ext;
1343 ee_len = ext4_ext_get_actual_len(ex);
1344 /*if (*logical < le32_to_cpu(ex->ee_block)) {*/
1345 if (*logical < (ex->ee_block)) {
1346 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1347 EXT4_ERROR_INODE(inode,
1348 "first_extent(path[%d].p_hdr) != ex",
1349 depth);
1350 return -EIO;
1351 }
1352 while (--depth >= 0) {
1353 ix = path[depth].p_idx;
1354 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1355 EXT4_ERROR_INODE(inode,
1356 "ix != EXT_FIRST_INDEX *logical %d!",
1357 *logical);
1358 return -EIO;
1359 }
1360 }
1361 goto found_extent;
1362 }
1363
1364 /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/
1365 if (unlikely(*logical < ((ex->ee_block) + ee_len))) {
1366 EXT4_ERROR_INODE(inode,
1367 "logical %d < ee_block %d + ee_len %d!",
1368 /**logical, le32_to_cpu(ex->ee_block), ee_len);*/
1369 *logical, (ex->ee_block), ee_len);
1370 return -EIO;
1371 }
1372
1373 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1374 /* next allocated block in this leaf */
1375 ex++;
1376 goto found_extent;
1377 }
1378
1379 /* go up and search for index to the right */
1380 while (--depth >= 0) {
1381 ix = path[depth].p_idx;
1382 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1383 goto got_index;
1384 }
1385
1386 /* we've gone up to the root and found no index to the right */
1387 return 0;
1388
1389 got_index:
1390 /* we've found index to the right, let's
1391 * follow it and find the closest allocated
1392 * block to the right */
1393 ix++;
1394 block = ext4_idx_pblock(ix);
1395 while (++depth < path->p_depth) {
1396 /* subtract from p_depth to get proper eh_depth */
1397 bh = read_extent_tree_block(inode, block,
1398 path->p_depth - depth, 0);
1399 if (IS_ERR(bh))
1400 return PTR_ERR(bh);
1401 eh = ext_block_hdr(bh);
1402 ix = EXT_FIRST_INDEX(eh);
1403 block = ext4_idx_pblock(ix);
1404 extents_brelse(bh);
1405 }
1406
1407 bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
1408 if (IS_ERR(bh))
1409 return PTR_ERR(bh);
1410 eh = ext_block_hdr(bh);
1411 ex = EXT_FIRST_EXTENT(eh);
1412 found_extent:
1413 /**logical = le32_to_cpu(ex->ee_block);*/
1414 *logical = (ex->ee_block);
1415 *phys = ext4_ext_pblock(ex);
1416 *ret_ex = ex;
1417 if (bh)
1418 extents_brelse(bh);
1419 return 0;
1420 }
1421
1422 /*
1423 * ext4_ext_next_allocated_block:
1424 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1425 * NOTE: it considers block number from index entry as
1426 * allocated block. Thus, index entries have to be consistent
1427 * with leaves.
1428 */
1429 ext4_lblk_t
1430 ext4_ext_next_allocated_block(struct ext4_ext_path *path, int at)
1431 {
1432 if (at == 0 && !path->p_ext && !path->p_idx)
1433 return EXT_MAX_BLOCKS;
1434
1435 while (at >= 0) {
1436 if (at == path->p_depth) {
1437 /* leaf */
1438 if (path[at].p_ext &&
1439 path[at].p_ext !=
1440 EXT_LAST_EXTENT(path[at].p_hdr))
1441 return le32_to_cpu(path[at].p_ext[1].ee_block);
1442 } else {
1443 /* index */
1444 if (path[at].p_idx !=
1445 EXT_LAST_INDEX(path[at].p_hdr))
1446 return le32_to_cpu(path[at].p_idx[1].ei_block);
1447 }
1448 at--;
1449 }
1450
1451 return EXT_MAX_BLOCKS;
1452 }
1453
1454 /*
1455 * ext4_ext_next_leaf_block:
1456 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1457 */
1458 static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1459 {
1460 int depth;
1461
1462 BUG_ON(path == NULL);
1463 depth = path->p_depth;
1464
1465 /* zero-tree has no leaf blocks at all */
1466 if (depth == 0)
1467 return EXT_MAX_BLOCKS;
1468
1469 /* go to index block */
1470 depth--;
1471
1472 while (depth >= 0) {
1473 if (path[depth].p_idx !=
1474 EXT_LAST_INDEX(path[depth].p_hdr))
1475 return (ext4_lblk_t)
1476 le32_to_cpu(path[depth].p_idx[1].ei_block);
1477 depth--;
1478 }
1479
1480 return EXT_MAX_BLOCKS;
1481 }
1482
1483 /*
1484 * ext4_ext_correct_indexes:
1485 * if leaf/node gets modified and modified extent/index
1486 * is first in the leaf/node,
1487 * then we have to correct all indexes above.
1488 */
1489 static int ext4_ext_correct_indexes(void *icb, handle_t *handle,
1490 struct inode *inode,
1491 struct ext4_ext_path *path,
1492 int at)
1493 {
1494 struct ext4_extent_header *eh;
1495 int depth = ext_depth(inode);
1496 struct ext4_extent *ex;
1497 __le32 border;
1498 int k, err = 0;
1499
1500 assert(at >= 0);
1501 if (!at)
1502 return 0;
1503
1504 if (depth == at) {
1505 eh = path[at].p_hdr;
1506 ex = path[at].p_ext;
1507
1508 if (ex == NULL || eh == NULL)
1509 return -EIO;
1510
1511 if (at == 0) {
1512 /* there is no tree at all */
1513 return 0;
1514 }
1515
1516 if (ex != EXT_FIRST_EXTENT(eh)) {
1517 /* we correct tree if first leaf got modified only */
1518 return 0;
1519 }
1520
1521 k = at - 1;
1522 border = path[at].p_ext->ee_block;
1523 path[k].p_idx->ei_block = border;
1524 err = ext4_ext_dirty(icb, handle, inode, path + k);
1525 if (err)
1526 return err;
1527
1528 } else {
1529 border = path[at].p_idx->ei_block;
1530 k = at;
1531 }
1532
1533 while (k) {
1534 /* change all left-side indexes */
1535 if (path[k].p_idx != EXT_FIRST_INDEX(path[k].p_hdr))
1536 break;
1537 path[k-1].p_idx->ei_block = border;
1538 err = ext4_ext_dirty(icb, handle, inode, path + k-1);
1539 if (err)
1540 break;
1541
1542 k--;
1543 }
1544
1545 return err;
1546 }
1547
1548 int
1549 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1550 struct ext4_extent *ex2)
1551 {
1552 unsigned short ext1_ee_len, ext2_ee_len;
1553
1554 /*
1555 * Make sure that both extents are initialized. We don't merge
1556 * unwritten extents so that we can be sure that end_io code has
1557 * the extent that was written properly split out and conversion to
1558 * initialized is trivial.
1559 */
1560 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1561 return 0;
1562
1563 ext1_ee_len = ext4_ext_get_actual_len(ex1);
1564 ext2_ee_len = ext4_ext_get_actual_len(ex2);
1565
1566 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1567 le32_to_cpu(ex2->ee_block))
1568 return 0;
1569
1570 /*
1571 * To allow future support for preallocated extents to be added
1572 * as an RO_COMPAT feature, refuse to merge to extents if
1573 * this can result in the top bit of ee_len being set.
1574 */
1575 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1576 return 0;
1577 if (ext4_ext_is_unwritten(ex1) &&
1578 (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN))
1579 return 0;
1580 #ifdef AGGRESSIVE_TEST
1581 if (ext1_ee_len >= 4)
1582 return 0;
1583 #endif
1584
1585 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1586 return 1;
1587 return 0;
1588 }
1589
1590 /*
1591 * This function tries to merge the "ex" extent to the next extent in the tree.
1592 * It always tries to merge towards right. If you want to merge towards
1593 * left, pass "ex - 1" as argument instead of "ex".
1594 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1595 * 1 if they got merged.
1596 */
1597 static int ext4_ext_try_to_merge_right(struct inode *inode,
1598 struct ext4_ext_path *path,
1599 struct ext4_extent *ex)
1600 {
1601 struct ext4_extent_header *eh;
1602 unsigned int depth, len;
1603 int merge_done = 0, unwritten;
1604
1605 depth = ext_depth(inode);
1606 assert(path[depth].p_hdr != NULL);
1607 eh = path[depth].p_hdr;
1608
1609 while (ex < EXT_LAST_EXTENT(eh)) {
1610 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1611 break;
1612 /* merge with next extent! */
1613 unwritten = ext4_ext_is_unwritten(ex);
1614 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1615 + ext4_ext_get_actual_len(ex + 1));
1616 if (unwritten)
1617 ext4_ext_mark_unwritten(ex);
1618
1619 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1620 len = (EXT_LAST_EXTENT(eh) - ex - 1)
1621 * sizeof(struct ext4_extent);
1622 memmove(ex + 1, ex + 2, len);
1623 }
1624 le16_add_cpu(&eh->eh_entries, -1);
1625 merge_done = 1;
1626 if (!eh->eh_entries)
1627 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1628 }
1629
1630 return merge_done;
1631 }
1632
1633 /*
1634 * This function does a very simple check to see if we can collapse
1635 * an extent tree with a single extent tree leaf block into the inode.
1636 */
1637 static void ext4_ext_try_to_merge_up(void *icb, handle_t *handle,
1638 struct inode *inode,
1639 struct ext4_ext_path *path)
1640 {
1641 size_t s;
1642 unsigned max_root = ext4_ext_space_root(inode, 0);
1643 ext4_fsblk_t blk;
1644
1645 if ((path[0].p_depth != 1) ||
1646 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1647 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1648 return;
1649
1650 /*
1651 * We need to modify the block allocation bitmap and the block
1652 * group descriptor to release the extent tree block. If we
1653 * can't get the journal credits, give up.
1654 */
1655 if (ext4_journal_extend(icb, handle, 2))
1656 return;
1657
1658 /*
1659 * Copy the extent data up to the inode
1660 */
1661 blk = ext4_idx_pblock(path[0].p_idx);
1662 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1663 sizeof(struct ext4_extent_idx);
1664 s += sizeof(struct ext4_extent_header);
1665
1666 path[1].p_maxdepth = path[0].p_maxdepth;
1667 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1668 path[0].p_depth = 0;
1669 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1670 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1671 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1672
1673 extents_brelse(path[1].p_bh);
1674 ext4_free_blocks(icb, handle, inode, NULL, blk, 1,
1675 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1676 }
1677
1678 /*
1679 * This function tries to merge the @ex extent to neighbours in the tree.
1680 * return 1 if merge left else 0.
1681 */
1682 static void ext4_ext_try_to_merge(void *icb, handle_t *handle,
1683 struct inode *inode,
1684 struct ext4_ext_path *path,
1685 struct ext4_extent *ex) {
1686 struct ext4_extent_header *eh;
1687 unsigned int depth;
1688 int merge_done = 0;
1689
1690 depth = ext_depth(inode);
1691 BUG_ON(path[depth].p_hdr == NULL);
1692 eh = path[depth].p_hdr;
1693
1694 if (ex > EXT_FIRST_EXTENT(eh))
1695 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1696
1697 if (!merge_done)
1698 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1699
1700 ext4_ext_try_to_merge_up(icb, handle, inode, path);
1701 }
1702
1703 /*
1704 * ext4_ext_insert_extent:
1705 * tries to merge requsted extent into the existing extent or
1706 * inserts requested extent as new one into the tree,
1707 * creating new leaf in the no-space case.
1708 */
1709 int ext4_ext_insert_extent(void *icb, handle_t *handle, struct inode *inode,
1710 struct ext4_ext_path **ppath,
1711 struct ext4_extent *newext,
1712 int gb_flags)
1713 {
1714 struct ext4_ext_path *path = *ppath;
1715 struct ext4_extent_header *eh;
1716 struct ext4_extent *ex, *fex;
1717 struct ext4_extent *nearex; /* nearest extent */
1718 struct ext4_ext_path *npath = NULL;
1719 int depth, len, err;
1720 ext4_lblk_t next;
1721 int mb_flags = 0, unwritten;
1722
1723 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1724 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1725 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1726 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1727 return -EIO;
1728 }
1729 depth = ext_depth(inode);
1730 ex = path[depth].p_ext;
1731 eh = path[depth].p_hdr;
1732 if (unlikely(path[depth].p_hdr == NULL)) {
1733 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1734 return -EIO;
1735 }
1736
1737 /* try to insert block into found extent and return */
1738 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1739
1740 /*
1741 * Try to see whether we should rather test the extent on
1742 * right from ex, or from the left of ex. This is because
1743 * ext4_find_extent() can return either extent on the
1744 * left, or on the right from the searched position. This
1745 * will make merging more effective.
1746 */
1747 if (ex < EXT_LAST_EXTENT(eh) &&
1748 (le32_to_cpu(ex->ee_block) +
1749 ext4_ext_get_actual_len(ex) <
1750 le32_to_cpu(newext->ee_block))) {
1751 ex += 1;
1752 goto prepend;
1753 } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
1754 (le32_to_cpu(newext->ee_block) +
1755 ext4_ext_get_actual_len(newext) <
1756 le32_to_cpu(ex->ee_block)))
1757 ex -= 1;
1758
1759 /* Try to append newex to the ex */
1760 if (ext4_can_extents_be_merged(inode, ex, newext)) {
1761 ext_debug("append [%d]%d block to %u:[%d]%d"
1762 "(from %llu)\n",
1763 ext4_ext_is_unwritten(newext),
1764 ext4_ext_get_actual_len(newext),
1765 le32_to_cpu(ex->ee_block),
1766 ext4_ext_is_unwritten(ex),
1767 ext4_ext_get_actual_len(ex),
1768 ext4_ext_pblock(ex));
1769 err = ext4_ext_get_access(icb, handle, inode,
1770 path + depth);
1771 if (err)
1772 return err;
1773 unwritten = ext4_ext_is_unwritten(ex);
1774 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1775 + ext4_ext_get_actual_len(newext));
1776 if (unwritten)
1777 ext4_ext_mark_unwritten(ex);
1778 eh = path[depth].p_hdr;
1779 nearex = ex;
1780 goto merge;
1781 }
1782
1783 prepend:
1784 /* Try to prepend newex to the ex */
1785 if (ext4_can_extents_be_merged(inode, newext, ex)) {
1786 ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
1787 "(from %llu)\n",
1788 le32_to_cpu(newext->ee_block),
1789 ext4_ext_is_unwritten(newext),
1790 ext4_ext_get_actual_len(newext),
1791 le32_to_cpu(ex->ee_block),
1792 ext4_ext_is_unwritten(ex),
1793 ext4_ext_get_actual_len(ex),
1794 ext4_ext_pblock(ex));
1795 err = ext4_ext_get_access(icb, handle, inode,
1796 path + depth);
1797 if (err)
1798 return err;
1799
1800 unwritten = ext4_ext_is_unwritten(ex);
1801 ex->ee_block = newext->ee_block;
1802 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1803 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1804 + ext4_ext_get_actual_len(newext));
1805 if (unwritten)
1806 ext4_ext_mark_unwritten(ex);
1807 eh = path[depth].p_hdr;
1808 nearex = ex;
1809 goto merge;
1810 }
1811 }
1812
1813 depth = ext_depth(inode);
1814 eh = path[depth].p_hdr;
1815 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
1816 goto has_space;
1817
1818 /* probably next leaf has space for us? */
1819 fex = EXT_LAST_EXTENT(eh);
1820 next = EXT_MAX_BLOCKS;
1821 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
1822 next = ext4_ext_next_leaf_block(path);
1823 if (next != EXT_MAX_BLOCKS) {
1824 ext_debug("next leaf block - %u\n", next);
1825 BUG_ON(npath != NULL);
1826 npath = ext4_find_extent(inode, next, NULL, 0);
1827 if (IS_ERR(npath))
1828 return PTR_ERR(npath);
1829 BUG_ON(npath->p_depth != path->p_depth);
1830 eh = npath[depth].p_hdr;
1831 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1832 ext_debug("next leaf isn't full(%d)\n",
1833 le16_to_cpu(eh->eh_entries));
1834 path = npath;
1835 goto has_space;
1836 }
1837 ext_debug("next leaf has no free space(%d,%d)\n",
1838 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1839 }
1840
1841 /*
1842 * There is no free space in the found leaf.
1843 * We're gonna add a new leaf in the tree.
1844 */
1845 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
1846 mb_flags |= EXT4_MB_USE_RESERVED;
1847 err = ext4_ext_create_new_leaf(icb, handle, inode, mb_flags, gb_flags,
1848 ppath, newext);
1849 if (err)
1850 goto cleanup;
1851 depth = ext_depth(inode);
1852 eh = path[depth].p_hdr;
1853
1854 has_space:
1855 nearex = path[depth].p_ext;
1856
1857 err = ext4_ext_get_access(icb, handle, inode, path + depth);
1858 if (err)
1859 goto cleanup;
1860
1861 if (!nearex) {
1862 /* there is no extent in this leaf, create first one */
1863 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
1864 le32_to_cpu(newext->ee_block),
1865 ext4_ext_pblock(newext),
1866 ext4_ext_is_unwritten(newext),
1867 ext4_ext_get_actual_len(newext));
1868 nearex = EXT_FIRST_EXTENT(eh);
1869 } else {
1870 if (le32_to_cpu(newext->ee_block)
1871 > le32_to_cpu(nearex->ee_block)) {
1872 /* Insert after */
1873 ext_debug("insert %u:%llu:[%d]%d before: "
1874 "nearest %p\n",
1875 le32_to_cpu(newext->ee_block),
1876 ext4_ext_pblock(newext),
1877 ext4_ext_is_unwritten(newext),
1878 ext4_ext_get_actual_len(newext),
1879 nearex);
1880 nearex++;
1881 } else {
1882 /* Insert before */
1883 BUG_ON(newext->ee_block == nearex->ee_block);
1884 ext_debug("insert %u:%llu:[%d]%d after: "
1885 "nearest %p\n",
1886 le32_to_cpu(newext->ee_block),
1887 ext4_ext_pblock(newext),
1888 ext4_ext_is_unwritten(newext),
1889 ext4_ext_get_actual_len(newext),
1890 nearex);
1891 }
1892 len = EXT_LAST_EXTENT(eh) - nearex + 1;
1893 if (len > 0) {
1894 ext_debug("insert %u:%llu:[%d]%d: "
1895 "move %d extents from 0x%p to 0x%p\n",
1896 le32_to_cpu(newext->ee_block),
1897 ext4_ext_pblock(newext),
1898 ext4_ext_is_unwritten(newext),
1899 ext4_ext_get_actual_len(newext),
1900 len, nearex, nearex + 1);
1901 memmove(nearex + 1, nearex,
1902 len * sizeof(struct ext4_extent));
1903 }
1904 }
1905
1906 le16_add_cpu(&eh->eh_entries, 1);
1907 path[depth].p_ext = nearex;
1908 nearex->ee_block = newext->ee_block;
1909 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1910 nearex->ee_len = newext->ee_len;
1911
1912 merge:
1913 /* try to merge extents */
1914 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
1915 ext4_ext_try_to_merge(icb, handle, inode, path, nearex);
1916
1917 depth = ext_depth(inode);
1918
1919 /* time to correct all indexes above */
1920 err = ext4_ext_correct_indexes(icb, handle,
1921 inode, path, depth);
1922 if (err)
1923 goto cleanup;
1924
1925 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
1926
1927 cleanup:
1928 if (npath) {
1929 ext4_ext_drop_refs(npath);
1930 kfree(npath);
1931 }
1932 return err;
1933 }
1934
1935 static inline int get_default_free_blocks_flags(struct inode *inode)
1936 {
1937 return 0;
1938 }
1939
1940 /* FIXME!! we need to try to merge to left or right after zero-out */
1941 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
1942 {
1943 ext4_fsblk_t ee_pblock;
1944 unsigned int ee_len;
1945 int ret;
1946
1947 ee_len = ext4_ext_get_actual_len(ex);
1948 ee_pblock = ext4_ext_pblock(ex);
1949
1950 ret = 0;
1951
1952 return ret;
1953 }
1954
1955 static void ext4_ext_remove_blocks(
1956 void *icb,
1957 handle_t *handle,
1958 struct inode *inode, struct ext4_extent *ex,
1959 ext4_lblk_t from, ext4_lblk_t to)
1960 {
1961 int len = to - from + 1;
1962 ext4_lblk_t num;
1963 ext4_fsblk_t start;
1964 num = from - le32_to_cpu(ex->ee_block);
1965 start = ext4_ext_pblock(ex) + num;
1966 ext_debug("Freeing %lu at %I64u, %d\n", from, start, len);
1967 ext4_free_blocks(icb, handle, inode, NULL,
1968 start, len, 0);
1969 }
1970
1971 static int ext4_ext_remove_idx(void *icb,
1972 handle_t *handle,
1973 struct inode *inode,
1974 struct ext4_ext_path *path,
1975 int depth)
1976 {
1977 int err, i = depth;
1978 ext4_fsblk_t leaf;
1979
1980 /* free index block */
1981 leaf = ext4_idx_pblock(path[i].p_idx);
1982
1983 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) {
1984 int len = EXT_LAST_INDEX(path[i].p_hdr) - path[i].p_idx;
1985 memmove(path[i].p_idx, path[i].p_idx + 1,
1986 len * sizeof(struct ext4_extent_idx));
1987 }
1988
1989 le16_add_cpu(&path[i].p_hdr->eh_entries, -1);
1990 err = ext4_ext_dirty(icb, handle, inode, path + i);
1991 if (err)
1992 return err;
1993
1994 ext_debug("IDX: Freeing %lu at %I64u, %d\n",
1995 le32_to_cpu(path[i].p_idx->ei_block), leaf, 1);
1996 ext4_free_blocks(icb, handle, inode, NULL,
1997 leaf, 1, 0);
1998 return err;
1999 }
2000
2001 static int ext4_ext_amalgamate(void *icb,
2002 handle_t *handle,
2003 struct inode *inode,
2004 struct ext4_ext_path *path,
2005 int at)
2006 {
2007 int new_entries, right_entries;
2008 int depth = ext_depth(inode);
2009 struct ext4_ext_path *right_path = NULL;
2010 ext4_lblk_t now;
2011 int ret = 0;
2012 if (!at)
2013 return 0;
2014
2015 now = ext4_ext_next_allocated_block(path, at - 1);
2016 if (now == EXT_MAX_BLOCKS)
2017 goto out;
2018
2019 right_path = ext4_find_extent(inode, now, NULL, 0);
2020 if (IS_ERR(right_path)) {
2021 ret = PTR_ERR(right_path);
2022 right_path = NULL;
2023 goto out;
2024 }
2025
2026 right_entries = le16_to_cpu(right_path[at].p_hdr->eh_entries);
2027 new_entries = le16_to_cpu(path[at].p_hdr->eh_entries) +
2028 right_entries;
2029 if (new_entries > path[at].p_hdr->eh_max) {
2030 ret = 0;
2031 goto out;
2032 }
2033 if (at == depth) {
2034 struct ext4_extent *last_ex = EXT_LAST_EXTENT(path[at].p_hdr);
2035 memmove(last_ex + 1,
2036 EXT_FIRST_EXTENT(right_path[at].p_hdr),
2037 right_entries * sizeof(struct ext4_extent));
2038 } else {
2039 struct ext4_extent_idx *last_ix = EXT_LAST_INDEX(path[at].p_hdr);
2040 memmove(last_ix + 1,
2041 EXT_FIRST_INDEX(right_path[at].p_hdr),
2042 right_entries * sizeof(struct ext4_extent_idx));
2043 }
2044 path[at].p_hdr->eh_entries = cpu_to_le16(new_entries);
2045 right_path[at].p_hdr->eh_entries = 0;
2046 ext4_ext_dirty(icb, handle, inode, path + at);
2047
2048 /*
2049 * remove the empty node from index block above.
2050 */
2051 depth = at;
2052 while (depth > 0) {
2053 struct ext4_extent_header *eh = right_path[depth].p_hdr;
2054 if (eh->eh_entries == 0 && right_path[depth].p_bh != NULL) {
2055 ext4_ext_drop_refs(right_path + depth);
2056 ret = ext4_ext_remove_idx(icb, handle, inode, right_path, depth - 1);
2057 if (ret)
2058 goto out;
2059
2060 } else
2061 break;
2062
2063 depth--;
2064 }
2065 ret = ext4_ext_correct_indexes(icb, handle,
2066 inode, right_path, depth);
2067 out:
2068 if (right_path) {
2069 ext4_ext_drop_refs(right_path);
2070 kfree(right_path);
2071 }
2072
2073 return ret;
2074 }
2075
2076 static int ext4_ext_balance(void *icb,
2077 handle_t *handle,
2078 struct inode *inode,
2079 struct ext4_ext_path **path,
2080 int at)
2081 {
2082 int ret, shrinked = 0;
2083 int depth = at;
2084
2085 while (depth > 0) {
2086 ret = ext4_ext_amalgamate(icb, handle,
2087 inode, *path, depth);
2088 if (ret)
2089 goto out;
2090
2091 depth--;
2092 }
2093 do {
2094 ret = ext4_ext_shrink_indepth(icb, handle,
2095 inode, 0, &shrinked);
2096 } while (!ret && shrinked);
2097
2098 out:
2099 return ret;
2100 }
2101
2102 /*
2103 * NOTE: After removal, path should not be reused.
2104 */
2105 int ext4_ext_remove_extent(void *icb,
2106 handle_t *handle,
2107 struct inode *inode, struct ext4_ext_path **path)
2108 {
2109 int len;
2110 int err = 0;
2111 ext4_lblk_t start;
2112 uint16_t new_entries;
2113 int depth = ext_depth(inode);
2114 struct ext4_extent *ex = (*path)[depth].p_ext,
2115 *ex2 = ex + 1;
2116 struct ext4_extent_header *eh = (*path)[depth].p_hdr;
2117 if (!ex)
2118 return -EINVAL;
2119
2120 start = le32_to_cpu(ex->ee_block);
2121 len = ext4_ext_get_actual_len(ex);
2122 new_entries = le16_to_cpu(eh->eh_entries) - 1;
2123
2124 ext4_ext_remove_blocks(icb, handle,
2125 inode, ex, start, start + len - 1);
2126 if (ex2 <= EXT_LAST_EXTENT(eh))
2127 memmove(ex, ex2,
2128 (EXT_LAST_EXTENT(eh) - ex2 + 1) * sizeof(struct ext4_extent));
2129 eh->eh_entries = cpu_to_le16(new_entries);
2130
2131 ext4_ext_dirty(icb, handle, inode, (*path) + depth);
2132
2133 /*
2134 * If the node is free, then we should
2135 * remove it from index block above.
2136 */
2137 while (depth > 0) {
2138 eh = (*path)[depth].p_hdr;
2139 if (eh->eh_entries == 0 && (*path)[depth].p_bh != NULL) {
2140 ext4_ext_drop_refs((*path) + depth);
2141 err = ext4_ext_remove_idx(icb, handle,
2142 inode, *path, depth - 1);
2143 if (err)
2144 break;
2145
2146 } else
2147 break;
2148
2149 depth--;
2150 }
2151 err = ext4_ext_correct_indexes(icb, handle,
2152 inode, *path, depth);
2153
2154 if ((*path)->p_hdr->eh_entries == 0) {
2155 /*
2156 * truncate to zero freed all the tree,
2157 * so we need to correct eh_depth
2158 */
2159 ext_inode_hdr(inode)->eh_depth = 0;
2160 ext_inode_hdr(inode)->eh_max =
2161 cpu_to_le16(ext4_ext_space_root(inode, 0));
2162 err = ext4_ext_dirty(icb, handle, inode, *path);
2163 }
2164 err = ext4_ext_balance(icb, handle, inode, path, depth);
2165
2166 return err;
2167 }
2168
2169 int __ext4_ext_truncate(void *icb,
2170 handle_t *handle,
2171 struct inode *inode,
2172 ext4_lblk_t from, ext4_lblk_t to)
2173 {
2174 int depth = ext_depth(inode), ret = -EIO;
2175 struct ext4_extent *ex;
2176 struct ext4_ext_path *path = NULL, *npath;
2177 ext4_lblk_t now = from;
2178
2179 if (to < from)
2180 return -EINVAL;
2181
2182 npath = ext4_find_extent(inode, from, &path, 0);
2183 if (IS_ERR(npath))
2184 goto out;
2185
2186 path = npath;
2187 ex = path[depth].p_ext;
2188 if (!ex)
2189 goto out;
2190
2191 if (from < le32_to_cpu(ex->ee_block) &&
2192 to < le32_to_cpu(ex->ee_block)) {
2193 ret = 0;
2194 goto out;
2195 }
2196 /* If we do remove_space inside the range of an extent */
2197 if ((le32_to_cpu(ex->ee_block) < from) &&
2198 (to < le32_to_cpu(ex->ee_block) +
2199 ext4_ext_get_actual_len(ex) - 1)) {
2200
2201 struct ext4_extent newex;
2202 int unwritten = ext4_ext_is_unwritten(ex);
2203 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
2204 int len = ext4_ext_get_actual_len(ex);
2205 ext4_fsblk_t newblock =
2206 to + 1 - ee_block + ext4_ext_pblock(ex);
2207
2208 ex->ee_len = cpu_to_le16(from - ee_block);
2209 if (unwritten)
2210 ext4_ext_mark_unwritten(ex);
2211
2212 ext4_ext_dirty(icb, handle, inode, path + depth);
2213
2214 ext4_ext_remove_blocks(icb, handle,
2215 inode,
2216 ex, from, to);
2217
2218 newex.ee_block = cpu_to_le32(to + 1);
2219 newex.ee_len = cpu_to_le16(ee_block + len - 1 - to);
2220 ext4_ext_store_pblock(&newex, newblock);
2221 if (unwritten)
2222 ext4_ext_mark_unwritten(&newex);
2223
2224 ret = ext4_ext_insert_extent(icb, handle,
2225 inode, &path, &newex, 0);
2226 goto out;
2227 }
2228 if (le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1 < from) {
2229 now = ext4_ext_next_allocated_block(path, depth);
2230 npath = ext4_find_extent(inode, now, &path, 0);
2231 if (IS_ERR(npath))
2232 goto out;
2233
2234 path = npath;
2235 ex = path[depth].p_ext;
2236 }
2237 while (ex &&
2238 le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1 >= now &&
2239 le32_to_cpu(ex->ee_block) <= to) {
2240 int len, new_len = 0;
2241 int unwritten;
2242 ext4_lblk_t start, new_start;
2243 ext4_fsblk_t newblock;
2244
2245 new_start = start = le32_to_cpu(ex->ee_block);
2246 len = ext4_ext_get_actual_len(ex);
2247 newblock = ext4_ext_pblock(ex);
2248 if (start < from) {
2249 len -= from - start;
2250 new_len = from - start;
2251 start = from;
2252 } else {
2253 if (start + len - 1 > to) {
2254 new_len = start + len - 1 - to;
2255 len -= new_len;
2256 new_start = to + 1;
2257 newblock += to + 1 - start;
2258 }
2259 }
2260
2261 now = ext4_ext_next_allocated_block(path, depth);
2262 if (!new_len) {
2263 ret = ext4_ext_remove_extent(icb, handle,
2264 inode, &path);
2265 if (ret)
2266 goto out;
2267
2268 } else {
2269 ext4_ext_remove_blocks(icb, handle,
2270 inode,
2271 ex, start, start + len - 1);
2272 ex->ee_block = cpu_to_le32(new_start);
2273 unwritten = ext4_ext_is_unwritten(ex);
2274 ex->ee_len = cpu_to_le16(new_len);
2275 ext4_ext_store_pblock(ex, newblock);
2276 if (unwritten)
2277 ext4_ext_mark_unwritten(ex);
2278
2279 ext4_ext_dirty(icb, handle,
2280 inode, path + depth);
2281 }
2282 npath = ext4_find_extent(inode, now, &path, 0);
2283 if (IS_ERR(npath))
2284 goto out;
2285
2286 path = npath;
2287 depth = ext_depth(inode);
2288 ex = path[depth].p_ext;
2289 }
2290 out:
2291 if (path) {
2292 ext4_ext_drop_refs(path);
2293 kfree(path);
2294 }
2295
2296 if (IS_ERR(npath))
2297 ret = PTR_ERR(npath);
2298
2299 return ret;
2300 }
2301
2302 /*
2303 * ext4_split_extent_at() splits an extent at given block.
2304 *
2305 * @handle: the journal handle
2306 * @inode: the file inode
2307 * @path: the path to the extent
2308 * @split: the logical block where the extent is splitted.
2309 * @split_flags: indicates if the extent could be zeroout if split fails, and
2310 * the states(init or unwritten) of new extents.
2311 * @flags: flags used to insert new extent to extent tree.
2312 *
2313 *
2314 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
2315 * of which are deterimined by split_flag.
2316 *
2317 * There are two cases:
2318 * a> the extent are splitted into two extent.
2319 * b> split is not needed, and just mark the extent.
2320 *
2321 * return 0 on success.
2322 */
2323 static int ext4_split_extent_at(void *icb, handle_t *handle,
2324 struct inode *inode,
2325 struct ext4_ext_path **ppath,
2326 ext4_lblk_t split,
2327 int split_flag,
2328 int flags)
2329 {
2330 struct ext4_ext_path *path = *ppath;
2331 ext4_fsblk_t newblock;
2332 ext4_lblk_t ee_block;
2333 struct ext4_extent *ex, newex, orig_ex, zero_ex;
2334 struct ext4_extent *ex2 = NULL;
2335 unsigned int ee_len, depth;
2336 int err = 0;
2337
2338 ext4_ext_show_leaf(inode, path);
2339
2340 depth = ext_depth(inode);
2341 ex = path[depth].p_ext;
2342 ee_block = le32_to_cpu(ex->ee_block);
2343 ee_len = ext4_ext_get_actual_len(ex);
2344 newblock = split - ee_block + ext4_ext_pblock(ex);
2345
2346 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
2347
2348 err = ext4_ext_get_access(icb, handle, inode, path + depth);
2349 if (err)
2350 goto out;
2351
2352 if (split == ee_block) {
2353 /*
2354 * case b: block @split is the block that the extent begins with
2355 * then we just change the state of the extent, and splitting
2356 * is not needed.
2357 */
2358 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
2359 ext4_ext_mark_unwritten(ex);
2360 else
2361 ext4_ext_mark_initialized(ex);
2362
2363 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2364 ext4_ext_try_to_merge(icb, handle, inode, path, ex);
2365
2366 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
2367 goto out;
2368 }
2369
2370 /* case a */
2371 memcpy(&orig_ex, ex, sizeof(orig_ex));
2372 ex->ee_len = cpu_to_le16(split - ee_block);
2373 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
2374 ext4_ext_mark_unwritten(ex);
2375
2376 /*
2377 * path may lead to new leaf, not to original leaf any more
2378 * after ext4_ext_insert_extent() returns,
2379 */
2380 err = ext4_ext_dirty(icb, handle, inode, path + depth);
2381 if (err)
2382 goto fix_extent_len;
2383
2384 ex2 = &newex;
2385 ex2->ee_block = cpu_to_le32(split);
2386 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
2387 ext4_ext_store_pblock(ex2, newblock);
2388 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
2389 ext4_ext_mark_unwritten(ex2);
2390
2391 err = ext4_ext_insert_extent(icb, handle, inode, ppath, &newex, flags);
2392 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2393 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2394 if (split_flag & EXT4_EXT_DATA_VALID1) {
2395 err = ext4_ext_zeroout(inode, ex2);
2396 zero_ex.ee_block = ex2->ee_block;
2397 zero_ex.ee_len = cpu_to_le16(
2398 ext4_ext_get_actual_len(ex2));
2399 ext4_ext_store_pblock(&zero_ex,
2400 ext4_ext_pblock(ex2));
2401 } else {
2402 err = ext4_ext_zeroout(inode, ex);
2403 zero_ex.ee_block = ex->ee_block;
2404 zero_ex.ee_len = cpu_to_le16(
2405 ext4_ext_get_actual_len(ex));
2406 ext4_ext_store_pblock(&zero_ex,
2407 ext4_ext_pblock(ex));
2408 }
2409 } else {
2410 err = ext4_ext_zeroout(inode, &orig_ex);
2411 zero_ex.ee_block = orig_ex.ee_block;
2412 zero_ex.ee_len = cpu_to_le16(
2413 ext4_ext_get_actual_len(&orig_ex));
2414 ext4_ext_store_pblock(&zero_ex,
2415 ext4_ext_pblock(&orig_ex));
2416 }
2417
2418 if (err)
2419 goto fix_extent_len;
2420 /* update the extent length and mark as initialized */
2421 ex->ee_len = cpu_to_le16(ee_len);
2422 ext4_ext_try_to_merge(icb, handle, inode, path, ex);
2423 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
2424 if (err)
2425 goto fix_extent_len;
2426
2427 goto out;
2428 } else if (err)
2429 goto fix_extent_len;
2430
2431 out:
2432 ext4_ext_show_leaf(inode, path);
2433 return err;
2434
2435 fix_extent_len:
2436 ex->ee_len = orig_ex.ee_len;
2437 ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
2438 return err;
2439 }
2440
2441 int ext4_ext_tree_init(void *icb, handle_t *handle, struct inode *inode)
2442 {
2443 struct ext4_extent_header *eh;
2444
2445 eh = ext_inode_hdr(inode);
2446 eh->eh_depth = 0;
2447 eh->eh_entries = 0;
2448 eh->eh_magic = cpu_to_le16(EXT4_EXT_MAGIC);
2449 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
2450 ext4_mark_inode_dirty(icb, handle, inode);
2451 return 0;
2452 }
2453
2454 /*
2455 * called at mount time
2456 */
2457 void ext4_ext_init(struct super_block *sb)
2458 {
2459 /*
2460 * possible initialization would be here
2461 */
2462 }
2463
2464 static int ext4_ext_convert_to_initialized (
2465 void *icb,
2466 handle_t *handle,
2467 struct inode *inode,
2468 struct ext4_ext_path **ppath,
2469 ext4_lblk_t split,
2470 unsigned long blocks,
2471 int flags)
2472 {
2473 int depth = ext_depth(inode), err;
2474 struct ext4_extent *ex = (*ppath)[depth].p_ext;
2475
2476 assert (le32_to_cpu(ex->ee_block) <= split);
2477
2478 if (split + blocks == le32_to_cpu(ex->ee_block) +
2479 ext4_ext_get_actual_len(ex)) {
2480
2481 /* split and initialize right part */
2482 err = ext4_split_extent_at(icb, handle, inode, ppath, split,
2483 EXT4_EXT_MARK_UNWRIT1, flags);
2484
2485 } else if (le32_to_cpu(ex->ee_block) == split) {
2486
2487 /* split and initialize left part */
2488 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks,
2489 EXT4_EXT_MARK_UNWRIT2, flags);
2490
2491 } else {
2492
2493 /* split 1 extent to 3 and initialize the 2nd */
2494 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks,
2495 EXT4_EXT_MARK_UNWRIT1 |
2496 EXT4_EXT_MARK_UNWRIT2, flags);
2497 if (0 == err) {
2498 err = ext4_split_extent_at(icb, handle, inode, ppath, split,
2499 EXT4_EXT_MARK_UNWRIT1, flags);
2500 }
2501 }
2502
2503 return err;
2504 }
2505
2506 int ext4_ext_get_blocks(void *icb, handle_t *handle, struct inode *inode, ext4_fsblk_t iblock,
2507 unsigned long max_blocks, struct buffer_head *bh_result,
2508 int create, int flags)
2509 {
2510 struct ext4_ext_path *path = NULL;
2511 struct ext4_extent newex, *ex;
2512 int goal, err = 0, depth;
2513 unsigned long allocated = 0;
2514 ext4_fsblk_t next, newblock;
2515
2516 clear_buffer_new(bh_result);
2517 /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/
2518
2519 /* find extent for this block */
2520 path = ext4_find_extent(inode, iblock, NULL, 0);
2521 if (IS_ERR(path)) {
2522 err = PTR_ERR(path);
2523 path = NULL;
2524 goto out2;
2525 }
2526
2527 depth = ext_depth(inode);
2528
2529 /*
2530 * consistent leaf must not be empty
2531 * this situations is possible, though, _during_ tree modification
2532 * this is why assert can't be put in ext4_ext_find_extent()
2533 */
2534 BUG_ON(path[depth].p_ext == NULL && depth != 0);
2535
2536 if ((ex = path[depth].p_ext)) {
2537 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
2538 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
2539 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2540 /* if found exent covers block, simple return it */
2541 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2542
2543 /* number of remain blocks in the extent */
2544 allocated = ee_len + ee_block - iblock;
2545
2546 if (ext4_ext_is_unwritten(ex)) {
2547 if (create) {
2548 newblock = iblock - ee_block + ee_start;
2549 err = ext4_ext_convert_to_initialized (
2550 icb, handle,
2551 inode,
2552 &path,
2553 iblock,
2554 allocated,
2555 flags);
2556 if (err)
2557 goto out2;
2558
2559 } else {
2560 newblock = 0;
2561 }
2562 } else {
2563 newblock = iblock - ee_block + ee_start;
2564 }
2565 goto out;
2566 }
2567 }
2568
2569 /*
2570 * requested block isn't allocated yet
2571 * we couldn't try to create block if create flag is zero
2572 */
2573 if (!create) {
2574 goto out2;
2575 }
2576
2577 /* find next allocated block so that we know how many
2578 * blocks we can allocate without ovelapping next extent */
2579 next = ext4_ext_next_allocated_block(path, depth);
2580 BUG_ON(next <= iblock);
2581 allocated = next - iblock;
2582 if (flags & EXT4_GET_BLOCKS_PRE_IO && max_blocks > EXT_UNWRITTEN_MAX_LEN)
2583 max_blocks = EXT_UNWRITTEN_MAX_LEN;
2584 if (allocated > max_blocks)
2585 allocated = max_blocks;
2586
2587 /* allocate new block */
2588 goal = ext4_ext_find_goal(inode, path, iblock);
2589 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, 0,
2590 &allocated, &err);
2591 if (!newblock)
2592 goto out2;
2593
2594 /* try to insert new extent into found leaf and return */
2595 newex.ee_block = cpu_to_le32(iblock);
2596 ext4_ext_store_pblock(&newex, newblock);
2597 newex.ee_len = cpu_to_le16(allocated);
2598 /* if it's fallocate, mark ex as unwritten */
2599 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
2600 ext4_ext_mark_unwritten(&newex);
2601 }
2602 err = ext4_ext_insert_extent(icb, handle, inode, &path, &newex,
2603 flags & EXT4_GET_BLOCKS_PRE_IO);
2604
2605 if (err) {
2606 /* free data blocks we just allocated */
2607 ext4_free_blocks(icb, handle, inode, NULL, ext4_ext_pblock(&newex),
2608 le16_to_cpu(newex.ee_len), get_default_free_blocks_flags(inode));
2609 goto out2;
2610 }
2611
2612 ext4_mark_inode_dirty(icb, handle, inode);
2613
2614 /* previous routine could use block we allocated */
2615 if (ext4_ext_is_unwritten(&newex))
2616 newblock = 0;
2617 else
2618 newblock = ext4_ext_pblock(&newex);
2619
2620 set_buffer_new(bh_result);
2621
2622 out:
2623 if (allocated > max_blocks)
2624 allocated = max_blocks;
2625
2626 ext4_ext_show_leaf(inode, path);
2627 set_buffer_mapped(bh_result);
2628 bh_result->b_bdev = inode->i_sb->s_bdev;
2629 bh_result->b_blocknr = newblock;
2630 out2:
2631 if (path) {
2632 ext4_ext_drop_refs(path);
2633 kfree(path);
2634 }
2635 /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/
2636
2637 return err ? err : allocated;
2638 }
2639
2640 int ext4_ext_truncate(void *icb, struct inode *inode, unsigned long start)
2641 {
2642 int ret = __ext4_ext_truncate(icb, NULL, inode,
2643 start, EXT_MAX_BLOCKS);
2644
2645 /* Save modifications on i_blocks field of the inode. */
2646 if (!ret)
2647 ret = ext4_mark_inode_dirty(icb, NULL, inode);
2648
2649 return ret;
2650 }
2651
2652 #ifdef _MSC_VER
2653 #pragma warning(pop)
2654 #endif
2655