[EXT2]
[reactos.git] / reactos / drivers / filesystems / ext2 / src / ext4 / ext4_extents.c
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public Licens
12 * along with this program; if not, write to the Free Software
13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
14 */
15
16 #include "ext2fs.h"
17 #include <linux/ext4.h>
18
19 #ifdef _MSC_VER
20 #pragma warning(push)
21 #pragma warning(disable: 4018)
22 #pragma warning(disable: 4242)
23 #pragma warning(disable: 4244)
24 #endif
25
26 /*
27 * used by extent splitting.
28 */
29 #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
30 due to ENOSPC */
31 #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
32 #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
33
34 #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
35 #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
36
37 #define CONFIG_EXTENT_TEST
38 #ifdef CONFIG_EXTENT_TEST
39
40 #define ext4_mark_inode_dirty(icb, handle, n) ext3_mark_inode_dirty(icb, n)
41 static inline ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
42 {
43 PEXT2_VCB Vcb;
44 Vcb = inode->i_sb->s_priv;
45 return (inode->i_ino - 1) / BLOCKS_PER_GROUP;
46 }
47
48 static ext4_fsblk_t ext4_new_meta_blocks(void *icb, handle_t *handle, struct inode *inode,
49 ext4_fsblk_t goal,
50 unsigned int flags,
51 unsigned long *count, int *errp)
52 {
53 NTSTATUS status;
54 ULONG blockcnt = (count)?*count:1;
55 ULONG block = 0;
56
57 status = Ext2NewBlock((PEXT2_IRP_CONTEXT)icb,
58 inode->i_sb->s_priv,
59 0, goal,
60 &block,
61 &blockcnt);
62 if (count)
63 *count = blockcnt;
64
65 if (!NT_SUCCESS(status)) {
66 *errp = Ext2LinuxError(status);
67 return 0;
68 }
69 inode->i_blocks += (blockcnt * (inode->i_sb->s_blocksize >> 9));
70 return block;
71 }
72
73 static void ext4_free_blocks(void *icb, handle_t *handle, struct inode *inode, void *fake,
74 ext4_fsblk_t block, int count, int flags)
75 {
76 Ext2FreeBlock((PEXT2_IRP_CONTEXT)icb, inode->i_sb->s_priv, block, count);
77 inode->i_blocks -= count * (inode->i_sb->s_blocksize >> 9);
78 return;
79 }
80
81 static inline void ext_debug(char *str, ...)
82 {
83 }
84 #if TRUE
85 #define EXT4_ERROR_INODE(inode, str, ...) do { \
86 DbgPrint("inode[%p]: " str "\n", inode, ##__VA_ARGS__); \
87 } while(0)
88 #else
89 #define EXT4_ERROR_INODE
90 #endif
91
92 #define ext4_std_error(s, err)
93 #define assert ASSERT
94
95 #endif
96
97 /*
98 * Return the right sibling of a tree node(either leaf or indexes node)
99 */
100
101 #define EXT_MAX_BLOCKS 0xffffffff
102
103
104 static inline int ext4_ext_space_block(struct inode *inode, int check)
105 {
106 int size;
107
108 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
109 / sizeof(struct ext4_extent);
110 #ifdef AGGRESSIVE_TEST
111 if (!check && size > 6)
112 size = 6;
113 #endif
114 return size;
115 }
116
117 static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
118 {
119 int size;
120
121 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
122 / sizeof(struct ext4_extent_idx);
123 #ifdef AGGRESSIVE_TEST
124 if (!check && size > 5)
125 size = 5;
126 #endif
127 return size;
128 }
129
130 static inline int ext4_ext_space_root(struct inode *inode, int check)
131 {
132 int size;
133
134 size = sizeof(EXT4_I(inode)->i_block);
135 size -= sizeof(struct ext4_extent_header);
136 size /= sizeof(struct ext4_extent);
137 #ifdef AGGRESSIVE_TEST
138 if (!check && size > 3)
139 size = 3;
140 #endif
141 return size;
142 }
143
144 static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
145 {
146 int size;
147
148 size = sizeof(EXT4_I(inode)->i_block);
149 size -= sizeof(struct ext4_extent_header);
150 size /= sizeof(struct ext4_extent_idx);
151 #ifdef AGGRESSIVE_TEST
152 if (!check && size > 4)
153 size = 4;
154 #endif
155 return size;
156 }
157
158 static int
159 ext4_ext_max_entries(struct inode *inode, int depth)
160 {
161 int max;
162
163 if (depth == ext_depth(inode)) {
164 if (depth == 0)
165 max = ext4_ext_space_root(inode, 1);
166 else
167 max = ext4_ext_space_root_idx(inode, 1);
168 } else {
169 if (depth == 0)
170 max = ext4_ext_space_block(inode, 1);
171 else
172 max = ext4_ext_space_block_idx(inode, 1);
173 }
174
175 return max;
176 }
177
178 static int __ext4_ext_check(const char *function, unsigned int line,
179 struct inode *inode,
180 struct ext4_extent_header *eh, int depth,
181 ext4_fsblk_t pblk);
182
183 /*
184 * read_extent_tree_block:
185 * Get a buffer_head by extents_bread, and read fresh data from the storage.
186 */
187 static struct buffer_head *
188 __read_extent_tree_block(const char *function, unsigned int line,
189 struct inode *inode, ext4_fsblk_t pblk, int depth,
190 int flags)
191 {
192 struct buffer_head *bh;
193 int err;
194
195 bh = extents_bread(inode->i_sb, pblk);
196 if (!bh)
197 return ERR_PTR(-ENOMEM);
198
199 if (!buffer_uptodate(bh)) {
200 err = -EIO;
201 goto errout;
202 }
203 if (buffer_verified(bh))
204 return bh;
205 err = __ext4_ext_check(function, line, inode,
206 ext_block_hdr(bh), depth, pblk);
207 if (err)
208 goto errout;
209 set_buffer_verified(bh);
210 return bh;
211 errout:
212 extents_brelse(bh);
213 return ERR_PTR(err);
214
215 }
216
217 #define read_extent_tree_block(inode, pblk, depth, flags) \
218 __read_extent_tree_block("", __LINE__, (inode), (pblk), \
219 (depth), (flags))
220
221 #define ext4_ext_check(inode, eh, depth, pblk) \
222 __ext4_ext_check("", __LINE__, (inode), (eh), (depth), (pblk))
223
224 int ext4_ext_check_inode(struct inode *inode)
225 {
226 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
227 }
228
229 static uint32_t ext4_ext_block_csum(struct inode *inode,
230 struct ext4_extent_header *eh)
231 {
232 /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
233 return 0;
234 }
235
236 static void ext4_extent_block_csum_set(struct inode *inode,
237 struct ext4_extent_header *eh)
238 {
239 struct ext4_extent_tail *tail;
240
241 tail = find_ext4_extent_tail(eh);
242 tail->et_checksum = ext4_ext_block_csum(
243 inode, eh);
244 }
245
246 static int ext4_split_extent_at(void *icb,
247 handle_t *handle,
248 struct inode *inode,
249 struct ext4_ext_path **ppath,
250 ext4_lblk_t split,
251 int split_flag,
252 int flags);
253
254 static inline int
255 ext4_force_split_extent_at(void *icb, handle_t *handle, struct inode *inode,
256 struct ext4_ext_path **ppath, ext4_lblk_t lblk,
257 int nofail)
258 {
259 struct ext4_ext_path *path = *ppath;
260 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
261
262 return ext4_split_extent_at(icb, handle, inode, ppath, lblk, unwritten ?
263 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
264 EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
265 (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
266 }
267
268 /*
269 * could return:
270 * - EROFS
271 * - ENOMEM
272 */
273
274 static int ext4_ext_get_access(void *icb, handle_t *handle, struct inode *inode,
275 struct ext4_ext_path *path)
276 {
277 if (path->p_bh) {
278 /* path points to block */
279
280 return ext4_journal_get_write_access(icb, handle, path->p_bh);
281
282 }
283 /* path points to leaf/index in inode body */
284 /* we use in-core data, no need to protect them */
285 return 0;
286 }
287
288
289 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
290 struct ext4_ext_path *path,
291 ext4_lblk_t block)
292 {
293 if (path) {
294 int depth = path->p_depth;
295 struct ext4_extent *ex;
296
297 /*
298 * Try to predict block placement assuming that we are
299 * filling in a file which will eventually be
300 * non-sparse --- i.e., in the case of libbfd writing
301 * an ELF object sections out-of-order but in a way
302 * the eventually results in a contiguous object or
303 * executable file, or some database extending a table
304 * space file. However, this is actually somewhat
305 * non-ideal if we are writing a sparse file such as
306 * qemu or KVM writing a raw image file that is going
307 * to stay fairly sparse, since it will end up
308 * fragmenting the file system's free space. Maybe we
309 * should have some hueristics or some way to allow
310 * userspace to pass a hint to file system,
311 * especially if the latter case turns out to be
312 * common.
313 */
314 ex = path[depth].p_ext;
315 if (ex) {
316 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
317 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
318
319 if (block > ext_block)
320 return ext_pblk + (block - ext_block);
321 else
322 return ext_pblk - (ext_block - block);
323 }
324
325 /* it looks like index is empty;
326 * try to find starting block from index itself */
327 if (path[depth].p_bh)
328 return path[depth].p_bh->b_blocknr;
329 }
330
331 /* OK. use inode's group */
332 return ext4_inode_to_goal_block(inode);
333 }
334
335 /*
336 * Allocation for a meta data block
337 */
338 static ext4_fsblk_t
339 ext4_ext_new_meta_block(void *icb, handle_t *handle, struct inode *inode,
340 struct ext4_ext_path *path,
341 struct ext4_extent *ex, int *err, unsigned int flags)
342 {
343 ext4_fsblk_t goal, newblock;
344
345 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
346 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags,
347 NULL, err);
348 return newblock;
349 }
350
351 int __ext4_ext_dirty(const char *where, unsigned int line,
352 void *icb, handle_t *handle,
353 struct inode *inode,
354 struct ext4_ext_path *path)
355 {
356 int err;
357
358 if (path->p_bh) {
359 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
360 /* path points to block */
361 err = __ext4_handle_dirty_metadata(where, line, icb, handle, inode, path->p_bh);
362 } else {
363 /* path points to leaf/index in inode body */
364 err = ext4_mark_inode_dirty(icb, handle, inode);
365 }
366 return err;
367 }
368
369 void ext4_ext_drop_refs(struct ext4_ext_path *path)
370 {
371 int depth, i;
372
373 if (!path)
374 return;
375 depth = path->p_depth;
376 for (i = 0; i <= depth; i++, path++)
377 if (path->p_bh) {
378 extents_brelse(path->p_bh);
379 path->p_bh = NULL;
380 }
381 }
382
383 /*
384 * Check that whether the basic information inside the extent header
385 * is correct or not.
386 */
387 static int __ext4_ext_check(const char *function, unsigned int line,
388 struct inode *inode,
389 struct ext4_extent_header *eh, int depth,
390 ext4_fsblk_t pblk)
391 {
392 struct ext4_extent_tail *tail;
393 const char *error_msg;
394 #ifndef __REACTOS__
395 int max = 0;
396 #endif
397 if (eh->eh_magic != EXT4_EXT_MAGIC) {
398 error_msg = "invalid magic";
399 goto corrupted;
400 }
401 if (le16_to_cpu(eh->eh_depth) != depth) {
402 error_msg = "unexpected eh_depth";
403 goto corrupted;
404 }
405 if (eh->eh_max == 0) {
406 error_msg = "invalid eh_max";
407 goto corrupted;
408 }
409 if (eh->eh_entries > eh->eh_max) {
410 error_msg = "invalid eh_entries";
411 goto corrupted;
412 }
413
414 tail = find_ext4_extent_tail(eh);
415 if (tail->et_checksum != ext4_ext_block_csum(inode, eh)) {
416 ext_debug("Warning: extent checksum damaged? tail->et_checksum = "
417 "%lu, ext4_ext_block_csum = %lu\n",
418 tail->et_checksum, ext4_ext_block_csum(inode, eh));
419 }
420
421 return 0;
422
423 corrupted:
424 ext_debug("corrupted! %s\n", error_msg);
425 return -EIO;
426 }
427
428 /*
429 * ext4_ext_binsearch_idx:
430 * binary search for the closest index of the given block
431 * the header must be checked before calling this
432 */
433 static void
434 ext4_ext_binsearch_idx(struct inode *inode,
435 struct ext4_ext_path *path, ext4_lblk_t block)
436 {
437 struct ext4_extent_header *eh = path->p_hdr;
438 struct ext4_extent_idx *r, *l, *m;
439
440 ext_debug("binsearch for %u(idx): ", block);
441
442 l = EXT_FIRST_INDEX(eh) + 1;
443 r = EXT_LAST_INDEX(eh);
444 while (l <= r) {
445 m = l + (r - l) / 2;
446 if (block < (m->ei_block))
447 r = m - 1;
448 else
449 l = m + 1;
450 ext_debug("%p(%u):%p(%u):%p(%u) ", l, (l->ei_block),
451 m, (m->ei_block),
452 r, (r->ei_block));
453 }
454
455 path->p_idx = l - 1;
456 ext_debug(" -> %u->%lld ", (path->p_idx->ei_block),
457 ext4_idx_pblock(path->p_idx));
458
459 #ifdef CHECK_BINSEARCH
460 {
461 struct ext4_extent_idx *chix, *ix;
462 int k;
463
464 chix = ix = EXT_FIRST_INDEX(eh);
465 for (k = 0; k < (eh->eh_entries); k++, ix++) {
466 if (k != 0 &&
467 (ix->ei_block) <= (ix[-1].ei_block)) {
468 printk(KERN_DEBUG "k=%d, ix=0x%p, "
469 "first=0x%p\n", k,
470 ix, EXT_FIRST_INDEX(eh));
471 printk(KERN_DEBUG "%u <= %u\n",
472 (ix->ei_block),
473 (ix[-1].ei_block));
474 }
475 BUG_ON(k && (ix->ei_block)
476 <= (ix[-1].ei_block));
477 if (block < (ix->ei_block))
478 break;
479 chix = ix;
480 }
481 BUG_ON(chix != path->p_idx);
482 }
483 #endif
484
485 }
486
487 /*
488 * ext4_ext_binsearch:
489 * binary search for closest extent of the given block
490 * the header must be checked before calling this
491 */
492 static void
493 ext4_ext_binsearch(struct inode *inode,
494 struct ext4_ext_path *path, ext4_lblk_t block)
495 {
496 struct ext4_extent_header *eh = path->p_hdr;
497 struct ext4_extent *r, *l, *m;
498
499 if (eh->eh_entries == 0) {
500 /*
501 * this leaf is empty:
502 * we get such a leaf in split/add case
503 */
504 return;
505 }
506
507 ext_debug("binsearch for %u: ", block);
508
509 l = EXT_FIRST_EXTENT(eh) + 1;
510 r = EXT_LAST_EXTENT(eh);
511
512 while (l <= r) {
513 m = l + (r - l) / 2;
514 if (block < m->ee_block)
515 r = m - 1;
516 else
517 l = m + 1;
518 ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block,
519 m, (m->ee_block),
520 r, (r->ee_block));
521 }
522
523 path->p_ext = l - 1;
524 ext_debug(" -> %d:%llu:[%d]%d ",
525 (path->p_ext->ee_block),
526 ext4_ext_pblock(path->p_ext),
527 ext4_ext_is_unwritten(path->p_ext),
528 ext4_ext_get_actual_len(path->p_ext));
529
530 #ifdef CHECK_BINSEARCH
531 {
532 struct ext4_extent *chex, *ex;
533 int k;
534
535 chex = ex = EXT_FIRST_EXTENT(eh);
536 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
537 BUG_ON(k && (ex->ee_block)
538 <= (ex[-1].ee_block));
539 if (block < (ex->ee_block))
540 break;
541 chex = ex;
542 }
543 BUG_ON(chex != path->p_ext);
544 }
545 #endif
546
547 }
548
549 #ifdef EXT_DEBUG
550 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
551 {
552 int k, l = path->p_depth;
553
554 ext_debug("path:");
555 for (k = 0; k <= l; k++, path++) {
556 if (path->p_idx) {
557 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
558 ext4_idx_pblock(path->p_idx));
559 } else if (path->p_ext) {
560 ext_debug(" %d:[%d]%d:%llu ",
561 le32_to_cpu(path->p_ext->ee_block),
562 ext4_ext_is_unwritten(path->p_ext),
563 ext4_ext_get_actual_len(path->p_ext),
564 ext4_ext_pblock(path->p_ext));
565 } else
566 ext_debug(" []");
567 }
568 ext_debug("\n");
569 }
570
571 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
572 {
573 int depth = ext_depth(inode);
574 struct ext4_extent_header *eh;
575 struct ext4_extent *ex;
576 int i;
577
578 if (!path)
579 return;
580
581 eh = path[depth].p_hdr;
582 ex = EXT_FIRST_EXTENT(eh);
583
584 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
585
586 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
587 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
588 ext4_ext_is_unwritten(ex),
589 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
590 }
591 ext_debug("\n");
592 }
593
594 static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
595 ext4_fsblk_t newblock, int level)
596 {
597 int depth = ext_depth(inode);
598 struct ext4_extent *ex;
599
600 if (depth != level) {
601 struct ext4_extent_idx *idx;
602 idx = path[level].p_idx;
603 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
604 ext_debug("%d: move %d:%llu in new index %llu\n", level,
605 le32_to_cpu(idx->ei_block),
606 ext4_idx_pblock(idx),
607 newblock);
608 idx++;
609 }
610
611 return;
612 }
613
614 ex = path[depth].p_ext;
615 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
616 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
617 le32_to_cpu(ex->ee_block),
618 ext4_ext_pblock(ex),
619 ext4_ext_is_unwritten(ex),
620 ext4_ext_get_actual_len(ex),
621 newblock);
622 ex++;
623 }
624 }
625
626 #else
627 #define ext4_ext_show_path(inode, path)
628 #define ext4_ext_show_leaf(inode, path)
629 #define ext4_ext_show_move(inode, path, newblock, level)
630 #endif
631
632 struct ext4_ext_path *
633 ext4_find_extent(struct inode *inode, ext4_lblk_t block,
634 struct ext4_ext_path **orig_path, int flags)
635 {
636 struct ext4_extent_header *eh;
637 struct buffer_head *bh;
638 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
639 short int depth, i, ppos = 0;
640 int ret;
641
642 eh = ext_inode_hdr(inode);
643 depth = ext_depth(inode);
644
645 if (path) {
646 ext4_ext_drop_refs(path);
647 if (depth > path[0].p_maxdepth) {
648 kfree(path);
649 *orig_path = path = NULL;
650 }
651 }
652 if (!path) {
653 /* account possible depth increase */
654 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
655 GFP_NOFS);
656 if (unlikely(!path))
657 return ERR_PTR(-ENOMEM);
658 path[0].p_maxdepth = depth + 1;
659 }
660 path[0].p_hdr = eh;
661 path[0].p_bh = NULL;
662
663 i = depth;
664 /* walk through the tree */
665 while (i) {
666 ext_debug("depth %d: num %d, max %d\n",
667 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
668
669 ext4_ext_binsearch_idx(inode, path + ppos, block);
670 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
671 path[ppos].p_depth = i;
672 path[ppos].p_ext = NULL;
673
674 bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
675 flags);
676 if (unlikely(IS_ERR(bh))) {
677 ret = PTR_ERR(bh);
678 goto err;
679 }
680
681 eh = ext_block_hdr(bh);
682 ppos++;
683 if (unlikely(ppos > depth)) {
684 extents_brelse(bh);
685 EXT4_ERROR_INODE(inode,
686 "ppos %d > depth %d", ppos, depth);
687 ret = -EIO;
688 goto err;
689 }
690 path[ppos].p_bh = bh;
691 path[ppos].p_hdr = eh;
692 }
693
694 path[ppos].p_depth = i;
695 path[ppos].p_ext = NULL;
696 path[ppos].p_idx = NULL;
697
698 /* find extent */
699 ext4_ext_binsearch(inode, path + ppos, block);
700 /* if not an empty leaf */
701 if (path[ppos].p_ext)
702 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
703
704 ext4_ext_show_path(inode, path);
705
706 return path;
707
708 err:
709 ext4_ext_drop_refs(path);
710 if (path) {
711 kfree(path);
712 if (orig_path)
713 *orig_path = NULL;
714 }
715 return ERR_PTR(ret);
716 }
717
718 /*
719 * ext4_ext_insert_index:
720 * insert new index [@logical;@ptr] into the block at @curp;
721 * check where to insert: before @curp or after @curp
722 */
723 static int ext4_ext_insert_index(void *icb, handle_t *handle, struct inode *inode,
724 struct ext4_ext_path *curp,
725 int logical, ext4_fsblk_t ptr)
726 {
727 struct ext4_extent_idx *ix;
728 int len, err;
729
730 err = ext4_ext_get_access(icb, handle, inode, curp);
731 if (err)
732 return err;
733
734 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
735 EXT4_ERROR_INODE(inode,
736 "logical %d == ei_block %d!",
737 logical, le32_to_cpu(curp->p_idx->ei_block));
738 return -EIO;
739 }
740
741 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
742 >= le16_to_cpu(curp->p_hdr->eh_max))) {
743 EXT4_ERROR_INODE(inode,
744 "eh_entries %d >= eh_max %d!",
745 le16_to_cpu(curp->p_hdr->eh_entries),
746 le16_to_cpu(curp->p_hdr->eh_max));
747 return -EIO;
748 }
749
750 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
751 /* insert after */
752 ext_debug("insert new index %d after: %llu\n", logical, ptr);
753 ix = curp->p_idx + 1;
754 } else {
755 /* insert before */
756 ext_debug("insert new index %d before: %llu\n", logical, ptr);
757 ix = curp->p_idx;
758 }
759
760 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
761 BUG_ON(len < 0);
762 if (len > 0) {
763 ext_debug("insert new index %d: "
764 "move %d indices from 0x%p to 0x%p\n",
765 logical, len, ix, ix + 1);
766 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
767 }
768
769 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
770 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
771 return -EIO;
772 }
773
774 ix->ei_block = cpu_to_le32(logical);
775 ext4_idx_store_pblock(ix, ptr);
776 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
777
778 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
779 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
780 return -EIO;
781 }
782
783 err = ext4_ext_dirty(icb, handle, inode, curp);
784 ext4_std_error(inode->i_sb, err);
785
786 return err;
787 }
788
789 /*
790 * ext4_ext_split:
791 * inserts new subtree into the path, using free index entry
792 * at depth @at:
793 * - allocates all needed blocks (new leaf and all intermediate index blocks)
794 * - makes decision where to split
795 * - moves remaining extents and index entries (right to the split point)
796 * into the newly allocated blocks
797 * - initializes subtree
798 */
799 static int ext4_ext_split(void *icb, handle_t *handle, struct inode *inode,
800 unsigned int flags,
801 struct ext4_ext_path *path,
802 struct ext4_extent *newext, int at)
803 {
804 struct buffer_head *bh = NULL;
805 int depth = ext_depth(inode);
806 struct ext4_extent_header *neh;
807 struct ext4_extent_idx *fidx;
808 int i = at, k, m, a;
809 ext4_fsblk_t newblock, oldblock;
810 __le32 border;
811 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
812 int err = 0;
813
814 /* make decision: where to split? */
815 /* FIXME: now decision is simplest: at current extent */
816
817 /* if current leaf will be split, then we should use
818 * border from split point */
819 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
820 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
821 return -EIO;
822 }
823 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
824 border = path[depth].p_ext[1].ee_block;
825 ext_debug("leaf will be split."
826 " next leaf starts at %d\n",
827 le32_to_cpu(border));
828 } else {
829 border = newext->ee_block;
830 ext_debug("leaf will be added."
831 " next leaf starts at %d\n",
832 le32_to_cpu(border));
833 }
834
835 /*
836 * If error occurs, then we break processing
837 * and mark filesystem read-only. index won't
838 * be inserted and tree will be in consistent
839 * state. Next mount will repair buffers too.
840 */
841
842 /*
843 * Get array to track all allocated blocks.
844 * We need this to handle errors and free blocks
845 * upon them.
846 */
847 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
848 if (!ablocks)
849 return -ENOMEM;
850
851 /* allocate all needed blocks */
852 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
853 for (a = 0; a < depth - at; a++) {
854 newblock = ext4_ext_new_meta_block(icb, handle, inode, path,
855 newext, &err, flags);
856 if (newblock == 0)
857 goto cleanup;
858 ablocks[a] = newblock;
859 }
860
861 /* initialize new leaf */
862 newblock = ablocks[--a];
863 if (unlikely(newblock == 0)) {
864 EXT4_ERROR_INODE(inode, "newblock == 0!");
865 err = -EIO;
866 goto cleanup;
867 }
868 bh = extents_bwrite(inode->i_sb, newblock);
869 if (unlikely(!bh)) {
870 err = -ENOMEM;
871 goto cleanup;
872 }
873
874 err = ext4_journal_get_create_access(icb, handle, bh);
875 if (err)
876 goto cleanup;
877
878 neh = ext_block_hdr(bh);
879 neh->eh_entries = 0;
880 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
881 neh->eh_magic = EXT4_EXT_MAGIC;
882 neh->eh_depth = 0;
883
884 /* move remainder of path[depth] to the new leaf */
885 if (unlikely(path[depth].p_hdr->eh_entries !=
886 path[depth].p_hdr->eh_max)) {
887 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
888 path[depth].p_hdr->eh_entries,
889 path[depth].p_hdr->eh_max);
890 err = -EIO;
891 goto cleanup;
892 }
893 /* start copy from next extent */
894 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
895 ext4_ext_show_move(inode, path, newblock, depth);
896 if (m) {
897 struct ext4_extent *ex;
898 ex = EXT_FIRST_EXTENT(neh);
899 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
900 le16_add_cpu(&neh->eh_entries, m);
901 }
902
903 ext4_extent_block_csum_set(inode, neh);
904 set_buffer_uptodate(bh);
905
906 err = ext4_handle_dirty_metadata(icb, handle, inode, bh);
907 if (err)
908 goto cleanup;
909 extents_brelse(bh);
910 bh = NULL;
911
912 /* correct old leaf */
913 if (m) {
914 err = ext4_ext_get_access(icb, handle, inode, path + depth);
915 if (err)
916 goto cleanup;
917 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
918 err = ext4_ext_dirty(icb, handle, inode, path + depth);
919 if (err)
920 goto cleanup;
921
922 }
923
924 /* create intermediate indexes */
925 k = depth - at - 1;
926 if (unlikely(k < 0)) {
927 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
928 err = -EIO;
929 goto cleanup;
930 }
931 if (k)
932 ext_debug("create %d intermediate indices\n", k);
933 /* insert new index into current index block */
934 /* current depth stored in i var */
935 i = depth - 1;
936 while (k--) {
937 oldblock = newblock;
938 newblock = ablocks[--a];
939 bh = extents_bwrite(inode->i_sb, newblock);
940 if (unlikely(!bh)) {
941 err = -ENOMEM;
942 goto cleanup;
943 }
944
945 err = ext4_journal_get_create_access(icb, handle, bh);
946 if (err)
947 goto cleanup;
948
949 neh = ext_block_hdr(bh);
950 neh->eh_entries = cpu_to_le16(1);
951 neh->eh_magic = EXT4_EXT_MAGIC;
952 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
953 neh->eh_depth = cpu_to_le16(depth - i);
954 fidx = EXT_FIRST_INDEX(neh);
955 fidx->ei_block = border;
956 ext4_idx_store_pblock(fidx, oldblock);
957
958 ext_debug("int.index at %d (block %llu): %u -> %llu\n",
959 i, newblock, le32_to_cpu(border), oldblock);
960
961 /* move remainder of path[i] to the new index block */
962 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
963 EXT_LAST_INDEX(path[i].p_hdr))) {
964 EXT4_ERROR_INODE(inode,
965 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
966 le32_to_cpu(path[i].p_ext->ee_block));
967 err = -EIO;
968 goto cleanup;
969 }
970 /* start copy indexes */
971 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
972 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
973 EXT_MAX_INDEX(path[i].p_hdr));
974 ext4_ext_show_move(inode, path, newblock, i);
975 if (m) {
976 memmove(++fidx, path[i].p_idx,
977 sizeof(struct ext4_extent_idx) * m);
978 le16_add_cpu(&neh->eh_entries, m);
979 }
980 ext4_extent_block_csum_set(inode, neh);
981 set_buffer_uptodate(bh);
982
983 err = ext4_handle_dirty_metadata(icb, handle, inode, bh);
984 if (err)
985 goto cleanup;
986 extents_brelse(bh);
987 bh = NULL;
988
989 /* correct old index */
990 if (m) {
991 err = ext4_ext_get_access(icb, handle, inode, path + i);
992 if (err)
993 goto cleanup;
994 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
995 err = ext4_ext_dirty(icb, handle, inode, path + i);
996 if (err)
997 goto cleanup;
998 }
999
1000 i--;
1001 }
1002
1003 /* insert new index */
1004 err = ext4_ext_insert_index(icb, handle, inode, path + at,
1005 le32_to_cpu(border), newblock);
1006
1007 cleanup:
1008 if (bh)
1009 extents_brelse(bh);
1010
1011 if (err) {
1012 /* free all allocated blocks in error case */
1013 for (i = 0; i < depth; i++) {
1014 if (!ablocks[i])
1015 continue;
1016 ext4_free_blocks(icb, handle, inode, NULL, ablocks[i], 1,
1017 EXT4_FREE_BLOCKS_METADATA);
1018 }
1019 }
1020 kfree(ablocks);
1021
1022 return err;
1023 }
1024
1025 /*
1026 * ext4_ext_grow_indepth:
1027 * implements tree growing procedure:
1028 * - allocates new block
1029 * - moves top-level data (index block or leaf) into the new block
1030 * - initializes new top-level, creating index that points to the
1031 * just created block
1032 */
1033 static int ext4_ext_grow_indepth(void *icb, handle_t *handle, struct inode *inode,
1034 unsigned int flags)
1035 {
1036 struct ext4_extent_header *neh;
1037 struct buffer_head *bh;
1038 ext4_fsblk_t newblock, goal = 0;
1039 int err = 0;
1040
1041 /* Try to prepend new index to old one */
1042 if (ext_depth(inode))
1043 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1044 goal = ext4_inode_to_goal_block(inode);
1045 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, flags,
1046 NULL, &err);
1047 if (newblock == 0)
1048 return err;
1049
1050 bh = extents_bwrite(inode->i_sb, newblock);
1051 if (!bh)
1052 return -ENOMEM;
1053
1054 err = ext4_journal_get_create_access(icb, handle, bh);
1055 if (err)
1056 goto out;
1057
1058 /* move top-level index/leaf into new block */
1059 memmove(bh->b_data, EXT4_I(inode)->i_block,
1060 sizeof(EXT4_I(inode)->i_block));
1061
1062 /* set size of new block */
1063 neh = ext_block_hdr(bh);
1064 /* old root could have indexes or leaves
1065 * so calculate e_max right way */
1066 if (ext_depth(inode))
1067 neh->eh_max = (ext4_ext_space_block_idx(inode, 0));
1068 else
1069 neh->eh_max = (ext4_ext_space_block(inode, 0));
1070 neh->eh_magic = EXT4_EXT_MAGIC;
1071 ext4_extent_block_csum_set(inode, neh);
1072 set_buffer_uptodate(bh);
1073
1074 err = ext4_handle_dirty_metadata(icb, handle, inode, bh);
1075 if (err)
1076 goto out;
1077
1078 /* Update top-level index: num,max,pointer */
1079 neh = ext_inode_hdr(inode);
1080 neh->eh_entries = (1);
1081 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1082 if (neh->eh_depth == 0) {
1083 /* Root extent block becomes index block */
1084 neh->eh_max = (ext4_ext_space_root_idx(inode, 0));
1085 EXT_FIRST_INDEX(neh)->ei_block =
1086 EXT_FIRST_EXTENT(neh)->ee_block;
1087 }
1088 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1089 (neh->eh_entries), (neh->eh_max),
1090 (EXT_FIRST_INDEX(neh)->ei_block),
1091 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1092
1093 le16_add_cpu(&neh->eh_depth, 1);
1094 ext4_mark_inode_dirty(icb, handle, inode);
1095 out:
1096 extents_brelse(bh);
1097
1098 return err;
1099 }
1100
1101 /*
1102 * ext4_ext_create_new_leaf:
1103 * finds empty index and adds new leaf.
1104 * if no free index is found, then it requests in-depth growing.
1105 */
1106 static int ext4_ext_create_new_leaf(void *icb, handle_t *handle, struct inode *inode,
1107 unsigned int mb_flags,
1108 unsigned int gb_flags,
1109 struct ext4_ext_path **ppath,
1110 struct ext4_extent *newext)
1111 {
1112 struct ext4_ext_path *path = *ppath;
1113 struct ext4_ext_path *curp;
1114 int depth, i, err = 0;
1115
1116 repeat:
1117 i = depth = ext_depth(inode);
1118
1119 /* walk up to the tree and look for free index entry */
1120 curp = path + depth;
1121 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1122 i--;
1123 curp--;
1124 }
1125
1126 /* we use already allocated block for index block,
1127 * so subsequent data blocks should be contiguous */
1128 if (EXT_HAS_FREE_INDEX(curp)) {
1129 /* if we found index with free entry, then use that
1130 * entry: create all needed subtree and add new leaf */
1131 err = ext4_ext_split(icb, handle, inode, mb_flags, path, newext, i);
1132 if (err)
1133 goto out;
1134
1135 /* refill path */
1136 path = ext4_find_extent(inode,
1137 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1138 ppath, gb_flags);
1139 if (IS_ERR(path))
1140 err = PTR_ERR(path);
1141 } else {
1142 /* tree is full, time to grow in depth */
1143 err = ext4_ext_grow_indepth(icb, handle, inode, mb_flags);
1144 if (err)
1145 goto out;
1146
1147 /* refill path */
1148 path = ext4_find_extent(inode,
1149 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1150 ppath, gb_flags);
1151 if (IS_ERR(path)) {
1152 err = PTR_ERR(path);
1153 goto out;
1154 }
1155
1156 /*
1157 * only first (depth 0 -> 1) produces free space;
1158 * in all other cases we have to split the grown tree
1159 */
1160 depth = ext_depth(inode);
1161 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1162 /* now we need to split */
1163 goto repeat;
1164 }
1165 }
1166
1167 out:
1168 return err;
1169 }
1170
1171 /*
1172 * search the closest allocated block to the left for *logical
1173 * and returns it at @logical + it's physical address at @phys
1174 * if *logical is the smallest allocated block, the function
1175 * returns 0 at @phys
1176 * return value contains 0 (success) or error code
1177 */
1178 static int ext4_ext_search_left(struct inode *inode,
1179 struct ext4_ext_path *path,
1180 ext4_lblk_t *logical, ext4_fsblk_t *phys)
1181 {
1182 struct ext4_extent_idx *ix;
1183 struct ext4_extent *ex;
1184 int depth, ee_len;
1185
1186 if (unlikely(path == NULL)) {
1187 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1188 return -EIO;
1189 }
1190 depth = path->p_depth;
1191 *phys = 0;
1192
1193 if (depth == 0 && path->p_ext == NULL)
1194 return 0;
1195
1196 /* usually extent in the path covers blocks smaller
1197 * then *logical, but it can be that extent is the
1198 * first one in the file */
1199
1200 ex = path[depth].p_ext;
1201 ee_len = ext4_ext_get_actual_len(ex);
1202 if (*logical < le32_to_cpu(ex->ee_block)) {
1203 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1204 EXT4_ERROR_INODE(inode,
1205 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1206 *logical, le32_to_cpu(ex->ee_block));
1207 return -EIO;
1208 }
1209 while (--depth >= 0) {
1210 ix = path[depth].p_idx;
1211 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1212 EXT4_ERROR_INODE(inode,
1213 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1214 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1215 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1216 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1217 depth);
1218 return -EIO;
1219 }
1220 }
1221 return 0;
1222 }
1223
1224 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1225 EXT4_ERROR_INODE(inode,
1226 "logical %d < ee_block %d + ee_len %d!",
1227 *logical, le32_to_cpu(ex->ee_block), ee_len);
1228 return -EIO;
1229 }
1230
1231 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1232 *phys = ext4_ext_pblock(ex) + ee_len - 1;
1233 return 0;
1234 }
1235
1236 /*
1237 * search the closest allocated block to the right for *logical
1238 * and returns it at @logical + it's physical address at @phys
1239 * if *logical is the largest allocated block, the function
1240 * returns 0 at @phys
1241 * return value contains 0 (success) or error code
1242 */
1243 static int ext4_ext_search_right(struct inode *inode,
1244 struct ext4_ext_path *path,
1245 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1246 struct ext4_extent **ret_ex)
1247 {
1248 struct buffer_head *bh = NULL;
1249 struct ext4_extent_header *eh;
1250 struct ext4_extent_idx *ix;
1251 struct ext4_extent *ex;
1252 ext4_fsblk_t block;
1253 int depth; /* Note, NOT eh_depth; depth from top of tree */
1254 int ee_len;
1255
1256 if ((path == NULL)) {
1257 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1258 return -EIO;
1259 }
1260 depth = path->p_depth;
1261 *phys = 0;
1262
1263 if (depth == 0 && path->p_ext == NULL)
1264 return 0;
1265
1266 /* usually extent in the path covers blocks smaller
1267 * then *logical, but it can be that extent is the
1268 * first one in the file */
1269
1270 ex = path[depth].p_ext;
1271 ee_len = ext4_ext_get_actual_len(ex);
1272 /*if (*logical < le32_to_cpu(ex->ee_block)) {*/
1273 if (*logical < (ex->ee_block)) {
1274 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1275 EXT4_ERROR_INODE(inode,
1276 "first_extent(path[%d].p_hdr) != ex",
1277 depth);
1278 return -EIO;
1279 }
1280 while (--depth >= 0) {
1281 ix = path[depth].p_idx;
1282 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1283 EXT4_ERROR_INODE(inode,
1284 "ix != EXT_FIRST_INDEX *logical %d!",
1285 *logical);
1286 return -EIO;
1287 }
1288 }
1289 goto found_extent;
1290 }
1291
1292 /*if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {*/
1293 if (unlikely(*logical < ((ex->ee_block) + ee_len))) {
1294 EXT4_ERROR_INODE(inode,
1295 "logical %d < ee_block %d + ee_len %d!",
1296 /**logical, le32_to_cpu(ex->ee_block), ee_len);*/
1297 *logical, (ex->ee_block), ee_len);
1298 return -EIO;
1299 }
1300
1301 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1302 /* next allocated block in this leaf */
1303 ex++;
1304 goto found_extent;
1305 }
1306
1307 /* go up and search for index to the right */
1308 while (--depth >= 0) {
1309 ix = path[depth].p_idx;
1310 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1311 goto got_index;
1312 }
1313
1314 /* we've gone up to the root and found no index to the right */
1315 return 0;
1316
1317 got_index:
1318 /* we've found index to the right, let's
1319 * follow it and find the closest allocated
1320 * block to the right */
1321 ix++;
1322 block = ext4_idx_pblock(ix);
1323 while (++depth < path->p_depth) {
1324 /* subtract from p_depth to get proper eh_depth */
1325 bh = read_extent_tree_block(inode, block,
1326 path->p_depth - depth, 0);
1327 if (IS_ERR(bh))
1328 return PTR_ERR(bh);
1329 eh = ext_block_hdr(bh);
1330 ix = EXT_FIRST_INDEX(eh);
1331 block = ext4_idx_pblock(ix);
1332 extents_brelse(bh);
1333 }
1334
1335 bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
1336 if (IS_ERR(bh))
1337 return PTR_ERR(bh);
1338 eh = ext_block_hdr(bh);
1339 ex = EXT_FIRST_EXTENT(eh);
1340 found_extent:
1341 /**logical = le32_to_cpu(ex->ee_block);*/
1342 *logical = (ex->ee_block);
1343 *phys = ext4_ext_pblock(ex);
1344 *ret_ex = ex;
1345 if (bh)
1346 extents_brelse(bh);
1347 return 0;
1348 }
1349
1350 /*
1351 * ext4_ext_next_allocated_block:
1352 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1353 * NOTE: it considers block number from index entry as
1354 * allocated block. Thus, index entries have to be consistent
1355 * with leaves.
1356 */
1357 ext4_lblk_t
1358 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1359 {
1360 int depth;
1361
1362 depth = path->p_depth;
1363
1364 if (depth == 0 && path->p_ext == NULL)
1365 return EXT_MAX_BLOCKS;
1366
1367 while (depth >= 0) {
1368 if (depth == path->p_depth) {
1369 /* leaf */
1370 if (path[depth].p_ext &&
1371 path[depth].p_ext !=
1372 EXT_LAST_EXTENT(path[depth].p_hdr))
1373 return le32_to_cpu(path[depth].p_ext[1].ee_block);
1374 } else {
1375 /* index */
1376 if (path[depth].p_idx !=
1377 EXT_LAST_INDEX(path[depth].p_hdr))
1378 return le32_to_cpu(path[depth].p_idx[1].ei_block);
1379 }
1380 depth--;
1381 }
1382
1383 return EXT_MAX_BLOCKS;
1384 }
1385
1386 /*
1387 * ext4_ext_next_leaf_block:
1388 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1389 */
1390 static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1391 {
1392 int depth;
1393
1394 BUG_ON(path == NULL);
1395 depth = path->p_depth;
1396
1397 /* zero-tree has no leaf blocks at all */
1398 if (depth == 0)
1399 return EXT_MAX_BLOCKS;
1400
1401 /* go to index block */
1402 depth--;
1403
1404 while (depth >= 0) {
1405 if (path[depth].p_idx !=
1406 EXT_LAST_INDEX(path[depth].p_hdr))
1407 return (ext4_lblk_t)
1408 le32_to_cpu(path[depth].p_idx[1].ei_block);
1409 depth--;
1410 }
1411
1412 return EXT_MAX_BLOCKS;
1413 }
1414
1415 /*
1416 * ext4_ext_correct_indexes:
1417 * if leaf gets modified and modified extent is first in the leaf,
1418 * then we have to correct all indexes above.
1419 * TODO: do we need to correct tree in all cases?
1420 */
1421 static int ext4_ext_correct_indexes(void *icb, handle_t *handle, struct inode *inode,
1422 struct ext4_ext_path *path)
1423 {
1424 struct ext4_extent_header *eh;
1425 int depth = ext_depth(inode);
1426 struct ext4_extent *ex;
1427 __le32 border;
1428 int k, err = 0;
1429
1430 eh = path[depth].p_hdr;
1431 ex = path[depth].p_ext;
1432
1433 if (unlikely(ex == NULL || eh == NULL)) {
1434 EXT4_ERROR_INODE(inode,
1435 "ex %p == NULL or eh %p == NULL", ex, eh);
1436 return -EIO;
1437 }
1438
1439 if (depth == 0) {
1440 /* there is no tree at all */
1441 return 0;
1442 }
1443
1444 if (ex != EXT_FIRST_EXTENT(eh)) {
1445 /* we correct tree if first leaf got modified only */
1446 return 0;
1447 }
1448
1449 /*
1450 * TODO: we need correction if border is smaller than current one
1451 */
1452 k = depth - 1;
1453 border = path[depth].p_ext->ee_block;
1454 err = ext4_ext_get_access(icb, handle, inode, path + k);
1455 if (err)
1456 return err;
1457 path[k].p_idx->ei_block = border;
1458 err = ext4_ext_dirty(icb, handle, inode, path + k);
1459 if (err)
1460 return err;
1461
1462 while (k--) {
1463 /* change all left-side indexes */
1464 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1465 break;
1466 err = ext4_ext_get_access(icb, handle, inode, path + k);
1467 if (err)
1468 break;
1469 path[k].p_idx->ei_block = border;
1470 err = ext4_ext_dirty(icb, handle, inode, path + k);
1471 if (err)
1472 break;
1473 }
1474
1475 return err;
1476 }
1477
1478 int
1479 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1480 struct ext4_extent *ex2)
1481 {
1482 unsigned short ext1_ee_len, ext2_ee_len;
1483
1484 /*
1485 * Make sure that both extents are initialized. We don't merge
1486 * unwritten extents so that we can be sure that end_io code has
1487 * the extent that was written properly split out and conversion to
1488 * initialized is trivial.
1489 */
1490 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1491 return 0;
1492
1493 ext1_ee_len = ext4_ext_get_actual_len(ex1);
1494 ext2_ee_len = ext4_ext_get_actual_len(ex2);
1495
1496 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1497 le32_to_cpu(ex2->ee_block))
1498 return 0;
1499
1500 /*
1501 * To allow future support for preallocated extents to be added
1502 * as an RO_COMPAT feature, refuse to merge to extents if
1503 * this can result in the top bit of ee_len being set.
1504 */
1505 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1506 return 0;
1507 if (ext4_ext_is_unwritten(ex1) &&
1508 (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN))
1509 return 0;
1510 #ifdef AGGRESSIVE_TEST
1511 if (ext1_ee_len >= 4)
1512 return 0;
1513 #endif
1514
1515 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1516 return 1;
1517 return 0;
1518 }
1519
1520 /*
1521 * This function tries to merge the "ex" extent to the next extent in the tree.
1522 * It always tries to merge towards right. If you want to merge towards
1523 * left, pass "ex - 1" as argument instead of "ex".
1524 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1525 * 1 if they got merged.
1526 */
1527 static int ext4_ext_try_to_merge_right(struct inode *inode,
1528 struct ext4_ext_path *path,
1529 struct ext4_extent *ex)
1530 {
1531 struct ext4_extent_header *eh;
1532 unsigned int depth, len;
1533 int merge_done = 0, unwritten;
1534
1535 depth = ext_depth(inode);
1536 assert(path[depth].p_hdr != NULL);
1537 eh = path[depth].p_hdr;
1538
1539 while (ex < EXT_LAST_EXTENT(eh)) {
1540 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1541 break;
1542 /* merge with next extent! */
1543 unwritten = ext4_ext_is_unwritten(ex);
1544 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1545 + ext4_ext_get_actual_len(ex + 1));
1546 if (unwritten)
1547 ext4_ext_mark_unwritten(ex);
1548
1549 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1550 len = (EXT_LAST_EXTENT(eh) - ex - 1)
1551 * sizeof(struct ext4_extent);
1552 memmove(ex + 1, ex + 2, len);
1553 }
1554 le16_add_cpu(&eh->eh_entries, -1);
1555 merge_done = 1;
1556 if (!eh->eh_entries)
1557 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1558 }
1559
1560 return merge_done;
1561 }
1562
1563 /*
1564 * This function does a very simple check to see if we can collapse
1565 * an extent tree with a single extent tree leaf block into the inode.
1566 */
1567 static void ext4_ext_try_to_merge_up(void *icb, handle_t *handle,
1568 struct inode *inode,
1569 struct ext4_ext_path *path)
1570 {
1571 size_t s;
1572 unsigned max_root = ext4_ext_space_root(inode, 0);
1573 ext4_fsblk_t blk;
1574
1575 if ((path[0].p_depth != 1) ||
1576 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1577 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1578 return;
1579
1580 /*
1581 * We need to modify the block allocation bitmap and the block
1582 * group descriptor to release the extent tree block. If we
1583 * can't get the journal credits, give up.
1584 */
1585 if (ext4_journal_extend(icb, handle, 2))
1586 return;
1587
1588 /*
1589 * Copy the extent data up to the inode
1590 */
1591 blk = ext4_idx_pblock(path[0].p_idx);
1592 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1593 sizeof(struct ext4_extent_idx);
1594 s += sizeof(struct ext4_extent_header);
1595
1596 path[1].p_maxdepth = path[0].p_maxdepth;
1597 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1598 path[0].p_depth = 0;
1599 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1600 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1601 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1602
1603 extents_brelse(path[1].p_bh);
1604 ext4_free_blocks(icb, handle, inode, NULL, blk, 1,
1605 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1606 }
1607
1608 /*
1609 * This function tries to merge the @ex extent to neighbours in the tree.
1610 * return 1 if merge left else 0.
1611 */
1612 static void ext4_ext_try_to_merge(void *icb, handle_t *handle,
1613 struct inode *inode,
1614 struct ext4_ext_path *path,
1615 struct ext4_extent *ex) {
1616 struct ext4_extent_header *eh;
1617 unsigned int depth;
1618 int merge_done = 0;
1619
1620 depth = ext_depth(inode);
1621 BUG_ON(path[depth].p_hdr == NULL);
1622 eh = path[depth].p_hdr;
1623
1624 if (ex > EXT_FIRST_EXTENT(eh))
1625 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1626
1627 if (!merge_done)
1628 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1629
1630 ext4_ext_try_to_merge_up(icb, handle, inode, path);
1631 }
1632
1633 /*
1634 * ext4_ext_insert_extent:
1635 * tries to merge requsted extent into the existing extent or
1636 * inserts requested extent as new one into the tree,
1637 * creating new leaf in the no-space case.
1638 */
1639 int ext4_ext_insert_extent(void *icb, handle_t *handle, struct inode *inode,
1640 struct ext4_ext_path **ppath,
1641 struct ext4_extent *newext,
1642 int gb_flags)
1643 {
1644 struct ext4_ext_path *path = *ppath;
1645 struct ext4_extent_header *eh;
1646 struct ext4_extent *ex, *fex;
1647 struct ext4_extent *nearex; /* nearest extent */
1648 struct ext4_ext_path *npath = NULL;
1649 int depth, len, err;
1650 ext4_lblk_t next;
1651 int mb_flags = 0, unwritten;
1652
1653 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1654 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1655 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1656 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1657 return -EIO;
1658 }
1659 depth = ext_depth(inode);
1660 ex = path[depth].p_ext;
1661 eh = path[depth].p_hdr;
1662 if (unlikely(path[depth].p_hdr == NULL)) {
1663 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1664 return -EIO;
1665 }
1666
1667 /* try to insert block into found extent and return */
1668 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1669
1670 /*
1671 * Try to see whether we should rather test the extent on
1672 * right from ex, or from the left of ex. This is because
1673 * ext4_find_extent() can return either extent on the
1674 * left, or on the right from the searched position. This
1675 * will make merging more effective.
1676 */
1677 if (ex < EXT_LAST_EXTENT(eh) &&
1678 (le32_to_cpu(ex->ee_block) +
1679 ext4_ext_get_actual_len(ex) <
1680 le32_to_cpu(newext->ee_block))) {
1681 ex += 1;
1682 goto prepend;
1683 } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
1684 (le32_to_cpu(newext->ee_block) +
1685 ext4_ext_get_actual_len(newext) <
1686 le32_to_cpu(ex->ee_block)))
1687 ex -= 1;
1688
1689 /* Try to append newex to the ex */
1690 if (ext4_can_extents_be_merged(inode, ex, newext)) {
1691 ext_debug("append [%d]%d block to %u:[%d]%d"
1692 "(from %llu)\n",
1693 ext4_ext_is_unwritten(newext),
1694 ext4_ext_get_actual_len(newext),
1695 le32_to_cpu(ex->ee_block),
1696 ext4_ext_is_unwritten(ex),
1697 ext4_ext_get_actual_len(ex),
1698 ext4_ext_pblock(ex));
1699 err = ext4_ext_get_access(icb, handle, inode,
1700 path + depth);
1701 if (err)
1702 return err;
1703 unwritten = ext4_ext_is_unwritten(ex);
1704 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1705 + ext4_ext_get_actual_len(newext));
1706 if (unwritten)
1707 ext4_ext_mark_unwritten(ex);
1708 eh = path[depth].p_hdr;
1709 nearex = ex;
1710 goto merge;
1711 }
1712
1713 prepend:
1714 /* Try to prepend newex to the ex */
1715 if (ext4_can_extents_be_merged(inode, newext, ex)) {
1716 ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
1717 "(from %llu)\n",
1718 le32_to_cpu(newext->ee_block),
1719 ext4_ext_is_unwritten(newext),
1720 ext4_ext_get_actual_len(newext),
1721 le32_to_cpu(ex->ee_block),
1722 ext4_ext_is_unwritten(ex),
1723 ext4_ext_get_actual_len(ex),
1724 ext4_ext_pblock(ex));
1725 err = ext4_ext_get_access(icb, handle, inode,
1726 path + depth);
1727 if (err)
1728 return err;
1729
1730 unwritten = ext4_ext_is_unwritten(ex);
1731 ex->ee_block = newext->ee_block;
1732 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1733 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1734 + ext4_ext_get_actual_len(newext));
1735 if (unwritten)
1736 ext4_ext_mark_unwritten(ex);
1737 eh = path[depth].p_hdr;
1738 nearex = ex;
1739 goto merge;
1740 }
1741 }
1742
1743 depth = ext_depth(inode);
1744 eh = path[depth].p_hdr;
1745 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
1746 goto has_space;
1747
1748 /* probably next leaf has space for us? */
1749 fex = EXT_LAST_EXTENT(eh);
1750 next = EXT_MAX_BLOCKS;
1751 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
1752 next = ext4_ext_next_leaf_block(path);
1753 if (next != EXT_MAX_BLOCKS) {
1754 ext_debug("next leaf block - %u\n", next);
1755 BUG_ON(npath != NULL);
1756 npath = ext4_find_extent(inode, next, NULL, 0);
1757 if (IS_ERR(npath))
1758 return PTR_ERR(npath);
1759 BUG_ON(npath->p_depth != path->p_depth);
1760 eh = npath[depth].p_hdr;
1761 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1762 ext_debug("next leaf isn't full(%d)\n",
1763 le16_to_cpu(eh->eh_entries));
1764 path = npath;
1765 goto has_space;
1766 }
1767 ext_debug("next leaf has no free space(%d,%d)\n",
1768 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1769 }
1770
1771 /*
1772 * There is no free space in the found leaf.
1773 * We're gonna add a new leaf in the tree.
1774 */
1775 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
1776 mb_flags |= EXT4_MB_USE_RESERVED;
1777 err = ext4_ext_create_new_leaf(icb, handle, inode, mb_flags, gb_flags,
1778 ppath, newext);
1779 if (err)
1780 goto cleanup;
1781 depth = ext_depth(inode);
1782 eh = path[depth].p_hdr;
1783
1784 has_space:
1785 nearex = path[depth].p_ext;
1786
1787 err = ext4_ext_get_access(icb, handle, inode, path + depth);
1788 if (err)
1789 goto cleanup;
1790
1791 if (!nearex) {
1792 /* there is no extent in this leaf, create first one */
1793 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
1794 le32_to_cpu(newext->ee_block),
1795 ext4_ext_pblock(newext),
1796 ext4_ext_is_unwritten(newext),
1797 ext4_ext_get_actual_len(newext));
1798 nearex = EXT_FIRST_EXTENT(eh);
1799 } else {
1800 if (le32_to_cpu(newext->ee_block)
1801 > le32_to_cpu(nearex->ee_block)) {
1802 /* Insert after */
1803 ext_debug("insert %u:%llu:[%d]%d before: "
1804 "nearest %p\n",
1805 le32_to_cpu(newext->ee_block),
1806 ext4_ext_pblock(newext),
1807 ext4_ext_is_unwritten(newext),
1808 ext4_ext_get_actual_len(newext),
1809 nearex);
1810 nearex++;
1811 } else {
1812 /* Insert before */
1813 BUG_ON(newext->ee_block == nearex->ee_block);
1814 ext_debug("insert %u:%llu:[%d]%d after: "
1815 "nearest %p\n",
1816 le32_to_cpu(newext->ee_block),
1817 ext4_ext_pblock(newext),
1818 ext4_ext_is_unwritten(newext),
1819 ext4_ext_get_actual_len(newext),
1820 nearex);
1821 }
1822 len = EXT_LAST_EXTENT(eh) - nearex + 1;
1823 if (len > 0) {
1824 ext_debug("insert %u:%llu:[%d]%d: "
1825 "move %d extents from 0x%p to 0x%p\n",
1826 le32_to_cpu(newext->ee_block),
1827 ext4_ext_pblock(newext),
1828 ext4_ext_is_unwritten(newext),
1829 ext4_ext_get_actual_len(newext),
1830 len, nearex, nearex + 1);
1831 memmove(nearex + 1, nearex,
1832 len * sizeof(struct ext4_extent));
1833 }
1834 }
1835
1836 le16_add_cpu(&eh->eh_entries, 1);
1837 path[depth].p_ext = nearex;
1838 nearex->ee_block = newext->ee_block;
1839 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1840 nearex->ee_len = newext->ee_len;
1841
1842 merge:
1843 /* try to merge extents */
1844 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
1845 ext4_ext_try_to_merge(icb, handle, inode, path, nearex);
1846
1847
1848 /* time to correct all indexes above */
1849 err = ext4_ext_correct_indexes(icb, handle, inode, path);
1850 if (err)
1851 goto cleanup;
1852
1853 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
1854
1855 cleanup:
1856 if (npath) {
1857 ext4_ext_drop_refs(npath);
1858 kfree(npath);
1859 }
1860 return err;
1861 }
1862
1863 static inline int get_default_free_blocks_flags(struct inode *inode)
1864 {
1865 return 0;
1866 }
1867
1868 /* FIXME!! we need to try to merge to left or right after zero-out */
1869 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
1870 {
1871 ext4_fsblk_t ee_pblock;
1872 unsigned int ee_len;
1873 int ret;
1874
1875 ee_len = ext4_ext_get_actual_len(ex);
1876 ee_pblock = ext4_ext_pblock(ex);
1877
1878 ret = 0;
1879
1880 return ret;
1881 }
1882
1883 static int ext4_remove_blocks(void *icb, handle_t *handle, struct inode *inode,
1884 struct ext4_extent *ex,
1885 unsigned long from, unsigned long to)
1886 {
1887 struct buffer_head *bh;
1888 int i;
1889
1890 if (from >= le32_to_cpu(ex->ee_block)
1891 && to == le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) {
1892 /* tail removal */
1893 unsigned long num, start;
1894 num = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - from;
1895 start = ext4_ext_pblock(ex) + ext4_ext_get_actual_len(ex) - num;
1896 ext4_free_blocks(icb, handle, inode, NULL, start, num, 0);
1897 } else if (from == le32_to_cpu(ex->ee_block)
1898 && to <= le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex) - 1) {
1899 } else {
1900 }
1901 return 0;
1902 }
1903
1904 /*
1905 * routine removes index from the index block
1906 * it's used in truncate case only. thus all requests are for
1907 * last index in the block only
1908 */
1909 int ext4_ext_rm_idx(void *icb, handle_t *handle, struct inode *inode,
1910 struct ext4_ext_path *path)
1911 {
1912 int err;
1913 ext4_fsblk_t leaf;
1914
1915 /* free index block */
1916 path--;
1917 leaf = ext4_idx_pblock(path->p_idx);
1918 BUG_ON(path->p_hdr->eh_entries == 0);
1919 if ((err = ext4_ext_get_access(icb, handle, inode, path)))
1920 return err;
1921 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
1922 if ((err = ext4_ext_dirty(icb, handle, inode, path)))
1923 return err;
1924 ext4_free_blocks(icb, handle, inode, NULL, leaf, 1, 0);
1925 return err;
1926 }
1927
1928 static int
1929 ext4_ext_rm_leaf(void *icb, handle_t *handle, struct inode *inode,
1930 struct ext4_ext_path *path, unsigned long start)
1931 {
1932 int err = 0, correct_index = 0;
1933 int depth = ext_depth(inode), credits;
1934 struct ext4_extent_header *eh;
1935 unsigned a, b, block, num;
1936 unsigned long ex_ee_block;
1937 unsigned short ex_ee_len;
1938 struct ext4_extent *ex;
1939
1940 /* the header must be checked already in ext4_ext_remove_space() */
1941 if (!path[depth].p_hdr)
1942 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
1943 eh = path[depth].p_hdr;
1944 BUG_ON(eh == NULL);
1945
1946 /* find where to start removing */
1947 ex = EXT_LAST_EXTENT(eh);
1948
1949 ex_ee_block = le32_to_cpu(ex->ee_block);
1950 ex_ee_len = ext4_ext_get_actual_len(ex);
1951
1952 while (ex >= EXT_FIRST_EXTENT(eh) &&
1953 ex_ee_block + ex_ee_len > start) {
1954 path[depth].p_ext = ex;
1955
1956 a = ex_ee_block > start ? ex_ee_block : start;
1957 b = (unsigned long long)ex_ee_block + ex_ee_len - 1 <
1958 EXT_MAX_BLOCKS ? ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCKS;
1959
1960
1961 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
1962 block = 0;
1963 num = 0;
1964 BUG();
1965 } else if (a != ex_ee_block) {
1966 /* remove tail of the extent */
1967 block = ex_ee_block;
1968 num = a - block;
1969 } else if (b != ex_ee_block + ex_ee_len - 1) {
1970 /* remove head of the extent */
1971 block = a;
1972 num = b - a;
1973 /* there is no "make a hole" API yet */
1974 BUG();
1975 } else {
1976 /* remove whole extent: excellent! */
1977 block = ex_ee_block;
1978 num = 0;
1979 BUG_ON(a != ex_ee_block);
1980 BUG_ON(b != ex_ee_block + ex_ee_len - 1);
1981 }
1982
1983 /* at present, extent can't cross block group */
1984 /* leaf + bitmap + group desc + sb + inode */
1985 credits = 5;
1986 if (ex == EXT_FIRST_EXTENT(eh)) {
1987 correct_index = 1;
1988 credits += (ext_depth(inode)) + 1;
1989 }
1990
1991 /*handle = ext4_ext_journal_restart(icb, handle, credits);*/
1992 /*if (IS_ERR(icb, handle)) {*/
1993 /*err = PTR_ERR(icb, handle);*/
1994 /*goto out;*/
1995 /*}*/
1996
1997 err = ext4_ext_get_access(icb, handle, inode, path + depth);
1998 if (err)
1999 goto out;
2000
2001 err = ext4_remove_blocks(icb, handle, inode, ex, a, b);
2002 if (err)
2003 goto out;
2004
2005 if (num == 0) {
2006 /* this extent is removed entirely mark slot unused */
2007 ext4_ext_store_pblock(ex, 0);
2008 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
2009 }
2010
2011 ex->ee_block = cpu_to_le32(block);
2012 ex->ee_len = cpu_to_le16(num);
2013
2014 err = ext4_ext_dirty(icb, handle, inode, path + depth);
2015 if (err)
2016 goto out;
2017
2018 ex--;
2019 ex_ee_block = le32_to_cpu(ex->ee_block);
2020 ex_ee_len = ext4_ext_get_actual_len(ex);
2021 }
2022
2023 if (correct_index && eh->eh_entries)
2024 err = ext4_ext_correct_indexes(icb, handle, inode, path);
2025
2026 /* if this leaf is free, then we should
2027 * remove it from index block above */
2028 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2029 err = ext4_ext_rm_idx(icb, handle, inode, path + depth);
2030
2031 out:
2032 return err;
2033 }
2034
2035 /*
2036 * ext4_split_extent_at() splits an extent at given block.
2037 *
2038 * @handle: the journal handle
2039 * @inode: the file inode
2040 * @path: the path to the extent
2041 * @split: the logical block where the extent is splitted.
2042 * @split_flags: indicates if the extent could be zeroout if split fails, and
2043 * the states(init or unwritten) of new extents.
2044 * @flags: flags used to insert new extent to extent tree.
2045 *
2046 *
2047 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
2048 * of which are deterimined by split_flag.
2049 *
2050 * There are two cases:
2051 * a> the extent are splitted into two extent.
2052 * b> split is not needed, and just mark the extent.
2053 *
2054 * return 0 on success.
2055 */
2056 static int ext4_split_extent_at(void *icb, handle_t *handle,
2057 struct inode *inode,
2058 struct ext4_ext_path **ppath,
2059 ext4_lblk_t split,
2060 int split_flag,
2061 int flags)
2062 {
2063 struct ext4_ext_path *path = *ppath;
2064 ext4_fsblk_t newblock;
2065 ext4_lblk_t ee_block;
2066 struct ext4_extent *ex, newex, orig_ex, zero_ex;
2067 struct ext4_extent *ex2 = NULL;
2068 unsigned int ee_len, depth;
2069 int err = 0;
2070
2071 ext4_ext_show_leaf(inode, path);
2072
2073 depth = ext_depth(inode);
2074 ex = path[depth].p_ext;
2075 ee_block = le32_to_cpu(ex->ee_block);
2076 ee_len = ext4_ext_get_actual_len(ex);
2077 newblock = split - ee_block + ext4_ext_pblock(ex);
2078
2079 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
2080
2081 err = ext4_ext_get_access(icb, handle, inode, path + depth);
2082 if (err)
2083 goto out;
2084
2085 if (split == ee_block) {
2086 /*
2087 * case b: block @split is the block that the extent begins with
2088 * then we just change the state of the extent, and splitting
2089 * is not needed.
2090 */
2091 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
2092 ext4_ext_mark_unwritten(ex);
2093 else
2094 ext4_ext_mark_initialized(ex);
2095
2096 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2097 ext4_ext_try_to_merge(icb, handle, inode, path, ex);
2098
2099 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
2100 goto out;
2101 }
2102
2103 /* case a */
2104 memcpy(&orig_ex, ex, sizeof(orig_ex));
2105 ex->ee_len = cpu_to_le16(split - ee_block);
2106 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
2107 ext4_ext_mark_unwritten(ex);
2108
2109 /*
2110 * path may lead to new leaf, not to original leaf any more
2111 * after ext4_ext_insert_extent() returns,
2112 */
2113 err = ext4_ext_dirty(icb, handle, inode, path + depth);
2114 if (err)
2115 goto fix_extent_len;
2116
2117 ex2 = &newex;
2118 ex2->ee_block = cpu_to_le32(split);
2119 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
2120 ext4_ext_store_pblock(ex2, newblock);
2121 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
2122 ext4_ext_mark_unwritten(ex2);
2123
2124 err = ext4_ext_insert_extent(icb, handle, inode, ppath, &newex, flags);
2125 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2126 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2127 if (split_flag & EXT4_EXT_DATA_VALID1) {
2128 err = ext4_ext_zeroout(inode, ex2);
2129 zero_ex.ee_block = ex2->ee_block;
2130 zero_ex.ee_len = cpu_to_le16(
2131 ext4_ext_get_actual_len(ex2));
2132 ext4_ext_store_pblock(&zero_ex,
2133 ext4_ext_pblock(ex2));
2134 } else {
2135 err = ext4_ext_zeroout(inode, ex);
2136 zero_ex.ee_block = ex->ee_block;
2137 zero_ex.ee_len = cpu_to_le16(
2138 ext4_ext_get_actual_len(ex));
2139 ext4_ext_store_pblock(&zero_ex,
2140 ext4_ext_pblock(ex));
2141 }
2142 } else {
2143 err = ext4_ext_zeroout(inode, &orig_ex);
2144 zero_ex.ee_block = orig_ex.ee_block;
2145 zero_ex.ee_len = cpu_to_le16(
2146 ext4_ext_get_actual_len(&orig_ex));
2147 ext4_ext_store_pblock(&zero_ex,
2148 ext4_ext_pblock(&orig_ex));
2149 }
2150
2151 if (err)
2152 goto fix_extent_len;
2153 /* update the extent length and mark as initialized */
2154 ex->ee_len = cpu_to_le16(ee_len);
2155 ext4_ext_try_to_merge(icb, handle, inode, path, ex);
2156 err = ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
2157 if (err)
2158 goto fix_extent_len;
2159
2160 goto out;
2161 } else if (err)
2162 goto fix_extent_len;
2163
2164 out:
2165 ext4_ext_show_leaf(inode, path);
2166 return err;
2167
2168 fix_extent_len:
2169 ex->ee_len = orig_ex.ee_len;
2170 ext4_ext_dirty(icb, handle, inode, path + path->p_depth);
2171 return err;
2172 }
2173
2174 /*
2175 * returns 1 if current index have to be freed (even partial)
2176 */
2177 static inline int
2178 ext4_ext_more_to_rm(struct ext4_ext_path *path)
2179 {
2180 BUG_ON(path->p_idx == NULL);
2181
2182 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2183 return 0;
2184
2185 /*
2186 * if truncate on deeper level happened it it wasn't partial
2187 * so we have to consider current index for truncation
2188 */
2189 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2190 return 0;
2191 return 1;
2192 }
2193
2194 int ext4_ext_remove_space(void *icb, struct inode *inode, unsigned long start)
2195 {
2196 #ifndef __REACTOS__
2197 struct super_block *sb = inode->i_sb;
2198 #endif
2199 int depth = ext_depth(inode);
2200 struct ext4_ext_path *path;
2201 handle_t *handle = NULL;
2202 int i = 0, err = 0;
2203
2204 /* probably first extent we're gonna free will be last in block */
2205 /*handle = ext4_journal_start(inode, depth + 1);*/
2206 /*if (IS_ERR(icb, handle))*/
2207 /*return PTR_ERR(icb, handle);*/
2208
2209 /*
2210 * we start scanning from right side freeing all the blocks
2211 * after i_size and walking into the deep
2212 */
2213 path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
2214 if (path == NULL) {
2215 ext4_journal_stop(icb, handle);
2216 return -ENOMEM;
2217 }
2218 memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
2219 path[0].p_hdr = ext_inode_hdr(inode);
2220 if (ext4_ext_check_inode(inode)) {
2221 err = -EIO;
2222 goto out;
2223 }
2224 path[0].p_depth = depth;
2225
2226 while (i >= 0 && err == 0) {
2227 if (i == depth) {
2228 /* this is leaf block */
2229 err = ext4_ext_rm_leaf(icb, handle, inode, path, start);
2230 /* root level have p_bh == NULL, extents_brelse() eats this */
2231 extents_brelse(path[i].p_bh);
2232 path[i].p_bh = NULL;
2233 i--;
2234 continue;
2235 }
2236
2237 /* this is index block */
2238 if (!path[i].p_hdr) {
2239 path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2240 }
2241
2242 if (!path[i].p_idx) {
2243 /* this level hasn't touched yet */
2244 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2245 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2246 } else {
2247 /* we've already was here, see at next index */
2248 path[i].p_idx--;
2249 }
2250
2251 if (ext4_ext_more_to_rm(path + i)) {
2252 struct buffer_head *bh;
2253 /* go to the next level */
2254 memset(path + i + 1, 0, sizeof(*path));
2255 bh = read_extent_tree_block(inode, ext4_idx_pblock(path[i].p_idx), path[0].p_depth - (i + 1), 0);
2256 if (IS_ERR(bh)) {
2257 /* should we reset i_size? */
2258 err = -EIO;
2259 break;
2260 }
2261 path[i+1].p_bh = bh;
2262
2263 /* put actual number of indexes to know is this
2264 * number got changed at the next iteration */
2265 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
2266 i++;
2267 } else {
2268 /* we finish processing this index, go up */
2269 if (path[i].p_hdr->eh_entries == 0 && i > 0) {
2270 /* index is empty, remove it
2271 * handle must be already prepared by the
2272 * truncatei_leaf() */
2273 err = ext4_ext_rm_idx(icb, handle, inode, path + i);
2274 }
2275 /* root level have p_bh == NULL, extents_brelse() eats this */
2276 extents_brelse(path[i].p_bh);
2277 path[i].p_bh = NULL;
2278 i--;
2279 }
2280 }
2281
2282 /* TODO: flexible tree reduction should be here */
2283 if (path->p_hdr->eh_entries == 0) {
2284 /*
2285 * truncate to zero freed all the tree
2286 * so, we need to correct eh_depth
2287 */
2288 err = ext4_ext_get_access(icb, handle, inode, path);
2289 if (err == 0) {
2290 ext_inode_hdr(inode)->eh_depth = 0;
2291 ext_inode_hdr(inode)->eh_max =
2292 cpu_to_le16(ext4_ext_space_root(inode, 0));
2293 err = ext4_ext_dirty(icb, handle, inode, path);
2294 }
2295 }
2296 out:
2297 if (path) {
2298 ext4_ext_drop_refs(path);
2299 kfree(path);
2300 }
2301 ext4_journal_stop(icb, handle);
2302
2303 return err;
2304 }
2305
2306 int ext4_ext_tree_init(void *icb, handle_t *handle, struct inode *inode)
2307 {
2308 struct ext4_extent_header *eh;
2309
2310 eh = ext_inode_hdr(inode);
2311 eh->eh_depth = 0;
2312 eh->eh_entries = 0;
2313 eh->eh_magic = EXT4_EXT_MAGIC;
2314 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
2315 ext4_mark_inode_dirty(icb, handle, inode);
2316 return 0;
2317 }
2318
2319 /*
2320 * called at mount time
2321 */
2322 void ext4_ext_init(struct super_block *sb)
2323 {
2324 /*
2325 * possible initialization would be here
2326 */
2327 }
2328
2329 static int ext4_ext_convert_to_initialized (
2330 void *icb,
2331 handle_t *handle,
2332 struct inode *inode,
2333 struct ext4_ext_path **ppath,
2334 ext4_lblk_t split,
2335 unsigned long blocks,
2336 int flags)
2337 {
2338 int depth = ext_depth(inode), err;
2339 struct ext4_extent *ex = (*ppath)[depth].p_ext;
2340
2341 assert (le32_to_cpu(ex->ee_block) <= split);
2342
2343 if (split + blocks == le32_to_cpu(ex->ee_block) +
2344 ext4_ext_get_actual_len(ex)) {
2345
2346 /* split and initialize right part */
2347 err = ext4_split_extent_at(icb, handle, inode, ppath, split,
2348 EXT4_EXT_MARK_UNWRIT1, flags);
2349
2350 } else if (le32_to_cpu(ex->ee_block) == split) {
2351
2352 /* split and initialize left part */
2353 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks,
2354 EXT4_EXT_MARK_UNWRIT2, flags);
2355
2356 } else {
2357
2358 /* split 1 extent to 3 and initialize the 2nd */
2359 err = ext4_split_extent_at(icb, handle, inode, ppath, split + blocks,
2360 EXT4_EXT_MARK_UNWRIT1 |
2361 EXT4_EXT_MARK_UNWRIT2, flags);
2362 if (0 == err) {
2363 err = ext4_split_extent_at(icb, handle, inode, ppath, split,
2364 EXT4_EXT_MARK_UNWRIT1, flags);
2365 }
2366 }
2367
2368 return err;
2369 }
2370
2371 int ext4_ext_get_blocks(void *icb, handle_t *handle, struct inode *inode, ext4_fsblk_t iblock,
2372 unsigned long max_blocks, struct buffer_head *bh_result,
2373 int create, int flags)
2374 {
2375 struct ext4_ext_path *path = NULL;
2376 struct ext4_extent newex, *ex;
2377 int goal, err = 0, depth;
2378 unsigned long allocated = 0;
2379 ext4_fsblk_t next, newblock;
2380
2381 clear_buffer_new(bh_result);
2382 /*mutex_lock(&ext4_I(inode)->truncate_mutex);*/
2383
2384 /* find extent for this block */
2385 path = ext4_find_extent(inode, iblock, NULL, 0);
2386 if (IS_ERR(path)) {
2387 err = PTR_ERR(path);
2388 path = NULL;
2389 goto out2;
2390 }
2391
2392 depth = ext_depth(inode);
2393
2394 /*
2395 * consistent leaf must not be empty
2396 * this situations is possible, though, _during_ tree modification
2397 * this is why assert can't be put in ext4_ext_find_extent()
2398 */
2399 BUG_ON(path[depth].p_ext == NULL && depth != 0);
2400
2401 if ((ex = path[depth].p_ext)) {
2402 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
2403 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
2404 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2405 /* if found exent covers block, simple return it */
2406 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2407
2408 /* number of remain blocks in the extent */
2409 allocated = ee_len + ee_block - iblock;
2410
2411 if (ext4_ext_is_unwritten(ex)) {
2412 if (create) {
2413 newblock = iblock - ee_block + ee_start;
2414 err = ext4_ext_convert_to_initialized (
2415 icb, handle,
2416 inode,
2417 &path,
2418 iblock,
2419 allocated,
2420 flags);
2421 if (err)
2422 goto out2;
2423
2424 } else {
2425 newblock = 0;
2426 }
2427 } else {
2428 newblock = iblock - ee_block + ee_start;
2429 }
2430 goto out;
2431 }
2432 }
2433
2434 /*
2435 * requested block isn't allocated yet
2436 * we couldn't try to create block if create flag is zero
2437 */
2438 if (!create) {
2439 goto out2;
2440 }
2441
2442 /* find next allocated block so that we know how many
2443 * blocks we can allocate without ovelapping next extent */
2444 next = ext4_ext_next_allocated_block(path);
2445 BUG_ON(next <= iblock);
2446 allocated = next - iblock;
2447 if (flags & EXT4_GET_BLOCKS_PRE_IO && max_blocks > EXT_UNWRITTEN_MAX_LEN)
2448 max_blocks = EXT_UNWRITTEN_MAX_LEN;
2449 if (allocated > max_blocks)
2450 allocated = max_blocks;
2451
2452 /* allocate new block */
2453 goal = ext4_ext_find_goal(inode, path, iblock);
2454 newblock = ext4_new_meta_blocks(icb, handle, inode, goal, 0,
2455 &allocated, &err);
2456 if (!newblock)
2457 goto out2;
2458
2459 /* try to insert new extent into found leaf and return */
2460 newex.ee_block = cpu_to_le32(iblock);
2461 ext4_ext_store_pblock(&newex, newblock);
2462 newex.ee_len = cpu_to_le16(allocated);
2463 /* if it's fallocate, mark ex as unwritten */
2464 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
2465 ext4_ext_mark_unwritten(&newex);
2466 }
2467 err = ext4_ext_insert_extent(icb, handle, inode, &path, &newex,
2468 flags & EXT4_GET_BLOCKS_PRE_IO);
2469
2470 if (err) {
2471 /* free data blocks we just allocated */
2472 ext4_free_blocks(icb, handle, inode, NULL, ext4_ext_pblock(&newex),
2473 le16_to_cpu(newex.ee_len), get_default_free_blocks_flags(inode));
2474 goto out2;
2475 }
2476
2477 ext4_mark_inode_dirty(icb, handle, inode);
2478
2479 /* previous routine could use block we allocated */
2480 if (ext4_ext_is_unwritten(&newex))
2481 newblock = 0;
2482 else
2483 newblock = ext4_ext_pblock(&newex);
2484
2485 set_buffer_new(bh_result);
2486
2487 out:
2488 if (allocated > max_blocks)
2489 allocated = max_blocks;
2490
2491 ext4_ext_show_leaf(inode, path);
2492 set_buffer_mapped(bh_result);
2493 bh_result->b_bdev = inode->i_sb->s_bdev;
2494 bh_result->b_blocknr = newblock;
2495 out2:
2496 if (path) {
2497 ext4_ext_drop_refs(path);
2498 kfree(path);
2499 }
2500 /*mutex_unlock(&ext4_I(inode)->truncate_mutex);*/
2501
2502 return err ? err : allocated;
2503 }
2504
2505 #ifdef _MSC_VER
2506 #pragma warning(pop)
2507 #endif
2508