1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
20 // Number of increments in the size of each cache inode, in sectors. Should
21 // this be a constant number of sectors, a constant 256 KB, or what?
22 #define CACHE_INCREMENTS 64
24 static NTSTATUS
remove_free_space_inode(device_extension
* Vcb
, UINT64 inode
, LIST_ENTRY
* batchlist
, PIRP Irp
, LIST_ENTRY
* rollback
) {
28 Status
= open_fcb(Vcb
, Vcb
->root_root
, inode
, BTRFS_TYPE_FILE
, NULL
, NULL
, &fcb
, PagedPool
, Irp
);
29 if (!NT_SUCCESS(Status
)) {
30 ERR("open_fcb returned %08x\n", Status
);
36 if (fcb
->inode_item
.st_size
> 0) {
37 Status
= excise_extents(fcb
->Vcb
, fcb
, 0, sector_align(fcb
->inode_item
.st_size
, fcb
->Vcb
->superblock
.sector_size
), Irp
, rollback
);
38 if (!NT_SUCCESS(Status
)) {
39 ERR("excise_extents returned %08x\n", Status
);
46 Status
= flush_fcb(fcb
, FALSE
, batchlist
, Irp
);
47 if (!NT_SUCCESS(Status
)) {
48 ERR("flush_fcb returned %08x\n", Status
);
55 return STATUS_SUCCESS
;
58 NTSTATUS
clear_free_space_cache(device_extension
* Vcb
, LIST_ENTRY
* batchlist
, PIRP Irp
) {
60 traverse_ptr tp
, next_tp
;
65 InitializeListHead(&rollback
);
67 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
68 searchkey
.obj_type
= 0;
71 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
72 if (!NT_SUCCESS(Status
)) {
73 ERR("error - find_item returned %08x\n", Status
);
78 if (tp
.item
->key
.obj_id
> searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
> searchkey
.obj_type
))
81 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
82 Status
= delete_tree_item(Vcb
, &tp
);
83 if (!NT_SUCCESS(Status
)) {
84 ERR("delete_tree_item returned %08x\n", Status
);
88 if (tp
.item
->size
>= sizeof(FREE_SPACE_ITEM
)) {
89 FREE_SPACE_ITEM
* fsi
= (FREE_SPACE_ITEM
*)tp
.item
->data
;
91 if (fsi
->key
.obj_type
!= TYPE_INODE_ITEM
)
92 WARN("key (%llx,%x,%llx) does not point to an INODE_ITEM\n", fsi
->key
.obj_id
, fsi
->key
.obj_type
, fsi
->key
.offset
);
96 Status
= remove_free_space_inode(Vcb
, fsi
->key
.obj_id
, batchlist
, Irp
, &rollback
);
98 if (!NT_SUCCESS(Status
))
99 ERR("remove_free_space_inode for (%llx,%x,%llx) returned %08x\n", fsi
->key
.obj_id
, fsi
->key
.obj_type
, fsi
->key
.offset
, Status
);
101 le
= Vcb
->chunks
.Flink
;
102 while (le
!= &Vcb
->chunks
) {
103 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
105 if (c
->offset
== tp
.item
->key
.offset
&& c
->cache
) {
106 free_fcb(Vcb
, c
->cache
);
114 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(FREE_SPACE_ITEM
));
117 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, Irp
);
122 Status
= STATUS_SUCCESS
;
124 if (NT_SUCCESS(Status
))
125 clear_rollback(&rollback
);
127 do_rollback(Vcb
, &rollback
);
129 if (Vcb
->space_root
) {
130 searchkey
.obj_id
= 0;
131 searchkey
.obj_type
= 0;
132 searchkey
.offset
= 0;
134 Status
= find_item(Vcb
, Vcb
->space_root
, &tp
, &searchkey
, FALSE
, Irp
);
135 if (!NT_SUCCESS(Status
)) {
136 ERR("find_item returned %08x\n", Status
);
141 Status
= delete_tree_item(Vcb
, &tp
);
142 if (!NT_SUCCESS(Status
)) {
143 ERR("delete_tree_item returned %08x\n", Status
);
147 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, Irp
);
153 // regenerate free space tree
154 if (Vcb
->superblock
.compat_ro_flags
& BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE
) {
157 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, TRUE
);
159 le
= Vcb
->chunks
.Flink
;
160 while (le
!= &Vcb
->chunks
) {
161 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
163 if (!c
->cache_loaded
) {
164 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
166 Status
= load_cache_chunk(Vcb
, c
, NULL
);
167 if (!NT_SUCCESS(Status
)) {
168 ERR("load_cache_chunk(%llx) returned %08x\n", c
->offset
, Status
);
169 ExReleaseResourceLite(&c
->lock
);
170 ExReleaseResourceLite(&Vcb
->chunk_lock
);
175 c
->space_changed
= TRUE
;
177 ExReleaseResourceLite(&c
->lock
);
183 ExReleaseResourceLite(&Vcb
->chunk_lock
);
189 NTSTATUS
add_space_entry(LIST_ENTRY
* list
, LIST_ENTRY
* list_size
, UINT64 offset
, UINT64 size
) {
192 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
195 ERR("out of memory\n");
196 return STATUS_INSUFFICIENT_RESOURCES
;
202 if (IsListEmpty(list
))
203 InsertTailList(list
, &s
->list_entry
);
205 space
* s2
= CONTAINING_RECORD(list
->Blink
, space
, list_entry
);
207 if (s2
->address
< offset
)
208 InsertTailList(list
, &s
->list_entry
);
214 s2
= CONTAINING_RECORD(le
, space
, list_entry
);
216 if (s2
->address
> offset
) {
217 InsertTailList(le
, &s
->list_entry
);
228 return STATUS_SUCCESS
;
230 if (IsListEmpty(list_size
))
231 InsertTailList(list_size
, &s
->list_entry_size
);
233 space
* s2
= CONTAINING_RECORD(list_size
->Blink
, space
, list_entry_size
);
235 if (s2
->size
>= size
)
236 InsertTailList(list_size
, &s
->list_entry_size
);
240 le
= list_size
->Flink
;
241 while (le
!= list_size
) {
242 s2
= CONTAINING_RECORD(le
, space
, list_entry_size
);
244 if (s2
->size
<= size
) {
245 InsertHeadList(le
->Blink
, &s
->list_entry_size
);
246 return STATUS_SUCCESS
;
254 return STATUS_SUCCESS
;
257 static void load_free_space_bitmap(device_extension
* Vcb
, chunk
* c
, UINT64 offset
, void* data
, UINT64
* total_space
) {
259 UINT32 i
, *dwords
= data
;
260 ULONG runlength
, index
;
263 for (i
= 0; i
< Vcb
->superblock
.sector_size
/ sizeof(UINT32
); i
++) {
264 dwords
[i
] = ~dwords
[i
];
267 RtlInitializeBitMap(&bmph
, data
, Vcb
->superblock
.sector_size
* 8);
270 runlength
= RtlFindFirstRunClear(&bmph
, &index
);
272 while (runlength
!= 0) {
275 addr
= offset
+ (index
* Vcb
->superblock
.sector_size
);
276 length
= Vcb
->superblock
.sector_size
* runlength
;
278 add_space_entry(&c
->space
, &c
->space_size
, addr
, length
);
280 *total_space
+= length
;
282 runlength
= RtlFindNextForwardRunClear(&bmph
, index
, &index
);
286 static void order_space_entry(space
* s
, LIST_ENTRY
* list_size
) {
289 if (IsListEmpty(list_size
)) {
290 InsertHeadList(list_size
, &s
->list_entry_size
);
294 le
= list_size
->Flink
;
296 while (le
!= list_size
) {
297 space
* s2
= CONTAINING_RECORD(le
, space
, list_entry_size
);
299 if (s2
->size
<= s
->size
) {
300 InsertHeadList(le
->Blink
, &s
->list_entry_size
);
307 InsertTailList(list_size
, &s
->list_entry_size
);
312 LIST_ENTRY list_entry
;
315 static NTSTATUS
add_superblock_stripe(LIST_ENTRY
* stripes
, UINT64 off
, UINT64 len
) {
318 for (i
= 0; i
< len
; i
++) {
320 superblock_stripe
* ss
;
324 while (le
!= stripes
) {
325 ss
= CONTAINING_RECORD(le
, superblock_stripe
, list_entry
);
327 if (ss
->stripe
== off
+ i
) {
338 ss
= ExAllocatePoolWithTag(PagedPool
, sizeof(superblock_stripe
), ALLOC_TAG
);
340 ERR("out of memory\n");
341 return STATUS_INSUFFICIENT_RESOURCES
;
344 ss
->stripe
= off
+ i
;
345 InsertTailList(stripes
, &ss
->list_entry
);
348 return STATUS_SUCCESS
;
351 static NTSTATUS
get_superblock_size(chunk
* c
, UINT64
* size
) {
353 CHUNK_ITEM
* ci
= c
->chunk_item
;
354 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
355 UINT64 off_start
, off_end
, space
= 0;
359 InitializeListHead(&stripes
);
361 while (superblock_addrs
[i
] != 0) {
362 if (ci
->type
& BLOCK_FLAG_RAID0
|| ci
->type
& BLOCK_FLAG_RAID10
) {
363 for (j
= 0; j
< ci
->num_stripes
; j
++) {
364 ULONG sub_stripes
= max(ci
->sub_stripes
, 1);
366 if (cis
[j
].offset
+ (ci
->size
* ci
->num_stripes
/ sub_stripes
) > superblock_addrs
[i
] && cis
[j
].offset
<= superblock_addrs
[i
] + sizeof(superblock
)) {
367 off_start
= superblock_addrs
[i
] - cis
[j
].offset
;
368 off_start
-= off_start
% ci
->stripe_length
;
369 off_start
*= ci
->num_stripes
/ sub_stripes
;
370 off_start
+= (j
/ sub_stripes
) * ci
->stripe_length
;
372 off_end
= off_start
+ ci
->stripe_length
;
374 Status
= add_superblock_stripe(&stripes
, off_start
/ ci
->stripe_length
, 1);
375 if (!NT_SUCCESS(Status
)) {
376 ERR("add_superblock_stripe returned %08x\n", Status
);
381 } else if (ci
->type
& BLOCK_FLAG_RAID5
) {
382 for (j
= 0; j
< ci
->num_stripes
; j
++) {
383 UINT64 stripe_size
= ci
->size
/ (ci
->num_stripes
- 1);
385 if (cis
[j
].offset
+ stripe_size
> superblock_addrs
[i
] && cis
[j
].offset
<= superblock_addrs
[i
] + sizeof(superblock
)) {
386 off_start
= superblock_addrs
[i
] - cis
[j
].offset
;
387 off_start
-= off_start
% (ci
->stripe_length
* (ci
->num_stripes
- 1));
388 off_start
*= ci
->num_stripes
- 1;
390 off_end
= off_start
+ (ci
->stripe_length
* (ci
->num_stripes
- 1));
392 Status
= add_superblock_stripe(&stripes
, off_start
/ ci
->stripe_length
, (off_end
- off_start
) / ci
->stripe_length
);
393 if (!NT_SUCCESS(Status
)) {
394 ERR("add_superblock_stripe returned %08x\n", Status
);
399 } else if (ci
->type
& BLOCK_FLAG_RAID6
) {
400 for (j
= 0; j
< ci
->num_stripes
; j
++) {
401 UINT64 stripe_size
= ci
->size
/ (ci
->num_stripes
- 2);
403 if (cis
[j
].offset
+ stripe_size
> superblock_addrs
[i
] && cis
[j
].offset
<= superblock_addrs
[i
] + sizeof(superblock
)) {
404 off_start
= superblock_addrs
[i
] - cis
[j
].offset
;
405 off_start
-= off_start
% (ci
->stripe_length
* (ci
->num_stripes
- 2));
406 off_start
*= ci
->num_stripes
- 2;
408 off_end
= off_start
+ (ci
->stripe_length
* (ci
->num_stripes
- 2));
410 Status
= add_superblock_stripe(&stripes
, off_start
/ ci
->stripe_length
, (off_end
- off_start
) / ci
->stripe_length
);
411 if (!NT_SUCCESS(Status
)) {
412 ERR("add_superblock_stripe returned %08x\n", Status
);
417 } else { // SINGLE, DUPLICATE, RAID1
418 for (j
= 0; j
< ci
->num_stripes
; j
++) {
419 if (cis
[j
].offset
+ ci
->size
> superblock_addrs
[i
] && cis
[j
].offset
<= superblock_addrs
[i
] + sizeof(superblock
)) {
420 off_start
= ((superblock_addrs
[i
] - cis
[j
].offset
) / c
->chunk_item
->stripe_length
) * c
->chunk_item
->stripe_length
;
421 off_end
= sector_align(superblock_addrs
[i
] - cis
[j
].offset
+ sizeof(superblock
), c
->chunk_item
->stripe_length
);
423 Status
= add_superblock_stripe(&stripes
, off_start
/ ci
->stripe_length
, (off_end
- off_start
) / ci
->stripe_length
);
424 if (!NT_SUCCESS(Status
)) {
425 ERR("add_superblock_stripe returned %08x\n", Status
);
435 Status
= STATUS_SUCCESS
;
438 while (!IsListEmpty(&stripes
)) {
439 LIST_ENTRY
* le
= RemoveHeadList(&stripes
);
440 superblock_stripe
* ss
= CONTAINING_RECORD(le
, superblock_stripe
, list_entry
);
447 if (NT_SUCCESS(Status
))
448 *size
= space
* ci
->stripe_length
;
453 NTSTATUS
load_stored_free_space_cache(device_extension
* Vcb
, chunk
* c
, BOOL load_only
, PIRP Irp
) {
456 FREE_SPACE_ITEM
* fsi
;
457 UINT64 inode
, *generation
;
460 UINT32
*checksums
, crc32
, i
, num_sectors
, num_valid_sectors
, size
;
461 FREE_SPACE_ENTRY
* fse
;
462 UINT64 num_entries
, num_bitmaps
, extent_length
, bmpnum
, off
, total_space
= 0, superblock_size
;
463 LIST_ENTRY
*le
, rollback
;
465 // FIXME - does this break if Vcb->superblock.sector_size is not 4096?
467 TRACE("(%p, %llx)\n", Vcb
, c
->offset
);
469 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
470 searchkey
.obj_type
= 0;
471 searchkey
.offset
= c
->offset
;
473 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
474 if (!NT_SUCCESS(Status
)) {
475 ERR("error - find_item returned %08x\n", Status
);
479 if (keycmp(tp
.item
->key
, searchkey
)) {
480 TRACE("(%llx,%x,%llx) not found\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
481 return STATUS_NOT_FOUND
;
484 if (tp
.item
->size
< sizeof(FREE_SPACE_ITEM
)) {
485 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(FREE_SPACE_ITEM
));
486 return STATUS_NOT_FOUND
;
489 fsi
= (FREE_SPACE_ITEM
*)tp
.item
->data
;
491 if (fsi
->key
.obj_type
!= TYPE_INODE_ITEM
) {
492 WARN("cache pointed to something other than an INODE_ITEM\n");
493 return STATUS_NOT_FOUND
;
496 inode
= fsi
->key
.obj_id
;
497 num_entries
= fsi
->num_entries
;
498 num_bitmaps
= fsi
->num_bitmaps
;
500 Status
= open_fcb(Vcb
, Vcb
->root_root
, inode
, BTRFS_TYPE_FILE
, NULL
, NULL
, &c
->cache
, PagedPool
, Irp
);
501 if (!NT_SUCCESS(Status
)) {
502 ERR("open_fcb returned %08x\n", Status
);
503 return STATUS_NOT_FOUND
;
507 return STATUS_SUCCESS
;
509 if (c
->cache
->inode_item
.st_size
== 0) {
510 WARN("cache had zero length\n");
511 free_fcb(Vcb
, c
->cache
);
513 return STATUS_NOT_FOUND
;
516 c
->cache
->inode_item
.flags
|= BTRFS_INODE_NODATACOW
;
518 if (num_entries
== 0 && num_bitmaps
== 0)
519 return STATUS_SUCCESS
;
521 size
= (UINT32
)sector_align(c
->cache
->inode_item
.st_size
, Vcb
->superblock
.sector_size
);
523 data
= ExAllocatePoolWithTag(PagedPool
, size
, ALLOC_TAG
);
526 ERR("out of memory\n");
527 free_fcb(Vcb
, c
->cache
);
529 return STATUS_INSUFFICIENT_RESOURCES
;
532 Status
= read_file(c
->cache
, data
, 0, c
->cache
->inode_item
.st_size
, NULL
, NULL
);
533 if (!NT_SUCCESS(Status
)) {
534 ERR("read_file returned %08x\n", Status
);
537 c
->cache
->deleted
= TRUE
;
538 mark_fcb_dirty(c
->cache
);
540 free_fcb(Vcb
, c
->cache
);
542 return STATUS_NOT_FOUND
;
545 if (size
> c
->cache
->inode_item
.st_size
)
546 RtlZeroMemory(&data
[c
->cache
->inode_item
.st_size
], (ULONG
)(size
- c
->cache
->inode_item
.st_size
));
548 num_sectors
= size
/ Vcb
->superblock
.sector_size
;
550 generation
= (UINT64
*)(data
+ (num_sectors
* sizeof(UINT32
)));
552 if (*generation
!= fsi
->generation
) {
553 WARN("free space cache generation for %llx was %llx, expected %llx\n", c
->offset
, *generation
, fsi
->generation
);
557 extent_length
= (num_sectors
* sizeof(UINT32
)) + sizeof(UINT64
) + (num_entries
* sizeof(FREE_SPACE_ENTRY
));
559 num_valid_sectors
= (ULONG
)((sector_align(extent_length
, Vcb
->superblock
.sector_size
) / Vcb
->superblock
.sector_size
) + num_bitmaps
);
561 if (num_valid_sectors
> num_sectors
) {
562 ERR("free space cache for %llx was %llx sectors, expected at least %llx\n", c
->offset
, num_sectors
, num_valid_sectors
);
566 checksums
= (UINT32
*)data
;
568 for (i
= 0; i
< num_valid_sectors
; i
++) {
569 if (i
* Vcb
->superblock
.sector_size
> sizeof(UINT32
) * num_sectors
)
570 crc32
= ~calc_crc32c(0xffffffff, &data
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
571 else if ((i
+ 1) * Vcb
->superblock
.sector_size
< sizeof(UINT32
) * num_sectors
)
572 crc32
= 0; // FIXME - test this
574 crc32
= ~calc_crc32c(0xffffffff, &data
[sizeof(UINT32
) * num_sectors
], ((i
+ 1) * Vcb
->superblock
.sector_size
) - (sizeof(UINT32
) * num_sectors
));
576 if (crc32
!= checksums
[i
]) {
577 WARN("checksum %llu was %08x, expected %08x\n", i
, crc32
, checksums
[i
]);
582 off
= (sizeof(UINT32
) * num_sectors
) + sizeof(UINT64
);
585 for (i
= 0; i
< num_entries
; i
++) {
586 if ((off
+ sizeof(FREE_SPACE_ENTRY
)) / Vcb
->superblock
.sector_size
!= off
/ Vcb
->superblock
.sector_size
)
587 off
= sector_align(off
, Vcb
->superblock
.sector_size
);
589 fse
= (FREE_SPACE_ENTRY
*)&data
[off
];
591 if (fse
->type
== FREE_SPACE_EXTENT
) {
592 Status
= add_space_entry(&c
->space
, &c
->space_size
, fse
->offset
, fse
->size
);
593 if (!NT_SUCCESS(Status
)) {
594 ERR("add_space_entry returned %08x\n", Status
);
599 total_space
+= fse
->size
;
600 } else if (fse
->type
!= FREE_SPACE_BITMAP
) {
601 ERR("unknown free-space type %x\n", fse
->type
);
604 off
+= sizeof(FREE_SPACE_ENTRY
);
607 if (num_bitmaps
> 0) {
608 bmpnum
= sector_align(off
, Vcb
->superblock
.sector_size
) / Vcb
->superblock
.sector_size
;
609 off
= (sizeof(UINT32
) * num_sectors
) + sizeof(UINT64
);
611 for (i
= 0; i
< num_entries
; i
++) {
612 if ((off
+ sizeof(FREE_SPACE_ENTRY
)) / Vcb
->superblock
.sector_size
!= off
/ Vcb
->superblock
.sector_size
)
613 off
= sector_align(off
, Vcb
->superblock
.sector_size
);
615 fse
= (FREE_SPACE_ENTRY
*)&data
[off
];
617 if (fse
->type
== FREE_SPACE_BITMAP
) {
618 // FIXME - make sure we don't overflow the buffer here
619 load_free_space_bitmap(Vcb
, c
, fse
->offset
, &data
[bmpnum
* Vcb
->superblock
.sector_size
], &total_space
);
623 off
+= sizeof(FREE_SPACE_ENTRY
);
629 Status
= get_superblock_size(c
, &superblock_size
);
630 if (!NT_SUCCESS(Status
)) {
631 ERR("get_superblock_size returned %08x\n", Status
);
636 if (c
->chunk_item
->size
- c
->used
!= total_space
+ superblock_size
) {
637 WARN("invalidating cache for chunk %llx: space was %llx, expected %llx\n", c
->offset
, total_space
+ superblock_size
, c
->chunk_item
->size
- c
->used
);
642 while (le
!= &c
->space
) {
643 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
644 LIST_ENTRY
* le2
= le
->Flink
;
646 if (le2
!= &c
->space
) {
647 space
* s2
= CONTAINING_RECORD(le2
, space
, list_entry
);
649 if (s2
->address
== s
->address
+ s
->size
) {
652 RemoveEntryList(&s2
->list_entry
);
653 RemoveEntryList(&s2
->list_entry_size
);
656 RemoveEntryList(&s
->list_entry_size
);
657 order_space_entry(s
, &c
->space_size
);
668 return STATUS_SUCCESS
;
673 InitializeListHead(&rollback
);
675 Status
= delete_tree_item(Vcb
, &tp
);
676 if (!NT_SUCCESS(Status
)) {
677 ERR("delete_tree_item returned %08x\n", Status
);
681 Status
= excise_extents(Vcb
, c
->cache
, 0, c
->cache
->inode_item
.st_size
, Irp
, &rollback
);
682 if (!NT_SUCCESS(Status
)) {
683 ERR("excise_extents returned %08x\n", Status
);
684 do_rollback(Vcb
, &rollback
);
688 clear_rollback(&rollback
);
690 c
->cache
->deleted
= TRUE
;
691 mark_fcb_dirty(c
->cache
);
693 c
->old_cache
= c
->cache
;
697 while (le
!= &c
->space
) {
698 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
699 LIST_ENTRY
* le2
= le
->Flink
;
701 RemoveEntryList(&s
->list_entry
);
702 RemoveEntryList(&s
->list_entry_size
);
708 return STATUS_NOT_FOUND
;
711 static NTSTATUS
load_stored_free_space_tree(device_extension
* Vcb
, chunk
* c
, PIRP Irp
) {
713 traverse_ptr tp
, next_tp
;
715 ULONG
* bmparr
= NULL
;
719 TRACE("(%p, %llx)\n", Vcb
, c
->offset
);
721 if (!Vcb
->space_root
)
722 return STATUS_NOT_FOUND
;
724 searchkey
.obj_id
= c
->offset
;
725 searchkey
.obj_type
= TYPE_FREE_SPACE_INFO
;
726 searchkey
.offset
= c
->chunk_item
->size
;
728 Status
= find_item(Vcb
, Vcb
->space_root
, &tp
, &searchkey
, FALSE
, Irp
);
729 if (!NT_SUCCESS(Status
)) {
730 ERR("find_item returned %08x\n", Status
);
734 if (keycmp(tp
.item
->key
, searchkey
)) {
735 TRACE("(%llx,%x,%llx) not found\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
736 return STATUS_NOT_FOUND
;
739 if (tp
.item
->size
< sizeof(FREE_SPACE_INFO
)) {
740 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(FREE_SPACE_INFO
));
741 return STATUS_NOT_FOUND
;
744 while (find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, Irp
)) {
747 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
750 if (tp
.item
->key
.obj_type
== TYPE_FREE_SPACE_EXTENT
) {
751 Status
= add_space_entry(&c
->space
, &c
->space_size
, tp
.item
->key
.obj_id
, tp
.item
->key
.offset
);
752 if (!NT_SUCCESS(Status
)) {
753 ERR("add_space_entry returned %08x\n", Status
);
754 if (bmparr
) ExFreePool(bmparr
);
757 } else if (tp
.item
->key
.obj_type
== TYPE_FREE_SPACE_BITMAP
) {
758 ULONG explen
, index
, runlength
;
762 explen
= (ULONG
)(tp
.item
->key
.offset
/ (Vcb
->superblock
.sector_size
* 8));
764 if (tp
.item
->size
< explen
) {
765 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, explen
);
766 return STATUS_NOT_FOUND
;
767 } else if (tp
.item
->size
== 0) {
768 WARN("(%llx,%x,%llx) has size of 0\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
);
769 return STATUS_NOT_FOUND
;
772 if (bmplen
< tp
.item
->size
) {
776 bmplen
= (ULONG
)sector_align(tp
.item
->size
, sizeof(ULONG
));
777 bmparr
= ExAllocatePoolWithTag(PagedPool
, bmplen
, ALLOC_TAG
);
779 ERR("out of memory\n");
780 return STATUS_INSUFFICIENT_RESOURCES
;
784 // We copy the bitmap because it supposedly has to be ULONG-aligned
785 RtlCopyMemory(bmparr
, tp
.item
->data
, tp
.item
->size
);
787 RtlInitializeBitMap(&bmp
, bmparr
, (ULONG
)(tp
.item
->key
.offset
/ Vcb
->superblock
.sector_size
));
789 lastoff
= tp
.item
->key
.obj_id
;
791 runlength
= RtlFindFirstRunClear(&bmp
, &index
);
793 while (runlength
!= 0) {
794 UINT64 runstart
= tp
.item
->key
.obj_id
+ (index
* Vcb
->superblock
.sector_size
);
795 UINT64 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
797 if (runstart
> lastoff
) {
798 Status
= add_space_entry(&c
->space
, &c
->space_size
, lastoff
, runstart
- lastoff
);
799 if (!NT_SUCCESS(Status
)) {
800 ERR("add_space_entry returned %08x\n", Status
);
801 if (bmparr
) ExFreePool(bmparr
);
808 runlength
= RtlFindNextForwardRunClear(&bmp
, index
+ runlength
, &index
);
811 if (lastoff
< tp
.item
->key
.obj_id
+ tp
.item
->key
.offset
) {
812 Status
= add_space_entry(&c
->space
, &c
->space_size
, lastoff
, tp
.item
->key
.obj_id
+ tp
.item
->key
.offset
- lastoff
);
813 if (!NT_SUCCESS(Status
)) {
814 ERR("add_space_entry returned %08x\n", Status
);
815 if (bmparr
) ExFreePool(bmparr
);
826 while (le
!= &c
->space
) {
827 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
828 LIST_ENTRY
* le2
= le
->Flink
;
830 if (le2
!= &c
->space
) {
831 space
* s2
= CONTAINING_RECORD(le2
, space
, list_entry
);
833 if (s2
->address
== s
->address
+ s
->size
) {
836 RemoveEntryList(&s2
->list_entry
);
837 RemoveEntryList(&s2
->list_entry_size
);
840 RemoveEntryList(&s
->list_entry_size
);
841 order_space_entry(s
, &c
->space_size
);
850 return STATUS_SUCCESS
;
853 static NTSTATUS
load_free_space_cache(device_extension
* Vcb
, chunk
* c
, PIRP Irp
) {
854 traverse_ptr tp
, next_tp
;
861 if (Vcb
->superblock
.compat_ro_flags
& BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE
&& Vcb
->superblock
.compat_ro_flags
& BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID
) {
862 Status
= load_stored_free_space_tree(Vcb
, c
, Irp
);
864 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
865 ERR("load_stored_free_space_tree returned %08x\n", Status
);
868 } else if (Vcb
->superblock
.generation
- 1 == Vcb
->superblock
.cache_generation
) {
869 Status
= load_stored_free_space_cache(Vcb
, c
, FALSE
, Irp
);
871 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
872 ERR("load_stored_free_space_cache returned %08x\n", Status
);
876 Status
= STATUS_NOT_FOUND
;
878 if (Status
== STATUS_NOT_FOUND
) {
879 TRACE("generating free space cache for chunk %llx\n", c
->offset
);
881 searchkey
.obj_id
= c
->offset
;
882 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
883 searchkey
.offset
= 0;
885 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, Irp
);
886 if (!NT_SUCCESS(Status
)) {
887 ERR("error - find_item returned %08x\n", Status
);
891 lastaddr
= c
->offset
;
894 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
897 if (tp
.item
->key
.obj_id
>= c
->offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
898 if (tp
.item
->key
.obj_id
> lastaddr
) {
899 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
902 ERR("out of memory\n");
903 return STATUS_INSUFFICIENT_RESOURCES
;
906 s
->address
= lastaddr
;
907 s
->size
= tp
.item
->key
.obj_id
- lastaddr
;
908 InsertTailList(&c
->space
, &s
->list_entry
);
910 order_space_entry(s
, &c
->space_size
);
912 TRACE("(%llx,%llx)\n", s
->address
, s
->size
);
915 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)
916 lastaddr
= tp
.item
->key
.obj_id
+ Vcb
->superblock
.node_size
;
918 lastaddr
= tp
.item
->key
.obj_id
+ tp
.item
->key
.offset
;
921 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, Irp
);
926 if (lastaddr
< c
->offset
+ c
->chunk_item
->size
) {
927 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
930 ERR("out of memory\n");
931 return STATUS_INSUFFICIENT_RESOURCES
;
934 s
->address
= lastaddr
;
935 s
->size
= c
->offset
+ c
->chunk_item
->size
- lastaddr
;
936 InsertTailList(&c
->space
, &s
->list_entry
);
938 order_space_entry(s
, &c
->space_size
);
940 TRACE("(%llx,%llx)\n", s
->address
, s
->size
);
944 return STATUS_SUCCESS
;
947 NTSTATUS
load_cache_chunk(device_extension
* Vcb
, chunk
* c
, PIRP Irp
) {
951 return STATUS_SUCCESS
;
953 Status
= load_free_space_cache(Vcb
, c
, Irp
);
954 if (!NT_SUCCESS(Status
)) {
955 ERR("load_free_space_cache returned %08x\n", Status
);
959 protect_superblocks(c
);
961 c
->cache_loaded
= TRUE
;
963 return STATUS_SUCCESS
;
966 static NTSTATUS
insert_cache_extent(fcb
* fcb
, UINT64 start
, UINT64 length
, LIST_ENTRY
* rollback
) {
968 LIST_ENTRY
* le
= fcb
->Vcb
->chunks
.Flink
;
972 flags
= fcb
->Vcb
->data_flags
;
974 while (le
!= &fcb
->Vcb
->chunks
) {
975 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
977 if (!c
->readonly
&& !c
->reloc
) {
978 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
980 if (c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= length
) {
981 if (insert_extent_chunk(fcb
->Vcb
, fcb
, c
, start
, length
, FALSE
, NULL
, NULL
, rollback
, BTRFS_COMPRESSION_NONE
, length
, FALSE
, 0))
982 return STATUS_SUCCESS
;
985 ExReleaseResourceLite(&c
->lock
);
991 Status
= alloc_chunk(fcb
->Vcb
, flags
, &c
, FALSE
);
993 if (!NT_SUCCESS(Status
)) {
994 ERR("alloc_chunk returned %08x\n", Status
);
998 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
1000 if (c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= length
) {
1001 if (insert_extent_chunk(fcb
->Vcb
, fcb
, c
, start
, length
, FALSE
, NULL
, NULL
, rollback
, BTRFS_COMPRESSION_NONE
, length
, FALSE
, 0))
1002 return STATUS_SUCCESS
;
1005 ExReleaseResourceLite(&c
->lock
);
1007 return STATUS_DISK_FULL
;
1010 static NTSTATUS
allocate_cache_chunk(device_extension
* Vcb
, chunk
* c
, BOOL
* changed
, LIST_ENTRY
* batchlist
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1013 UINT64 num_entries
, new_cache_size
, i
;
1015 BOOL realloc_extents
= FALSE
;
1017 // FIXME - also do bitmaps
1018 // FIXME - make sure this works when sector_size is not 4096
1024 // num_entries is the number of entries in c->space and c->deleting - it might
1025 // be slightly higher then what we end up writing, but doing it this way is much
1026 // quicker and simpler.
1027 if (!IsListEmpty(&c
->space
)) {
1028 le
= c
->space
.Flink
;
1029 while (le
!= &c
->space
) {
1036 if (!IsListEmpty(&c
->deleting
)) {
1037 le
= c
->deleting
.Flink
;
1038 while (le
!= &c
->deleting
) {
1045 new_cache_size
= sizeof(UINT64
) + (num_entries
* sizeof(FREE_SPACE_ENTRY
));
1047 num_sectors
= (UINT32
)sector_align(new_cache_size
, Vcb
->superblock
.sector_size
) / Vcb
->superblock
.sector_size
;
1048 num_sectors
= (UINT32
)sector_align(num_sectors
, CACHE_INCREMENTS
);
1050 // adjust for padding
1051 // FIXME - there must be a more efficient way of doing this
1052 new_cache_size
= sizeof(UINT64
) + (sizeof(UINT32
) * num_sectors
);
1053 for (i
= 0; i
< num_entries
; i
++) {
1054 if ((new_cache_size
/ Vcb
->superblock
.sector_size
) != ((new_cache_size
+ sizeof(FREE_SPACE_ENTRY
)) / Vcb
->superblock
.sector_size
))
1055 new_cache_size
= sector_align(new_cache_size
, Vcb
->superblock
.sector_size
);
1057 new_cache_size
+= sizeof(FREE_SPACE_ENTRY
);
1060 new_cache_size
= sector_align(new_cache_size
, CACHE_INCREMENTS
* Vcb
->superblock
.sector_size
);
1062 TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c
->offset
, c
->cache
? c
->cache
->inode_item
.st_size
: 0, new_cache_size
);
1065 if (new_cache_size
> c
->cache
->inode_item
.st_size
)
1066 realloc_extents
= TRUE
;
1068 le
= c
->cache
->extents
.Flink
;
1070 while (le
!= &c
->cache
->extents
) {
1071 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
1073 if (!ext
->ignore
&& (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
)) {
1074 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)&ext
->extent_data
.data
[0];
1076 if (ed2
->size
!= 0) {
1077 chunk
* c2
= get_chunk_from_address(Vcb
, ed2
->address
);
1079 if (c2
&& (c2
->readonly
|| c2
->reloc
)) {
1080 realloc_extents
= TRUE
;
1092 FREE_SPACE_ITEM
* fsi
;
1098 c
->cache
= create_fcb(Vcb
, PagedPool
);
1100 ERR("out of memory\n");
1101 return STATUS_INSUFFICIENT_RESOURCES
;
1104 c
->cache
->Vcb
= Vcb
;
1106 c
->cache
->inode_item
.st_size
= new_cache_size
;
1107 c
->cache
->inode_item
.st_blocks
= new_cache_size
;
1108 c
->cache
->inode_item
.st_nlink
= 1;
1109 c
->cache
->inode_item
.st_mode
= S_IRUSR
| S_IWUSR
| __S_IFREG
;
1110 c
->cache
->inode_item
.flags
= BTRFS_INODE_NODATASUM
| BTRFS_INODE_NODATACOW
| BTRFS_INODE_NOCOMPRESS
| BTRFS_INODE_PREALLOC
;
1112 c
->cache
->Header
.IsFastIoPossible
= fast_io_possible(c
->cache
);
1113 c
->cache
->Header
.AllocationSize
.QuadPart
= 0;
1114 c
->cache
->Header
.FileSize
.QuadPart
= 0;
1115 c
->cache
->Header
.ValidDataLength
.QuadPart
= 0;
1117 c
->cache
->subvol
= Vcb
->root_root
;
1119 c
->cache
->inode
= InterlockedIncrement64(&Vcb
->root_root
->lastinode
);
1121 c
->cache
->type
= BTRFS_TYPE_FILE
;
1122 c
->cache
->created
= TRUE
;
1124 // create new free space entry
1126 fsi
= ExAllocatePoolWithTag(PagedPool
, sizeof(FREE_SPACE_ITEM
), ALLOC_TAG
);
1128 ERR("out of memory\n");
1129 free_fcb(Vcb
, c
->cache
);
1131 return STATUS_INSUFFICIENT_RESOURCES
;
1134 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
1135 searchkey
.obj_type
= 0;
1136 searchkey
.offset
= c
->offset
;
1138 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
1139 if (!NT_SUCCESS(Status
)) {
1140 ERR("error - find_item returned %08x\n", Status
);
1142 free_fcb(Vcb
, c
->cache
);
1147 if (!keycmp(searchkey
, tp
.item
->key
)) {
1148 Status
= delete_tree_item(Vcb
, &tp
);
1149 if (!NT_SUCCESS(Status
)) {
1150 ERR("delete_tree_item returned %08x\n", Status
);
1152 free_fcb(Vcb
, c
->cache
);
1158 fsi
->key
.obj_id
= c
->cache
->inode
;
1159 fsi
->key
.obj_type
= TYPE_INODE_ITEM
;
1160 fsi
->key
.offset
= 0;
1162 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, FREE_SPACE_CACHE_ID
, 0, c
->offset
, fsi
, sizeof(FREE_SPACE_ITEM
), NULL
, Irp
);
1163 if (!NT_SUCCESS(Status
)) {
1164 ERR("insert_tree_item returned %08x\n", Status
);
1166 free_fcb(Vcb
, c
->cache
);
1173 Status
= insert_cache_extent(c
->cache
, 0, new_cache_size
, rollback
);
1174 if (!NT_SUCCESS(Status
)) {
1175 ERR("insert_cache_extent returned %08x\n", Status
);
1176 free_fcb(Vcb
, c
->cache
);
1181 c
->cache
->extents_changed
= TRUE
;
1182 InsertTailList(&Vcb
->all_fcbs
, &c
->cache
->list_entry_all
);
1184 Status
= flush_fcb(c
->cache
, TRUE
, batchlist
, Irp
);
1185 if (!NT_SUCCESS(Status
)) {
1186 ERR("flush_fcb returned %08x\n", Status
);
1187 free_fcb(Vcb
, c
->cache
);
1193 } else if (realloc_extents
) {
1197 TRACE("reallocating extents\n");
1199 // add free_space entry to tree cache
1201 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
1202 searchkey
.obj_type
= 0;
1203 searchkey
.offset
= c
->offset
;
1205 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
1206 if (!NT_SUCCESS(Status
)) {
1207 ERR("error - find_item returned %08x\n", Status
);
1211 if (keycmp(searchkey
, tp
.item
->key
)) {
1212 ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
1213 return STATUS_INTERNAL_ERROR
;
1216 if (tp
.item
->size
< sizeof(FREE_SPACE_ITEM
)) {
1217 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(FREE_SPACE_ITEM
));
1218 return STATUS_INTERNAL_ERROR
;
1221 tp
.tree
->write
= TRUE
;
1223 // remove existing extents
1225 if (c
->cache
->inode_item
.st_size
> 0) {
1226 le
= c
->cache
->extents
.Flink
;
1228 while (le
!= &c
->cache
->extents
) {
1229 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
1231 if (!ext
->ignore
&& (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
)) {
1232 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)&ext
->extent_data
.data
[0];
1234 if (ed2
->size
!= 0) {
1235 chunk
* c2
= get_chunk_from_address(Vcb
, ed2
->address
);
1239 c2
->space_changed
= TRUE
;
1247 Status
= excise_extents(Vcb
, c
->cache
, 0, c
->cache
->inode_item
.st_size
, Irp
, rollback
);
1248 if (!NT_SUCCESS(Status
)) {
1249 ERR("excise_extents returned %08x\n", Status
);
1256 Status
= insert_cache_extent(c
->cache
, 0, new_cache_size
, rollback
);
1257 if (!NT_SUCCESS(Status
)) {
1258 ERR("insert_cache_extent returned %08x\n", Status
);
1262 // modify INODE_ITEM
1264 c
->cache
->inode_item
.st_size
= new_cache_size
;
1265 c
->cache
->inode_item
.st_blocks
= new_cache_size
;
1267 Status
= flush_fcb(c
->cache
, TRUE
, batchlist
, Irp
);
1268 if (!NT_SUCCESS(Status
)) {
1269 ERR("flush_fcb returned %08x\n", Status
);
1278 // add INODE_ITEM and free_space entry to tree cache, for writing later
1280 searchkey
.obj_id
= c
->cache
->inode
;
1281 searchkey
.obj_type
= TYPE_INODE_ITEM
;
1282 searchkey
.offset
= 0;
1284 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
1285 if (!NT_SUCCESS(Status
)) {
1286 ERR("error - find_item returned %08x\n", Status
);
1290 if (keycmp(searchkey
, tp
.item
->key
)) {
1293 ii
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_ITEM
), ALLOC_TAG
);
1295 ERR("out of memory\n");
1296 return STATUS_INSUFFICIENT_RESOURCES
;
1299 RtlCopyMemory(ii
, &c
->cache
->inode_item
, sizeof(INODE_ITEM
));
1301 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, c
->cache
->inode
, TYPE_INODE_ITEM
, 0, ii
, sizeof(INODE_ITEM
), NULL
, Irp
);
1302 if (!NT_SUCCESS(Status
)) {
1303 ERR("insert_tree_item returned %08x\n", Status
);
1310 if (tp
.item
->size
< sizeof(INODE_ITEM
)) {
1311 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(INODE_ITEM
));
1312 return STATUS_INTERNAL_ERROR
;
1315 tp
.tree
->write
= TRUE
;
1318 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
1319 searchkey
.obj_type
= 0;
1320 searchkey
.offset
= c
->offset
;
1322 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
1323 if (!NT_SUCCESS(Status
)) {
1324 ERR("error - find_item returned %08x\n", Status
);
1328 if (keycmp(searchkey
, tp
.item
->key
)) {
1329 ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
1330 return STATUS_INTERNAL_ERROR
;
1333 if (tp
.item
->size
< sizeof(FREE_SPACE_ITEM
)) {
1334 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(FREE_SPACE_ITEM
));
1335 return STATUS_INTERNAL_ERROR
;
1338 tp
.tree
->write
= TRUE
;
1341 // FIXME - reduce inode allocation if cache is shrinking. Make sure to avoid infinite write loops
1343 return STATUS_SUCCESS
;
1346 NTSTATUS
allocate_cache(device_extension
* Vcb
, BOOL
* changed
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1347 LIST_ENTRY
*le
, batchlist
;
1352 InitializeListHead(&batchlist
);
1354 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
1356 le
= Vcb
->chunks
.Flink
;
1357 while (le
!= &Vcb
->chunks
) {
1358 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
1360 if (c
->space_changed
) {
1363 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
1364 Status
= allocate_cache_chunk(Vcb
, c
, &b
, &batchlist
, Irp
, rollback
);
1365 ExReleaseResourceLite(&c
->lock
);
1370 if (!NT_SUCCESS(Status
)) {
1371 ERR("allocate_cache_chunk(%llx) returned %08x\n", c
->offset
, Status
);
1372 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1373 clear_batch_list(Vcb
, &batchlist
);
1381 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1383 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
1384 if (!NT_SUCCESS(Status
)) {
1385 ERR("commit_batch_list returned %08x\n", Status
);
1389 return STATUS_SUCCESS
;
1392 static void add_rollback_space(LIST_ENTRY
* rollback
, BOOL add
, LIST_ENTRY
* list
, LIST_ENTRY
* list_size
, UINT64 address
, UINT64 length
, chunk
* c
) {
1395 rs
= ExAllocatePoolWithTag(PagedPool
, sizeof(rollback_space
), ALLOC_TAG
);
1397 ERR("out of memory\n");
1402 rs
->list_size
= list_size
;
1403 rs
->address
= address
;
1404 rs
->length
= length
;
1407 add_rollback(rollback
, add
? ROLLBACK_ADD_SPACE
: ROLLBACK_SUBTRACT_SPACE
, rs
);
1410 void space_list_add2(LIST_ENTRY
* list
, LIST_ENTRY
* list_size
, UINT64 address
, UINT64 length
, chunk
* c
, LIST_ENTRY
* rollback
) {
1414 if (IsListEmpty(list
)) {
1415 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
1418 ERR("out of memory\n");
1422 s
->address
= address
;
1424 InsertTailList(list
, &s
->list_entry
);
1427 InsertTailList(list_size
, &s
->list_entry_size
);
1430 add_rollback_space(rollback
, TRUE
, list
, list_size
, address
, length
, c
);
1437 s2
= CONTAINING_RECORD(le
, space
, list_entry
);
1439 // old entry envelops new one completely
1440 if (s2
->address
<= address
&& s2
->address
+ s2
->size
>= address
+ length
)
1443 // new entry envelops old one completely
1444 if (address
<= s2
->address
&& address
+ length
>= s2
->address
+ s2
->size
) {
1445 if (address
< s2
->address
) {
1447 add_rollback_space(rollback
, TRUE
, list
, list_size
, address
, s2
->address
- address
, c
);
1449 s2
->size
+= s2
->address
- address
;
1450 s2
->address
= address
;
1452 while (s2
->list_entry
.Blink
!= list
) {
1453 space
* s3
= CONTAINING_RECORD(s2
->list_entry
.Blink
, space
, list_entry
);
1455 if (s3
->address
+ s3
->size
== s2
->address
) {
1456 s2
->address
= s3
->address
;
1457 s2
->size
+= s3
->size
;
1459 RemoveEntryList(&s3
->list_entry
);
1462 RemoveEntryList(&s3
->list_entry_size
);
1470 if (length
> s2
->size
) {
1472 add_rollback_space(rollback
, TRUE
, list
, list_size
, s2
->address
+ s2
->size
, address
+ length
- s2
->address
- s2
->size
, c
);
1476 while (s2
->list_entry
.Flink
!= list
) {
1477 space
* s3
= CONTAINING_RECORD(s2
->list_entry
.Flink
, space
, list_entry
);
1479 if (s3
->address
<= s2
->address
+ s2
->size
) {
1480 s2
->size
= max(s2
->size
, s3
->address
+ s3
->size
- s2
->address
);
1482 RemoveEntryList(&s3
->list_entry
);
1485 RemoveEntryList(&s3
->list_entry_size
);
1494 RemoveEntryList(&s2
->list_entry_size
);
1495 order_space_entry(s2
, list_size
);
1501 // new entry overlaps start of old one
1502 if (address
< s2
->address
&& address
+ length
>= s2
->address
) {
1504 add_rollback_space(rollback
, TRUE
, list
, list_size
, address
, s2
->address
- address
, c
);
1506 s2
->size
+= s2
->address
- address
;
1507 s2
->address
= address
;
1509 while (s2
->list_entry
.Blink
!= list
) {
1510 space
* s3
= CONTAINING_RECORD(s2
->list_entry
.Blink
, space
, list_entry
);
1512 if (s3
->address
+ s3
->size
== s2
->address
) {
1513 s2
->address
= s3
->address
;
1514 s2
->size
+= s3
->size
;
1516 RemoveEntryList(&s3
->list_entry
);
1519 RemoveEntryList(&s3
->list_entry_size
);
1527 RemoveEntryList(&s2
->list_entry_size
);
1528 order_space_entry(s2
, list_size
);
1534 // new entry overlaps end of old one
1535 if (address
<= s2
->address
+ s2
->size
&& address
+ length
> s2
->address
+ s2
->size
) {
1537 add_rollback_space(rollback
, TRUE
, list
, list_size
, address
, s2
->address
+ s2
->size
- address
, c
);
1539 s2
->size
= address
+ length
- s2
->address
;
1541 while (s2
->list_entry
.Flink
!= list
) {
1542 space
* s3
= CONTAINING_RECORD(s2
->list_entry
.Flink
, space
, list_entry
);
1544 if (s3
->address
<= s2
->address
+ s2
->size
) {
1545 s2
->size
= max(s2
->size
, s3
->address
+ s3
->size
- s2
->address
);
1547 RemoveEntryList(&s3
->list_entry
);
1550 RemoveEntryList(&s3
->list_entry_size
);
1558 RemoveEntryList(&s2
->list_entry_size
);
1559 order_space_entry(s2
, list_size
);
1565 // add completely separate entry
1566 if (s2
->address
> address
+ length
) {
1567 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
1570 ERR("out of memory\n");
1575 add_rollback_space(rollback
, TRUE
, list
, list_size
, address
, length
, c
);
1577 s
->address
= address
;
1579 InsertHeadList(s2
->list_entry
.Blink
, &s
->list_entry
);
1582 order_space_entry(s
, list_size
);
1588 } while (le
!= list
);
1590 // check if contiguous with last entry
1591 if (s2
->address
+ s2
->size
== address
) {
1595 RemoveEntryList(&s2
->list_entry_size
);
1596 order_space_entry(s2
, list_size
);
1602 // otherwise, insert at end
1603 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
1606 ERR("out of memory\n");
1610 s
->address
= address
;
1612 InsertTailList(list
, &s
->list_entry
);
1615 order_space_entry(s
, list_size
);
1618 add_rollback_space(rollback
, TRUE
, list
, list_size
, address
, length
, c
);
1621 static void space_list_merge(LIST_ENTRY
* spacelist
, LIST_ENTRY
* spacelist_size
, LIST_ENTRY
* deleting
) {
1624 if (!IsListEmpty(deleting
)) {
1625 le
= deleting
->Flink
;
1626 while (le
!= deleting
) {
1627 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
1629 space_list_add2(spacelist
, spacelist_size
, s
->address
, s
->size
, NULL
, NULL
);
1636 static NTSTATUS
update_chunk_cache(device_extension
* Vcb
, chunk
* c
, BTRFS_TIME
* now
, LIST_ENTRY
* batchlist
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1640 FREE_SPACE_ITEM
* fsi
;
1642 UINT64 num_entries
, *cachegen
, off
;
1643 UINT32
*checksums
, num_sectors
, i
;
1646 space_list_merge(&c
->space
, &c
->space_size
, &c
->deleting
);
1648 data
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)c
->cache
->inode_item
.st_size
, ALLOC_TAG
);
1650 ERR("out of memory\n");
1651 return STATUS_INSUFFICIENT_RESOURCES
;
1654 RtlZeroMemory(data
, (ULONG
)c
->cache
->inode_item
.st_size
);
1657 num_sectors
= (UINT32
)(c
->cache
->inode_item
.st_size
/ Vcb
->superblock
.sector_size
);
1658 off
= (sizeof(UINT32
) * num_sectors
) + sizeof(UINT64
);
1660 le
= c
->space
.Flink
;
1661 while (le
!= &c
->space
) {
1662 FREE_SPACE_ENTRY
* fse
;
1664 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
1666 if ((off
+ sizeof(FREE_SPACE_ENTRY
)) / Vcb
->superblock
.sector_size
!= off
/ Vcb
->superblock
.sector_size
)
1667 off
= sector_align(off
, Vcb
->superblock
.sector_size
);
1669 fse
= (FREE_SPACE_ENTRY
*)((UINT8
*)data
+ off
);
1671 fse
->offset
= s
->address
;
1672 fse
->size
= s
->size
;
1673 fse
->type
= FREE_SPACE_EXTENT
;
1676 off
+= sizeof(FREE_SPACE_ENTRY
);
1681 // update INODE_ITEM
1683 c
->cache
->inode_item
.generation
= Vcb
->superblock
.generation
;
1684 c
->cache
->inode_item
.transid
= Vcb
->superblock
.generation
;
1685 c
->cache
->inode_item
.sequence
++;
1686 c
->cache
->inode_item
.st_ctime
= *now
;
1688 Status
= flush_fcb(c
->cache
, TRUE
, batchlist
, Irp
);
1689 if (!NT_SUCCESS(Status
)) {
1690 ERR("flush_fcb returned %08x\n", Status
);
1694 // update free_space item
1696 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
1697 searchkey
.obj_type
= 0;
1698 searchkey
.offset
= c
->offset
;
1700 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, Irp
);
1701 if (!NT_SUCCESS(Status
)) {
1702 ERR("error - find_item returned %08x\n", Status
);
1706 if (keycmp(searchkey
, tp
.item
->key
)) {
1707 ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
1708 Status
= STATUS_INTERNAL_ERROR
;
1712 if (tp
.item
->size
< sizeof(FREE_SPACE_ITEM
)) {
1713 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(FREE_SPACE_ITEM
));
1714 Status
= STATUS_INTERNAL_ERROR
;
1718 fsi
= (FREE_SPACE_ITEM
*)tp
.item
->data
;
1720 fsi
->generation
= Vcb
->superblock
.generation
;
1721 fsi
->num_entries
= num_entries
;
1722 fsi
->num_bitmaps
= 0;
1724 // set cache generation
1726 cachegen
= (UINT64
*)((UINT8
*)data
+ (sizeof(UINT32
) * num_sectors
));
1727 *cachegen
= Vcb
->superblock
.generation
;
1729 // calculate cache checksums
1731 checksums
= (UINT32
*)data
;
1733 // FIXME - if we know sector is fully zeroed, use cached checksum
1735 for (i
= 0; i
< num_sectors
; i
++) {
1736 if (i
* Vcb
->superblock
.sector_size
> sizeof(UINT32
) * num_sectors
)
1737 checksums
[i
] = ~calc_crc32c(0xffffffff, (UINT8
*)data
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1738 else if ((i
+ 1) * Vcb
->superblock
.sector_size
< sizeof(UINT32
) * num_sectors
)
1739 checksums
[i
] = 0; // FIXME - test this
1741 checksums
[i
] = ~calc_crc32c(0xffffffff, (UINT8
*)data
+ (sizeof(UINT32
) * num_sectors
), ((i
+ 1) * Vcb
->superblock
.sector_size
) - (sizeof(UINT32
) * num_sectors
));
1746 Status
= do_write_file(c
->cache
, 0, c
->cache
->inode_item
.st_size
, data
, NULL
, FALSE
, 0, rollback
);
1747 if (!NT_SUCCESS(Status
)) {
1748 ERR("do_write_file returned %08x\n", Status
);
1750 // Writing the cache isn't critical, so we don't return an error if writing fails. This means
1751 // we can still flush on a degraded mount if metadata is RAID1 but data is RAID0.
1754 Status
= STATUS_SUCCESS
;
1762 static NTSTATUS
update_chunk_cache_tree(device_extension
* Vcb
, chunk
* c
, LIST_ENTRY
* batchlist
) {
1765 FREE_SPACE_INFO
* fsi
;
1767 fsi
= ExAllocatePoolWithTag(PagedPool
, sizeof(FREE_SPACE_INFO
), ALLOC_TAG
);
1769 ERR("out of memory\n");
1770 return STATUS_INSUFFICIENT_RESOURCES
;
1773 space_list_merge(&c
->space
, &c
->space_size
, &c
->deleting
);
1778 le
= c
->space
.Flink
;
1779 while (le
!= &c
->space
) {
1780 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
1784 Status
= insert_tree_item_batch(batchlist
, Vcb
, Vcb
->space_root
, s
->address
, TYPE_FREE_SPACE_EXTENT
, s
->size
,
1785 NULL
, 0, Batch_Insert
);
1786 if (!NT_SUCCESS(Status
)) {
1787 ERR("insert_tree_item_batch returned %08x\n", Status
);
1795 Status
= insert_tree_item_batch(batchlist
, Vcb
, Vcb
->space_root
, c
->offset
, TYPE_FREE_SPACE_INFO
, c
->chunk_item
->size
,
1796 NULL
, 0, Batch_DeleteFreeSpace
);
1797 if (!NT_SUCCESS(Status
)) {
1798 ERR("insert_tree_item_batch returned %08x\n", Status
);
1803 Status
= insert_tree_item_batch(batchlist
, Vcb
, Vcb
->space_root
, c
->offset
, TYPE_FREE_SPACE_INFO
, c
->chunk_item
->size
,
1804 fsi
, sizeof(FREE_SPACE_INFO
), Batch_Insert
);
1805 if (!NT_SUCCESS(Status
)) {
1806 ERR("insert_tree_item_batch returned %08x\n", Status
);
1811 return STATUS_SUCCESS
;
1814 NTSTATUS
update_chunk_caches(device_extension
* Vcb
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1815 LIST_ENTRY
*le
, batchlist
;
1821 KeQuerySystemTime(&time
);
1822 win_time_to_unix(time
, &now
);
1824 InitializeListHead(&batchlist
);
1826 le
= Vcb
->chunks
.Flink
;
1827 while (le
!= &Vcb
->chunks
) {
1828 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
1830 if (c
->space_changed
) {
1831 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
1832 Status
= update_chunk_cache(Vcb
, c
, &now
, &batchlist
, Irp
, rollback
);
1833 ExReleaseResourceLite(&c
->lock
);
1835 if (!NT_SUCCESS(Status
)) {
1836 ERR("update_chunk_cache(%llx) returned %08x\n", c
->offset
, Status
);
1837 clear_batch_list(Vcb
, &batchlist
);
1845 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
1846 if (!NT_SUCCESS(Status
)) {
1847 ERR("commit_batch_list returned %08x\n", Status
);
1851 le
= Vcb
->chunks
.Flink
;
1852 while (le
!= &Vcb
->chunks
) {
1853 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
1855 if (c
->changed
&& (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)) {
1856 ExAcquireResourceExclusiveLite(&c
->partial_stripes_lock
, TRUE
);
1858 while (!IsListEmpty(&c
->partial_stripes
)) {
1859 partial_stripe
* ps
= CONTAINING_RECORD(RemoveHeadList(&c
->partial_stripes
), partial_stripe
, list_entry
);
1861 Status
= flush_partial_stripe(Vcb
, c
, ps
);
1864 ExFreePool(ps
->bmparr
);
1868 if (!NT_SUCCESS(Status
)) {
1869 ERR("flush_partial_stripe returned %08x\n", Status
);
1870 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1875 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1881 return STATUS_SUCCESS
;
1884 NTSTATUS
update_chunk_caches_tree(device_extension
* Vcb
, PIRP Irp
) {
1885 LIST_ENTRY
*le
, batchlist
;
1889 Vcb
->superblock
.compat_ro_flags
|= BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID
;
1891 InitializeListHead(&batchlist
);
1893 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, TRUE
);
1895 le
= Vcb
->chunks
.Flink
;
1896 while (le
!= &Vcb
->chunks
) {
1897 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
1899 if (c
->space_changed
) {
1900 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
1901 Status
= update_chunk_cache_tree(Vcb
, c
, &batchlist
);
1902 ExReleaseResourceLite(&c
->lock
);
1904 if (!NT_SUCCESS(Status
)) {
1905 ERR("update_chunk_cache_tree(%llx) returned %08x\n", c
->offset
, Status
);
1906 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1907 clear_batch_list(Vcb
, &batchlist
);
1915 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1917 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
1918 if (!NT_SUCCESS(Status
)) {
1919 ERR("commit_batch_list returned %08x\n", Status
);
1923 return STATUS_SUCCESS
;
1926 void space_list_add(chunk
* c
, UINT64 address
, UINT64 length
, LIST_ENTRY
* rollback
) {
1927 TRACE("(%p, %llx, %llx, %p)\n", c
, address
, length
, rollback
);
1930 c
->space_changed
= TRUE
;
1932 space_list_add2(&c
->deleting
, NULL
, address
, length
, c
, rollback
);
1935 void space_list_subtract2(LIST_ENTRY
* list
, LIST_ENTRY
* list_size
, UINT64 address
, UINT64 length
, chunk
* c
, LIST_ENTRY
* rollback
) {
1936 LIST_ENTRY
*le
, *le2
;
1939 if (IsListEmpty(list
))
1943 while (le
!= list
) {
1944 s2
= CONTAINING_RECORD(le
, space
, list_entry
);
1947 if (s2
->address
>= address
+ length
)
1950 if (s2
->address
>= address
&& s2
->address
+ s2
->size
<= address
+ length
) { // remove entry entirely
1952 add_rollback_space(rollback
, FALSE
, list
, list_size
, s2
->address
, s2
->size
, c
);
1954 RemoveEntryList(&s2
->list_entry
);
1957 RemoveEntryList(&s2
->list_entry_size
);
1960 } else if (address
+ length
> s2
->address
&& address
+ length
< s2
->address
+ s2
->size
) {
1961 if (address
> s2
->address
) { // cut out hole
1963 add_rollback_space(rollback
, FALSE
, list
, list_size
, address
, length
, c
);
1965 s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
1968 ERR("out of memory\n");
1972 s
->address
= s2
->address
;
1973 s
->size
= address
- s2
->address
;
1974 InsertHeadList(s2
->list_entry
.Blink
, &s
->list_entry
);
1976 s2
->size
= s2
->address
+ s2
->size
- address
- length
;
1977 s2
->address
= address
+ length
;
1980 RemoveEntryList(&s2
->list_entry_size
);
1981 order_space_entry(s2
, list_size
);
1982 order_space_entry(s
, list_size
);
1986 } else { // remove start of entry
1988 add_rollback_space(rollback
, FALSE
, list
, list_size
, s2
->address
, address
+ length
- s2
->address
, c
);
1990 s2
->size
-= address
+ length
- s2
->address
;
1991 s2
->address
= address
+ length
;
1994 RemoveEntryList(&s2
->list_entry_size
);
1995 order_space_entry(s2
, list_size
);
1998 } else if (address
> s2
->address
&& address
< s2
->address
+ s2
->size
) { // remove end of entry
2000 add_rollback_space(rollback
, FALSE
, list
, list_size
, address
, s2
->address
+ s2
->size
- address
, c
);
2002 s2
->size
= address
- s2
->address
;
2005 RemoveEntryList(&s2
->list_entry_size
);
2006 order_space_entry(s2
, list_size
);
2014 void space_list_subtract(chunk
* c
, BOOL deleting
, UINT64 address
, UINT64 length
, LIST_ENTRY
* rollback
) {
2017 list
= deleting
? &c
->deleting
: &c
->space
;
2020 c
->space_changed
= TRUE
;
2022 space_list_subtract2(list
, deleting
? NULL
: &c
->space_size
, address
, length
, c
, rollback
);