1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
30 LIST_ENTRY list_entry
;
42 metadata_reloc
* parent
;
44 LIST_ENTRY list_entry
;
54 LIST_ENTRY list_entry
;
66 metadata_reloc
* parent
;
67 LIST_ENTRY list_entry
;
71 #ifndef _MSC_VER // not in mingw yet
72 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
76 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time
78 static NTSTATUS
add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, traverse_ptr
* tp
,
79 BOOL skinny
, metadata_reloc
** mr2
, chunk
* c
, LIST_ENTRY
* rollback
) {
87 mr
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc
), ALLOC_TAG
);
89 ERR("out of memory\n");
90 return STATUS_INSUFFICIENT_RESOURCES
;
93 mr
->address
= tp
->item
->key
.obj_id
;
95 mr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
97 InitializeListHead(&mr
->refs
);
99 Status
= delete_tree_item(Vcb
, tp
);
100 if (!NT_SUCCESS(Status
)) {
101 ERR("delete_tree_item returned %08x\n", Status
);
107 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
110 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
112 c
->used
-= Vcb
->superblock
.node_size
;
114 space_list_add(c
, tp
->item
->key
.obj_id
, Vcb
->superblock
.node_size
, rollback
);
116 ExReleaseResourceLite(&c
->lock
);
119 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
122 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
123 ptr
= (UINT8
*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
125 len
-= sizeof(EXTENT_ITEM2
);
126 ptr
+= sizeof(EXTENT_ITEM2
);
130 UINT8 secttype
= *ptr
;
131 UINT16 sectlen
= secttype
== TYPE_TREE_BLOCK_REF
? sizeof(TREE_BLOCK_REF
) : (secttype
== TYPE_SHARED_BLOCK_REF
? sizeof(SHARED_BLOCK_REF
) : 0);
132 metadata_reloc_ref
* ref
;
137 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
138 return STATUS_INTERNAL_ERROR
;
142 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
143 return STATUS_INTERNAL_ERROR
;
146 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
148 ERR("out of memory\n");
149 return STATUS_INSUFFICIENT_RESOURCES
;
152 if (secttype
== TYPE_TREE_BLOCK_REF
) {
153 ref
->type
= TYPE_TREE_BLOCK_REF
;
154 RtlCopyMemory(&ref
->tbr
, ptr
+ sizeof(UINT8
), sizeof(TREE_BLOCK_REF
));
156 } else if (secttype
== TYPE_SHARED_BLOCK_REF
) {
157 ref
->type
= TYPE_SHARED_BLOCK_REF
;
158 RtlCopyMemory(&ref
->sbr
, ptr
+ sizeof(UINT8
), sizeof(SHARED_BLOCK_REF
));
161 ERR("unexpected tree type %x\n", secttype
);
163 return STATUS_INTERNAL_ERROR
;
168 InsertTailList(&mr
->refs
, &ref
->list_entry
);
171 ptr
+= sizeof(UINT8
) + sectlen
;
174 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
175 traverse_ptr tp2
= *tp
, next_tp
;
177 while (find_next_item(Vcb
, &tp2
, &next_tp
, FALSE
, NULL
)) {
180 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
181 if (tp2
.item
->key
.obj_type
== TYPE_TREE_BLOCK_REF
) {
182 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
184 ERR("out of memory\n");
185 return STATUS_INSUFFICIENT_RESOURCES
;
188 ref
->type
= TYPE_TREE_BLOCK_REF
;
189 ref
->tbr
.offset
= tp2
.item
->key
.offset
;
192 InsertTailList(&mr
->refs
, &ref
->list_entry
);
194 Status
= delete_tree_item(Vcb
, &tp2
);
195 if (!NT_SUCCESS(Status
)) {
196 ERR("delete_tree_item returned %08x\n", Status
);
199 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_BLOCK_REF
) {
200 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
202 ERR("out of memory\n");
203 return STATUS_INSUFFICIENT_RESOURCES
;
206 ref
->type
= TYPE_SHARED_BLOCK_REF
;
207 ref
->sbr
.offset
= tp2
.item
->key
.offset
;
210 InsertTailList(&mr
->refs
, &ref
->list_entry
);
212 Status
= delete_tree_item(Vcb
, &tp2
);
213 if (!NT_SUCCESS(Status
)) {
214 ERR("delete_tree_item returned %08x\n", Status
);
223 InsertTailList(items
, &mr
->list_entry
);
228 return STATUS_SUCCESS
;
231 static NTSTATUS
add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
232 UINT64 address
, metadata_reloc
** mr2
, LIST_ENTRY
* rollback
) {
240 while (le
!= items
) {
241 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
243 if (mr
->address
== address
) {
245 return STATUS_SUCCESS
;
251 searchkey
.obj_id
= address
;
252 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
253 searchkey
.offset
= 0xffffffffffffffff;
255 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, NULL
);
256 if (!NT_SUCCESS(Status
)) {
257 ERR("find_item returned %08x\n", Status
);
261 if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
))
263 else if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
264 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
265 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
267 if (!(ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)) {
268 ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address
);
269 return STATUS_INTERNAL_ERROR
;
272 ERR("could not find valid EXTENT_ITEM for address %llx\n", address
);
273 return STATUS_INTERNAL_ERROR
;
276 Status
= add_metadata_reloc(Vcb
, items
, &tp
, skinny
, mr2
, NULL
, rollback
);
277 if (!NT_SUCCESS(Status
)) {
278 ERR("add_metadata_reloc returned %08x\n", Status
);
282 return STATUS_SUCCESS
;
285 static void sort_metadata_reloc_refs(metadata_reloc
* mr
) {
286 LIST_ENTRY newlist
, *le
;
288 if (mr
->refs
.Flink
== mr
->refs
.Blink
) // 0 or 1 items
293 InitializeListHead(&newlist
);
295 while (!IsListEmpty(&mr
->refs
)) {
296 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
297 BOOL inserted
= FALSE
;
299 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
300 ref
->hash
= ref
->tbr
.offset
;
301 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
302 ref
->hash
= ref
->parent
->new_address
;
305 while (le
!= &newlist
) {
306 metadata_reloc_ref
* ref2
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
308 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
309 InsertHeadList(le
->Blink
, &ref
->list_entry
);
318 InsertTailList(&newlist
, &ref
->list_entry
);
321 newlist
.Flink
->Blink
= &mr
->refs
;
322 newlist
.Blink
->Flink
= &mr
->refs
;
323 mr
->refs
.Flink
= newlist
.Flink
;
324 mr
->refs
.Blink
= newlist
.Blink
;
327 static NTSTATUS
add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, metadata_reloc
* mr
) {
332 BOOL all_inline
= TRUE
;
333 metadata_reloc_ref
* first_noninline
= NULL
;
337 inline_len
= sizeof(EXTENT_ITEM
);
338 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
))
339 inline_len
+= sizeof(EXTENT_ITEM2
);
341 sort_metadata_reloc_refs(mr
);
344 while (le
!= &mr
->refs
) {
345 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
350 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
351 extlen
+= sizeof(TREE_BLOCK_REF
);
352 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
353 extlen
+= sizeof(SHARED_BLOCK_REF
);
356 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
358 first_noninline
= ref
;
360 inline_len
+= extlen
+ 1;
366 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
368 ERR("out of memory\n");
369 return STATUS_INSUFFICIENT_RESOURCES
;
373 ei
->generation
= mr
->ei
->generation
;
374 ei
->flags
= mr
->ei
->flags
;
375 ptr
= (UINT8
*)&ei
[1];
377 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)) {
378 EXTENT_ITEM2
* ei2
= (EXTENT_ITEM2
*)ptr
;
380 ei2
->firstitem
= *(KEY
*)&mr
->data
[1];
381 ei2
->level
= mr
->data
->level
;
383 ptr
+= sizeof(EXTENT_ITEM2
);
387 while (le
!= &mr
->refs
) {
388 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
390 if (ref
== first_noninline
)
396 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
397 TREE_BLOCK_REF
* tbr
= (TREE_BLOCK_REF
*)ptr
;
399 tbr
->offset
= ref
->tbr
.offset
;
401 ptr
+= sizeof(TREE_BLOCK_REF
);
402 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
403 SHARED_BLOCK_REF
* sbr
= (SHARED_BLOCK_REF
*)ptr
;
405 sbr
->offset
= ref
->parent
->new_address
;
407 ptr
+= sizeof(SHARED_BLOCK_REF
);
413 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)
414 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_METADATA_ITEM
, mr
->data
->level
, ei
, inline_len
, NULL
, NULL
);
416 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_EXTENT_ITEM
, Vcb
->superblock
.node_size
, ei
, inline_len
, NULL
, NULL
);
418 if (!NT_SUCCESS(Status
)) {
419 ERR("insert_tree_item returned %08x\n", Status
);
425 le
= &first_noninline
->list_entry
;
427 while (le
!= &mr
->refs
) {
428 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
430 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
431 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_TREE_BLOCK_REF
, ref
->tbr
.offset
, NULL
, 0, NULL
, NULL
);
432 if (!NT_SUCCESS(Status
)) {
433 ERR("insert_tree_item returned %08x\n", Status
);
436 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
437 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_SHARED_BLOCK_REF
, ref
->parent
->new_address
, NULL
, 0, NULL
, NULL
);
438 if (!NT_SUCCESS(Status
)) {
439 ERR("insert_tree_item returned %08x\n", Status
);
448 if (ei
->flags
& EXTENT_ITEM_SHARED_BACKREFS
|| mr
->data
->flags
& HEADER_FLAG_SHARED_BACKREF
|| !(mr
->data
->flags
& HEADER_FLAG_MIXED_BACKREF
)) {
449 if (mr
->data
->level
> 0) {
451 internal_node
* in
= (internal_node
*)&mr
->data
[1];
453 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
454 UINT64 sbrrc
= find_extent_shared_tree_refcount(Vcb
, in
[i
].address
, mr
->address
, NULL
);
457 SHARED_BLOCK_REF sbr
;
459 sbr
.offset
= mr
->new_address
;
461 Status
= increase_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0, NULL
);
462 if (!NT_SUCCESS(Status
)) {
463 ERR("increase_extent_refcount returned %08x\n", Status
);
467 sbr
.offset
= mr
->address
;
469 Status
= decrease_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
470 sbr
.offset
, FALSE
, NULL
);
471 if (!NT_SUCCESS(Status
)) {
472 ERR("decrease_extent_refcount returned %08x\n", Status
);
479 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
481 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
482 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
483 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((UINT8
*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
485 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
486 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
488 if (ed2
->size
> 0) { // not sparse
489 UINT32 sdrrc
= find_extent_shared_data_refcount(Vcb
, ed2
->address
, mr
->address
, NULL
);
495 sdr
.offset
= mr
->new_address
;
498 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0, NULL
);
499 if (!NT_SUCCESS(Status
)) {
500 ERR("increase_extent_refcount returned %08x\n", Status
);
504 sdr
.offset
= mr
->address
;
506 Status
= decrease_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
507 sdr
.offset
, FALSE
, NULL
);
508 if (!NT_SUCCESS(Status
)) {
509 ERR("decrease_extent_refcount returned %08x\n", Status
);
513 c
= get_chunk_from_address(Vcb
, ed2
->address
);
516 // check changed_extents
518 ExAcquireResourceExclusiveLite(&c
->changed_extents_lock
, TRUE
);
520 le
= c
->changed_extents
.Flink
;
522 while (le
!= &c
->changed_extents
) {
523 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
525 if (ce
->address
== ed2
->address
) {
528 le2
= ce
->refs
.Flink
;
529 while (le2
!= &ce
->refs
) {
530 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
532 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
533 cer
->sdr
.offset
= mr
->new_address
;
540 le2
= ce
->old_refs
.Flink
;
541 while (le2
!= &ce
->old_refs
) {
542 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
544 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
545 cer
->sdr
.offset
= mr
->new_address
;
558 ExReleaseResourceLite(&c
->changed_extents_lock
);
568 return STATUS_SUCCESS
;
571 static NTSTATUS
write_metadata_items(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
572 LIST_ENTRY
* data_items
, chunk
* c
, LIST_ENTRY
* rollback
) {
573 LIST_ENTRY tree_writes
, *le
;
576 UINT8 level
, max_level
= 0;
577 chunk
* newchunk
= NULL
;
579 InitializeListHead(&tree_writes
);
582 while (le
!= items
) {
583 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
587 mr
->data
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
589 ERR("out of memory\n");
590 return STATUS_INSUFFICIENT_RESOURCES
;
593 Status
= read_data(Vcb
, mr
->address
, Vcb
->superblock
.node_size
, NULL
, TRUE
, (UINT8
*)mr
->data
,
594 c
&& mr
->address
>= c
->offset
&& mr
->address
< c
->offset
+ c
->chunk_item
->size
? c
: NULL
, &pc
, NULL
, 0, FALSE
, NormalPagePriority
);
595 if (!NT_SUCCESS(Status
)) {
596 ERR("read_data returned %08x\n", Status
);
600 if (pc
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
603 if (data_items
&& mr
->data
->level
== 0) {
604 le2
= data_items
->Flink
;
605 while (le2
!= data_items
) {
606 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
607 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
610 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
611 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
612 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((UINT8
*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
614 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
615 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
617 if (ed2
->address
== dr
->address
)
618 ed2
->address
= dr
->new_address
;
627 if (mr
->data
->level
> max_level
)
628 max_level
= mr
->data
->level
;
630 le2
= mr
->refs
.Flink
;
631 while (le2
!= &mr
->refs
) {
632 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
634 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
640 firstitem
= (KEY
*)&mr
->data
[1];
642 le3
= Vcb
->roots
.Flink
;
643 while (le3
!= &Vcb
->roots
) {
644 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
646 if (r2
->id
== ref
->tbr
.offset
) {
655 ERR("could not find subvol with id %llx\n", ref
->tbr
.offset
);
656 return STATUS_INTERNAL_ERROR
;
659 Status
= find_item_to_level(Vcb
, r
, &tp
, firstitem
, FALSE
, mr
->data
->level
+ 1, NULL
);
660 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
661 ERR("find_item_to_level returned %08x\n", Status
);
666 while (t
&& t
->header
.level
< mr
->data
->level
+ 1) {
675 Status
= add_metadata_reloc_parent(Vcb
, items
, t
->header
.address
, &mr2
, rollback
);
676 if (!NT_SUCCESS(Status
)) {
677 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
683 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
686 Status
= add_metadata_reloc_parent(Vcb
, items
, ref
->sbr
.offset
, &mr2
, rollback
);
687 if (!NT_SUCCESS(Status
)) {
688 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
702 while (le
!= items
) {
703 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
709 hash
= calc_crc32c(0xffffffff, (UINT8
*)&mr
->address
, sizeof(UINT64
));
711 le2
= Vcb
->trees_ptrs
[hash
>> 24];
714 while (le2
!= &Vcb
->trees_hash
) {
715 tree
* t
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
717 if (t
->header
.address
== mr
->address
) {
720 } else if (t
->hash
> hash
)
730 for (level
= 0; level
<= max_level
; level
++) {
732 while (le
!= items
) {
733 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
735 if (mr
->data
->level
== level
) {
743 flags
= Vcb
->system_flags
;
744 else if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
)
745 flags
= Vcb
->data_flags
;
747 flags
= Vcb
->metadata_flags
;
750 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
752 if (newchunk
->chunk_item
->type
== flags
&& find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
753 newchunk
->used
+= Vcb
->superblock
.node_size
;
754 space_list_subtract(newchunk
, FALSE
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
758 ExReleaseResourceLite(&newchunk
->lock
);
762 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
764 le2
= Vcb
->chunks
.Flink
;
765 while (le2
!= &Vcb
->chunks
) {
766 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
768 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== flags
) {
769 ExAcquireResourceExclusiveLite(&c2
->lock
, TRUE
);
771 if ((c2
->chunk_item
->size
- c2
->used
) >= Vcb
->superblock
.node_size
) {
772 if (find_metadata_address_in_chunk(Vcb
, c2
, &mr
->new_address
)) {
773 c2
->used
+= Vcb
->superblock
.node_size
;
774 space_list_subtract(c2
, FALSE
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
775 ExReleaseResourceLite(&c2
->lock
);
782 ExReleaseResourceLite(&c2
->lock
);
788 // allocate new chunk if necessary
790 Status
= alloc_chunk(Vcb
, flags
, &newchunk
, FALSE
);
792 if (!NT_SUCCESS(Status
)) {
793 ERR("alloc_chunk returned %08x\n", Status
);
794 ExReleaseResourceLite(&Vcb
->chunk_lock
);
798 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
800 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
802 if (!find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
803 ExReleaseResourceLite(&newchunk
->lock
);
804 ExReleaseResourceLite(&Vcb
->chunk_lock
);
805 ERR("could not find address in new chunk\n");
806 Status
= STATUS_DISK_FULL
;
809 newchunk
->used
+= Vcb
->superblock
.node_size
;
810 space_list_subtract(newchunk
, FALSE
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
813 ExReleaseResourceLite(&newchunk
->lock
);
816 ExReleaseResourceLite(&Vcb
->chunk_lock
);
820 le2
= mr
->refs
.Flink
;
821 while (le2
!= &mr
->refs
) {
822 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
826 internal_node
* in
= (internal_node
*)&ref
->parent
->data
[1];
828 for (i
= 0; i
< ref
->parent
->data
->num_items
; i
++) {
829 if (in
[i
].address
== mr
->address
) {
830 in
[i
].address
= mr
->new_address
;
835 if (ref
->parent
->t
) {
838 le3
= ref
->parent
->t
->itemlist
.Flink
;
839 while (le3
!= &ref
->parent
->t
->itemlist
) {
840 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
842 if (!td
->inserted
&& td
->treeholder
.address
== mr
->address
)
843 td
->treeholder
.address
= mr
->new_address
;
848 } else if (ref
->top
&& ref
->type
== TYPE_TREE_BLOCK_REF
) {
854 le3
= Vcb
->roots
.Flink
;
855 while (le3
!= &Vcb
->roots
) {
856 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
858 if (r2
->id
== ref
->tbr
.offset
) {
867 r
->treeholder
.address
= mr
->new_address
;
869 if (r
== Vcb
->root_root
)
870 Vcb
->superblock
.root_tree_addr
= mr
->new_address
;
871 else if (r
== Vcb
->chunk_root
)
872 Vcb
->superblock
.chunk_tree_addr
= mr
->new_address
;
873 else if (r
->root_item
.block_number
== mr
->address
) {
877 r
->root_item
.block_number
= mr
->new_address
;
879 searchkey
.obj_id
= r
->id
;
880 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
881 searchkey
.offset
= 0xffffffffffffffff;
883 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
884 if (!NT_SUCCESS(Status
)) {
885 ERR("find_item returned %08x\n", Status
);
889 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
890 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey
.obj_id
);
891 Status
= STATUS_INTERNAL_ERROR
;
895 ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
897 ERR("out of memory\n");
898 Status
= STATUS_INSUFFICIENT_RESOURCES
;
902 RtlCopyMemory(ri
, &r
->root_item
, sizeof(ROOT_ITEM
));
904 Status
= delete_tree_item(Vcb
, &tp
);
905 if (!NT_SUCCESS(Status
)) {
906 ERR("delete_tree_item returned %08x\n", Status
);
910 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, NULL
);
911 if (!NT_SUCCESS(Status
)) {
912 ERR("insert_tree_item returned %08x\n", Status
);
922 mr
->data
->address
= mr
->new_address
;
931 // check if tree loaded more than once
932 if (t3
->list_entry
.Flink
!= &Vcb
->trees_hash
) {
933 tree
* nt
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
935 if (nt
->header
.address
== t3
->header
.address
)
939 t3
->header
.address
= mr
->new_address
;
943 if (Vcb
->trees_ptrs
[h
] == &t3
->list_entry_hash
) {
944 if (t3
->list_entry_hash
.Flink
== &Vcb
->trees_hash
)
945 Vcb
->trees_ptrs
[h
] = NULL
;
947 tree
* t2
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
949 if (t2
->hash
>> 24 == h
)
950 Vcb
->trees_ptrs
[h
] = &t2
->list_entry_hash
;
952 Vcb
->trees_ptrs
[h
] = NULL
;
956 RemoveEntryList(&t3
->list_entry_hash
);
958 t3
->hash
= calc_crc32c(0xffffffff, (UINT8
*)&t3
->header
.address
, sizeof(UINT64
));
961 if (!Vcb
->trees_ptrs
[h
]) {
964 le2
= Vcb
->trees_hash
.Flink
;
969 if (Vcb
->trees_ptrs
[h2
]) {
970 le2
= Vcb
->trees_ptrs
[h2
];
978 le2
= Vcb
->trees_ptrs
[h
];
981 while (le2
!= &Vcb
->trees_hash
) {
982 tree
* t2
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
984 if (t2
->hash
>= t3
->hash
) {
985 InsertHeadList(le2
->Blink
, &t3
->list_entry_hash
);
994 InsertTailList(&Vcb
->trees_hash
, &t3
->list_entry_hash
);
996 if (!Vcb
->trees_ptrs
[h
] || t3
->list_entry_hash
.Flink
== Vcb
->trees_ptrs
[h
])
997 Vcb
->trees_ptrs
[h
] = &t3
->list_entry_hash
;
999 if (data_items
&& level
== 0) {
1000 le2
= data_items
->Flink
;
1002 while (le2
!= data_items
) {
1003 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
1004 LIST_ENTRY
* le3
= t3
->itemlist
.Flink
;
1006 while (le3
!= &t3
->itemlist
) {
1007 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
1009 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
1010 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
1012 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
1013 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1015 if (ed2
->address
== dr
->address
)
1016 ed2
->address
= dr
->new_address
;
1030 *((UINT32
*)mr
->data
) = ~calc_crc32c(0xffffffff, (UINT8
*)&mr
->data
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(mr
->data
->csum
));
1032 tw
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree_write
), ALLOC_TAG
);
1034 ERR("out of memory\n");
1035 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1039 tw
->address
= mr
->new_address
;
1040 tw
->length
= Vcb
->superblock
.node_size
;
1041 tw
->data
= (UINT8
*)mr
->data
;
1043 if (IsListEmpty(&tree_writes
))
1044 InsertTailList(&tree_writes
, &tw
->list_entry
);
1046 BOOL inserted
= FALSE
;
1048 le2
= tree_writes
.Flink
;
1049 while (le2
!= &tree_writes
) {
1050 tree_write
* tw2
= CONTAINING_RECORD(le2
, tree_write
, list_entry
);
1052 if (tw2
->address
> tw
->address
) {
1053 InsertHeadList(le2
->Blink
, &tw
->list_entry
);
1062 InsertTailList(&tree_writes
, &tw
->list_entry
);
1070 Status
= do_tree_writes(Vcb
, &tree_writes
, TRUE
);
1071 if (!NT_SUCCESS(Status
)) {
1072 ERR("do_tree_writes returned %08x\n", Status
);
1077 while (le
!= items
) {
1078 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
1080 Status
= add_metadata_reloc_extent_item(Vcb
, mr
);
1081 if (!NT_SUCCESS(Status
)) {
1082 ERR("add_metadata_reloc_extent_item returned %08x\n", Status
);
1089 Status
= STATUS_SUCCESS
;
1092 while (!IsListEmpty(&tree_writes
)) {
1093 tree_write
* tw
= CONTAINING_RECORD(RemoveHeadList(&tree_writes
), tree_write
, list_entry
);
1100 static NTSTATUS
balance_metadata_chunk(device_extension
* Vcb
, chunk
* c
, BOOL
* changed
) {
1105 LIST_ENTRY items
, rollback
;
1108 TRACE("chunk %llx\n", c
->offset
);
1110 InitializeListHead(&rollback
);
1111 InitializeListHead(&items
);
1113 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
1115 searchkey
.obj_id
= c
->offset
;
1116 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
1117 searchkey
.offset
= 0xffffffffffffffff;
1119 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, NULL
);
1120 if (!NT_SUCCESS(Status
)) {
1121 ERR("find_item returned %08x\n", Status
);
1126 traverse_ptr next_tp
;
1128 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1131 if (tp
.item
->key
.obj_id
>= c
->offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
1132 BOOL tree
= FALSE
, skinny
= FALSE
;
1134 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1137 } else if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
1138 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1139 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1141 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1146 Status
= add_metadata_reloc(Vcb
, &items
, &tp
, skinny
, NULL
, c
, &rollback
);
1148 if (!NT_SUCCESS(Status
)) {
1149 ERR("add_metadata_reloc returned %08x\n", Status
);
1155 if (loaded
>= 64) // only do 64 at a time
1160 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, NULL
);
1166 if (IsListEmpty(&items
)) {
1168 Status
= STATUS_SUCCESS
;
1173 Status
= write_metadata_items(Vcb
, &items
, NULL
, c
, &rollback
);
1174 if (!NT_SUCCESS(Status
)) {
1175 ERR("write_metadata_items returned %08x\n", Status
);
1179 Status
= STATUS_SUCCESS
;
1181 Vcb
->need_write
= TRUE
;
1184 if (NT_SUCCESS(Status
)) {
1185 Status
= do_write(Vcb
, NULL
);
1186 if (!NT_SUCCESS(Status
))
1187 ERR("do_write returned %08x\n", Status
);
1190 if (NT_SUCCESS(Status
))
1191 clear_rollback(&rollback
);
1193 do_rollback(Vcb
, &rollback
);
1197 ExReleaseResourceLite(&Vcb
->tree_lock
);
1199 while (!IsListEmpty(&items
)) {
1200 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&items
), metadata_reloc
, list_entry
);
1202 while (!IsListEmpty(&mr
->refs
)) {
1203 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
1214 static NTSTATUS
data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* metadata_items
,
1215 data_reloc
* dr
, EXTENT_DATA_REF
* edr
, LIST_ENTRY
* rollback
) {
1222 UINT64 last_tree
= 0;
1223 data_reloc_ref
* ref
;
1225 le
= Vcb
->roots
.Flink
;
1226 while (le
!= &Vcb
->roots
) {
1227 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
1229 if (r2
->id
== edr
->root
) {
1238 ERR("could not find subvol %llx\n", edr
->count
);
1239 return STATUS_INTERNAL_ERROR
;
1242 searchkey
.obj_id
= edr
->objid
;
1243 searchkey
.obj_type
= TYPE_EXTENT_DATA
;
1244 searchkey
.offset
= 0;
1246 Status
= find_item(Vcb
, r
, &tp
, &searchkey
, FALSE
, NULL
);
1247 if (!NT_SUCCESS(Status
)) {
1248 ERR("find_item returned %08x\n", Status
);
1252 if (tp
.item
->key
.obj_id
< searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
< searchkey
.obj_type
)) {
1255 if (find_next_item(Vcb
, &tp
, &tp2
, FALSE
, NULL
))
1258 ERR("could not find EXTENT_DATA for inode %llx in root %llx\n", searchkey
.obj_id
, r
->id
);
1259 return STATUS_INTERNAL_ERROR
;
1265 while (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
1268 if (tp
.item
->size
>= sizeof(EXTENT_DATA
)) {
1269 EXTENT_DATA
* ed
= (EXTENT_DATA
*)tp
.item
->data
;
1271 if ((ed
->type
== EXTENT_TYPE_PREALLOC
|| ed
->type
== EXTENT_TYPE_REGULAR
) && tp
.item
->size
>= offsetof(EXTENT_DATA
, data
[0]) + sizeof(EXTENT_DATA2
)) {
1272 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1274 if (ed2
->address
== dr
->address
&& ed2
->size
== dr
->size
&& tp
.item
->key
.offset
- ed2
->offset
== edr
->offset
) {
1275 if (ref
&& last_tree
== tp
.tree
->header
.address
)
1278 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1280 ERR("out of memory\n");
1281 return STATUS_INSUFFICIENT_RESOURCES
;
1284 ref
->type
= TYPE_EXTENT_DATA_REF
;
1285 RtlCopyMemory(&ref
->edr
, edr
, sizeof(EXTENT_DATA_REF
));
1288 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, tp
.tree
->header
.address
, &mr
, rollback
);
1289 if (!NT_SUCCESS(Status
)) {
1290 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1295 last_tree
= tp
.tree
->header
.address
;
1298 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1304 if (find_next_item(Vcb
, &tp
, &tp2
, FALSE
, NULL
))
1310 return STATUS_SUCCESS
;
1313 static NTSTATUS
add_data_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, LIST_ENTRY
* metadata_items
,
1314 traverse_ptr
* tp
, chunk
* c
, LIST_ENTRY
* rollback
) {
1322 dr
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc
), ALLOC_TAG
);
1324 ERR("out of memory\n");
1325 return STATUS_INSUFFICIENT_RESOURCES
;
1328 dr
->address
= tp
->item
->key
.obj_id
;
1329 dr
->size
= tp
->item
->key
.offset
;
1330 dr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1331 InitializeListHead(&dr
->refs
);
1333 Status
= delete_tree_item(Vcb
, tp
);
1334 if (!NT_SUCCESS(Status
)) {
1335 ERR("delete_tree_item returned %08x\n", Status
);
1340 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
1343 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
1345 c
->used
-= tp
->item
->key
.offset
;
1347 space_list_add(c
, tp
->item
->key
.obj_id
, tp
->item
->key
.offset
, rollback
);
1349 ExReleaseResourceLite(&c
->lock
);
1352 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1355 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
1356 ptr
= (UINT8
*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
1359 UINT8 secttype
= *ptr
;
1360 UINT16 sectlen
= secttype
== TYPE_EXTENT_DATA_REF
? sizeof(EXTENT_DATA_REF
) : (secttype
== TYPE_SHARED_DATA_REF
? sizeof(SHARED_DATA_REF
) : 0);
1364 if (sectlen
> len
) {
1365 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
1366 return STATUS_INTERNAL_ERROR
;
1370 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
1371 return STATUS_INTERNAL_ERROR
;
1374 if (secttype
== TYPE_EXTENT_DATA_REF
) {
1375 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)(ptr
+ sizeof(UINT8
));
1377 inline_rc
+= edr
->count
;
1379 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, edr
, rollback
);
1380 if (!NT_SUCCESS(Status
)) {
1381 ERR("data_reloc_add_tree_edr returned %08x\n", Status
);
1384 } else if (secttype
== TYPE_SHARED_DATA_REF
) {
1386 data_reloc_ref
* ref
;
1388 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1390 ERR("out of memory\n");
1391 return STATUS_INSUFFICIENT_RESOURCES
;
1394 ref
->type
= TYPE_SHARED_DATA_REF
;
1395 RtlCopyMemory(&ref
->sdr
, ptr
+ sizeof(UINT8
), sizeof(SHARED_DATA_REF
));
1396 inline_rc
+= ref
->sdr
.count
;
1398 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1399 if (!NT_SUCCESS(Status
)) {
1400 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1407 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1409 ERR("unexpected tree type %x\n", secttype
);
1410 return STATUS_INTERNAL_ERROR
;
1415 ptr
+= sizeof(UINT8
) + sectlen
;
1418 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
1419 traverse_ptr tp2
= *tp
, next_tp
;
1421 while (find_next_item(Vcb
, &tp2
, &next_tp
, FALSE
, NULL
)) {
1424 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
1425 if (tp2
.item
->key
.obj_type
== TYPE_EXTENT_DATA_REF
&& tp2
.item
->size
>= sizeof(EXTENT_DATA_REF
)) {
1426 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, (EXTENT_DATA_REF
*)tp2
.item
->data
, rollback
);
1427 if (!NT_SUCCESS(Status
)) {
1428 ERR("data_reloc_add_tree_edr returned %08x\n", Status
);
1432 Status
= delete_tree_item(Vcb
, &tp2
);
1433 if (!NT_SUCCESS(Status
)) {
1434 ERR("delete_tree_item returned %08x\n", Status
);
1437 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_DATA_REF
&& tp2
.item
->size
>= sizeof(UINT32
)) {
1439 data_reloc_ref
* ref
;
1441 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1443 ERR("out of memory\n");
1444 return STATUS_INSUFFICIENT_RESOURCES
;
1447 ref
->type
= TYPE_SHARED_DATA_REF
;
1448 ref
->sdr
.offset
= tp2
.item
->key
.offset
;
1449 ref
->sdr
.count
= *((UINT32
*)tp2
.item
->data
);
1451 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1452 if (!NT_SUCCESS(Status
)) {
1453 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1459 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1461 Status
= delete_tree_item(Vcb
, &tp2
);
1462 if (!NT_SUCCESS(Status
)) {
1463 ERR("delete_tree_item returned %08x\n", Status
);
1472 InsertTailList(items
, &dr
->list_entry
);
1474 return STATUS_SUCCESS
;
1477 static void sort_data_reloc_refs(data_reloc
* dr
) {
1478 LIST_ENTRY newlist
, *le
;
1480 if (IsListEmpty(&dr
->refs
))
1485 InitializeListHead(&newlist
);
1487 while (!IsListEmpty(&dr
->refs
)) {
1488 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
1489 BOOL inserted
= FALSE
;
1491 if (ref
->type
== TYPE_EXTENT_DATA_REF
)
1492 ref
->hash
= get_extent_data_ref_hash2(ref
->edr
.root
, ref
->edr
.objid
, ref
->edr
.offset
);
1493 else if (ref
->type
== TYPE_SHARED_DATA_REF
)
1494 ref
->hash
= ref
->parent
->new_address
;
1497 while (le
!= &newlist
) {
1498 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1500 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
1501 InsertHeadList(le
->Blink
, &ref
->list_entry
);
1510 InsertTailList(&newlist
, &ref
->list_entry
);
1514 while (le
!= &newlist
) {
1515 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1517 if (le
->Flink
!= &newlist
) {
1518 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
->Flink
, data_reloc_ref
, list_entry
);
1520 if (ref
->type
== TYPE_EXTENT_DATA_REF
&& ref2
->type
== TYPE_EXTENT_DATA_REF
&& ref
->edr
.root
== ref2
->edr
.root
&&
1521 ref
->edr
.objid
== ref2
->edr
.objid
&& ref
->edr
.offset
== ref2
->edr
.offset
) {
1522 RemoveEntryList(&ref2
->list_entry
);
1523 ref
->edr
.count
+= ref2
->edr
.count
;
1532 newlist
.Flink
->Blink
= &dr
->refs
;
1533 newlist
.Blink
->Flink
= &dr
->refs
;
1534 dr
->refs
.Flink
= newlist
.Flink
;
1535 dr
->refs
.Blink
= newlist
.Blink
;
1538 static NTSTATUS
add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, data_reloc
* dr
) {
1543 BOOL all_inline
= TRUE
;
1544 data_reloc_ref
* first_noninline
= NULL
;
1548 inline_len
= sizeof(EXTENT_ITEM
);
1550 sort_data_reloc_refs(dr
);
1552 le
= dr
->refs
.Flink
;
1553 while (le
!= &dr
->refs
) {
1554 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1557 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1558 extlen
+= sizeof(EXTENT_DATA_REF
);
1559 rc
+= ref
->edr
.count
;
1560 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1561 extlen
+= sizeof(SHARED_DATA_REF
);
1566 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
1568 first_noninline
= ref
;
1570 inline_len
+= extlen
+ 1;
1576 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
1578 ERR("out of memory\n");
1579 return STATUS_INSUFFICIENT_RESOURCES
;
1583 ei
->generation
= dr
->ei
->generation
;
1584 ei
->flags
= dr
->ei
->flags
;
1585 ptr
= (UINT8
*)&ei
[1];
1587 le
= dr
->refs
.Flink
;
1588 while (le
!= &dr
->refs
) {
1589 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1591 if (ref
== first_noninline
)
1597 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1598 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)ptr
;
1600 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1602 ptr
+= sizeof(EXTENT_DATA_REF
);
1603 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1604 SHARED_DATA_REF
* sdr
= (SHARED_DATA_REF
*)ptr
;
1606 sdr
->offset
= ref
->parent
->new_address
;
1607 sdr
->count
= ref
->sdr
.count
;
1609 ptr
+= sizeof(SHARED_DATA_REF
);
1615 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_ITEM
, dr
->size
, ei
, inline_len
, NULL
, NULL
);
1616 if (!NT_SUCCESS(Status
)) {
1617 ERR("insert_tree_item returned %08x\n", Status
);
1622 le
= &first_noninline
->list_entry
;
1624 while (le
!= &dr
->refs
) {
1625 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1627 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1628 EXTENT_DATA_REF
* edr
;
1630 edr
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_DATA_REF
), ALLOC_TAG
);
1632 ERR("out of memory\n");
1633 return STATUS_INSUFFICIENT_RESOURCES
;
1636 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1638 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_DATA_REF
, ref
->hash
, edr
, sizeof(EXTENT_DATA_REF
), NULL
, NULL
);
1639 if (!NT_SUCCESS(Status
)) {
1640 ERR("insert_tree_item returned %08x\n", Status
);
1643 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1646 sdr
= ExAllocatePoolWithTag(PagedPool
, sizeof(UINT32
), ALLOC_TAG
);
1648 ERR("out of memory\n");
1649 return STATUS_INSUFFICIENT_RESOURCES
;
1652 *sdr
= ref
->sdr
.count
;
1654 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_SHARED_DATA_REF
, ref
->parent
->new_address
, sdr
, sizeof(UINT32
), NULL
, NULL
);
1655 if (!NT_SUCCESS(Status
)) {
1656 ERR("insert_tree_item returned %08x\n", Status
);
1665 return STATUS_SUCCESS
;
1668 static NTSTATUS
balance_data_chunk(device_extension
* Vcb
, chunk
* c
, BOOL
* changed
) {
1673 LIST_ENTRY items
, metadata_items
, rollback
, *le
;
1674 UINT64 loaded
= 0, num_loaded
= 0;
1675 chunk
* newchunk
= NULL
;
1678 TRACE("chunk %llx\n", c
->offset
);
1680 InitializeListHead(&rollback
);
1681 InitializeListHead(&items
);
1682 InitializeListHead(&metadata_items
);
1684 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
1686 searchkey
.obj_id
= c
->offset
;
1687 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
1688 searchkey
.offset
= 0xffffffffffffffff;
1690 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, NULL
);
1691 if (!NT_SUCCESS(Status
)) {
1692 ERR("find_item returned %08x\n", Status
);
1697 traverse_ptr next_tp
;
1699 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1702 if (tp
.item
->key
.obj_id
>= c
->offset
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
) {
1705 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1706 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1708 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1713 Status
= add_data_reloc(Vcb
, &items
, &metadata_items
, &tp
, c
, &rollback
);
1715 if (!NT_SUCCESS(Status
)) {
1716 ERR("add_data_reloc returned %08x\n", Status
);
1720 loaded
+= tp
.item
->key
.offset
;
1723 if (loaded
>= 0x1000000 || num_loaded
>= 100) // only do so much at a time, so we don't block too obnoxiously
1728 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, NULL
);
1734 if (IsListEmpty(&items
)) {
1736 Status
= STATUS_SUCCESS
;
1741 data
= ExAllocatePoolWithTag(PagedPool
, BALANCE_UNIT
, ALLOC_TAG
);
1743 ERR("out of memory\n");
1744 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1749 while (le
!= &items
) {
1750 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
1756 ULONG runlength
, index
, lastoff
;
1759 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
1761 if (find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1762 newchunk
->used
+= dr
->size
;
1763 space_list_subtract(newchunk
, FALSE
, dr
->new_address
, dr
->size
, &rollback
);
1767 ExReleaseResourceLite(&newchunk
->lock
);
1771 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
1773 le2
= Vcb
->chunks
.Flink
;
1774 while (le2
!= &Vcb
->chunks
) {
1775 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
1777 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== Vcb
->data_flags
) {
1778 ExAcquireResourceExclusiveLite(&c2
->lock
, TRUE
);
1780 if ((c2
->chunk_item
->size
- c2
->used
) >= dr
->size
) {
1781 if (find_data_address_in_chunk(Vcb
, c2
, dr
->size
, &dr
->new_address
)) {
1782 c2
->used
+= dr
->size
;
1783 space_list_subtract(c2
, FALSE
, dr
->new_address
, dr
->size
, &rollback
);
1784 ExReleaseResourceLite(&c2
->lock
);
1791 ExReleaseResourceLite(&c2
->lock
);
1797 // allocate new chunk if necessary
1799 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &newchunk
, FALSE
);
1801 if (!NT_SUCCESS(Status
)) {
1802 ERR("alloc_chunk returned %08x\n", Status
);
1803 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1807 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
1809 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
1811 if (!find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1812 ExReleaseResourceLite(&newchunk
->lock
);
1813 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1814 ERR("could not find address in new chunk\n");
1815 Status
= STATUS_DISK_FULL
;
1818 newchunk
->used
+= dr
->size
;
1819 space_list_subtract(newchunk
, FALSE
, dr
->new_address
, dr
->size
, &rollback
);
1822 ExReleaseResourceLite(&newchunk
->lock
);
1825 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1828 dr
->newchunk
= newchunk
;
1830 bmparr
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)sector_align((dr
->size
/ Vcb
->superblock
.sector_size
) + 1, sizeof(ULONG
)), ALLOC_TAG
);
1832 ERR("out of memory\n");
1833 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1837 csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(dr
->size
* sizeof(UINT32
) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
1839 ERR("out of memory\n");
1841 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1845 RtlInitializeBitMap(&bmp
, bmparr
, (ULONG
)(dr
->size
/ Vcb
->superblock
.sector_size
));
1846 RtlSetAllBits(&bmp
); // 1 = no csum, 0 = csum
1848 searchkey
.obj_id
= EXTENT_CSUM_ID
;
1849 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
1850 searchkey
.offset
= dr
->address
;
1852 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, FALSE
, NULL
);
1853 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
1854 ERR("find_item returned %08x\n", Status
);
1860 if (Status
!= STATUS_NOT_FOUND
) {
1862 traverse_ptr next_tp
;
1864 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_CSUM
) {
1865 if (tp
.item
->key
.offset
>= dr
->address
+ dr
->size
)
1867 else if (tp
.item
->size
>= sizeof(UINT32
) && tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(UINT32
)) >= dr
->address
) {
1868 UINT64 cs
= max(dr
->address
, tp
.item
->key
.offset
);
1869 UINT64 ce
= min(dr
->address
+ dr
->size
, tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(UINT32
)));
1871 RtlCopyMemory(csum
+ ((cs
- dr
->address
) / Vcb
->superblock
.sector_size
),
1872 tp
.item
->data
+ ((cs
- tp
.item
->key
.offset
) * sizeof(UINT32
) / Vcb
->superblock
.sector_size
),
1873 (ULONG
)((ce
- cs
) * sizeof(UINT32
) / Vcb
->superblock
.sector_size
));
1875 RtlClearBits(&bmp
, (ULONG
)((cs
- dr
->address
) / Vcb
->superblock
.sector_size
), (ULONG
)((ce
- cs
) / Vcb
->superblock
.sector_size
));
1877 if (ce
== dr
->address
+ dr
->size
)
1882 if (find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, NULL
))
1890 runlength
= RtlFindFirstRunClear(&bmp
, &index
);
1892 while (runlength
!= 0) {
1893 if (index
> lastoff
) {
1894 ULONG off
= lastoff
;
1895 ULONG size
= index
- lastoff
;
1897 // handle no csum run
1901 if (size
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1902 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1906 Status
= read_data(Vcb
, dr
->address
+ (off
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, NULL
, FALSE
, data
,
1907 c
, NULL
, NULL
, 0, FALSE
, NormalPagePriority
);
1908 if (!NT_SUCCESS(Status
)) {
1909 ERR("read_data returned %08x\n", Status
);
1915 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1916 NULL
, newchunk
, FALSE
, 0, NormalPagePriority
);
1917 if (!NT_SUCCESS(Status
)) {
1918 ERR("write_data_complete returned %08x\n", Status
);
1929 add_checksum_entry(Vcb
, dr
->new_address
+ (index
* Vcb
->superblock
.sector_size
), runlength
, &csum
[index
], NULL
);
1930 add_checksum_entry(Vcb
, dr
->address
+ (index
* Vcb
->superblock
.sector_size
), runlength
, NULL
, NULL
);
1936 if (runlength
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1937 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1941 Status
= read_data(Vcb
, dr
->address
+ (index
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, &csum
[index
], FALSE
, data
,
1942 c
, NULL
, NULL
, 0, FALSE
, NormalPagePriority
);
1943 if (!NT_SUCCESS(Status
)) {
1944 ERR("read_data returned %08x\n", Status
);
1950 Status
= write_data_complete(Vcb
, dr
->new_address
+ (index
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1951 NULL
, newchunk
, FALSE
, 0, NormalPagePriority
);
1952 if (!NT_SUCCESS(Status
)) {
1953 ERR("write_data_complete returned %08x\n", Status
);
1961 } while (runlength
> 0);
1964 runlength
= RtlFindNextForwardRunClear(&bmp
, index
, &index
);
1970 // handle final nocsum run
1971 if (lastoff
< dr
->size
/ Vcb
->superblock
.sector_size
) {
1972 ULONG off
= lastoff
;
1973 ULONG size
= (ULONG
)((dr
->size
/ Vcb
->superblock
.sector_size
) - lastoff
);
1978 if (size
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1979 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1983 Status
= read_data(Vcb
, dr
->address
+ (off
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, NULL
, FALSE
, data
,
1984 c
, NULL
, NULL
, 0, FALSE
, NormalPagePriority
);
1985 if (!NT_SUCCESS(Status
)) {
1986 ERR("read_data returned %08x\n", Status
);
1990 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1991 NULL
, newchunk
, FALSE
, 0, NormalPagePriority
);
1992 if (!NT_SUCCESS(Status
)) {
1993 ERR("write_data_complete returned %08x\n", Status
);
2008 Status
= write_metadata_items(Vcb
, &metadata_items
, &items
, NULL
, &rollback
);
2009 if (!NT_SUCCESS(Status
)) {
2010 ERR("write_metadata_items returned %08x\n", Status
);
2015 while (le
!= &items
) {
2016 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
2018 Status
= add_data_reloc_extent_item(Vcb
, dr
);
2019 if (!NT_SUCCESS(Status
)) {
2020 ERR("add_data_reloc_extent_item returned %08x\n", Status
);
2027 le
= c
->changed_extents
.Flink
;
2028 while (le
!= &c
->changed_extents
) {
2029 LIST_ENTRY
*le2
, *le3
;
2030 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
2035 while (le2
!= &items
) {
2036 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
2038 if (ce
->address
== dr
->address
) {
2039 ce
->address
= dr
->new_address
;
2040 RemoveEntryList(&ce
->list_entry
);
2041 InsertTailList(&dr
->newchunk
->changed_extents
, &ce
->list_entry
);
2051 Status
= STATUS_SUCCESS
;
2053 Vcb
->need_write
= TRUE
;
2056 if (NT_SUCCESS(Status
)) {
2057 // update extents in cache inodes before we flush
2058 le
= Vcb
->chunks
.Flink
;
2059 while (le
!= &Vcb
->chunks
) {
2060 chunk
* c2
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2065 ExAcquireResourceExclusiveLite(c2
->cache
->Header
.Resource
, TRUE
);
2067 le2
= c2
->cache
->extents
.Flink
;
2068 while (le2
!= &c2
->cache
->extents
) {
2069 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2072 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2073 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2075 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2076 LIST_ENTRY
* le3
= items
.Flink
;
2077 while (le3
!= &items
) {
2078 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2080 if (ed2
->address
== dr
->address
) {
2081 ed2
->address
= dr
->new_address
;
2094 ExReleaseResourceLite(c2
->cache
->Header
.Resource
);
2100 Status
= do_write(Vcb
, NULL
);
2101 if (!NT_SUCCESS(Status
))
2102 ERR("do_write returned %08x\n", Status
);
2105 if (NT_SUCCESS(Status
)) {
2106 clear_rollback(&rollback
);
2109 // FIXME - speed this up(?)
2111 acquire_fcb_lock_shared(Vcb
);
2113 le
= Vcb
->all_fcbs
.Flink
;
2114 while (le
!= &Vcb
->all_fcbs
) {
2115 struct _fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_all
);
2118 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, TRUE
);
2120 le2
= fcb
->extents
.Flink
;
2121 while (le2
!= &fcb
->extents
) {
2122 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2125 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2126 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2128 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2129 LIST_ENTRY
* le3
= items
.Flink
;
2130 while (le3
!= &items
) {
2131 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2133 if (ed2
->address
== dr
->address
) {
2134 ed2
->address
= dr
->new_address
;
2147 ExReleaseResourceLite(fcb
->Header
.Resource
);
2152 release_fcb_lock(Vcb
);
2154 do_rollback(Vcb
, &rollback
);
2158 ExReleaseResourceLite(&Vcb
->tree_lock
);
2163 while (!IsListEmpty(&items
)) {
2164 data_reloc
* dr
= CONTAINING_RECORD(RemoveHeadList(&items
), data_reloc
, list_entry
);
2166 while (!IsListEmpty(&dr
->refs
)) {
2167 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
2175 while (!IsListEmpty(&metadata_items
)) {
2176 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&metadata_items
), metadata_reloc
, list_entry
);
2178 while (!IsListEmpty(&mr
->refs
)) {
2179 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
2190 static __inline UINT64
get_chunk_dup_type(chunk
* c
) {
2191 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2192 return BLOCK_FLAG_RAID0
;
2193 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
2194 return BLOCK_FLAG_RAID1
;
2195 else if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
2196 return BLOCK_FLAG_DUPLICATE
;
2197 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2198 return BLOCK_FLAG_RAID10
;
2199 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2200 return BLOCK_FLAG_RAID5
;
2201 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2202 return BLOCK_FLAG_RAID6
;
2204 return BLOCK_FLAG_SINGLE
;
2207 static BOOL
should_balance_chunk(device_extension
* Vcb
, UINT8 sort
, chunk
* c
) {
2208 btrfs_balance_opts
* opts
;
2210 opts
= &Vcb
->balance
.opts
[sort
];
2212 if (!(opts
->flags
& BTRFS_BALANCE_OPTS_ENABLED
))
2215 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2216 UINT64 type
= get_chunk_dup_type(c
);
2218 if (!(type
& opts
->profiles
))
2222 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2224 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2227 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2228 if (cis
[i
].dev_id
== opts
->devid
) {
2238 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2241 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2244 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2245 factor
= c
->chunk_item
->num_stripes
;
2246 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2247 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
2248 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2249 factor
= c
->chunk_item
->num_stripes
- 1;
2250 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2251 factor
= c
->chunk_item
->num_stripes
- 2;
2252 else // SINGLE, DUPLICATE, RAID1
2255 physsize
= c
->chunk_item
->size
/ factor
;
2257 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2258 if (cis
[i
].offset
< opts
->drange_end
&& cis
[i
].offset
+ physsize
>= opts
->drange_start
&&
2259 (!(opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) || cis
[i
].dev_id
== opts
->devid
)) {
2269 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2270 if (c
->offset
+ c
->chunk_item
->size
<= opts
->vrange_start
|| c
->offset
> opts
->vrange_end
)
2274 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2275 if (c
->chunk_item
->num_stripes
< opts
->stripes_start
|| c
->chunk_item
->num_stripes
< opts
->stripes_end
)
2279 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2280 UINT64 usage
= c
->used
* 100 / c
->chunk_item
->size
;
2282 // usage == 0 should mean completely empty, not just that usage rounds to 0%
2283 if (c
->used
> 0 && usage
== 0)
2286 if (usage
< opts
->usage_start
|| usage
> opts
->usage_end
)
2290 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
&& opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
) {
2291 UINT64 type
= get_chunk_dup_type(c
);
2293 if (type
== opts
->convert
)
2300 static void copy_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2301 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2302 args
->profiles
= opts
->profiles
;
2303 args
->flags
|= BALANCE_ARGS_FLAGS_PROFILES
;
2306 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2307 if (args
->usage_start
== 0) {
2308 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE_RANGE
;
2309 args
->usage_start
= opts
->usage_start
;
2310 args
->usage_end
= opts
->usage_end
;
2312 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE
;
2313 args
->usage
= opts
->usage_end
;
2317 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2318 args
->devid
= opts
->devid
;
2319 args
->flags
|= BALANCE_ARGS_FLAGS_DEVID
;
2322 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2323 args
->drange_start
= opts
->drange_start
;
2324 args
->drange_end
= opts
->drange_end
;
2325 args
->flags
|= BALANCE_ARGS_FLAGS_DRANGE
;
2328 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2329 args
->vrange_start
= opts
->vrange_start
;
2330 args
->vrange_end
= opts
->vrange_end
;
2331 args
->flags
|= BALANCE_ARGS_FLAGS_VRANGE
;
2334 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
2335 args
->convert
= opts
->convert
;
2336 args
->flags
|= BALANCE_ARGS_FLAGS_CONVERT
;
2338 if (opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
)
2339 args
->flags
|= BALANCE_ARGS_FLAGS_SOFT
;
2342 if (opts
->flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
2343 if (args
->limit_start
== 0) {
2344 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT_RANGE
;
2345 args
->limit_start
= (UINT32
)opts
->limit_start
;
2346 args
->limit_end
= (UINT32
)opts
->limit_end
;
2348 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT
;
2349 args
->limit
= opts
->limit_end
;
2353 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2354 args
->stripes_start
= opts
->stripes_start
;
2355 args
->stripes_end
= opts
->stripes_end
;
2356 args
->flags
|= BALANCE_ARGS_FLAGS_STRIPES_RANGE
;
2360 static NTSTATUS
add_balance_item(device_extension
* Vcb
) {
2366 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2367 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2368 searchkey
.offset
= 0;
2370 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
2372 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
2373 if (!NT_SUCCESS(Status
)) {
2374 ERR("find_item returned %08x\n", Status
);
2378 if (!keycmp(tp
.item
->key
, searchkey
)) {
2379 Status
= delete_tree_item(Vcb
, &tp
);
2380 if (!NT_SUCCESS(Status
)) {
2381 ERR("delete_tree_item returned %08x\n", Status
);
2386 bi
= ExAllocatePoolWithTag(PagedPool
, sizeof(BALANCE_ITEM
), ALLOC_TAG
);
2388 ERR("out of memory\n");
2389 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2393 RtlZeroMemory(bi
, sizeof(BALANCE_ITEM
));
2395 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2396 bi
->flags
|= BALANCE_FLAGS_DATA
;
2397 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
2400 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2401 bi
->flags
|= BALANCE_FLAGS_METADATA
;
2402 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
2405 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2406 bi
->flags
|= BALANCE_FLAGS_SYSTEM
;
2407 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
2410 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, BALANCE_ITEM_ID
, TYPE_TEMP_ITEM
, 0, bi
, sizeof(BALANCE_ITEM
), NULL
, NULL
);
2411 if (!NT_SUCCESS(Status
)) {
2412 ERR("insert_tree_item returned %08x\n", Status
);
2417 Status
= STATUS_SUCCESS
;
2420 if (NT_SUCCESS(Status
)) {
2421 Status
= do_write(Vcb
, NULL
);
2422 if (!NT_SUCCESS(Status
))
2423 ERR("do_write returned %08x\n", Status
);
2428 ExReleaseResourceLite(&Vcb
->tree_lock
);
2433 static NTSTATUS
remove_balance_item(device_extension
* Vcb
) {
2438 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2439 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2440 searchkey
.offset
= 0;
2442 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
2444 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
2445 if (!NT_SUCCESS(Status
)) {
2446 ERR("find_item returned %08x\n", Status
);
2450 if (!keycmp(tp
.item
->key
, searchkey
)) {
2451 Status
= delete_tree_item(Vcb
, &tp
);
2452 if (!NT_SUCCESS(Status
)) {
2453 ERR("delete_tree_item returned %08x\n", Status
);
2457 Status
= do_write(Vcb
, NULL
);
2458 if (!NT_SUCCESS(Status
)) {
2459 ERR("do_write returned %08x\n", Status
);
2466 Status
= STATUS_SUCCESS
;
2469 ExReleaseResourceLite(&Vcb
->tree_lock
);
2474 static void load_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2475 opts
->flags
= BTRFS_BALANCE_OPTS_ENABLED
;
2477 if (args
->flags
& BALANCE_ARGS_FLAGS_PROFILES
) {
2478 opts
->flags
|= BTRFS_BALANCE_OPTS_PROFILES
;
2479 opts
->profiles
= args
->profiles
;
2482 if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE
) {
2483 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2485 opts
->usage_start
= 0;
2486 opts
->usage_end
= (UINT8
)args
->usage
;
2487 } else if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE_RANGE
) {
2488 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2490 opts
->usage_start
= (UINT8
)args
->usage_start
;
2491 opts
->usage_end
= (UINT8
)args
->usage_end
;
2494 if (args
->flags
& BALANCE_ARGS_FLAGS_DEVID
) {
2495 opts
->flags
|= BTRFS_BALANCE_OPTS_DEVID
;
2496 opts
->devid
= args
->devid
;
2499 if (args
->flags
& BALANCE_ARGS_FLAGS_DRANGE
) {
2500 opts
->flags
|= BTRFS_BALANCE_OPTS_DRANGE
;
2501 opts
->drange_start
= args
->drange_start
;
2502 opts
->drange_end
= args
->drange_end
;
2505 if (args
->flags
& BALANCE_ARGS_FLAGS_VRANGE
) {
2506 opts
->flags
|= BTRFS_BALANCE_OPTS_VRANGE
;
2507 opts
->vrange_start
= args
->vrange_start
;
2508 opts
->vrange_end
= args
->vrange_end
;
2511 if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT
) {
2512 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2514 opts
->limit_start
= 0;
2515 opts
->limit_end
= args
->limit
;
2516 } else if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT_RANGE
) {
2517 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2519 opts
->limit_start
= args
->limit_start
;
2520 opts
->limit_end
= args
->limit_end
;
2523 if (args
->flags
& BALANCE_ARGS_FLAGS_STRIPES_RANGE
) {
2524 opts
->flags
|= BTRFS_BALANCE_OPTS_STRIPES
;
2526 opts
->stripes_start
= (UINT16
)args
->stripes_start
;
2527 opts
->stripes_end
= (UINT16
)args
->stripes_end
;
2530 if (args
->flags
& BALANCE_ARGS_FLAGS_CONVERT
) {
2531 opts
->flags
|= BTRFS_BALANCE_OPTS_CONVERT
;
2532 opts
->convert
= args
->convert
;
2534 if (args
->flags
& BALANCE_ARGS_FLAGS_SOFT
)
2535 opts
->flags
|= BTRFS_BALANCE_OPTS_SOFT
;
2539 static NTSTATUS
remove_superblocks(device
* dev
) {
2544 sb
= ExAllocatePoolWithTag(PagedPool
, sizeof(superblock
), ALLOC_TAG
);
2546 ERR("out of memory\n");
2547 return STATUS_INSUFFICIENT_RESOURCES
;
2550 RtlZeroMemory(sb
, sizeof(superblock
));
2552 while (superblock_addrs
[i
] > 0 && dev
->devitem
.num_bytes
>= superblock_addrs
[i
] + sizeof(superblock
)) {
2553 Status
= write_data_phys(dev
->devobj
, superblock_addrs
[i
], sb
, sizeof(superblock
));
2555 if (!NT_SUCCESS(Status
)) {
2565 return STATUS_SUCCESS
;
2568 static NTSTATUS
finish_removing_device(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2573 volume_device_extension
* vde
;
2575 if (Vcb
->need_write
) {
2576 Status
= do_write(Vcb
, NULL
);
2578 if (!NT_SUCCESS(Status
))
2579 ERR("do_write returned %08x\n", Status
);
2581 Status
= STATUS_SUCCESS
;
2585 if (!NT_SUCCESS(Status
))
2588 // remove entry in chunk tree
2590 searchkey
.obj_id
= 1;
2591 searchkey
.obj_type
= TYPE_DEV_ITEM
;
2592 searchkey
.offset
= dev
->devitem
.dev_id
;
2594 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, FALSE
, NULL
);
2595 if (!NT_SUCCESS(Status
)) {
2596 ERR("find_item returned %08x\n", Status
);
2600 if (!keycmp(searchkey
, tp
.item
->key
)) {
2601 Status
= delete_tree_item(Vcb
, &tp
);
2603 if (!NT_SUCCESS(Status
)) {
2604 ERR("delete_tree_item returned %08x\n", Status
);
2609 // remove stats entry in device tree
2611 searchkey
.obj_id
= 0;
2612 searchkey
.obj_type
= TYPE_DEV_STATS
;
2613 searchkey
.offset
= dev
->devitem
.dev_id
;
2615 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, FALSE
, NULL
);
2616 if (!NT_SUCCESS(Status
)) {
2617 ERR("find_item returned %08x\n", Status
);
2621 if (!keycmp(searchkey
, tp
.item
->key
)) {
2622 Status
= delete_tree_item(Vcb
, &tp
);
2624 if (!NT_SUCCESS(Status
)) {
2625 ERR("delete_tree_item returned %08x\n", Status
);
2630 // update superblock
2632 Vcb
->superblock
.num_devices
--;
2633 Vcb
->superblock
.total_bytes
-= dev
->devitem
.num_bytes
;
2634 Vcb
->devices_loaded
--;
2636 RemoveEntryList(&dev
->list_entry
);
2640 Status
= do_write(Vcb
, NULL
);
2641 if (!NT_SUCCESS(Status
))
2642 ERR("do_write returned %08x\n", Status
);
2646 if (!NT_SUCCESS(Status
))
2649 if (!dev
->readonly
&& dev
->devobj
) {
2650 Status
= remove_superblocks(dev
);
2651 if (!NT_SUCCESS(Status
))
2652 WARN("remove_superblocks returned %08x\n", Status
);
2655 // remove entry in volume list
2660 pdo_device_extension
* pdode
= vde
->pdode
;
2662 ExAcquireResourceExclusiveLite(&pdode
->child_lock
, TRUE
);
2664 le
= pdode
->children
.Flink
;
2665 while (le
!= &pdode
->children
) {
2666 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2668 if (RtlCompareMemory(&dev
->devitem
.device_uuid
, &vc
->uuid
, sizeof(BTRFS_UUID
)) == sizeof(BTRFS_UUID
)) {
2669 PFILE_OBJECT FileObject
;
2670 PDEVICE_OBJECT mountmgr
;
2671 UNICODE_STRING mmdevpath
;
2673 pdode
->children_loaded
--;
2675 if (vc
->had_drive_letter
) { // re-add entry to mountmgr
2676 RtlInitUnicodeString(&mmdevpath
, MOUNTMGR_DEVICE_NAME
);
2677 Status
= IoGetDeviceObjectPointer(&mmdevpath
, FILE_READ_ATTRIBUTES
, &FileObject
, &mountmgr
);
2678 if (!NT_SUCCESS(Status
))
2679 ERR("IoGetDeviceObjectPointer returned %08x\n", Status
);
2683 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, &mdn
, sizeof(MOUNTDEV_NAME
), TRUE
, NULL
);
2684 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
)
2685 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2687 MOUNTDEV_NAME
* mdn2
;
2688 ULONG mdnsize
= (ULONG
)offsetof(MOUNTDEV_NAME
, Name
[0]) + mdn
.NameLength
;
2690 mdn2
= ExAllocatePoolWithTag(PagedPool
, mdnsize
, ALLOC_TAG
);
2692 ERR("out of memory\n");
2694 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, mdn2
, mdnsize
, TRUE
, NULL
);
2695 if (!NT_SUCCESS(Status
))
2696 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2698 UNICODE_STRING name
;
2700 name
.Buffer
= mdn2
->Name
;
2701 name
.Length
= name
.MaximumLength
= mdn2
->NameLength
;
2703 Status
= mountmgr_add_drive_letter(mountmgr
, &name
);
2704 if (!NT_SUCCESS(Status
))
2705 WARN("mountmgr_add_drive_letter returned %08x\n", Status
);
2712 ObDereferenceObject(FileObject
);
2716 ExFreePool(vc
->pnp_name
.Buffer
);
2717 RemoveEntryList(&vc
->list_entry
);
2720 ObDereferenceObject(vc
->fileobj
);
2728 if (pdode
->children_loaded
> 0 && vde
->device
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2729 vde
->device
->Characteristics
&= ~FILE_REMOVABLE_MEDIA
;
2731 le
= pdode
->children
.Flink
;
2732 while (le
!= &pdode
->children
) {
2733 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2735 if (vc
->devobj
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2736 vde
->device
->Characteristics
|= FILE_REMOVABLE_MEDIA
;
2744 pdode
->num_children
= Vcb
->superblock
.num_devices
;
2746 ExReleaseResourceLite(&pdode
->child_lock
);
2750 if (dev
->trim
&& !dev
->readonly
&& !Vcb
->options
.no_trim
)
2751 trim_whole_device(dev
);
2754 while (!IsListEmpty(&dev
->space
)) {
2755 LIST_ENTRY
* le2
= RemoveHeadList(&dev
->space
);
2756 space
* s
= CONTAINING_RECORD(le2
, space
, list_entry
);
2766 le
= Vcb
->devices
.Flink
;
2767 while (le
!= &Vcb
->devices
) {
2768 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
2779 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
2781 return STATUS_SUCCESS
;
2784 static void trim_unalloc_space(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2785 DEVICE_MANAGE_DATA_SET_ATTRIBUTES
* dmdsa
;
2786 DEVICE_DATA_SET_RANGE
* ranges
;
2792 UINT64 lastoff
= 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
2795 dev
->num_trim_entries
= 0;
2797 searchkey
.obj_id
= dev
->devitem
.dev_id
;
2798 searchkey
.obj_type
= TYPE_DEV_EXTENT
;
2799 searchkey
.offset
= 0;
2801 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, FALSE
, NULL
);
2802 if (!NT_SUCCESS(Status
)) {
2803 ERR("find_item returned %08x\n", Status
);
2808 traverse_ptr next_tp
;
2810 if (tp
.item
->key
.obj_id
== dev
->devitem
.dev_id
&& tp
.item
->key
.obj_type
== TYPE_DEV_EXTENT
) {
2811 if (tp
.item
->size
>= sizeof(DEV_EXTENT
)) {
2812 DEV_EXTENT
* de
= (DEV_EXTENT
*)tp
.item
->data
;
2814 if (tp
.item
->key
.offset
> lastoff
)
2815 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, tp
.item
->key
.offset
- lastoff
);
2817 lastoff
= tp
.item
->key
.offset
+ de
->length
;
2819 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(DEV_EXTENT
));
2824 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, NULL
);
2828 if (tp
.item
->key
.obj_id
> searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
> searchkey
.obj_type
))
2833 if (lastoff
< dev
->devitem
.num_bytes
)
2834 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, dev
->devitem
.num_bytes
- lastoff
);
2836 if (dev
->num_trim_entries
== 0)
2839 datalen
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(UINT64
)) + (dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
));
2841 dmdsa
= ExAllocatePoolWithTag(PagedPool
, datalen
, ALLOC_TAG
);
2843 ERR("out of memory\n");
2847 dmdsa
->Size
= sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
);
2848 dmdsa
->Action
= DeviceDsmAction_Trim
;
2849 dmdsa
->Flags
= DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED
;
2850 dmdsa
->ParameterBlockOffset
= 0;
2851 dmdsa
->ParameterBlockLength
= 0;
2852 dmdsa
->DataSetRangesOffset
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(UINT64
));
2853 dmdsa
->DataSetRangesLength
= dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
);
2855 ranges
= (DEVICE_DATA_SET_RANGE
*)((UINT8
*)dmdsa
+ dmdsa
->DataSetRangesOffset
);
2858 le
= dev
->trim_list
.Flink
;
2859 while (le
!= &dev
->trim_list
) {
2860 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
2862 ranges
[i
].StartingOffset
= s
->address
;
2863 ranges
[i
].LengthInBytes
= s
->size
;
2869 Status
= dev_ioctl(dev
->devobj
, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES
, dmdsa
, datalen
, NULL
, 0, TRUE
, NULL
);
2870 if (!NT_SUCCESS(Status
))
2871 WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status
);
2876 while (!IsListEmpty(&dev
->trim_list
)) {
2877 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->trim_list
), space
, list_entry
);
2881 dev
->num_trim_entries
= 0;
2884 static NTSTATUS
try_consolidation(device_extension
* Vcb
, UINT64 flags
, chunk
** newchunk
) {
2890 // FIXME - allow with metadata chunks?
2895 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, TRUE
);
2897 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, TRUE
);
2899 // choose the least-used chunk we haven't looked at yet
2900 le
= Vcb
->chunks
.Flink
;
2901 while (le
!= &Vcb
->chunks
) {
2902 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2904 // FIXME - skip full-size chunks over e.g. 90% full?
2905 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&& !c
->readonly
&& c
->balance_num
!= Vcb
->balance
.balance_num
&& (!rc
|| c
->used
< rc
->used
))
2911 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2914 ExReleaseResourceLite(&Vcb
->tree_lock
);
2918 if (rc
->list_entry_balance
.Flink
) {
2919 RemoveEntryList(&rc
->list_entry_balance
);
2920 Vcb
->balance
.chunks_left
--;
2923 rc
->list_entry_balance
.Flink
= (LIST_ENTRY
*)1; // so it doesn't get dropped
2926 ExReleaseResourceLite(&Vcb
->tree_lock
);
2931 Status
= balance_data_chunk(Vcb
, rc
, &changed
);
2932 if (!NT_SUCCESS(Status
)) {
2933 ERR("balance_data_chunk returned %08x\n", Status
);
2934 Vcb
->balance
.status
= Status
;
2935 rc
->list_entry_balance
.Flink
= NULL
;
2940 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
2943 Vcb
->balance
.stopping
= TRUE
;
2945 if (Vcb
->balance
.stopping
)
2946 return STATUS_SUCCESS
;
2949 rc
->list_entry_balance
.Flink
= NULL
;
2952 rc
->space_changed
= TRUE
;
2953 rc
->balance_num
= Vcb
->balance
.balance_num
;
2955 Status
= do_write(Vcb
, NULL
);
2956 if (!NT_SUCCESS(Status
)) {
2957 ERR("do_write returned %08x\n", Status
);
2962 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
2964 Status
= alloc_chunk(Vcb
, flags
, &rc
, TRUE
);
2966 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2968 if (NT_SUCCESS(Status
)) {
2972 ERR("alloc_chunk returned %08x\n", Status
);
2977 static NTSTATUS
regenerate_space_list(device_extension
* Vcb
, device
* dev
) {
2980 while (!IsListEmpty(&dev
->space
)) {
2981 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->space
), space
, list_entry
);
2986 // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
2988 space_list_add2(&dev
->space
, NULL
, 0x100000, dev
->devitem
.num_bytes
- 0x100000, NULL
, NULL
);
2990 le
= Vcb
->chunks
.Flink
;
2991 while (le
!= &Vcb
->chunks
) {
2993 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2994 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2996 for (n
= 0; n
< c
->chunk_item
->num_stripes
; n
++) {
2997 UINT64 stripe_size
= 0;
2999 if (cis
[n
].dev_id
== dev
->devitem
.dev_id
) {
3000 if (stripe_size
== 0) {
3003 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
3004 factor
= c
->chunk_item
->num_stripes
;
3005 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
3006 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
3007 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
3008 factor
= c
->chunk_item
->num_stripes
- 1;
3009 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
3010 factor
= c
->chunk_item
->num_stripes
- 2;
3011 else // SINGLE, DUP, RAID1
3014 stripe_size
= c
->chunk_item
->size
/ factor
;
3017 space_list_subtract2(&dev
->space
, NULL
, cis
[n
].offset
, stripe_size
, NULL
, NULL
);
3024 return STATUS_SUCCESS
;
3027 _Function_class_(KSTART_ROUTINE
)
3029 void balance_thread(void* context
) {
3031 void NTAPI
balance_thread(void* context
) {
3033 device_extension
* Vcb
= (device_extension
*)context
;
3036 UINT64 num_chunks
[3], okay_metadata_chunks
= 0, okay_data_chunks
= 0, okay_system_chunks
= 0;
3037 UINT64 old_data_flags
= 0, old_metadata_flags
= 0, old_system_flags
= 0;
3040 Vcb
->balance
.balance_num
++;
3042 Vcb
->balance
.stopping
= FALSE
;
3043 KeInitializeEvent(&Vcb
->balance
.finished
, NotificationEvent
, FALSE
);
3045 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3046 old_data_flags
= Vcb
->data_flags
;
3047 Vcb
->data_flags
= BLOCK_FLAG_DATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
);
3049 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3052 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3053 old_metadata_flags
= Vcb
->metadata_flags
;
3054 Vcb
->metadata_flags
= BLOCK_FLAG_METADATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
);
3057 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3058 old_system_flags
= Vcb
->system_flags
;
3059 Vcb
->system_flags
= BLOCK_FLAG_SYSTEM
| (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
);
3062 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
) {
3063 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3064 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3065 else if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3066 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3069 num_chunks
[0] = num_chunks
[1] = num_chunks
[2] = 0;
3070 Vcb
->balance
.total_chunks
= Vcb
->balance
.chunks_left
= 0;
3072 InitializeListHead(&chunks
);
3074 // FIXME - what are we supposed to do with limit_start?
3076 if (!Vcb
->readonly
) {
3077 if (!Vcb
->balance
.removing
&& !Vcb
->balance
.shrinking
) {
3078 Status
= add_balance_item(Vcb
);
3079 if (!NT_SUCCESS(Status
)) {
3080 ERR("add_balance_item returned %08x\n", Status
);
3081 Vcb
->balance
.status
= Status
;
3085 if (Vcb
->need_write
) {
3086 Status
= do_write(Vcb
, NULL
);
3090 if (!NT_SUCCESS(Status
)) {
3091 ERR("do_write returned %08x\n", Status
);
3092 Vcb
->balance
.status
= Status
;
3099 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
3101 if (Vcb
->balance
.stopping
)
3104 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, TRUE
);
3106 le
= Vcb
->chunks
.Flink
;
3107 while (le
!= &Vcb
->chunks
) {
3108 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3111 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
3113 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
)
3114 sort
= BALANCE_OPTS_DATA
;
3115 else if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
)
3116 sort
= BALANCE_OPTS_METADATA
;
3117 else if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
3118 sort
= BALANCE_OPTS_SYSTEM
;
3120 ERR("unexpected chunk type %llx\n", c
->chunk_item
->type
);
3121 ExReleaseResourceLite(&c
->lock
);
3125 if ((!(Vcb
->balance
.opts
[sort
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) || num_chunks
[sort
] < Vcb
->balance
.opts
[sort
].limit_end
) &&
3126 should_balance_chunk(Vcb
, sort
, c
)) {
3127 InsertTailList(&chunks
, &c
->list_entry_balance
);
3130 Vcb
->balance
.total_chunks
++;
3131 Vcb
->balance
.chunks_left
++;
3132 } else if (sort
== BALANCE_OPTS_METADATA
)
3133 okay_metadata_chunks
++;
3134 else if (sort
== BALANCE_OPTS_DATA
)
3136 else if (sort
== BALANCE_OPTS_SYSTEM
)
3137 okay_system_chunks
++;
3139 if (!c
->cache_loaded
) {
3140 Status
= load_cache_chunk(Vcb
, c
, NULL
);
3142 if (!NT_SUCCESS(Status
)) {
3143 ERR("load_cache_chunk returned %08x\n", Status
);
3144 Vcb
->balance
.status
= Status
;
3145 ExReleaseResourceLite(&c
->lock
);
3146 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3151 ExReleaseResourceLite(&c
->lock
);
3156 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3158 // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
3159 if (okay_metadata_chunks
== 0 || okay_data_chunks
== 0 || okay_system_chunks
== 0) {
3160 BOOL consolidated
= FALSE
;
3163 if (okay_metadata_chunks
== 0) {
3164 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
3166 Status
= alloc_chunk(Vcb
, Vcb
->metadata_flags
, &c
, TRUE
);
3167 if (NT_SUCCESS(Status
))
3168 c
->balance_num
= Vcb
->balance
.balance_num
;
3169 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3170 ERR("alloc_chunk returned %08x\n", Status
);
3171 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3172 Vcb
->balance
.status
= Status
;
3176 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3178 if (Status
== STATUS_DISK_FULL
) {
3179 Status
= try_consolidation(Vcb
, Vcb
->metadata_flags
, &c
);
3180 if (!NT_SUCCESS(Status
)) {
3181 ERR("try_consolidation returned %08x\n", Status
);
3182 Vcb
->balance
.status
= Status
;
3185 c
->balance_num
= Vcb
->balance
.balance_num
;
3187 consolidated
= TRUE
;
3189 if (Vcb
->balance
.stopping
)
3194 if (okay_data_chunks
== 0) {
3195 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
3197 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &c
, TRUE
);
3198 if (NT_SUCCESS(Status
))
3199 c
->balance_num
= Vcb
->balance
.balance_num
;
3200 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3201 ERR("alloc_chunk returned %08x\n", Status
);
3202 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3203 Vcb
->balance
.status
= Status
;
3207 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3209 if (Status
== STATUS_DISK_FULL
) {
3210 Status
= try_consolidation(Vcb
, Vcb
->data_flags
, &c
);
3211 if (!NT_SUCCESS(Status
)) {
3212 ERR("try_consolidation returned %08x\n", Status
);
3213 Vcb
->balance
.status
= Status
;
3216 c
->balance_num
= Vcb
->balance
.balance_num
;
3218 consolidated
= TRUE
;
3220 if (Vcb
->balance
.stopping
)
3225 if (okay_system_chunks
== 0) {
3226 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
3228 Status
= alloc_chunk(Vcb
, Vcb
->system_flags
, &c
, TRUE
);
3229 if (NT_SUCCESS(Status
))
3230 c
->balance_num
= Vcb
->balance
.balance_num
;
3231 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3232 ERR("alloc_chunk returned %08x\n", Status
);
3233 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3234 Vcb
->balance
.status
= Status
;
3238 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3240 if (Status
== STATUS_DISK_FULL
) {
3241 Status
= try_consolidation(Vcb
, Vcb
->system_flags
, &c
);
3242 if (!NT_SUCCESS(Status
)) {
3243 ERR("try_consolidation returned %08x\n", Status
);
3244 Vcb
->balance
.status
= Status
;
3247 c
->balance_num
= Vcb
->balance
.balance_num
;
3249 consolidated
= TRUE
;
3251 if (Vcb
->balance
.stopping
)
3257 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, TRUE
);
3260 while (le
!= &chunks
) {
3261 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3268 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3270 // do data chunks before metadata
3272 while (le
!= &chunks
) {
3273 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3274 LIST_ENTRY
* le2
= le
->Flink
;
3276 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
3282 Status
= balance_data_chunk(Vcb
, c
, &changed
);
3283 if (!NT_SUCCESS(Status
)) {
3284 ERR("balance_data_chunk returned %08x\n", Status
);
3285 Vcb
->balance
.status
= Status
;
3289 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
3292 Vcb
->balance
.stopping
= TRUE
;
3294 if (Vcb
->balance
.stopping
)
3299 c
->space_changed
= TRUE
;
3302 if (Vcb
->balance
.stopping
)
3305 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&&
3306 (!(Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) || !(c
->chunk_item
->type
& BLOCK_FLAG_METADATA
))) {
3307 RemoveEntryList(&c
->list_entry_balance
);
3308 c
->list_entry_balance
.Flink
= NULL
;
3310 Vcb
->balance
.chunks_left
--;
3316 // do metadata chunks
3317 while (!IsListEmpty(&chunks
)) {
3321 le
= RemoveHeadList(&chunks
);
3322 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3324 if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
|| c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
) {
3326 Status
= balance_metadata_chunk(Vcb
, c
, &changed
);
3327 if (!NT_SUCCESS(Status
)) {
3328 ERR("balance_metadata_chunk returned %08x\n", Status
);
3329 Vcb
->balance
.status
= Status
;
3333 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
3336 Vcb
->balance
.stopping
= TRUE
;
3338 if (Vcb
->balance
.stopping
)
3343 c
->space_changed
= TRUE
;
3346 if (Vcb
->balance
.stopping
)
3349 c
->list_entry_balance
.Flink
= NULL
;
3351 Vcb
->balance
.chunks_left
--;
3355 if (!Vcb
->readonly
) {
3356 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3358 while (le
!= &chunks
) {
3359 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3363 c
->list_entry_balance
.Flink
= NULL
;
3366 if (old_data_flags
!= 0)
3367 Vcb
->data_flags
= old_data_flags
;
3369 if (old_metadata_flags
!= 0)
3370 Vcb
->metadata_flags
= old_metadata_flags
;
3372 if (old_system_flags
!= 0)
3373 Vcb
->system_flags
= old_system_flags
;
3376 if (Vcb
->balance
.removing
) {
3379 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
3381 le
= Vcb
->devices
.Flink
;
3382 while (le
!= &Vcb
->devices
) {
3383 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3385 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3394 if (Vcb
->balance
.chunks_left
== 0) {
3395 Status
= finish_removing_device(Vcb
, dev
);
3397 if (!NT_SUCCESS(Status
)) {
3398 ERR("finish_removing_device returned %08x\n", Status
);
3405 ExReleaseResourceLite(&Vcb
->tree_lock
);
3406 } else if (Vcb
->balance
.shrinking
) {
3409 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
3411 le
= Vcb
->devices
.Flink
;
3412 while (le
!= &Vcb
->devices
) {
3413 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3415 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3424 ERR("could not find device %llx\n", Vcb
->balance
.opts
[0].devid
);
3425 Vcb
->balance
.status
= STATUS_INTERNAL_ERROR
;
3428 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3430 Status
= regenerate_space_list(Vcb
, dev
);
3431 if (!NT_SUCCESS(Status
))
3432 WARN("regenerate_space_list returned %08x\n", Status
);
3437 old_size
= dev
->devitem
.num_bytes
;
3438 dev
->devitem
.num_bytes
= Vcb
->balance
.opts
[0].drange_start
;
3440 Status
= update_dev_item(Vcb
, dev
, NULL
);
3441 if (!NT_SUCCESS(Status
)) {
3442 ERR("update_dev_item returned %08x\n", Status
);
3443 dev
->devitem
.num_bytes
= old_size
;
3444 Vcb
->balance
.status
= Status
;
3446 Status
= regenerate_space_list(Vcb
, dev
);
3447 if (!NT_SUCCESS(Status
))
3448 WARN("regenerate_space_list returned %08x\n", Status
);
3450 Vcb
->superblock
.total_bytes
-= old_size
- dev
->devitem
.num_bytes
;
3452 Status
= do_write(Vcb
, NULL
);
3453 if (!NT_SUCCESS(Status
))
3454 ERR("do_write returned %08x\n", Status
);
3460 ExReleaseResourceLite(&Vcb
->tree_lock
);
3462 if (!Vcb
->balance
.stopping
&& NT_SUCCESS(Vcb
->balance
.status
))
3463 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3465 Status
= remove_balance_item(Vcb
);
3466 if (!NT_SUCCESS(Status
)) {
3467 ERR("remove_balance_item returned %08x\n", Status
);
3472 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
3473 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
3475 le
= Vcb
->devices
.Flink
;
3476 while (le
!= &Vcb
->devices
) {
3477 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3479 if (dev2
->devobj
&& !dev2
->readonly
&& dev2
->trim
)
3480 trim_unalloc_space(Vcb
, dev2
);
3485 ExReleaseResourceLite(&Vcb
->tree_lock
);
3489 ZwClose(Vcb
->balance
.thread
);
3490 Vcb
->balance
.thread
= NULL
;
3492 KeSetEvent(&Vcb
->balance
.finished
, 0, FALSE
);
3495 NTSTATUS
start_balance(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3497 btrfs_start_balance
* bsb
= (btrfs_start_balance
*)data
;
3500 if (length
< sizeof(btrfs_start_balance
) || !data
)
3501 return STATUS_INVALID_PARAMETER
;
3503 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3504 return STATUS_PRIVILEGE_NOT_HELD
;
3507 WARN("cannot start balance while locked\n");
3508 return STATUS_DEVICE_NOT_READY
;
3511 if (Vcb
->scrub
.thread
) {
3512 WARN("cannot start balance while scrub running\n");
3513 return STATUS_DEVICE_NOT_READY
;
3516 if (Vcb
->balance
.thread
) {
3517 WARN("balance already running\n");
3518 return STATUS_DEVICE_NOT_READY
;
3522 return STATUS_MEDIA_WRITE_PROTECTED
;
3524 if (!(bsb
->opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3525 !(bsb
->opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3526 !(bsb
->opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
))
3527 return STATUS_SUCCESS
;
3529 for (i
= 0; i
< 3; i
++) {
3530 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3531 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
3532 bsb
->opts
[i
].profiles
&= BLOCK_FLAG_RAID0
| BLOCK_FLAG_RAID1
| BLOCK_FLAG_DUPLICATE
| BLOCK_FLAG_RAID10
|
3533 BLOCK_FLAG_RAID5
| BLOCK_FLAG_RAID6
| BLOCK_FLAG_SINGLE
;
3535 if (bsb
->opts
[i
].profiles
== 0)
3536 return STATUS_INVALID_PARAMETER
;
3539 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DEVID
) {
3540 if (bsb
->opts
[i
].devid
== 0)
3541 return STATUS_INVALID_PARAMETER
;
3544 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
3545 if (bsb
->opts
[i
].drange_start
> bsb
->opts
[i
].drange_end
)
3546 return STATUS_INVALID_PARAMETER
;
3549 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
3550 if (bsb
->opts
[i
].vrange_start
> bsb
->opts
[i
].vrange_end
)
3551 return STATUS_INVALID_PARAMETER
;
3554 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
3555 bsb
->opts
[i
].limit_start
= max(1, bsb
->opts
[i
].limit_start
);
3556 bsb
->opts
[i
].limit_end
= max(1, bsb
->opts
[i
].limit_end
);
3558 if (bsb
->opts
[i
].limit_start
> bsb
->opts
[i
].limit_end
)
3559 return STATUS_INVALID_PARAMETER
;
3562 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
3563 bsb
->opts
[i
].stripes_start
= max(1, bsb
->opts
[i
].stripes_start
);
3564 bsb
->opts
[i
].stripes_end
= max(1, bsb
->opts
[i
].stripes_end
);
3566 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3567 return STATUS_INVALID_PARAMETER
;
3570 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) {
3571 bsb
->opts
[i
].usage_start
= min(100, bsb
->opts
[i
].stripes_start
);
3572 bsb
->opts
[i
].usage_end
= min(100, bsb
->opts
[i
].stripes_end
);
3574 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3575 return STATUS_INVALID_PARAMETER
;
3578 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3579 if (bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID0
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1
&&
3580 bsb
->opts
[i
].convert
!= BLOCK_FLAG_DUPLICATE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID10
&&
3581 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID5
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID6
&&
3582 bsb
->opts
[i
].convert
!= BLOCK_FLAG_SINGLE
)
3583 return STATUS_INVALID_PARAMETER
;
3588 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bsb
->opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3589 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bsb
->opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3590 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bsb
->opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3592 Vcb
->balance
.paused
= FALSE
;
3593 Vcb
->balance
.removing
= FALSE
;
3594 Vcb
->balance
.shrinking
= FALSE
;
3595 Vcb
->balance
.status
= STATUS_SUCCESS
;
3596 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3598 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, NULL
, NULL
, NULL
, balance_thread
, Vcb
);
3599 if (!NT_SUCCESS(Status
)) {
3600 ERR("PsCreateSystemThread returned %08x\n", Status
);
3604 return STATUS_SUCCESS
;
3607 NTSTATUS
look_for_balance_item(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
) {
3614 searchkey
.obj_id
= BALANCE_ITEM_ID
;
3615 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
3616 searchkey
.offset
= 0;
3618 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
3619 if (!NT_SUCCESS(Status
)) {
3620 ERR("find_item returned %08x\n", Status
);
3624 if (keycmp(tp
.item
->key
, searchkey
)) {
3625 TRACE("no balance item found\n");
3626 return STATUS_NOT_FOUND
;
3629 if (tp
.item
->size
< sizeof(BALANCE_ITEM
)) {
3630 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
3631 tp
.item
->size
, sizeof(BALANCE_ITEM
));
3632 return STATUS_INTERNAL_ERROR
;
3635 bi
= (BALANCE_ITEM
*)tp
.item
->data
;
3637 if (bi
->flags
& BALANCE_FLAGS_DATA
)
3638 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
3640 if (bi
->flags
& BALANCE_FLAGS_METADATA
)
3641 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
3643 if (bi
->flags
& BALANCE_FLAGS_SYSTEM
)
3644 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
3646 // do the heuristics that Linux driver does
3648 for (i
= 0; i
< 3; i
++) {
3649 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3650 // if converting, don't redo chunks already done
3652 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3653 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_SOFT
;
3655 // don't balance chunks more than 90% filled - presumably these
3656 // have already been done
3658 if (!(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) &&
3659 !(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3661 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_USAGE
;
3662 Vcb
->balance
.opts
[i
].usage_start
= 0;
3663 Vcb
->balance
.opts
[i
].usage_end
= 90;
3668 if (Vcb
->readonly
|| Vcb
->options
.skip_balance
)
3669 Vcb
->balance
.paused
= TRUE
;
3671 Vcb
->balance
.paused
= FALSE
;
3673 Vcb
->balance
.removing
= FALSE
;
3674 Vcb
->balance
.shrinking
= FALSE
;
3675 Vcb
->balance
.status
= STATUS_SUCCESS
;
3676 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3678 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, NULL
, NULL
, NULL
, balance_thread
, Vcb
);
3679 if (!NT_SUCCESS(Status
)) {
3680 ERR("PsCreateSystemThread returned %08x\n", Status
);
3684 return STATUS_SUCCESS
;
3687 NTSTATUS
query_balance(device_extension
* Vcb
, void* data
, ULONG length
) {
3688 btrfs_query_balance
* bqb
= (btrfs_query_balance
*)data
;
3690 if (length
< sizeof(btrfs_query_balance
) || !data
)
3691 return STATUS_INVALID_PARAMETER
;
3693 if (!Vcb
->balance
.thread
) {
3694 bqb
->status
= BTRFS_BALANCE_STOPPED
;
3696 if (!NT_SUCCESS(Vcb
->balance
.status
)) {
3697 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3698 bqb
->error
= Vcb
->balance
.status
;
3701 return STATUS_SUCCESS
;
3704 bqb
->status
= Vcb
->balance
.paused
? BTRFS_BALANCE_PAUSED
: BTRFS_BALANCE_RUNNING
;
3706 if (Vcb
->balance
.removing
)
3707 bqb
->status
|= BTRFS_BALANCE_REMOVAL
;
3709 if (Vcb
->balance
.shrinking
)
3710 bqb
->status
|= BTRFS_BALANCE_SHRINKING
;
3712 if (!NT_SUCCESS(Vcb
->balance
.status
))
3713 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3715 bqb
->chunks_left
= Vcb
->balance
.chunks_left
;
3716 bqb
->total_chunks
= Vcb
->balance
.total_chunks
;
3717 bqb
->error
= Vcb
->balance
.status
;
3718 RtlCopyMemory(&bqb
->data_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3719 RtlCopyMemory(&bqb
->metadata_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3720 RtlCopyMemory(&bqb
->system_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3722 return STATUS_SUCCESS
;
3725 NTSTATUS
pause_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3726 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3727 return STATUS_PRIVILEGE_NOT_HELD
;
3729 if (!Vcb
->balance
.thread
)
3730 return STATUS_DEVICE_NOT_READY
;
3732 if (Vcb
->balance
.paused
)
3733 return STATUS_DEVICE_NOT_READY
;
3735 Vcb
->balance
.paused
= TRUE
;
3736 KeClearEvent(&Vcb
->balance
.event
);
3738 return STATUS_SUCCESS
;
3741 NTSTATUS
resume_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3742 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3743 return STATUS_PRIVILEGE_NOT_HELD
;
3745 if (!Vcb
->balance
.thread
)
3746 return STATUS_DEVICE_NOT_READY
;
3748 if (!Vcb
->balance
.paused
)
3749 return STATUS_DEVICE_NOT_READY
;
3752 return STATUS_MEDIA_WRITE_PROTECTED
;
3754 Vcb
->balance
.paused
= FALSE
;
3755 KeSetEvent(&Vcb
->balance
.event
, 0, FALSE
);
3757 return STATUS_SUCCESS
;
3760 NTSTATUS
stop_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3761 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3762 return STATUS_PRIVILEGE_NOT_HELD
;
3764 if (!Vcb
->balance
.thread
)
3765 return STATUS_DEVICE_NOT_READY
;
3767 Vcb
->balance
.paused
= FALSE
;
3768 Vcb
->balance
.stopping
= TRUE
;
3769 Vcb
->balance
.status
= STATUS_SUCCESS
;
3770 KeSetEvent(&Vcb
->balance
.event
, 0, FALSE
);
3772 return STATUS_SUCCESS
;
3775 NTSTATUS
remove_device(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3781 UINT64 num_rw_devices
;
3783 TRACE("(%p, %p, %x)\n", Vcb
, data
, length
);
3785 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3786 return STATUS_PRIVILEGE_NOT_HELD
;
3788 if (length
< sizeof(UINT64
))
3789 return STATUS_INVALID_PARAMETER
;
3791 devid
= *(UINT64
*)data
;
3793 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, TRUE
);
3795 if (Vcb
->readonly
) {
3796 ExReleaseResourceLite(&Vcb
->tree_lock
);
3797 return STATUS_MEDIA_WRITE_PROTECTED
;
3802 le
= Vcb
->devices
.Flink
;
3803 while (le
!= &Vcb
->devices
) {
3804 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3806 if (dev2
->devitem
.dev_id
== devid
)
3809 if (!dev2
->readonly
)
3816 ExReleaseResourceLite(&Vcb
->tree_lock
);
3817 WARN("device %llx not found\n", devid
);
3818 return STATUS_NOT_FOUND
;
3821 if (!dev
->readonly
) {
3822 if (num_rw_devices
== 1) {
3823 ExReleaseResourceLite(&Vcb
->tree_lock
);
3824 WARN("not removing last non-readonly device\n");
3825 return STATUS_INVALID_PARAMETER
;
3828 if (num_rw_devices
== 4 &&
3829 ((Vcb
->data_flags
& BLOCK_FLAG_RAID10
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID10
|| Vcb
->system_flags
& BLOCK_FLAG_RAID10
) ||
3830 (Vcb
->data_flags
& BLOCK_FLAG_RAID6
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID6
|| Vcb
->system_flags
& BLOCK_FLAG_RAID6
))
3832 ExReleaseResourceLite(&Vcb
->tree_lock
);
3833 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n");
3834 return STATUS_CANNOT_DELETE
;
3837 if (num_rw_devices
== 3 && (Vcb
->data_flags
& BLOCK_FLAG_RAID5
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID5
|| Vcb
->system_flags
& BLOCK_FLAG_RAID5
)) {
3838 ExReleaseResourceLite(&Vcb
->tree_lock
);
3839 ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n");
3840 return STATUS_CANNOT_DELETE
;
3843 if (num_rw_devices
== 2 &&
3844 ((Vcb
->data_flags
& BLOCK_FLAG_RAID0
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID0
|| Vcb
->system_flags
& BLOCK_FLAG_RAID0
) ||
3845 (Vcb
->data_flags
& BLOCK_FLAG_RAID1
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1
))
3847 ExReleaseResourceLite(&Vcb
->tree_lock
);
3848 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
3849 return STATUS_CANNOT_DELETE
;
3853 ExReleaseResourceLite(&Vcb
->tree_lock
);
3855 if (Vcb
->balance
.thread
) {
3856 WARN("balance already running\n");
3857 return STATUS_DEVICE_NOT_READY
;
3862 RtlZeroMemory(Vcb
->balance
.opts
, sizeof(btrfs_balance_opts
) * 3);
3864 for (i
= 0; i
< 3; i
++) {
3865 Vcb
->balance
.opts
[i
].flags
= BTRFS_BALANCE_OPTS_ENABLED
| BTRFS_BALANCE_OPTS_DEVID
;
3866 Vcb
->balance
.opts
[i
].devid
= devid
;
3869 Vcb
->balance
.paused
= FALSE
;
3870 Vcb
->balance
.removing
= TRUE
;
3871 Vcb
->balance
.shrinking
= FALSE
;
3872 Vcb
->balance
.status
= STATUS_SUCCESS
;
3873 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3875 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, NULL
, NULL
, NULL
, balance_thread
, Vcb
);
3876 if (!NT_SUCCESS(Status
)) {
3877 ERR("PsCreateSystemThread returned %08x\n", Status
);
3882 return STATUS_SUCCESS
;