1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
31 LIST_ENTRY list_entry
;
43 metadata_reloc
* parent
;
45 LIST_ENTRY list_entry
;
55 LIST_ENTRY list_entry
;
67 metadata_reloc
* parent
;
68 LIST_ENTRY list_entry
;
71 #ifndef _MSC_VER // not in mingw yet
72 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
75 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time
77 static NTSTATUS
add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, traverse_ptr
* tp
,
78 bool skinny
, metadata_reloc
** mr2
, chunk
* c
, LIST_ENTRY
* rollback
) {
86 mr
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc
), ALLOC_TAG
);
88 ERR("out of memory\n");
89 return STATUS_INSUFFICIENT_RESOURCES
;
92 mr
->address
= tp
->item
->key
.obj_id
;
94 mr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
96 InitializeListHead(&mr
->refs
);
98 Status
= delete_tree_item(Vcb
, tp
);
99 if (!NT_SUCCESS(Status
)) {
100 ERR("delete_tree_item returned %08lx\n", Status
);
106 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
109 acquire_chunk_lock(c
, Vcb
);
111 c
->used
-= Vcb
->superblock
.node_size
;
113 space_list_add(c
, tp
->item
->key
.obj_id
, Vcb
->superblock
.node_size
, rollback
);
115 release_chunk_lock(c
, Vcb
);
118 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
121 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
122 ptr
= (uint8_t*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
124 len
-= sizeof(EXTENT_ITEM2
);
125 ptr
+= sizeof(EXTENT_ITEM2
);
129 uint8_t secttype
= *ptr
;
130 uint16_t sectlen
= secttype
== TYPE_TREE_BLOCK_REF
? sizeof(TREE_BLOCK_REF
) : (secttype
== TYPE_SHARED_BLOCK_REF
? sizeof(SHARED_BLOCK_REF
) : 0);
131 metadata_reloc_ref
* ref
;
136 ERR("(%I64x,%x,%I64x): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
137 return STATUS_INTERNAL_ERROR
;
141 ERR("(%I64x,%x,%I64x): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
142 return STATUS_INTERNAL_ERROR
;
145 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
147 ERR("out of memory\n");
148 return STATUS_INSUFFICIENT_RESOURCES
;
151 if (secttype
== TYPE_TREE_BLOCK_REF
) {
152 ref
->type
= TYPE_TREE_BLOCK_REF
;
153 RtlCopyMemory(&ref
->tbr
, ptr
+ sizeof(uint8_t), sizeof(TREE_BLOCK_REF
));
155 } else if (secttype
== TYPE_SHARED_BLOCK_REF
) {
156 ref
->type
= TYPE_SHARED_BLOCK_REF
;
157 RtlCopyMemory(&ref
->sbr
, ptr
+ sizeof(uint8_t), sizeof(SHARED_BLOCK_REF
));
160 ERR("unexpected tree type %x\n", secttype
);
162 return STATUS_INTERNAL_ERROR
;
167 InsertTailList(&mr
->refs
, &ref
->list_entry
);
170 ptr
+= sizeof(uint8_t) + sectlen
;
173 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
174 traverse_ptr tp2
= *tp
, next_tp
;
176 while (find_next_item(Vcb
, &tp2
, &next_tp
, false, NULL
)) {
179 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
180 if (tp2
.item
->key
.obj_type
== TYPE_TREE_BLOCK_REF
) {
181 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
183 ERR("out of memory\n");
184 return STATUS_INSUFFICIENT_RESOURCES
;
187 ref
->type
= TYPE_TREE_BLOCK_REF
;
188 ref
->tbr
.offset
= tp2
.item
->key
.offset
;
191 InsertTailList(&mr
->refs
, &ref
->list_entry
);
193 Status
= delete_tree_item(Vcb
, &tp2
);
194 if (!NT_SUCCESS(Status
)) {
195 ERR("delete_tree_item returned %08lx\n", Status
);
198 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_BLOCK_REF
) {
199 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
201 ERR("out of memory\n");
202 return STATUS_INSUFFICIENT_RESOURCES
;
205 ref
->type
= TYPE_SHARED_BLOCK_REF
;
206 ref
->sbr
.offset
= tp2
.item
->key
.offset
;
209 InsertTailList(&mr
->refs
, &ref
->list_entry
);
211 Status
= delete_tree_item(Vcb
, &tp2
);
212 if (!NT_SUCCESS(Status
)) {
213 ERR("delete_tree_item returned %08lx\n", Status
);
222 InsertTailList(items
, &mr
->list_entry
);
227 return STATUS_SUCCESS
;
230 static NTSTATUS
add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
231 uint64_t address
, metadata_reloc
** mr2
, LIST_ENTRY
* rollback
) {
239 while (le
!= items
) {
240 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
242 if (mr
->address
== address
) {
244 return STATUS_SUCCESS
;
250 searchkey
.obj_id
= address
;
251 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
252 searchkey
.offset
= 0xffffffffffffffff;
254 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
255 if (!NT_SUCCESS(Status
)) {
256 ERR("find_item returned %08lx\n", Status
);
260 if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
))
262 else if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
263 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
264 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
266 if (!(ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)) {
267 ERR("EXTENT_ITEM for %I64x found, but tree flag not set\n", address
);
268 return STATUS_INTERNAL_ERROR
;
271 ERR("could not find valid EXTENT_ITEM for address %I64x\n", address
);
272 return STATUS_INTERNAL_ERROR
;
275 Status
= add_metadata_reloc(Vcb
, items
, &tp
, skinny
, mr2
, NULL
, rollback
);
276 if (!NT_SUCCESS(Status
)) {
277 ERR("add_metadata_reloc returned %08lx\n", Status
);
281 return STATUS_SUCCESS
;
284 static void sort_metadata_reloc_refs(metadata_reloc
* mr
) {
285 LIST_ENTRY newlist
, *le
;
287 if (mr
->refs
.Flink
== mr
->refs
.Blink
) // 0 or 1 items
292 InitializeListHead(&newlist
);
294 while (!IsListEmpty(&mr
->refs
)) {
295 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
296 bool inserted
= false;
298 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
299 ref
->hash
= ref
->tbr
.offset
;
300 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
301 ref
->hash
= ref
->parent
->new_address
;
304 while (le
!= &newlist
) {
305 metadata_reloc_ref
* ref2
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
307 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
308 InsertHeadList(le
->Blink
, &ref
->list_entry
);
317 InsertTailList(&newlist
, &ref
->list_entry
);
320 newlist
.Flink
->Blink
= &mr
->refs
;
321 newlist
.Blink
->Flink
= &mr
->refs
;
322 mr
->refs
.Flink
= newlist
.Flink
;
323 mr
->refs
.Blink
= newlist
.Blink
;
326 static NTSTATUS
add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, metadata_reloc
* mr
) {
331 bool all_inline
= true;
332 metadata_reloc_ref
* first_noninline
= NULL
;
336 inline_len
= sizeof(EXTENT_ITEM
);
337 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
))
338 inline_len
+= sizeof(EXTENT_ITEM2
);
340 sort_metadata_reloc_refs(mr
);
343 while (le
!= &mr
->refs
) {
344 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
349 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
350 extlen
+= sizeof(TREE_BLOCK_REF
);
351 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
352 extlen
+= sizeof(SHARED_BLOCK_REF
);
355 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
357 first_noninline
= ref
;
359 inline_len
+= extlen
+ 1;
365 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
367 ERR("out of memory\n");
368 return STATUS_INSUFFICIENT_RESOURCES
;
372 ei
->generation
= mr
->ei
->generation
;
373 ei
->flags
= mr
->ei
->flags
;
374 ptr
= (uint8_t*)&ei
[1];
376 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)) {
377 EXTENT_ITEM2
* ei2
= (EXTENT_ITEM2
*)ptr
;
379 ei2
->firstitem
= *(KEY
*)&mr
->data
[1];
380 ei2
->level
= mr
->data
->level
;
382 ptr
+= sizeof(EXTENT_ITEM2
);
386 while (le
!= &mr
->refs
) {
387 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
389 if (ref
== first_noninline
)
395 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
396 TREE_BLOCK_REF
* tbr
= (TREE_BLOCK_REF
*)ptr
;
398 tbr
->offset
= ref
->tbr
.offset
;
400 ptr
+= sizeof(TREE_BLOCK_REF
);
401 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
402 SHARED_BLOCK_REF
* sbr
= (SHARED_BLOCK_REF
*)ptr
;
404 sbr
->offset
= ref
->parent
->new_address
;
406 ptr
+= sizeof(SHARED_BLOCK_REF
);
412 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)
413 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_METADATA_ITEM
, mr
->data
->level
, ei
, inline_len
, NULL
, NULL
);
415 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_EXTENT_ITEM
, Vcb
->superblock
.node_size
, ei
, inline_len
, NULL
, NULL
);
417 if (!NT_SUCCESS(Status
)) {
418 ERR("insert_tree_item returned %08lx\n", Status
);
424 le
= &first_noninline
->list_entry
;
426 while (le
!= &mr
->refs
) {
427 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
429 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
430 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_TREE_BLOCK_REF
, ref
->tbr
.offset
, NULL
, 0, NULL
, NULL
);
431 if (!NT_SUCCESS(Status
)) {
432 ERR("insert_tree_item returned %08lx\n", Status
);
435 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
436 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_SHARED_BLOCK_REF
, ref
->parent
->new_address
, NULL
, 0, NULL
, NULL
);
437 if (!NT_SUCCESS(Status
)) {
438 ERR("insert_tree_item returned %08lx\n", Status
);
447 if (ei
->flags
& EXTENT_ITEM_SHARED_BACKREFS
|| mr
->data
->flags
& HEADER_FLAG_SHARED_BACKREF
|| !(mr
->data
->flags
& HEADER_FLAG_MIXED_BACKREF
)) {
448 if (mr
->data
->level
> 0) {
450 internal_node
* in
= (internal_node
*)&mr
->data
[1];
452 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
453 uint64_t sbrrc
= find_extent_shared_tree_refcount(Vcb
, in
[i
].address
, mr
->address
, NULL
);
456 SHARED_BLOCK_REF sbr
;
458 sbr
.offset
= mr
->new_address
;
460 Status
= increase_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0, NULL
);
461 if (!NT_SUCCESS(Status
)) {
462 ERR("increase_extent_refcount returned %08lx\n", Status
);
466 sbr
.offset
= mr
->address
;
468 Status
= decrease_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
469 sbr
.offset
, false, NULL
);
470 if (!NT_SUCCESS(Status
)) {
471 ERR("decrease_extent_refcount returned %08lx\n", Status
);
478 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
480 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
481 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
482 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
484 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
485 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
487 if (ed2
->size
> 0) { // not sparse
488 uint32_t sdrrc
= find_extent_shared_data_refcount(Vcb
, ed2
->address
, mr
->address
, NULL
);
494 sdr
.offset
= mr
->new_address
;
497 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0, NULL
);
498 if (!NT_SUCCESS(Status
)) {
499 ERR("increase_extent_refcount returned %08lx\n", Status
);
503 sdr
.offset
= mr
->address
;
505 Status
= decrease_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
506 sdr
.offset
, false, NULL
);
507 if (!NT_SUCCESS(Status
)) {
508 ERR("decrease_extent_refcount returned %08lx\n", Status
);
512 c
= get_chunk_from_address(Vcb
, ed2
->address
);
515 // check changed_extents
517 ExAcquireResourceExclusiveLite(&c
->changed_extents_lock
, true);
519 le
= c
->changed_extents
.Flink
;
521 while (le
!= &c
->changed_extents
) {
522 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
524 if (ce
->address
== ed2
->address
) {
527 le2
= ce
->refs
.Flink
;
528 while (le2
!= &ce
->refs
) {
529 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
531 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
532 cer
->sdr
.offset
= mr
->new_address
;
539 le2
= ce
->old_refs
.Flink
;
540 while (le2
!= &ce
->old_refs
) {
541 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
543 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
544 cer
->sdr
.offset
= mr
->new_address
;
557 ExReleaseResourceLite(&c
->changed_extents_lock
);
567 return STATUS_SUCCESS
;
570 static NTSTATUS
write_metadata_items(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
571 LIST_ENTRY
* data_items
, chunk
* c
, LIST_ENTRY
* rollback
) {
572 LIST_ENTRY tree_writes
, *le
;
575 uint8_t level
, max_level
= 0;
576 chunk
* newchunk
= NULL
;
578 InitializeListHead(&tree_writes
);
581 while (le
!= items
) {
582 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
586 mr
->data
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
588 ERR("out of memory\n");
589 return STATUS_INSUFFICIENT_RESOURCES
;
592 Status
= read_data(Vcb
, mr
->address
, Vcb
->superblock
.node_size
, NULL
, true, (uint8_t*)mr
->data
,
593 c
&& mr
->address
>= c
->offset
&& mr
->address
< c
->offset
+ c
->chunk_item
->size
? c
: NULL
, &pc
, NULL
, 0, false, NormalPagePriority
);
594 if (!NT_SUCCESS(Status
)) {
595 ERR("read_data returned %08lx\n", Status
);
599 if (pc
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
602 if (data_items
&& mr
->data
->level
== 0) {
603 le2
= data_items
->Flink
;
604 while (le2
!= data_items
) {
605 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
606 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
609 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
610 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
611 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
613 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
614 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
616 if (ed2
->address
== dr
->address
)
617 ed2
->address
= dr
->new_address
;
626 if (mr
->data
->level
> max_level
)
627 max_level
= mr
->data
->level
;
629 le2
= mr
->refs
.Flink
;
630 while (le2
!= &mr
->refs
) {
631 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
633 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
639 firstitem
= (KEY
*)&mr
->data
[1];
641 le3
= Vcb
->roots
.Flink
;
642 while (le3
!= &Vcb
->roots
) {
643 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
645 if (r2
->id
== ref
->tbr
.offset
) {
654 ERR("could not find subvol with id %I64x\n", ref
->tbr
.offset
);
655 return STATUS_INTERNAL_ERROR
;
658 Status
= find_item_to_level(Vcb
, r
, &tp
, firstitem
, false, mr
->data
->level
+ 1, NULL
);
659 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
660 ERR("find_item_to_level returned %08lx\n", Status
);
665 while (t
&& t
->header
.level
< mr
->data
->level
+ 1) {
674 Status
= add_metadata_reloc_parent(Vcb
, items
, t
->header
.address
, &mr2
, rollback
);
675 if (!NT_SUCCESS(Status
)) {
676 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
682 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
685 Status
= add_metadata_reloc_parent(Vcb
, items
, ref
->sbr
.offset
, &mr2
, rollback
);
686 if (!NT_SUCCESS(Status
)) {
687 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
701 while (le
!= items
) {
702 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
708 hash
= calc_crc32c(0xffffffff, (uint8_t*)&mr
->address
, sizeof(uint64_t));
710 le2
= Vcb
->trees_ptrs
[hash
>> 24];
713 while (le2
!= &Vcb
->trees_hash
) {
714 tree
* t
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
716 if (t
->header
.address
== mr
->address
) {
719 } else if (t
->hash
> hash
)
729 for (level
= 0; level
<= max_level
; level
++) {
731 while (le
!= items
) {
732 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
734 if (mr
->data
->level
== level
) {
742 flags
= Vcb
->system_flags
;
743 else if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
)
744 flags
= Vcb
->data_flags
;
746 flags
= Vcb
->metadata_flags
;
749 acquire_chunk_lock(newchunk
, Vcb
);
751 if (newchunk
->chunk_item
->type
== flags
&& find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
752 newchunk
->used
+= Vcb
->superblock
.node_size
;
753 space_list_subtract(newchunk
, false, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
757 release_chunk_lock(newchunk
, Vcb
);
761 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
763 le2
= Vcb
->chunks
.Flink
;
764 while (le2
!= &Vcb
->chunks
) {
765 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
767 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== flags
) {
768 acquire_chunk_lock(c2
, Vcb
);
770 if ((c2
->chunk_item
->size
- c2
->used
) >= Vcb
->superblock
.node_size
) {
771 if (find_metadata_address_in_chunk(Vcb
, c2
, &mr
->new_address
)) {
772 c2
->used
+= Vcb
->superblock
.node_size
;
773 space_list_subtract(c2
, false, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
774 release_chunk_lock(c2
, Vcb
);
781 release_chunk_lock(c2
, Vcb
);
787 // allocate new chunk if necessary
789 Status
= alloc_chunk(Vcb
, flags
, &newchunk
, false);
791 if (!NT_SUCCESS(Status
)) {
792 ERR("alloc_chunk returned %08lx\n", Status
);
793 ExReleaseResourceLite(&Vcb
->chunk_lock
);
797 acquire_chunk_lock(newchunk
, Vcb
);
799 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
801 if (!find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
802 release_chunk_lock(newchunk
, Vcb
);
803 ExReleaseResourceLite(&Vcb
->chunk_lock
);
804 ERR("could not find address in new chunk\n");
805 Status
= STATUS_DISK_FULL
;
808 newchunk
->used
+= Vcb
->superblock
.node_size
;
809 space_list_subtract(newchunk
, false, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
812 release_chunk_lock(newchunk
, Vcb
);
815 ExReleaseResourceLite(&Vcb
->chunk_lock
);
819 le2
= mr
->refs
.Flink
;
820 while (le2
!= &mr
->refs
) {
821 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
825 internal_node
* in
= (internal_node
*)&ref
->parent
->data
[1];
827 for (i
= 0; i
< ref
->parent
->data
->num_items
; i
++) {
828 if (in
[i
].address
== mr
->address
) {
829 in
[i
].address
= mr
->new_address
;
834 if (ref
->parent
->t
) {
837 le3
= ref
->parent
->t
->itemlist
.Flink
;
838 while (le3
!= &ref
->parent
->t
->itemlist
) {
839 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
841 if (!td
->inserted
&& td
->treeholder
.address
== mr
->address
)
842 td
->treeholder
.address
= mr
->new_address
;
847 } else if (ref
->top
&& ref
->type
== TYPE_TREE_BLOCK_REF
) {
853 le3
= Vcb
->roots
.Flink
;
854 while (le3
!= &Vcb
->roots
) {
855 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
857 if (r2
->id
== ref
->tbr
.offset
) {
866 r
->treeholder
.address
= mr
->new_address
;
868 if (r
== Vcb
->root_root
)
869 Vcb
->superblock
.root_tree_addr
= mr
->new_address
;
870 else if (r
== Vcb
->chunk_root
)
871 Vcb
->superblock
.chunk_tree_addr
= mr
->new_address
;
872 else if (r
->root_item
.block_number
== mr
->address
) {
876 r
->root_item
.block_number
= mr
->new_address
;
878 searchkey
.obj_id
= r
->id
;
879 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
880 searchkey
.offset
= 0xffffffffffffffff;
882 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
883 if (!NT_SUCCESS(Status
)) {
884 ERR("find_item returned %08lx\n", Status
);
888 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
889 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
890 Status
= STATUS_INTERNAL_ERROR
;
894 ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
896 ERR("out of memory\n");
897 Status
= STATUS_INSUFFICIENT_RESOURCES
;
901 RtlCopyMemory(ri
, &r
->root_item
, sizeof(ROOT_ITEM
));
903 Status
= delete_tree_item(Vcb
, &tp
);
904 if (!NT_SUCCESS(Status
)) {
905 ERR("delete_tree_item returned %08lx\n", Status
);
909 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, NULL
);
910 if (!NT_SUCCESS(Status
)) {
911 ERR("insert_tree_item returned %08lx\n", Status
);
921 mr
->data
->address
= mr
->new_address
;
930 // check if tree loaded more than once
931 if (t3
->list_entry
.Flink
!= &Vcb
->trees_hash
) {
932 tree
* nt
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
934 if (nt
->header
.address
== t3
->header
.address
)
938 t3
->header
.address
= mr
->new_address
;
942 if (Vcb
->trees_ptrs
[h
] == &t3
->list_entry_hash
) {
943 if (t3
->list_entry_hash
.Flink
== &Vcb
->trees_hash
)
944 Vcb
->trees_ptrs
[h
] = NULL
;
946 tree
* t2
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
948 if (t2
->hash
>> 24 == h
)
949 Vcb
->trees_ptrs
[h
] = &t2
->list_entry_hash
;
951 Vcb
->trees_ptrs
[h
] = NULL
;
955 RemoveEntryList(&t3
->list_entry_hash
);
957 t3
->hash
= calc_crc32c(0xffffffff, (uint8_t*)&t3
->header
.address
, sizeof(uint64_t));
960 if (!Vcb
->trees_ptrs
[h
]) {
963 le2
= Vcb
->trees_hash
.Flink
;
968 if (Vcb
->trees_ptrs
[h2
]) {
969 le2
= Vcb
->trees_ptrs
[h2
];
977 le2
= Vcb
->trees_ptrs
[h
];
980 while (le2
!= &Vcb
->trees_hash
) {
981 tree
* t2
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
983 if (t2
->hash
>= t3
->hash
) {
984 InsertHeadList(le2
->Blink
, &t3
->list_entry_hash
);
993 InsertTailList(&Vcb
->trees_hash
, &t3
->list_entry_hash
);
995 if (!Vcb
->trees_ptrs
[h
] || t3
->list_entry_hash
.Flink
== Vcb
->trees_ptrs
[h
])
996 Vcb
->trees_ptrs
[h
] = &t3
->list_entry_hash
;
998 if (data_items
&& level
== 0) {
999 le2
= data_items
->Flink
;
1001 while (le2
!= data_items
) {
1002 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
1003 LIST_ENTRY
* le3
= t3
->itemlist
.Flink
;
1005 while (le3
!= &t3
->itemlist
) {
1006 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
1008 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
1009 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
1011 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
1012 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1014 if (ed2
->address
== dr
->address
)
1015 ed2
->address
= dr
->new_address
;
1029 calc_tree_checksum(Vcb
, mr
->data
);
1031 tw
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree_write
), ALLOC_TAG
);
1033 ERR("out of memory\n");
1034 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1038 tw
->address
= mr
->new_address
;
1039 tw
->length
= Vcb
->superblock
.node_size
;
1040 tw
->data
= (uint8_t*)mr
->data
;
1041 tw
->allocated
= false;
1043 if (IsListEmpty(&tree_writes
))
1044 InsertTailList(&tree_writes
, &tw
->list_entry
);
1046 bool inserted
= false;
1048 le2
= tree_writes
.Flink
;
1049 while (le2
!= &tree_writes
) {
1050 tree_write
* tw2
= CONTAINING_RECORD(le2
, tree_write
, list_entry
);
1052 if (tw2
->address
> tw
->address
) {
1053 InsertHeadList(le2
->Blink
, &tw
->list_entry
);
1062 InsertTailList(&tree_writes
, &tw
->list_entry
);
1070 Status
= do_tree_writes(Vcb
, &tree_writes
, true);
1071 if (!NT_SUCCESS(Status
)) {
1072 ERR("do_tree_writes returned %08lx\n", Status
);
1077 while (le
!= items
) {
1078 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
1080 Status
= add_metadata_reloc_extent_item(Vcb
, mr
);
1081 if (!NT_SUCCESS(Status
)) {
1082 ERR("add_metadata_reloc_extent_item returned %08lx\n", Status
);
1089 Status
= STATUS_SUCCESS
;
1092 while (!IsListEmpty(&tree_writes
)) {
1093 tree_write
* tw
= CONTAINING_RECORD(RemoveHeadList(&tree_writes
), tree_write
, list_entry
);
1096 ExFreePool(tw
->data
);
1104 static NTSTATUS
balance_metadata_chunk(device_extension
* Vcb
, chunk
* c
, bool* changed
) {
1109 LIST_ENTRY items
, rollback
;
1110 uint32_t loaded
= 0;
1112 TRACE("chunk %I64x\n", c
->offset
);
1114 InitializeListHead(&rollback
);
1115 InitializeListHead(&items
);
1117 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
1119 searchkey
.obj_id
= c
->offset
;
1120 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
1121 searchkey
.offset
= 0xffffffffffffffff;
1123 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
1124 if (!NT_SUCCESS(Status
)) {
1125 ERR("find_item returned %08lx\n", Status
);
1130 traverse_ptr next_tp
;
1132 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1135 if (tp
.item
->key
.obj_id
>= c
->offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
1136 bool tree
= false, skinny
= false;
1138 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1141 } else if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
1142 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1143 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1145 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1150 Status
= add_metadata_reloc(Vcb
, &items
, &tp
, skinny
, NULL
, c
, &rollback
);
1152 if (!NT_SUCCESS(Status
)) {
1153 ERR("add_metadata_reloc returned %08lx\n", Status
);
1159 if (loaded
>= 64) // only do 64 at a time
1164 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
1170 if (IsListEmpty(&items
)) {
1172 Status
= STATUS_SUCCESS
;
1177 Status
= write_metadata_items(Vcb
, &items
, NULL
, c
, &rollback
);
1178 if (!NT_SUCCESS(Status
)) {
1179 ERR("write_metadata_items returned %08lx\n", Status
);
1183 Status
= STATUS_SUCCESS
;
1185 Vcb
->need_write
= true;
1188 if (NT_SUCCESS(Status
)) {
1189 Status
= do_write(Vcb
, NULL
);
1190 if (!NT_SUCCESS(Status
))
1191 ERR("do_write returned %08lx\n", Status
);
1194 if (NT_SUCCESS(Status
))
1195 clear_rollback(&rollback
);
1197 do_rollback(Vcb
, &rollback
);
1201 ExReleaseResourceLite(&Vcb
->tree_lock
);
1203 while (!IsListEmpty(&items
)) {
1204 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&items
), metadata_reloc
, list_entry
);
1206 while (!IsListEmpty(&mr
->refs
)) {
1207 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
1213 ExFreePool(mr
->data
);
1221 static NTSTATUS
data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* metadata_items
,
1222 data_reloc
* dr
, EXTENT_DATA_REF
* edr
, LIST_ENTRY
* rollback
) {
1229 uint64_t last_tree
= 0;
1230 data_reloc_ref
* ref
;
1232 le
= Vcb
->roots
.Flink
;
1233 while (le
!= &Vcb
->roots
) {
1234 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
1236 if (r2
->id
== edr
->root
) {
1245 ERR("could not find subvol %I64x\n", edr
->root
);
1246 return STATUS_INTERNAL_ERROR
;
1249 searchkey
.obj_id
= edr
->objid
;
1250 searchkey
.obj_type
= TYPE_EXTENT_DATA
;
1251 searchkey
.offset
= 0;
1253 Status
= find_item(Vcb
, r
, &tp
, &searchkey
, false, NULL
);
1254 if (!NT_SUCCESS(Status
)) {
1255 ERR("find_item returned %08lx\n", Status
);
1259 if (tp
.item
->key
.obj_id
< searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
< searchkey
.obj_type
)) {
1262 if (find_next_item(Vcb
, &tp
, &tp2
, false, NULL
))
1265 ERR("could not find EXTENT_DATA for inode %I64x in root %I64x\n", searchkey
.obj_id
, r
->id
);
1266 return STATUS_INTERNAL_ERROR
;
1272 while (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
1275 if (tp
.item
->size
>= sizeof(EXTENT_DATA
)) {
1276 EXTENT_DATA
* ed
= (EXTENT_DATA
*)tp
.item
->data
;
1278 if ((ed
->type
== EXTENT_TYPE_PREALLOC
|| ed
->type
== EXTENT_TYPE_REGULAR
) && tp
.item
->size
>= offsetof(EXTENT_DATA
, data
[0]) + sizeof(EXTENT_DATA2
)) {
1279 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1281 if (ed2
->address
== dr
->address
&& ed2
->size
== dr
->size
&& tp
.item
->key
.offset
- ed2
->offset
== edr
->offset
) {
1282 if (ref
&& last_tree
== tp
.tree
->header
.address
)
1285 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1287 ERR("out of memory\n");
1288 return STATUS_INSUFFICIENT_RESOURCES
;
1291 ref
->type
= TYPE_EXTENT_DATA_REF
;
1292 RtlCopyMemory(&ref
->edr
, edr
, sizeof(EXTENT_DATA_REF
));
1295 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, tp
.tree
->header
.address
, &mr
, rollback
);
1296 if (!NT_SUCCESS(Status
)) {
1297 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
1302 last_tree
= tp
.tree
->header
.address
;
1305 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1311 if (find_next_item(Vcb
, &tp
, &tp2
, false, NULL
))
1317 return STATUS_SUCCESS
;
1320 static NTSTATUS
add_data_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, LIST_ENTRY
* metadata_items
,
1321 traverse_ptr
* tp
, chunk
* c
, LIST_ENTRY
* rollback
) {
1329 dr
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc
), ALLOC_TAG
);
1331 ERR("out of memory\n");
1332 return STATUS_INSUFFICIENT_RESOURCES
;
1335 dr
->address
= tp
->item
->key
.obj_id
;
1336 dr
->size
= tp
->item
->key
.offset
;
1337 dr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1338 InitializeListHead(&dr
->refs
);
1340 Status
= delete_tree_item(Vcb
, tp
);
1341 if (!NT_SUCCESS(Status
)) {
1342 ERR("delete_tree_item returned %08lx\n", Status
);
1347 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
1350 acquire_chunk_lock(c
, Vcb
);
1352 c
->used
-= tp
->item
->key
.offset
;
1354 space_list_add(c
, tp
->item
->key
.obj_id
, tp
->item
->key
.offset
, rollback
);
1356 release_chunk_lock(c
, Vcb
);
1359 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1362 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
1363 ptr
= (uint8_t*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
1366 uint8_t secttype
= *ptr
;
1367 uint16_t sectlen
= secttype
== TYPE_EXTENT_DATA_REF
? sizeof(EXTENT_DATA_REF
) : (secttype
== TYPE_SHARED_DATA_REF
? sizeof(SHARED_DATA_REF
) : 0);
1371 if (sectlen
> len
) {
1372 ERR("(%I64x,%x,%I64x): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
1373 return STATUS_INTERNAL_ERROR
;
1377 ERR("(%I64x,%x,%I64x): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
1378 return STATUS_INTERNAL_ERROR
;
1381 if (secttype
== TYPE_EXTENT_DATA_REF
) {
1382 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)(ptr
+ sizeof(uint8_t));
1384 inline_rc
+= edr
->count
;
1386 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, edr
, rollback
);
1387 if (!NT_SUCCESS(Status
)) {
1388 ERR("data_reloc_add_tree_edr returned %08lx\n", Status
);
1391 } else if (secttype
== TYPE_SHARED_DATA_REF
) {
1393 data_reloc_ref
* ref
;
1395 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1397 ERR("out of memory\n");
1398 return STATUS_INSUFFICIENT_RESOURCES
;
1401 ref
->type
= TYPE_SHARED_DATA_REF
;
1402 RtlCopyMemory(&ref
->sdr
, ptr
+ sizeof(uint8_t), sizeof(SHARED_DATA_REF
));
1403 inline_rc
+= ref
->sdr
.count
;
1405 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1406 if (!NT_SUCCESS(Status
)) {
1407 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
1414 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1416 ERR("unexpected tree type %x\n", secttype
);
1417 return STATUS_INTERNAL_ERROR
;
1422 ptr
+= sizeof(uint8_t) + sectlen
;
1425 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
1426 traverse_ptr tp2
= *tp
, next_tp
;
1428 while (find_next_item(Vcb
, &tp2
, &next_tp
, false, NULL
)) {
1431 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
1432 if (tp2
.item
->key
.obj_type
== TYPE_EXTENT_DATA_REF
&& tp2
.item
->size
>= sizeof(EXTENT_DATA_REF
)) {
1433 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, (EXTENT_DATA_REF
*)tp2
.item
->data
, rollback
);
1434 if (!NT_SUCCESS(Status
)) {
1435 ERR("data_reloc_add_tree_edr returned %08lx\n", Status
);
1439 Status
= delete_tree_item(Vcb
, &tp2
);
1440 if (!NT_SUCCESS(Status
)) {
1441 ERR("delete_tree_item returned %08lx\n", Status
);
1444 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_DATA_REF
&& tp2
.item
->size
>= sizeof(uint32_t)) {
1446 data_reloc_ref
* ref
;
1448 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1450 ERR("out of memory\n");
1451 return STATUS_INSUFFICIENT_RESOURCES
;
1454 ref
->type
= TYPE_SHARED_DATA_REF
;
1455 ref
->sdr
.offset
= tp2
.item
->key
.offset
;
1456 ref
->sdr
.count
= *((uint32_t*)tp2
.item
->data
);
1458 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1459 if (!NT_SUCCESS(Status
)) {
1460 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
1466 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1468 Status
= delete_tree_item(Vcb
, &tp2
);
1469 if (!NT_SUCCESS(Status
)) {
1470 ERR("delete_tree_item returned %08lx\n", Status
);
1479 InsertTailList(items
, &dr
->list_entry
);
1481 return STATUS_SUCCESS
;
1484 static void sort_data_reloc_refs(data_reloc
* dr
) {
1485 LIST_ENTRY newlist
, *le
;
1487 if (IsListEmpty(&dr
->refs
))
1492 InitializeListHead(&newlist
);
1494 while (!IsListEmpty(&dr
->refs
)) {
1495 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
1496 bool inserted
= false;
1498 if (ref
->type
== TYPE_EXTENT_DATA_REF
)
1499 ref
->hash
= get_extent_data_ref_hash2(ref
->edr
.root
, ref
->edr
.objid
, ref
->edr
.offset
);
1500 else if (ref
->type
== TYPE_SHARED_DATA_REF
)
1501 ref
->hash
= ref
->parent
->new_address
;
1504 while (le
!= &newlist
) {
1505 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1507 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
1508 InsertHeadList(le
->Blink
, &ref
->list_entry
);
1517 InsertTailList(&newlist
, &ref
->list_entry
);
1521 while (le
!= &newlist
) {
1522 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1524 if (le
->Flink
!= &newlist
) {
1525 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
->Flink
, data_reloc_ref
, list_entry
);
1527 if (ref
->type
== TYPE_EXTENT_DATA_REF
&& ref2
->type
== TYPE_EXTENT_DATA_REF
&& ref
->edr
.root
== ref2
->edr
.root
&&
1528 ref
->edr
.objid
== ref2
->edr
.objid
&& ref
->edr
.offset
== ref2
->edr
.offset
) {
1529 RemoveEntryList(&ref2
->list_entry
);
1530 ref
->edr
.count
+= ref2
->edr
.count
;
1539 newlist
.Flink
->Blink
= &dr
->refs
;
1540 newlist
.Blink
->Flink
= &dr
->refs
;
1541 dr
->refs
.Flink
= newlist
.Flink
;
1542 dr
->refs
.Blink
= newlist
.Blink
;
1545 static NTSTATUS
add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, data_reloc
* dr
) {
1549 uint16_t inline_len
;
1550 bool all_inline
= true;
1551 data_reloc_ref
* first_noninline
= NULL
;
1555 inline_len
= sizeof(EXTENT_ITEM
);
1557 sort_data_reloc_refs(dr
);
1559 le
= dr
->refs
.Flink
;
1560 while (le
!= &dr
->refs
) {
1561 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1562 uint16_t extlen
= 0;
1564 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1565 extlen
+= sizeof(EXTENT_DATA_REF
);
1566 rc
+= ref
->edr
.count
;
1567 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1568 extlen
+= sizeof(SHARED_DATA_REF
);
1573 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
1575 first_noninline
= ref
;
1577 inline_len
+= extlen
+ 1;
1583 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
1585 ERR("out of memory\n");
1586 return STATUS_INSUFFICIENT_RESOURCES
;
1590 ei
->generation
= dr
->ei
->generation
;
1591 ei
->flags
= dr
->ei
->flags
;
1592 ptr
= (uint8_t*)&ei
[1];
1594 le
= dr
->refs
.Flink
;
1595 while (le
!= &dr
->refs
) {
1596 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1598 if (ref
== first_noninline
)
1604 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1605 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)ptr
;
1607 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1609 ptr
+= sizeof(EXTENT_DATA_REF
);
1610 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1611 SHARED_DATA_REF
* sdr
= (SHARED_DATA_REF
*)ptr
;
1613 sdr
->offset
= ref
->parent
->new_address
;
1614 sdr
->count
= ref
->sdr
.count
;
1616 ptr
+= sizeof(SHARED_DATA_REF
);
1622 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_ITEM
, dr
->size
, ei
, inline_len
, NULL
, NULL
);
1623 if (!NT_SUCCESS(Status
)) {
1624 ERR("insert_tree_item returned %08lx\n", Status
);
1629 le
= &first_noninline
->list_entry
;
1631 while (le
!= &dr
->refs
) {
1632 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1634 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1635 EXTENT_DATA_REF
* edr
;
1637 edr
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_DATA_REF
), ALLOC_TAG
);
1639 ERR("out of memory\n");
1640 return STATUS_INSUFFICIENT_RESOURCES
;
1643 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1645 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_DATA_REF
, ref
->hash
, edr
, sizeof(EXTENT_DATA_REF
), NULL
, NULL
);
1646 if (!NT_SUCCESS(Status
)) {
1647 ERR("insert_tree_item returned %08lx\n", Status
);
1650 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1653 sdr
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint32_t), ALLOC_TAG
);
1655 ERR("out of memory\n");
1656 return STATUS_INSUFFICIENT_RESOURCES
;
1659 *sdr
= ref
->sdr
.count
;
1661 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_SHARED_DATA_REF
, ref
->parent
->new_address
, sdr
, sizeof(uint32_t), NULL
, NULL
);
1662 if (!NT_SUCCESS(Status
)) {
1663 ERR("insert_tree_item returned %08lx\n", Status
);
1672 return STATUS_SUCCESS
;
1675 static NTSTATUS
balance_data_chunk(device_extension
* Vcb
, chunk
* c
, bool* changed
) {
1680 LIST_ENTRY items
, metadata_items
, rollback
, *le
;
1681 uint64_t loaded
= 0, num_loaded
= 0;
1682 chunk
* newchunk
= NULL
;
1683 uint8_t* data
= NULL
;
1685 TRACE("chunk %I64x\n", c
->offset
);
1687 InitializeListHead(&rollback
);
1688 InitializeListHead(&items
);
1689 InitializeListHead(&metadata_items
);
1691 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
1693 searchkey
.obj_id
= c
->offset
;
1694 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
1695 searchkey
.offset
= 0xffffffffffffffff;
1697 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
1698 if (!NT_SUCCESS(Status
)) {
1699 ERR("find_item returned %08lx\n", Status
);
1704 traverse_ptr next_tp
;
1706 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1709 if (tp
.item
->key
.obj_id
>= c
->offset
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
) {
1712 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1713 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1715 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1720 Status
= add_data_reloc(Vcb
, &items
, &metadata_items
, &tp
, c
, &rollback
);
1722 if (!NT_SUCCESS(Status
)) {
1723 ERR("add_data_reloc returned %08lx\n", Status
);
1727 loaded
+= tp
.item
->key
.offset
;
1730 if (loaded
>= 0x1000000 || num_loaded
>= 100) // only do so much at a time, so we don't block too obnoxiously
1735 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
1741 if (IsListEmpty(&items
)) {
1743 Status
= STATUS_SUCCESS
;
1748 data
= ExAllocatePoolWithTag(PagedPool
, BALANCE_UNIT
, ALLOC_TAG
);
1750 ERR("out of memory\n");
1751 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1756 while (le
!= &items
) {
1757 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
1763 ULONG bmplen
, runlength
, index
, lastoff
;
1766 acquire_chunk_lock(newchunk
, Vcb
);
1768 if (find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1769 newchunk
->used
+= dr
->size
;
1770 space_list_subtract(newchunk
, false, dr
->new_address
, dr
->size
, &rollback
);
1774 release_chunk_lock(newchunk
, Vcb
);
1778 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
1780 le2
= Vcb
->chunks
.Flink
;
1781 while (le2
!= &Vcb
->chunks
) {
1782 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
1784 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== Vcb
->data_flags
) {
1785 acquire_chunk_lock(c2
, Vcb
);
1787 if ((c2
->chunk_item
->size
- c2
->used
) >= dr
->size
) {
1788 if (find_data_address_in_chunk(Vcb
, c2
, dr
->size
, &dr
->new_address
)) {
1789 c2
->used
+= dr
->size
;
1790 space_list_subtract(c2
, false, dr
->new_address
, dr
->size
, &rollback
);
1791 release_chunk_lock(c2
, Vcb
);
1798 release_chunk_lock(c2
, Vcb
);
1804 // allocate new chunk if necessary
1806 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &newchunk
, false);
1808 if (!NT_SUCCESS(Status
)) {
1809 ERR("alloc_chunk returned %08lx\n", Status
);
1810 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1814 acquire_chunk_lock(newchunk
, Vcb
);
1816 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
1818 if (!find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1819 release_chunk_lock(newchunk
, Vcb
);
1820 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1821 ERR("could not find address in new chunk\n");
1822 Status
= STATUS_DISK_FULL
;
1825 newchunk
->used
+= dr
->size
;
1826 space_list_subtract(newchunk
, false, dr
->new_address
, dr
->size
, &rollback
);
1829 release_chunk_lock(newchunk
, Vcb
);
1832 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1835 dr
->newchunk
= newchunk
;
1837 bmplen
= (ULONG
)(dr
->size
/ Vcb
->superblock
.sector_size
);
1839 bmparr
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)sector_align(bmplen
+ 1, sizeof(ULONG
)), ALLOC_TAG
);
1841 ERR("out of memory\n");
1842 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1846 csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(dr
->size
* Vcb
->csum_size
/ Vcb
->superblock
.sector_size
), ALLOC_TAG
);
1848 ERR("out of memory\n");
1850 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1854 RtlInitializeBitMap(&bmp
, bmparr
, bmplen
);
1855 RtlSetAllBits(&bmp
); // 1 = no csum, 0 = csum
1857 searchkey
.obj_id
= EXTENT_CSUM_ID
;
1858 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
1859 searchkey
.offset
= dr
->address
;
1861 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, false, NULL
);
1862 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
1863 ERR("find_item returned %08lx\n", Status
);
1869 if (Status
!= STATUS_NOT_FOUND
) {
1871 traverse_ptr next_tp
;
1873 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_CSUM
) {
1874 if (tp
.item
->key
.offset
>= dr
->address
+ dr
->size
)
1876 else if (tp
.item
->size
>= Vcb
->csum_size
&& tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ Vcb
->csum_size
) >= dr
->address
) {
1877 uint64_t cs
= max(dr
->address
, tp
.item
->key
.offset
);
1878 uint64_t ce
= min(dr
->address
+ dr
->size
, tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ Vcb
->csum_size
));
1880 RtlCopyMemory((uint8_t*)csum
+ ((cs
- dr
->address
) * Vcb
->csum_size
/ Vcb
->superblock
.sector_size
),
1881 tp
.item
->data
+ ((cs
- tp
.item
->key
.offset
) * Vcb
->csum_size
/ Vcb
->superblock
.sector_size
),
1882 (ULONG
)((ce
- cs
) * Vcb
->csum_size
/ Vcb
->superblock
.sector_size
));
1884 RtlClearBits(&bmp
, (ULONG
)((cs
- dr
->address
) / Vcb
->superblock
.sector_size
), (ULONG
)((ce
- cs
) / Vcb
->superblock
.sector_size
));
1886 if (ce
== dr
->address
+ dr
->size
)
1891 if (find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
))
1899 runlength
= RtlFindFirstRunClear(&bmp
, &index
);
1901 while (runlength
!= 0) {
1902 if (index
>= bmplen
)
1905 if (index
+ runlength
>= bmplen
) {
1906 runlength
= bmplen
- index
;
1912 if (index
> lastoff
) {
1913 ULONG off
= lastoff
;
1914 ULONG size
= index
- lastoff
;
1916 // handle no csum run
1920 if (size
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1921 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1925 Status
= read_data(Vcb
, dr
->address
+ (off
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, NULL
, false, data
,
1926 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
1927 if (!NT_SUCCESS(Status
)) {
1928 ERR("read_data returned %08lx\n", Status
);
1934 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1935 NULL
, newchunk
, false, 0, NormalPagePriority
);
1936 if (!NT_SUCCESS(Status
)) {
1937 ERR("write_data_complete returned %08lx\n", Status
);
1948 add_checksum_entry(Vcb
, dr
->new_address
+ (index
* Vcb
->superblock
.sector_size
), runlength
, (uint8_t*)csum
+ (index
* Vcb
->csum_size
), NULL
);
1949 add_checksum_entry(Vcb
, dr
->address
+ (index
* Vcb
->superblock
.sector_size
), runlength
, NULL
, NULL
);
1955 if (runlength
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1956 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1960 Status
= read_data(Vcb
, dr
->address
+ (index
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
,
1961 (uint8_t*)csum
+ (index
* Vcb
->csum_size
), false, data
, c
, NULL
, NULL
, 0, false, NormalPagePriority
);
1962 if (!NT_SUCCESS(Status
)) {
1963 ERR("read_data returned %08lx\n", Status
);
1969 Status
= write_data_complete(Vcb
, dr
->new_address
+ (index
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1970 NULL
, newchunk
, false, 0, NormalPagePriority
);
1971 if (!NT_SUCCESS(Status
)) {
1972 ERR("write_data_complete returned %08lx\n", Status
);
1980 } while (runlength
> 0);
1983 runlength
= RtlFindNextForwardRunClear(&bmp
, index
, &index
);
1989 // handle final nocsum run
1990 if (lastoff
< dr
->size
/ Vcb
->superblock
.sector_size
) {
1991 ULONG off
= lastoff
;
1992 ULONG size
= (ULONG
)((dr
->size
/ Vcb
->superblock
.sector_size
) - lastoff
);
1997 if (size
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1998 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
2002 Status
= read_data(Vcb
, dr
->address
+ (off
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, NULL
, false, data
,
2003 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
2004 if (!NT_SUCCESS(Status
)) {
2005 ERR("read_data returned %08lx\n", Status
);
2009 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
2010 NULL
, newchunk
, false, 0, NormalPagePriority
);
2011 if (!NT_SUCCESS(Status
)) {
2012 ERR("write_data_complete returned %08lx\n", Status
);
2027 Status
= write_metadata_items(Vcb
, &metadata_items
, &items
, NULL
, &rollback
);
2028 if (!NT_SUCCESS(Status
)) {
2029 ERR("write_metadata_items returned %08lx\n", Status
);
2034 while (le
!= &items
) {
2035 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
2037 Status
= add_data_reloc_extent_item(Vcb
, dr
);
2038 if (!NT_SUCCESS(Status
)) {
2039 ERR("add_data_reloc_extent_item returned %08lx\n", Status
);
2046 le
= c
->changed_extents
.Flink
;
2047 while (le
!= &c
->changed_extents
) {
2048 LIST_ENTRY
*le2
, *le3
;
2049 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
2054 while (le2
!= &items
) {
2055 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
2057 if (ce
->address
== dr
->address
) {
2058 ce
->address
= dr
->new_address
;
2059 RemoveEntryList(&ce
->list_entry
);
2060 InsertTailList(&dr
->newchunk
->changed_extents
, &ce
->list_entry
);
2070 Status
= STATUS_SUCCESS
;
2072 Vcb
->need_write
= true;
2075 if (NT_SUCCESS(Status
)) {
2076 // update extents in cache inodes before we flush
2077 le
= Vcb
->chunks
.Flink
;
2078 while (le
!= &Vcb
->chunks
) {
2079 chunk
* c2
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2084 ExAcquireResourceExclusiveLite(c2
->cache
->Header
.Resource
, true);
2086 le2
= c2
->cache
->extents
.Flink
;
2087 while (le2
!= &c2
->cache
->extents
) {
2088 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2091 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2092 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2094 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2095 LIST_ENTRY
* le3
= items
.Flink
;
2096 while (le3
!= &items
) {
2097 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2099 if (ed2
->address
== dr
->address
) {
2100 ed2
->address
= dr
->new_address
;
2113 ExReleaseResourceLite(c2
->cache
->Header
.Resource
);
2119 Status
= do_write(Vcb
, NULL
);
2120 if (!NT_SUCCESS(Status
))
2121 ERR("do_write returned %08lx\n", Status
);
2124 if (NT_SUCCESS(Status
)) {
2125 clear_rollback(&rollback
);
2128 // FIXME - speed this up(?)
2130 le
= Vcb
->all_fcbs
.Flink
;
2131 while (le
!= &Vcb
->all_fcbs
) {
2132 struct _fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_all
);
2135 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, true);
2137 le2
= fcb
->extents
.Flink
;
2138 while (le2
!= &fcb
->extents
) {
2139 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2142 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2143 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2145 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2146 LIST_ENTRY
* le3
= items
.Flink
;
2147 while (le3
!= &items
) {
2148 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2150 if (ed2
->address
== dr
->address
) {
2151 ed2
->address
= dr
->new_address
;
2164 ExReleaseResourceLite(fcb
->Header
.Resource
);
2169 do_rollback(Vcb
, &rollback
);
2173 ExReleaseResourceLite(&Vcb
->tree_lock
);
2178 while (!IsListEmpty(&items
)) {
2179 data_reloc
* dr
= CONTAINING_RECORD(RemoveHeadList(&items
), data_reloc
, list_entry
);
2181 while (!IsListEmpty(&dr
->refs
)) {
2182 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
2190 while (!IsListEmpty(&metadata_items
)) {
2191 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&metadata_items
), metadata_reloc
, list_entry
);
2193 while (!IsListEmpty(&mr
->refs
)) {
2194 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
2200 ExFreePool(mr
->data
);
2208 static __inline
uint64_t get_chunk_dup_type(chunk
* c
) {
2209 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2210 return BLOCK_FLAG_RAID0
;
2211 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
2212 return BLOCK_FLAG_RAID1
;
2213 else if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
2214 return BLOCK_FLAG_DUPLICATE
;
2215 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2216 return BLOCK_FLAG_RAID10
;
2217 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2218 return BLOCK_FLAG_RAID5
;
2219 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2220 return BLOCK_FLAG_RAID6
;
2221 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1C3
)
2222 return BLOCK_FLAG_RAID1C3
;
2223 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1C4
)
2224 return BLOCK_FLAG_RAID1C4
;
2226 return BLOCK_FLAG_SINGLE
;
2229 static bool should_balance_chunk(device_extension
* Vcb
, uint8_t sort
, chunk
* c
) {
2230 btrfs_balance_opts
* opts
;
2232 opts
= &Vcb
->balance
.opts
[sort
];
2234 if (!(opts
->flags
& BTRFS_BALANCE_OPTS_ENABLED
))
2237 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2238 uint64_t type
= get_chunk_dup_type(c
);
2240 if (!(type
& opts
->profiles
))
2244 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2246 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2249 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2250 if (cis
[i
].dev_id
== opts
->devid
) {
2260 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2263 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2266 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2267 factor
= c
->chunk_item
->num_stripes
;
2268 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2269 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
2270 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2271 factor
= c
->chunk_item
->num_stripes
- 1;
2272 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2273 factor
= c
->chunk_item
->num_stripes
- 2;
2274 else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
2277 physsize
= c
->chunk_item
->size
/ factor
;
2279 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2280 if (cis
[i
].offset
< opts
->drange_end
&& cis
[i
].offset
+ physsize
>= opts
->drange_start
&&
2281 (!(opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) || cis
[i
].dev_id
== opts
->devid
)) {
2291 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2292 if (c
->offset
+ c
->chunk_item
->size
<= opts
->vrange_start
|| c
->offset
> opts
->vrange_end
)
2296 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2297 if (c
->chunk_item
->num_stripes
< opts
->stripes_start
|| c
->chunk_item
->num_stripes
< opts
->stripes_end
)
2301 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2302 uint64_t usage
= c
->used
* 100 / c
->chunk_item
->size
;
2304 // usage == 0 should mean completely empty, not just that usage rounds to 0%
2305 if (c
->used
> 0 && usage
== 0)
2308 if (usage
< opts
->usage_start
|| usage
> opts
->usage_end
)
2312 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
&& opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
) {
2313 uint64_t type
= get_chunk_dup_type(c
);
2315 if (type
== opts
->convert
)
2322 static void copy_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2323 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2324 args
->profiles
= opts
->profiles
;
2325 args
->flags
|= BALANCE_ARGS_FLAGS_PROFILES
;
2328 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2329 if (args
->usage_start
== 0) {
2330 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE_RANGE
;
2331 args
->usage_start
= opts
->usage_start
;
2332 args
->usage_end
= opts
->usage_end
;
2334 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE
;
2335 args
->usage
= opts
->usage_end
;
2339 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2340 args
->devid
= opts
->devid
;
2341 args
->flags
|= BALANCE_ARGS_FLAGS_DEVID
;
2344 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2345 args
->drange_start
= opts
->drange_start
;
2346 args
->drange_end
= opts
->drange_end
;
2347 args
->flags
|= BALANCE_ARGS_FLAGS_DRANGE
;
2350 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2351 args
->vrange_start
= opts
->vrange_start
;
2352 args
->vrange_end
= opts
->vrange_end
;
2353 args
->flags
|= BALANCE_ARGS_FLAGS_VRANGE
;
2356 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
2357 args
->convert
= opts
->convert
;
2358 args
->flags
|= BALANCE_ARGS_FLAGS_CONVERT
;
2360 if (opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
)
2361 args
->flags
|= BALANCE_ARGS_FLAGS_SOFT
;
2364 if (opts
->flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
2365 if (args
->limit_start
== 0) {
2366 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT_RANGE
;
2367 args
->limit_start
= (uint32_t)opts
->limit_start
;
2368 args
->limit_end
= (uint32_t)opts
->limit_end
;
2370 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT
;
2371 args
->limit
= opts
->limit_end
;
2375 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2376 args
->stripes_start
= opts
->stripes_start
;
2377 args
->stripes_end
= opts
->stripes_end
;
2378 args
->flags
|= BALANCE_ARGS_FLAGS_STRIPES_RANGE
;
2382 static NTSTATUS
add_balance_item(device_extension
* Vcb
) {
2388 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2389 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2390 searchkey
.offset
= 0;
2392 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
2394 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
2395 if (!NT_SUCCESS(Status
)) {
2396 ERR("find_item returned %08lx\n", Status
);
2400 if (!keycmp(tp
.item
->key
, searchkey
)) {
2401 Status
= delete_tree_item(Vcb
, &tp
);
2402 if (!NT_SUCCESS(Status
)) {
2403 ERR("delete_tree_item returned %08lx\n", Status
);
2408 bi
= ExAllocatePoolWithTag(PagedPool
, sizeof(BALANCE_ITEM
), ALLOC_TAG
);
2410 ERR("out of memory\n");
2411 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2415 RtlZeroMemory(bi
, sizeof(BALANCE_ITEM
));
2417 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2418 bi
->flags
|= BALANCE_FLAGS_DATA
;
2419 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
2422 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2423 bi
->flags
|= BALANCE_FLAGS_METADATA
;
2424 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
2427 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2428 bi
->flags
|= BALANCE_FLAGS_SYSTEM
;
2429 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
2432 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, BALANCE_ITEM_ID
, TYPE_TEMP_ITEM
, 0, bi
, sizeof(BALANCE_ITEM
), NULL
, NULL
);
2433 if (!NT_SUCCESS(Status
)) {
2434 ERR("insert_tree_item returned %08lx\n", Status
);
2439 Status
= STATUS_SUCCESS
;
2442 if (NT_SUCCESS(Status
)) {
2443 Status
= do_write(Vcb
, NULL
);
2444 if (!NT_SUCCESS(Status
))
2445 ERR("do_write returned %08lx\n", Status
);
2450 ExReleaseResourceLite(&Vcb
->tree_lock
);
2455 static NTSTATUS
remove_balance_item(device_extension
* Vcb
) {
2460 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2461 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2462 searchkey
.offset
= 0;
2464 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
2466 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
2467 if (!NT_SUCCESS(Status
)) {
2468 ERR("find_item returned %08lx\n", Status
);
2472 if (!keycmp(tp
.item
->key
, searchkey
)) {
2473 Status
= delete_tree_item(Vcb
, &tp
);
2474 if (!NT_SUCCESS(Status
)) {
2475 ERR("delete_tree_item returned %08lx\n", Status
);
2479 Status
= do_write(Vcb
, NULL
);
2480 if (!NT_SUCCESS(Status
)) {
2481 ERR("do_write returned %08lx\n", Status
);
2488 Status
= STATUS_SUCCESS
;
2491 ExReleaseResourceLite(&Vcb
->tree_lock
);
2496 static void load_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2497 opts
->flags
= BTRFS_BALANCE_OPTS_ENABLED
;
2499 if (args
->flags
& BALANCE_ARGS_FLAGS_PROFILES
) {
2500 opts
->flags
|= BTRFS_BALANCE_OPTS_PROFILES
;
2501 opts
->profiles
= args
->profiles
;
2504 if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE
) {
2505 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2507 opts
->usage_start
= 0;
2508 opts
->usage_end
= (uint8_t)args
->usage
;
2509 } else if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE_RANGE
) {
2510 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2512 opts
->usage_start
= (uint8_t)args
->usage_start
;
2513 opts
->usage_end
= (uint8_t)args
->usage_end
;
2516 if (args
->flags
& BALANCE_ARGS_FLAGS_DEVID
) {
2517 opts
->flags
|= BTRFS_BALANCE_OPTS_DEVID
;
2518 opts
->devid
= args
->devid
;
2521 if (args
->flags
& BALANCE_ARGS_FLAGS_DRANGE
) {
2522 opts
->flags
|= BTRFS_BALANCE_OPTS_DRANGE
;
2523 opts
->drange_start
= args
->drange_start
;
2524 opts
->drange_end
= args
->drange_end
;
2527 if (args
->flags
& BALANCE_ARGS_FLAGS_VRANGE
) {
2528 opts
->flags
|= BTRFS_BALANCE_OPTS_VRANGE
;
2529 opts
->vrange_start
= args
->vrange_start
;
2530 opts
->vrange_end
= args
->vrange_end
;
2533 if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT
) {
2534 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2536 opts
->limit_start
= 0;
2537 opts
->limit_end
= args
->limit
;
2538 } else if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT_RANGE
) {
2539 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2541 opts
->limit_start
= args
->limit_start
;
2542 opts
->limit_end
= args
->limit_end
;
2545 if (args
->flags
& BALANCE_ARGS_FLAGS_STRIPES_RANGE
) {
2546 opts
->flags
|= BTRFS_BALANCE_OPTS_STRIPES
;
2548 opts
->stripes_start
= (uint16_t)args
->stripes_start
;
2549 opts
->stripes_end
= (uint16_t)args
->stripes_end
;
2552 if (args
->flags
& BALANCE_ARGS_FLAGS_CONVERT
) {
2553 opts
->flags
|= BTRFS_BALANCE_OPTS_CONVERT
;
2554 opts
->convert
= args
->convert
;
2556 if (args
->flags
& BALANCE_ARGS_FLAGS_SOFT
)
2557 opts
->flags
|= BTRFS_BALANCE_OPTS_SOFT
;
2561 static NTSTATUS
remove_superblocks(device
* dev
) {
2566 sb
= ExAllocatePoolWithTag(PagedPool
, sizeof(superblock
), ALLOC_TAG
);
2568 ERR("out of memory\n");
2569 return STATUS_INSUFFICIENT_RESOURCES
;
2572 RtlZeroMemory(sb
, sizeof(superblock
));
2574 while (superblock_addrs
[i
] > 0 && dev
->devitem
.num_bytes
>= superblock_addrs
[i
] + sizeof(superblock
)) {
2575 Status
= write_data_phys(dev
->devobj
, dev
->fileobj
, superblock_addrs
[i
], sb
, sizeof(superblock
));
2577 if (!NT_SUCCESS(Status
)) {
2587 return STATUS_SUCCESS
;
2590 static NTSTATUS
finish_removing_device(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2595 volume_device_extension
* vde
;
2597 if (Vcb
->need_write
) {
2598 Status
= do_write(Vcb
, NULL
);
2600 if (!NT_SUCCESS(Status
))
2601 ERR("do_write returned %08lx\n", Status
);
2603 Status
= STATUS_SUCCESS
;
2607 if (!NT_SUCCESS(Status
))
2610 // remove entry in chunk tree
2612 searchkey
.obj_id
= 1;
2613 searchkey
.obj_type
= TYPE_DEV_ITEM
;
2614 searchkey
.offset
= dev
->devitem
.dev_id
;
2616 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, false, NULL
);
2617 if (!NT_SUCCESS(Status
)) {
2618 ERR("find_item returned %08lx\n", Status
);
2622 if (!keycmp(searchkey
, tp
.item
->key
)) {
2623 Status
= delete_tree_item(Vcb
, &tp
);
2625 if (!NT_SUCCESS(Status
)) {
2626 ERR("delete_tree_item returned %08lx\n", Status
);
2631 // remove stats entry in device tree
2633 searchkey
.obj_id
= 0;
2634 searchkey
.obj_type
= TYPE_DEV_STATS
;
2635 searchkey
.offset
= dev
->devitem
.dev_id
;
2637 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, NULL
);
2638 if (!NT_SUCCESS(Status
)) {
2639 ERR("find_item returned %08lx\n", Status
);
2643 if (!keycmp(searchkey
, tp
.item
->key
)) {
2644 Status
= delete_tree_item(Vcb
, &tp
);
2646 if (!NT_SUCCESS(Status
)) {
2647 ERR("delete_tree_item returned %08lx\n", Status
);
2652 // update superblock
2654 Vcb
->superblock
.num_devices
--;
2655 Vcb
->superblock
.total_bytes
-= dev
->devitem
.num_bytes
;
2656 Vcb
->devices_loaded
--;
2658 RemoveEntryList(&dev
->list_entry
);
2662 Status
= do_write(Vcb
, NULL
);
2663 if (!NT_SUCCESS(Status
))
2664 ERR("do_write returned %08lx\n", Status
);
2668 if (!NT_SUCCESS(Status
))
2671 if (!dev
->readonly
&& dev
->devobj
) {
2672 Status
= remove_superblocks(dev
);
2673 if (!NT_SUCCESS(Status
))
2674 WARN("remove_superblocks returned %08lx\n", Status
);
2677 // remove entry in volume list
2682 pdo_device_extension
* pdode
= vde
->pdode
;
2684 ExAcquireResourceExclusiveLite(&pdode
->child_lock
, true);
2686 le
= pdode
->children
.Flink
;
2687 while (le
!= &pdode
->children
) {
2688 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2690 if (RtlCompareMemory(&dev
->devitem
.device_uuid
, &vc
->uuid
, sizeof(BTRFS_UUID
)) == sizeof(BTRFS_UUID
)) {
2691 PFILE_OBJECT FileObject
;
2692 PDEVICE_OBJECT mountmgr
;
2693 UNICODE_STRING mmdevpath
;
2695 pdode
->children_loaded
--;
2697 if (vc
->had_drive_letter
) { // re-add entry to mountmgr
2698 RtlInitUnicodeString(&mmdevpath
, MOUNTMGR_DEVICE_NAME
);
2699 Status
= IoGetDeviceObjectPointer(&mmdevpath
, FILE_READ_ATTRIBUTES
, &FileObject
, &mountmgr
);
2700 if (!NT_SUCCESS(Status
))
2701 ERR("IoGetDeviceObjectPointer returned %08lx\n", Status
);
2705 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, &mdn
, sizeof(MOUNTDEV_NAME
), true, NULL
);
2706 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
)
2707 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08lx\n", Status
);
2709 MOUNTDEV_NAME
* mdn2
;
2710 ULONG mdnsize
= (ULONG
)offsetof(MOUNTDEV_NAME
, Name
[0]) + mdn
.NameLength
;
2712 mdn2
= ExAllocatePoolWithTag(PagedPool
, mdnsize
, ALLOC_TAG
);
2714 ERR("out of memory\n");
2716 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, mdn2
, mdnsize
, true, NULL
);
2717 if (!NT_SUCCESS(Status
))
2718 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08lx\n", Status
);
2720 UNICODE_STRING name
;
2722 name
.Buffer
= mdn2
->Name
;
2723 name
.Length
= name
.MaximumLength
= mdn2
->NameLength
;
2725 Status
= mountmgr_add_drive_letter(mountmgr
, &name
);
2726 if (!NT_SUCCESS(Status
))
2727 WARN("mountmgr_add_drive_letter returned %08lx\n", Status
);
2734 ObDereferenceObject(FileObject
);
2738 ExFreePool(vc
->pnp_name
.Buffer
);
2739 RemoveEntryList(&vc
->list_entry
);
2742 ObDereferenceObject(vc
->fileobj
);
2750 if (pdode
->children_loaded
> 0 && vde
->device
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2751 vde
->device
->Characteristics
&= ~FILE_REMOVABLE_MEDIA
;
2753 le
= pdode
->children
.Flink
;
2754 while (le
!= &pdode
->children
) {
2755 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2757 if (vc
->devobj
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2758 vde
->device
->Characteristics
|= FILE_REMOVABLE_MEDIA
;
2766 pdode
->num_children
= Vcb
->superblock
.num_devices
;
2768 ExReleaseResourceLite(&pdode
->child_lock
);
2772 if (dev
->trim
&& !dev
->readonly
&& !Vcb
->options
.no_trim
)
2773 trim_whole_device(dev
);
2776 while (!IsListEmpty(&dev
->space
)) {
2777 LIST_ENTRY
* le2
= RemoveHeadList(&dev
->space
);
2778 space
* s
= CONTAINING_RECORD(le2
, space
, list_entry
);
2788 le
= Vcb
->devices
.Flink
;
2789 while (le
!= &Vcb
->devices
) {
2790 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
2801 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
2803 return STATUS_SUCCESS
;
2806 static void trim_unalloc_space(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2807 DEVICE_MANAGE_DATA_SET_ATTRIBUTES
* dmdsa
;
2808 DEVICE_DATA_SET_RANGE
* ranges
;
2814 uint64_t lastoff
= 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
2817 dev
->num_trim_entries
= 0;
2819 searchkey
.obj_id
= dev
->devitem
.dev_id
;
2820 searchkey
.obj_type
= TYPE_DEV_EXTENT
;
2821 searchkey
.offset
= 0;
2823 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, NULL
);
2824 if (!NT_SUCCESS(Status
)) {
2825 ERR("find_item returned %08lx\n", Status
);
2830 traverse_ptr next_tp
;
2832 if (tp
.item
->key
.obj_id
== dev
->devitem
.dev_id
&& tp
.item
->key
.obj_type
== TYPE_DEV_EXTENT
) {
2833 if (tp
.item
->size
>= sizeof(DEV_EXTENT
)) {
2834 DEV_EXTENT
* de
= (DEV_EXTENT
*)tp
.item
->data
;
2836 if (tp
.item
->key
.offset
> lastoff
)
2837 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, tp
.item
->key
.offset
- lastoff
);
2839 lastoff
= tp
.item
->key
.offset
+ de
->length
;
2841 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(DEV_EXTENT
));
2846 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
2850 if (tp
.item
->key
.obj_id
> searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
> searchkey
.obj_type
))
2855 if (lastoff
< dev
->devitem
.num_bytes
)
2856 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, dev
->devitem
.num_bytes
- lastoff
);
2858 if (dev
->num_trim_entries
== 0)
2861 datalen
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t)) + (dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
));
2863 dmdsa
= ExAllocatePoolWithTag(PagedPool
, datalen
, ALLOC_TAG
);
2865 ERR("out of memory\n");
2869 dmdsa
->Size
= sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
);
2870 dmdsa
->Action
= DeviceDsmAction_Trim
;
2871 dmdsa
->Flags
= DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED
;
2872 dmdsa
->ParameterBlockOffset
= 0;
2873 dmdsa
->ParameterBlockLength
= 0;
2874 dmdsa
->DataSetRangesOffset
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t));
2875 dmdsa
->DataSetRangesLength
= dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
);
2877 ranges
= (DEVICE_DATA_SET_RANGE
*)((uint8_t*)dmdsa
+ dmdsa
->DataSetRangesOffset
);
2880 le
= dev
->trim_list
.Flink
;
2881 while (le
!= &dev
->trim_list
) {
2882 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
2884 ranges
[i
].StartingOffset
= s
->address
;
2885 ranges
[i
].LengthInBytes
= s
->size
;
2891 Status
= dev_ioctl(dev
->devobj
, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES
, dmdsa
, datalen
, NULL
, 0, true, NULL
);
2892 if (!NT_SUCCESS(Status
))
2893 WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08lx\n", Status
);
2898 while (!IsListEmpty(&dev
->trim_list
)) {
2899 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->trim_list
), space
, list_entry
);
2903 dev
->num_trim_entries
= 0;
2906 static NTSTATUS
try_consolidation(device_extension
* Vcb
, uint64_t flags
, chunk
** newchunk
) {
2912 // FIXME - allow with metadata chunks?
2917 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
2919 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
2921 // choose the least-used chunk we haven't looked at yet
2922 le
= Vcb
->chunks
.Flink
;
2923 while (le
!= &Vcb
->chunks
) {
2924 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2926 // FIXME - skip full-size chunks over e.g. 90% full?
2927 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&& !c
->readonly
&& c
->balance_num
!= Vcb
->balance
.balance_num
&& (!rc
|| c
->used
< rc
->used
))
2933 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2936 ExReleaseResourceLite(&Vcb
->tree_lock
);
2940 if (rc
->list_entry_balance
.Flink
) {
2941 RemoveEntryList(&rc
->list_entry_balance
);
2942 Vcb
->balance
.chunks_left
--;
2945 rc
->list_entry_balance
.Flink
= (LIST_ENTRY
*)1; // so it doesn't get dropped
2948 ExReleaseResourceLite(&Vcb
->tree_lock
);
2953 Status
= balance_data_chunk(Vcb
, rc
, &changed
);
2954 if (!NT_SUCCESS(Status
)) {
2955 ERR("balance_data_chunk returned %08lx\n", Status
);
2956 Vcb
->balance
.status
= Status
;
2957 rc
->list_entry_balance
.Flink
= NULL
;
2962 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
2965 Vcb
->balance
.stopping
= true;
2967 if (Vcb
->balance
.stopping
)
2968 return STATUS_SUCCESS
;
2971 rc
->list_entry_balance
.Flink
= NULL
;
2974 rc
->space_changed
= true;
2975 rc
->balance_num
= Vcb
->balance
.balance_num
;
2977 Status
= do_write(Vcb
, NULL
);
2978 if (!NT_SUCCESS(Status
)) {
2979 ERR("do_write returned %08lx\n", Status
);
2986 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
2988 Status
= alloc_chunk(Vcb
, flags
, &rc
, true);
2990 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2992 if (NT_SUCCESS(Status
)) {
2996 ERR("alloc_chunk returned %08lx\n", Status
);
3001 static NTSTATUS
regenerate_space_list(device_extension
* Vcb
, device
* dev
) {
3004 while (!IsListEmpty(&dev
->space
)) {
3005 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->space
), space
, list_entry
);
3010 // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
3012 space_list_add2(&dev
->space
, NULL
, 0x100000, dev
->devitem
.num_bytes
- 0x100000, NULL
, NULL
);
3014 le
= Vcb
->chunks
.Flink
;
3015 while (le
!= &Vcb
->chunks
) {
3017 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3018 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
3020 for (n
= 0; n
< c
->chunk_item
->num_stripes
; n
++) {
3021 uint64_t stripe_size
= 0;
3023 if (cis
[n
].dev_id
== dev
->devitem
.dev_id
) {
3024 if (stripe_size
== 0) {
3027 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
3028 factor
= c
->chunk_item
->num_stripes
;
3029 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
3030 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
3031 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
3032 factor
= c
->chunk_item
->num_stripes
- 1;
3033 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
3034 factor
= c
->chunk_item
->num_stripes
- 2;
3035 else // SINGLE, DUP, RAID1, RAID1C3, RAID1C4
3038 stripe_size
= c
->chunk_item
->size
/ factor
;
3041 space_list_subtract2(&dev
->space
, NULL
, cis
[n
].offset
, stripe_size
, NULL
, NULL
);
3048 return STATUS_SUCCESS
;
3051 _Function_class_(KSTART_ROUTINE
)
3052 void __stdcall
balance_thread(void* context
) {
3053 device_extension
* Vcb
= (device_extension
*)context
;
3056 uint64_t num_chunks
[3], okay_metadata_chunks
= 0, okay_data_chunks
= 0, okay_system_chunks
= 0;
3057 uint64_t old_data_flags
= 0, old_metadata_flags
= 0, old_system_flags
= 0;
3060 Vcb
->balance
.balance_num
++;
3062 Vcb
->balance
.stopping
= false;
3063 KeInitializeEvent(&Vcb
->balance
.finished
, NotificationEvent
, false);
3065 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3066 old_data_flags
= Vcb
->data_flags
;
3067 Vcb
->data_flags
= BLOCK_FLAG_DATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
);
3069 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3072 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3073 old_metadata_flags
= Vcb
->metadata_flags
;
3074 Vcb
->metadata_flags
= BLOCK_FLAG_METADATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
);
3077 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3078 old_system_flags
= Vcb
->system_flags
;
3079 Vcb
->system_flags
= BLOCK_FLAG_SYSTEM
| (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
);
3082 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
) {
3083 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3084 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3085 else if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3086 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3089 num_chunks
[0] = num_chunks
[1] = num_chunks
[2] = 0;
3090 Vcb
->balance
.total_chunks
= Vcb
->balance
.chunks_left
= 0;
3092 InitializeListHead(&chunks
);
3094 // FIXME - what are we supposed to do with limit_start?
3096 if (!Vcb
->readonly
) {
3097 if (!Vcb
->balance
.removing
&& !Vcb
->balance
.shrinking
) {
3098 Status
= add_balance_item(Vcb
);
3099 if (!NT_SUCCESS(Status
)) {
3100 ERR("add_balance_item returned %08lx\n", Status
);
3101 Vcb
->balance
.status
= Status
;
3105 if (Vcb
->need_write
) {
3106 Status
= do_write(Vcb
, NULL
);
3110 if (!NT_SUCCESS(Status
)) {
3111 ERR("do_write returned %08lx\n", Status
);
3112 Vcb
->balance
.status
= Status
;
3119 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3121 if (Vcb
->balance
.stopping
)
3124 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3126 le
= Vcb
->chunks
.Flink
;
3127 while (le
!= &Vcb
->chunks
) {
3128 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3131 acquire_chunk_lock(c
, Vcb
);
3133 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
)
3134 sort
= BALANCE_OPTS_DATA
;
3135 else if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
)
3136 sort
= BALANCE_OPTS_METADATA
;
3137 else if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
3138 sort
= BALANCE_OPTS_SYSTEM
;
3140 ERR("unexpected chunk type %I64x\n", c
->chunk_item
->type
);
3141 release_chunk_lock(c
, Vcb
);
3145 if ((!(Vcb
->balance
.opts
[sort
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) || num_chunks
[sort
] < Vcb
->balance
.opts
[sort
].limit_end
) &&
3146 should_balance_chunk(Vcb
, sort
, c
)) {
3147 InsertTailList(&chunks
, &c
->list_entry_balance
);
3150 Vcb
->balance
.total_chunks
++;
3151 Vcb
->balance
.chunks_left
++;
3152 } else if (sort
== BALANCE_OPTS_METADATA
)
3153 okay_metadata_chunks
++;
3154 else if (sort
== BALANCE_OPTS_DATA
)
3156 else if (sort
== BALANCE_OPTS_SYSTEM
)
3157 okay_system_chunks
++;
3159 if (!c
->cache_loaded
) {
3160 Status
= load_cache_chunk(Vcb
, c
, NULL
);
3162 if (!NT_SUCCESS(Status
)) {
3163 ERR("load_cache_chunk returned %08lx\n", Status
);
3164 Vcb
->balance
.status
= Status
;
3165 release_chunk_lock(c
, Vcb
);
3166 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3171 release_chunk_lock(c
, Vcb
);
3176 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3178 // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
3179 if (okay_metadata_chunks
== 0 || okay_data_chunks
== 0 || okay_system_chunks
== 0) {
3180 bool consolidated
= false;
3183 if (okay_metadata_chunks
== 0) {
3184 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3186 Status
= alloc_chunk(Vcb
, Vcb
->metadata_flags
, &c
, true);
3187 if (NT_SUCCESS(Status
))
3188 c
->balance_num
= Vcb
->balance
.balance_num
;
3189 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3190 ERR("alloc_chunk returned %08lx\n", Status
);
3191 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3192 Vcb
->balance
.status
= Status
;
3196 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3198 if (Status
== STATUS_DISK_FULL
) {
3199 Status
= try_consolidation(Vcb
, Vcb
->metadata_flags
, &c
);
3200 if (!NT_SUCCESS(Status
)) {
3201 ERR("try_consolidation returned %08lx\n", Status
);
3202 Vcb
->balance
.status
= Status
;
3205 c
->balance_num
= Vcb
->balance
.balance_num
;
3207 consolidated
= true;
3209 if (Vcb
->balance
.stopping
)
3214 if (okay_data_chunks
== 0) {
3215 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3217 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &c
, true);
3218 if (NT_SUCCESS(Status
))
3219 c
->balance_num
= Vcb
->balance
.balance_num
;
3220 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3221 ERR("alloc_chunk returned %08lx\n", Status
);
3222 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3223 Vcb
->balance
.status
= Status
;
3227 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3229 if (Status
== STATUS_DISK_FULL
) {
3230 Status
= try_consolidation(Vcb
, Vcb
->data_flags
, &c
);
3231 if (!NT_SUCCESS(Status
)) {
3232 ERR("try_consolidation returned %08lx\n", Status
);
3233 Vcb
->balance
.status
= Status
;
3236 c
->balance_num
= Vcb
->balance
.balance_num
;
3238 consolidated
= true;
3240 if (Vcb
->balance
.stopping
)
3245 if (okay_system_chunks
== 0) {
3246 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3248 Status
= alloc_chunk(Vcb
, Vcb
->system_flags
, &c
, true);
3249 if (NT_SUCCESS(Status
))
3250 c
->balance_num
= Vcb
->balance
.balance_num
;
3251 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3252 ERR("alloc_chunk returned %08lx\n", Status
);
3253 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3254 Vcb
->balance
.status
= Status
;
3258 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3260 if (Status
== STATUS_DISK_FULL
) {
3261 Status
= try_consolidation(Vcb
, Vcb
->system_flags
, &c
);
3262 if (!NT_SUCCESS(Status
)) {
3263 ERR("try_consolidation returned %08lx\n", Status
);
3264 Vcb
->balance
.status
= Status
;
3267 c
->balance_num
= Vcb
->balance
.balance_num
;
3269 consolidated
= true;
3271 if (Vcb
->balance
.stopping
)
3277 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3280 while (le
!= &chunks
) {
3281 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3288 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3290 // do data chunks before metadata
3292 while (le
!= &chunks
) {
3293 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3294 LIST_ENTRY
* le2
= le
->Flink
;
3296 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
3302 Status
= balance_data_chunk(Vcb
, c
, &changed
);
3303 if (!NT_SUCCESS(Status
)) {
3304 ERR("balance_data_chunk returned %08lx\n", Status
);
3305 Vcb
->balance
.status
= Status
;
3309 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3312 Vcb
->balance
.stopping
= true;
3314 if (Vcb
->balance
.stopping
)
3319 c
->space_changed
= true;
3322 if (Vcb
->balance
.stopping
)
3325 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&&
3326 (!(Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) || !(c
->chunk_item
->type
& BLOCK_FLAG_METADATA
))) {
3327 RemoveEntryList(&c
->list_entry_balance
);
3328 c
->list_entry_balance
.Flink
= NULL
;
3330 Vcb
->balance
.chunks_left
--;
3336 // do metadata chunks
3337 while (!IsListEmpty(&chunks
)) {
3341 le
= RemoveHeadList(&chunks
);
3342 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3344 if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
|| c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
) {
3346 Status
= balance_metadata_chunk(Vcb
, c
, &changed
);
3347 if (!NT_SUCCESS(Status
)) {
3348 ERR("balance_metadata_chunk returned %08lx\n", Status
);
3349 Vcb
->balance
.status
= Status
;
3353 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3356 Vcb
->balance
.stopping
= true;
3358 if (Vcb
->balance
.stopping
)
3363 c
->space_changed
= true;
3366 if (Vcb
->balance
.stopping
)
3369 c
->list_entry_balance
.Flink
= NULL
;
3371 Vcb
->balance
.chunks_left
--;
3375 if (!Vcb
->readonly
) {
3376 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3378 while (le
!= &chunks
) {
3379 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3383 c
->list_entry_balance
.Flink
= NULL
;
3386 if (old_data_flags
!= 0)
3387 Vcb
->data_flags
= old_data_flags
;
3389 if (old_metadata_flags
!= 0)
3390 Vcb
->metadata_flags
= old_metadata_flags
;
3392 if (old_system_flags
!= 0)
3393 Vcb
->system_flags
= old_system_flags
;
3396 if (Vcb
->balance
.removing
) {
3399 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3401 le
= Vcb
->devices
.Flink
;
3402 while (le
!= &Vcb
->devices
) {
3403 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3405 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3414 if (Vcb
->balance
.chunks_left
== 0) {
3415 Status
= finish_removing_device(Vcb
, dev
);
3417 if (!NT_SUCCESS(Status
)) {
3418 ERR("finish_removing_device returned %08lx\n", Status
);
3425 ExReleaseResourceLite(&Vcb
->tree_lock
);
3426 } else if (Vcb
->balance
.shrinking
) {
3429 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3431 le
= Vcb
->devices
.Flink
;
3432 while (le
!= &Vcb
->devices
) {
3433 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3435 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3444 ERR("could not find device %I64x\n", Vcb
->balance
.opts
[0].devid
);
3445 Vcb
->balance
.status
= STATUS_INTERNAL_ERROR
;
3448 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3450 Status
= regenerate_space_list(Vcb
, dev
);
3451 if (!NT_SUCCESS(Status
))
3452 WARN("regenerate_space_list returned %08lx\n", Status
);
3457 old_size
= dev
->devitem
.num_bytes
;
3458 dev
->devitem
.num_bytes
= Vcb
->balance
.opts
[0].drange_start
;
3460 Status
= update_dev_item(Vcb
, dev
, NULL
);
3461 if (!NT_SUCCESS(Status
)) {
3462 ERR("update_dev_item returned %08lx\n", Status
);
3463 dev
->devitem
.num_bytes
= old_size
;
3464 Vcb
->balance
.status
= Status
;
3466 Status
= regenerate_space_list(Vcb
, dev
);
3467 if (!NT_SUCCESS(Status
))
3468 WARN("regenerate_space_list returned %08lx\n", Status
);
3470 Vcb
->superblock
.total_bytes
-= old_size
- dev
->devitem
.num_bytes
;
3472 Status
= do_write(Vcb
, NULL
);
3473 if (!NT_SUCCESS(Status
))
3474 ERR("do_write returned %08lx\n", Status
);
3480 ExReleaseResourceLite(&Vcb
->tree_lock
);
3482 if (!Vcb
->balance
.stopping
&& NT_SUCCESS(Vcb
->balance
.status
))
3483 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3485 Status
= remove_balance_item(Vcb
);
3486 if (!NT_SUCCESS(Status
)) {
3487 ERR("remove_balance_item returned %08lx\n", Status
);
3492 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
3493 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3495 le
= Vcb
->devices
.Flink
;
3496 while (le
!= &Vcb
->devices
) {
3497 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3499 if (dev2
->devobj
&& !dev2
->readonly
&& dev2
->trim
)
3500 trim_unalloc_space(Vcb
, dev2
);
3505 ExReleaseResourceLite(&Vcb
->tree_lock
);
3509 ZwClose(Vcb
->balance
.thread
);
3510 Vcb
->balance
.thread
= NULL
;
3512 KeSetEvent(&Vcb
->balance
.finished
, 0, false);
3515 NTSTATUS
start_balance(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3517 btrfs_start_balance
* bsb
= (btrfs_start_balance
*)data
;
3518 OBJECT_ATTRIBUTES oa
;
3521 if (length
< sizeof(btrfs_start_balance
) || !data
)
3522 return STATUS_INVALID_PARAMETER
;
3524 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3525 return STATUS_PRIVILEGE_NOT_HELD
;
3528 WARN("cannot start balance while locked\n");
3529 return STATUS_DEVICE_NOT_READY
;
3532 if (Vcb
->scrub
.thread
) {
3533 WARN("cannot start balance while scrub running\n");
3534 return STATUS_DEVICE_NOT_READY
;
3537 if (Vcb
->balance
.thread
) {
3538 WARN("balance already running\n");
3539 return STATUS_DEVICE_NOT_READY
;
3543 return STATUS_MEDIA_WRITE_PROTECTED
;
3545 if (!(bsb
->opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3546 !(bsb
->opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3547 !(bsb
->opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
))
3548 return STATUS_SUCCESS
;
3550 for (i
= 0; i
< 3; i
++) {
3551 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3552 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
3553 bsb
->opts
[i
].profiles
&= BLOCK_FLAG_RAID0
| BLOCK_FLAG_RAID1
| BLOCK_FLAG_DUPLICATE
| BLOCK_FLAG_RAID10
|
3554 BLOCK_FLAG_RAID5
| BLOCK_FLAG_RAID6
| BLOCK_FLAG_SINGLE
| BLOCK_FLAG_RAID1C3
|
3557 if (bsb
->opts
[i
].profiles
== 0)
3558 return STATUS_INVALID_PARAMETER
;
3561 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DEVID
) {
3562 if (bsb
->opts
[i
].devid
== 0)
3563 return STATUS_INVALID_PARAMETER
;
3566 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
3567 if (bsb
->opts
[i
].drange_start
> bsb
->opts
[i
].drange_end
)
3568 return STATUS_INVALID_PARAMETER
;
3571 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
3572 if (bsb
->opts
[i
].vrange_start
> bsb
->opts
[i
].vrange_end
)
3573 return STATUS_INVALID_PARAMETER
;
3576 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
3577 bsb
->opts
[i
].limit_start
= max(1, bsb
->opts
[i
].limit_start
);
3578 bsb
->opts
[i
].limit_end
= max(1, bsb
->opts
[i
].limit_end
);
3580 if (bsb
->opts
[i
].limit_start
> bsb
->opts
[i
].limit_end
)
3581 return STATUS_INVALID_PARAMETER
;
3584 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
3585 bsb
->opts
[i
].stripes_start
= max(1, bsb
->opts
[i
].stripes_start
);
3586 bsb
->opts
[i
].stripes_end
= max(1, bsb
->opts
[i
].stripes_end
);
3588 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3589 return STATUS_INVALID_PARAMETER
;
3592 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) {
3593 bsb
->opts
[i
].usage_start
= min(100, bsb
->opts
[i
].stripes_start
);
3594 bsb
->opts
[i
].usage_end
= min(100, bsb
->opts
[i
].stripes_end
);
3596 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3597 return STATUS_INVALID_PARAMETER
;
3600 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3601 if (bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID0
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1
&&
3602 bsb
->opts
[i
].convert
!= BLOCK_FLAG_DUPLICATE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID10
&&
3603 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID5
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID6
&&
3604 bsb
->opts
[i
].convert
!= BLOCK_FLAG_SINGLE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1C3
&&
3605 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1C4
)
3606 return STATUS_INVALID_PARAMETER
;
3611 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bsb
->opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3612 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bsb
->opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3613 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bsb
->opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3615 Vcb
->balance
.paused
= false;
3616 Vcb
->balance
.removing
= false;
3617 Vcb
->balance
.shrinking
= false;
3618 Vcb
->balance
.status
= STATUS_SUCCESS
;
3619 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3621 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3623 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3624 if (!NT_SUCCESS(Status
)) {
3625 ERR("PsCreateSystemThread returned %08lx\n", Status
);
3629 return STATUS_SUCCESS
;
3632 NTSTATUS
look_for_balance_item(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
) {
3637 OBJECT_ATTRIBUTES oa
;
3640 searchkey
.obj_id
= BALANCE_ITEM_ID
;
3641 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
3642 searchkey
.offset
= 0;
3644 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
3645 if (!NT_SUCCESS(Status
)) {
3646 ERR("find_item returned %08lx\n", Status
);
3650 if (keycmp(tp
.item
->key
, searchkey
)) {
3651 TRACE("no balance item found\n");
3652 return STATUS_NOT_FOUND
;
3655 if (tp
.item
->size
< sizeof(BALANCE_ITEM
)) {
3656 WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
3657 tp
.item
->size
, sizeof(BALANCE_ITEM
));
3658 return STATUS_INTERNAL_ERROR
;
3661 bi
= (BALANCE_ITEM
*)tp
.item
->data
;
3663 if (bi
->flags
& BALANCE_FLAGS_DATA
)
3664 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
3666 if (bi
->flags
& BALANCE_FLAGS_METADATA
)
3667 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
3669 if (bi
->flags
& BALANCE_FLAGS_SYSTEM
)
3670 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
3672 // do the heuristics that Linux driver does
3674 for (i
= 0; i
< 3; i
++) {
3675 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3676 // if converting, don't redo chunks already done
3678 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3679 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_SOFT
;
3681 // don't balance chunks more than 90% filled - presumably these
3682 // have already been done
3684 if (!(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) &&
3685 !(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3687 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_USAGE
;
3688 Vcb
->balance
.opts
[i
].usage_start
= 0;
3689 Vcb
->balance
.opts
[i
].usage_end
= 90;
3694 if (Vcb
->readonly
|| Vcb
->options
.skip_balance
)
3695 Vcb
->balance
.paused
= true;
3697 Vcb
->balance
.paused
= false;
3699 Vcb
->balance
.removing
= false;
3700 Vcb
->balance
.shrinking
= false;
3701 Vcb
->balance
.status
= STATUS_SUCCESS
;
3702 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3704 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3706 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3707 if (!NT_SUCCESS(Status
)) {
3708 ERR("PsCreateSystemThread returned %08lx\n", Status
);
3712 return STATUS_SUCCESS
;
3715 NTSTATUS
query_balance(device_extension
* Vcb
, void* data
, ULONG length
) {
3716 btrfs_query_balance
* bqb
= (btrfs_query_balance
*)data
;
3718 if (length
< sizeof(btrfs_query_balance
) || !data
)
3719 return STATUS_INVALID_PARAMETER
;
3721 if (!Vcb
->balance
.thread
) {
3722 bqb
->status
= BTRFS_BALANCE_STOPPED
;
3724 if (!NT_SUCCESS(Vcb
->balance
.status
)) {
3725 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3726 bqb
->error
= Vcb
->balance
.status
;
3729 return STATUS_SUCCESS
;
3732 bqb
->status
= Vcb
->balance
.paused
? BTRFS_BALANCE_PAUSED
: BTRFS_BALANCE_RUNNING
;
3734 if (Vcb
->balance
.removing
)
3735 bqb
->status
|= BTRFS_BALANCE_REMOVAL
;
3737 if (Vcb
->balance
.shrinking
)
3738 bqb
->status
|= BTRFS_BALANCE_SHRINKING
;
3740 if (!NT_SUCCESS(Vcb
->balance
.status
))
3741 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3743 bqb
->chunks_left
= Vcb
->balance
.chunks_left
;
3744 bqb
->total_chunks
= Vcb
->balance
.total_chunks
;
3745 bqb
->error
= Vcb
->balance
.status
;
3746 RtlCopyMemory(&bqb
->data_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3747 RtlCopyMemory(&bqb
->metadata_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3748 RtlCopyMemory(&bqb
->system_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3750 return STATUS_SUCCESS
;
3753 NTSTATUS
pause_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3754 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3755 return STATUS_PRIVILEGE_NOT_HELD
;
3757 if (!Vcb
->balance
.thread
)
3758 return STATUS_DEVICE_NOT_READY
;
3760 if (Vcb
->balance
.paused
)
3761 return STATUS_DEVICE_NOT_READY
;
3763 Vcb
->balance
.paused
= true;
3764 KeClearEvent(&Vcb
->balance
.event
);
3766 return STATUS_SUCCESS
;
3769 NTSTATUS
resume_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3770 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3771 return STATUS_PRIVILEGE_NOT_HELD
;
3773 if (!Vcb
->balance
.thread
)
3774 return STATUS_DEVICE_NOT_READY
;
3776 if (!Vcb
->balance
.paused
)
3777 return STATUS_DEVICE_NOT_READY
;
3780 return STATUS_MEDIA_WRITE_PROTECTED
;
3782 Vcb
->balance
.paused
= false;
3783 KeSetEvent(&Vcb
->balance
.event
, 0, false);
3785 return STATUS_SUCCESS
;
3788 NTSTATUS
stop_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3789 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3790 return STATUS_PRIVILEGE_NOT_HELD
;
3792 if (!Vcb
->balance
.thread
)
3793 return STATUS_DEVICE_NOT_READY
;
3795 Vcb
->balance
.paused
= false;
3796 Vcb
->balance
.stopping
= true;
3797 Vcb
->balance
.status
= STATUS_SUCCESS
;
3798 KeSetEvent(&Vcb
->balance
.event
, 0, false);
3800 return STATUS_SUCCESS
;
3803 NTSTATUS
remove_device(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3809 uint64_t num_rw_devices
;
3810 OBJECT_ATTRIBUTES oa
;
3812 TRACE("(%p, %p, %lx)\n", Vcb
, data
, length
);
3814 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3815 return STATUS_PRIVILEGE_NOT_HELD
;
3817 if (length
< sizeof(uint64_t))
3818 return STATUS_INVALID_PARAMETER
;
3820 devid
= *(uint64_t*)data
;
3822 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
3824 if (Vcb
->readonly
) {
3825 ExReleaseResourceLite(&Vcb
->tree_lock
);
3826 return STATUS_MEDIA_WRITE_PROTECTED
;
3831 le
= Vcb
->devices
.Flink
;
3832 while (le
!= &Vcb
->devices
) {
3833 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3835 if (dev2
->devitem
.dev_id
== devid
)
3838 if (!dev2
->readonly
)
3845 ExReleaseResourceLite(&Vcb
->tree_lock
);
3846 WARN("device %I64x not found\n", devid
);
3847 return STATUS_NOT_FOUND
;
3850 if (!dev
->readonly
) {
3851 if (num_rw_devices
== 1) {
3852 ExReleaseResourceLite(&Vcb
->tree_lock
);
3853 WARN("not removing last non-readonly device\n");
3854 return STATUS_INVALID_PARAMETER
;
3857 if (num_rw_devices
== 4 &&
3858 ((Vcb
->data_flags
& BLOCK_FLAG_RAID10
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID10
|| Vcb
->system_flags
& BLOCK_FLAG_RAID10
) ||
3859 (Vcb
->data_flags
& BLOCK_FLAG_RAID6
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID6
|| Vcb
->system_flags
& BLOCK_FLAG_RAID6
) ||
3860 (Vcb
->data_flags
& BLOCK_FLAG_RAID1C4
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1C4
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1C4
)
3863 ExReleaseResourceLite(&Vcb
->tree_lock
);
3864 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10/1C4)\n");
3865 return STATUS_CANNOT_DELETE
;
3868 if (num_rw_devices
== 3 &&
3869 ((Vcb
->data_flags
& BLOCK_FLAG_RAID5
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID5
|| Vcb
->system_flags
& BLOCK_FLAG_RAID5
) ||
3870 (Vcb
->data_flags
& BLOCK_FLAG_RAID1C3
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1C3
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1C3
))
3872 ExReleaseResourceLite(&Vcb
->tree_lock
);
3873 ERR("would not be enough devices to satisfy RAID requirement (RAID5/1C3)\n");
3874 return STATUS_CANNOT_DELETE
;
3877 if (num_rw_devices
== 2 &&
3878 ((Vcb
->data_flags
& BLOCK_FLAG_RAID0
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID0
|| Vcb
->system_flags
& BLOCK_FLAG_RAID0
) ||
3879 (Vcb
->data_flags
& BLOCK_FLAG_RAID1
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1
))
3881 ExReleaseResourceLite(&Vcb
->tree_lock
);
3882 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
3883 return STATUS_CANNOT_DELETE
;
3887 ExReleaseResourceLite(&Vcb
->tree_lock
);
3889 if (Vcb
->balance
.thread
) {
3890 WARN("balance already running\n");
3891 return STATUS_DEVICE_NOT_READY
;
3896 RtlZeroMemory(Vcb
->balance
.opts
, sizeof(btrfs_balance_opts
) * 3);
3898 for (i
= 0; i
< 3; i
++) {
3899 Vcb
->balance
.opts
[i
].flags
= BTRFS_BALANCE_OPTS_ENABLED
| BTRFS_BALANCE_OPTS_DEVID
;
3900 Vcb
->balance
.opts
[i
].devid
= devid
;
3903 Vcb
->balance
.paused
= false;
3904 Vcb
->balance
.removing
= true;
3905 Vcb
->balance
.shrinking
= false;
3906 Vcb
->balance
.status
= STATUS_SUCCESS
;
3907 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3909 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3911 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3912 if (!NT_SUCCESS(Status
)) {
3913 ERR("PsCreateSystemThread returned %08lx\n", Status
);
3918 return STATUS_SUCCESS
;