1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
30 LIST_ENTRY list_entry
;
42 metadata_reloc
* parent
;
44 LIST_ENTRY list_entry
;
54 LIST_ENTRY list_entry
;
66 metadata_reloc
* parent
;
67 LIST_ENTRY list_entry
;
70 #ifndef _MSC_VER // not in mingw yet
71 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
74 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time
76 static NTSTATUS
add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, traverse_ptr
* tp
,
77 bool skinny
, metadata_reloc
** mr2
, chunk
* c
, LIST_ENTRY
* rollback
) {
85 mr
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc
), ALLOC_TAG
);
87 ERR("out of memory\n");
88 return STATUS_INSUFFICIENT_RESOURCES
;
91 mr
->address
= tp
->item
->key
.obj_id
;
93 mr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
95 InitializeListHead(&mr
->refs
);
97 Status
= delete_tree_item(Vcb
, tp
);
98 if (!NT_SUCCESS(Status
)) {
99 ERR("delete_tree_item returned %08x\n", Status
);
105 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
108 acquire_chunk_lock(c
, Vcb
);
110 c
->used
-= Vcb
->superblock
.node_size
;
112 space_list_add(c
, tp
->item
->key
.obj_id
, Vcb
->superblock
.node_size
, rollback
);
114 release_chunk_lock(c
, Vcb
);
117 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
120 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
121 ptr
= (uint8_t*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
123 len
-= sizeof(EXTENT_ITEM2
);
124 ptr
+= sizeof(EXTENT_ITEM2
);
128 uint8_t secttype
= *ptr
;
129 uint16_t sectlen
= secttype
== TYPE_TREE_BLOCK_REF
? sizeof(TREE_BLOCK_REF
) : (secttype
== TYPE_SHARED_BLOCK_REF
? sizeof(SHARED_BLOCK_REF
) : 0);
130 metadata_reloc_ref
* ref
;
135 ERR("(%I64x,%x,%I64x): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
136 return STATUS_INTERNAL_ERROR
;
140 ERR("(%I64x,%x,%I64x): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
141 return STATUS_INTERNAL_ERROR
;
144 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
146 ERR("out of memory\n");
147 return STATUS_INSUFFICIENT_RESOURCES
;
150 if (secttype
== TYPE_TREE_BLOCK_REF
) {
151 ref
->type
= TYPE_TREE_BLOCK_REF
;
152 RtlCopyMemory(&ref
->tbr
, ptr
+ sizeof(uint8_t), sizeof(TREE_BLOCK_REF
));
154 } else if (secttype
== TYPE_SHARED_BLOCK_REF
) {
155 ref
->type
= TYPE_SHARED_BLOCK_REF
;
156 RtlCopyMemory(&ref
->sbr
, ptr
+ sizeof(uint8_t), sizeof(SHARED_BLOCK_REF
));
159 ERR("unexpected tree type %x\n", secttype
);
161 return STATUS_INTERNAL_ERROR
;
166 InsertTailList(&mr
->refs
, &ref
->list_entry
);
169 ptr
+= sizeof(uint8_t) + sectlen
;
172 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
173 traverse_ptr tp2
= *tp
, next_tp
;
175 while (find_next_item(Vcb
, &tp2
, &next_tp
, false, NULL
)) {
178 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
179 if (tp2
.item
->key
.obj_type
== TYPE_TREE_BLOCK_REF
) {
180 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
182 ERR("out of memory\n");
183 return STATUS_INSUFFICIENT_RESOURCES
;
186 ref
->type
= TYPE_TREE_BLOCK_REF
;
187 ref
->tbr
.offset
= tp2
.item
->key
.offset
;
190 InsertTailList(&mr
->refs
, &ref
->list_entry
);
192 Status
= delete_tree_item(Vcb
, &tp2
);
193 if (!NT_SUCCESS(Status
)) {
194 ERR("delete_tree_item returned %08x\n", Status
);
197 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_BLOCK_REF
) {
198 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
200 ERR("out of memory\n");
201 return STATUS_INSUFFICIENT_RESOURCES
;
204 ref
->type
= TYPE_SHARED_BLOCK_REF
;
205 ref
->sbr
.offset
= tp2
.item
->key
.offset
;
208 InsertTailList(&mr
->refs
, &ref
->list_entry
);
210 Status
= delete_tree_item(Vcb
, &tp2
);
211 if (!NT_SUCCESS(Status
)) {
212 ERR("delete_tree_item returned %08x\n", Status
);
221 InsertTailList(items
, &mr
->list_entry
);
226 return STATUS_SUCCESS
;
229 static NTSTATUS
add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
230 uint64_t address
, metadata_reloc
** mr2
, LIST_ENTRY
* rollback
) {
238 while (le
!= items
) {
239 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
241 if (mr
->address
== address
) {
243 return STATUS_SUCCESS
;
249 searchkey
.obj_id
= address
;
250 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
251 searchkey
.offset
= 0xffffffffffffffff;
253 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
254 if (!NT_SUCCESS(Status
)) {
255 ERR("find_item returned %08x\n", Status
);
259 if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
))
261 else if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
262 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
263 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
265 if (!(ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)) {
266 ERR("EXTENT_ITEM for %I64x found, but tree flag not set\n", address
);
267 return STATUS_INTERNAL_ERROR
;
270 ERR("could not find valid EXTENT_ITEM for address %I64x\n", address
);
271 return STATUS_INTERNAL_ERROR
;
274 Status
= add_metadata_reloc(Vcb
, items
, &tp
, skinny
, mr2
, NULL
, rollback
);
275 if (!NT_SUCCESS(Status
)) {
276 ERR("add_metadata_reloc returned %08x\n", Status
);
280 return STATUS_SUCCESS
;
283 static void sort_metadata_reloc_refs(metadata_reloc
* mr
) {
284 LIST_ENTRY newlist
, *le
;
286 if (mr
->refs
.Flink
== mr
->refs
.Blink
) // 0 or 1 items
291 InitializeListHead(&newlist
);
293 while (!IsListEmpty(&mr
->refs
)) {
294 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
295 bool inserted
= false;
297 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
298 ref
->hash
= ref
->tbr
.offset
;
299 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
300 ref
->hash
= ref
->parent
->new_address
;
303 while (le
!= &newlist
) {
304 metadata_reloc_ref
* ref2
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
306 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
307 InsertHeadList(le
->Blink
, &ref
->list_entry
);
316 InsertTailList(&newlist
, &ref
->list_entry
);
319 newlist
.Flink
->Blink
= &mr
->refs
;
320 newlist
.Blink
->Flink
= &mr
->refs
;
321 mr
->refs
.Flink
= newlist
.Flink
;
322 mr
->refs
.Blink
= newlist
.Blink
;
325 static NTSTATUS
add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, metadata_reloc
* mr
) {
330 bool all_inline
= true;
331 metadata_reloc_ref
* first_noninline
= NULL
;
335 inline_len
= sizeof(EXTENT_ITEM
);
336 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
))
337 inline_len
+= sizeof(EXTENT_ITEM2
);
339 sort_metadata_reloc_refs(mr
);
342 while (le
!= &mr
->refs
) {
343 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
348 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
349 extlen
+= sizeof(TREE_BLOCK_REF
);
350 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
351 extlen
+= sizeof(SHARED_BLOCK_REF
);
354 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
356 first_noninline
= ref
;
358 inline_len
+= extlen
+ 1;
364 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
366 ERR("out of memory\n");
367 return STATUS_INSUFFICIENT_RESOURCES
;
371 ei
->generation
= mr
->ei
->generation
;
372 ei
->flags
= mr
->ei
->flags
;
373 ptr
= (uint8_t*)&ei
[1];
375 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)) {
376 EXTENT_ITEM2
* ei2
= (EXTENT_ITEM2
*)ptr
;
378 ei2
->firstitem
= *(KEY
*)&mr
->data
[1];
379 ei2
->level
= mr
->data
->level
;
381 ptr
+= sizeof(EXTENT_ITEM2
);
385 while (le
!= &mr
->refs
) {
386 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
388 if (ref
== first_noninline
)
394 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
395 TREE_BLOCK_REF
* tbr
= (TREE_BLOCK_REF
*)ptr
;
397 tbr
->offset
= ref
->tbr
.offset
;
399 ptr
+= sizeof(TREE_BLOCK_REF
);
400 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
401 SHARED_BLOCK_REF
* sbr
= (SHARED_BLOCK_REF
*)ptr
;
403 sbr
->offset
= ref
->parent
->new_address
;
405 ptr
+= sizeof(SHARED_BLOCK_REF
);
411 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)
412 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_METADATA_ITEM
, mr
->data
->level
, ei
, inline_len
, NULL
, NULL
);
414 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_EXTENT_ITEM
, Vcb
->superblock
.node_size
, ei
, inline_len
, NULL
, NULL
);
416 if (!NT_SUCCESS(Status
)) {
417 ERR("insert_tree_item returned %08x\n", Status
);
423 le
= &first_noninline
->list_entry
;
425 while (le
!= &mr
->refs
) {
426 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
428 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
429 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_TREE_BLOCK_REF
, ref
->tbr
.offset
, NULL
, 0, NULL
, NULL
);
430 if (!NT_SUCCESS(Status
)) {
431 ERR("insert_tree_item returned %08x\n", Status
);
434 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
435 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_SHARED_BLOCK_REF
, ref
->parent
->new_address
, NULL
, 0, NULL
, NULL
);
436 if (!NT_SUCCESS(Status
)) {
437 ERR("insert_tree_item returned %08x\n", Status
);
446 if (ei
->flags
& EXTENT_ITEM_SHARED_BACKREFS
|| mr
->data
->flags
& HEADER_FLAG_SHARED_BACKREF
|| !(mr
->data
->flags
& HEADER_FLAG_MIXED_BACKREF
)) {
447 if (mr
->data
->level
> 0) {
449 internal_node
* in
= (internal_node
*)&mr
->data
[1];
451 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
452 uint64_t sbrrc
= find_extent_shared_tree_refcount(Vcb
, in
[i
].address
, mr
->address
, NULL
);
455 SHARED_BLOCK_REF sbr
;
457 sbr
.offset
= mr
->new_address
;
459 Status
= increase_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0, NULL
);
460 if (!NT_SUCCESS(Status
)) {
461 ERR("increase_extent_refcount returned %08x\n", Status
);
465 sbr
.offset
= mr
->address
;
467 Status
= decrease_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
468 sbr
.offset
, false, NULL
);
469 if (!NT_SUCCESS(Status
)) {
470 ERR("decrease_extent_refcount returned %08x\n", Status
);
477 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
479 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
480 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
481 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
483 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
484 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
486 if (ed2
->size
> 0) { // not sparse
487 uint32_t sdrrc
= find_extent_shared_data_refcount(Vcb
, ed2
->address
, mr
->address
, NULL
);
493 sdr
.offset
= mr
->new_address
;
496 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0, NULL
);
497 if (!NT_SUCCESS(Status
)) {
498 ERR("increase_extent_refcount returned %08x\n", Status
);
502 sdr
.offset
= mr
->address
;
504 Status
= decrease_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
505 sdr
.offset
, false, NULL
);
506 if (!NT_SUCCESS(Status
)) {
507 ERR("decrease_extent_refcount returned %08x\n", Status
);
511 c
= get_chunk_from_address(Vcb
, ed2
->address
);
514 // check changed_extents
516 ExAcquireResourceExclusiveLite(&c
->changed_extents_lock
, true);
518 le
= c
->changed_extents
.Flink
;
520 while (le
!= &c
->changed_extents
) {
521 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
523 if (ce
->address
== ed2
->address
) {
526 le2
= ce
->refs
.Flink
;
527 while (le2
!= &ce
->refs
) {
528 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
530 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
531 cer
->sdr
.offset
= mr
->new_address
;
538 le2
= ce
->old_refs
.Flink
;
539 while (le2
!= &ce
->old_refs
) {
540 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
542 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
543 cer
->sdr
.offset
= mr
->new_address
;
556 ExReleaseResourceLite(&c
->changed_extents_lock
);
566 return STATUS_SUCCESS
;
569 static NTSTATUS
write_metadata_items(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
570 LIST_ENTRY
* data_items
, chunk
* c
, LIST_ENTRY
* rollback
) {
571 LIST_ENTRY tree_writes
, *le
;
574 uint8_t level
, max_level
= 0;
575 chunk
* newchunk
= NULL
;
577 InitializeListHead(&tree_writes
);
580 while (le
!= items
) {
581 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
585 mr
->data
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
587 ERR("out of memory\n");
588 return STATUS_INSUFFICIENT_RESOURCES
;
591 Status
= read_data(Vcb
, mr
->address
, Vcb
->superblock
.node_size
, NULL
, true, (uint8_t*)mr
->data
,
592 c
&& mr
->address
>= c
->offset
&& mr
->address
< c
->offset
+ c
->chunk_item
->size
? c
: NULL
, &pc
, NULL
, 0, false, NormalPagePriority
);
593 if (!NT_SUCCESS(Status
)) {
594 ERR("read_data returned %08x\n", Status
);
598 if (pc
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
601 if (data_items
&& mr
->data
->level
== 0) {
602 le2
= data_items
->Flink
;
603 while (le2
!= data_items
) {
604 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
605 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
608 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
609 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
610 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
612 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
613 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
615 if (ed2
->address
== dr
->address
)
616 ed2
->address
= dr
->new_address
;
625 if (mr
->data
->level
> max_level
)
626 max_level
= mr
->data
->level
;
628 le2
= mr
->refs
.Flink
;
629 while (le2
!= &mr
->refs
) {
630 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
632 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
638 firstitem
= (KEY
*)&mr
->data
[1];
640 le3
= Vcb
->roots
.Flink
;
641 while (le3
!= &Vcb
->roots
) {
642 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
644 if (r2
->id
== ref
->tbr
.offset
) {
653 ERR("could not find subvol with id %I64x\n", ref
->tbr
.offset
);
654 return STATUS_INTERNAL_ERROR
;
657 Status
= find_item_to_level(Vcb
, r
, &tp
, firstitem
, false, mr
->data
->level
+ 1, NULL
);
658 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
659 ERR("find_item_to_level returned %08x\n", Status
);
664 while (t
&& t
->header
.level
< mr
->data
->level
+ 1) {
673 Status
= add_metadata_reloc_parent(Vcb
, items
, t
->header
.address
, &mr2
, rollback
);
674 if (!NT_SUCCESS(Status
)) {
675 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
681 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
684 Status
= add_metadata_reloc_parent(Vcb
, items
, ref
->sbr
.offset
, &mr2
, rollback
);
685 if (!NT_SUCCESS(Status
)) {
686 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
700 while (le
!= items
) {
701 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
707 hash
= calc_crc32c(0xffffffff, (uint8_t*)&mr
->address
, sizeof(uint64_t));
709 le2
= Vcb
->trees_ptrs
[hash
>> 24];
712 while (le2
!= &Vcb
->trees_hash
) {
713 tree
* t
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
715 if (t
->header
.address
== mr
->address
) {
718 } else if (t
->hash
> hash
)
728 for (level
= 0; level
<= max_level
; level
++) {
730 while (le
!= items
) {
731 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
733 if (mr
->data
->level
== level
) {
741 flags
= Vcb
->system_flags
;
742 else if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
)
743 flags
= Vcb
->data_flags
;
745 flags
= Vcb
->metadata_flags
;
748 acquire_chunk_lock(newchunk
, Vcb
);
750 if (newchunk
->chunk_item
->type
== flags
&& find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
751 newchunk
->used
+= Vcb
->superblock
.node_size
;
752 space_list_subtract(newchunk
, false, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
756 release_chunk_lock(newchunk
, Vcb
);
760 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
762 le2
= Vcb
->chunks
.Flink
;
763 while (le2
!= &Vcb
->chunks
) {
764 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
766 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== flags
) {
767 acquire_chunk_lock(c2
, Vcb
);
769 if ((c2
->chunk_item
->size
- c2
->used
) >= Vcb
->superblock
.node_size
) {
770 if (find_metadata_address_in_chunk(Vcb
, c2
, &mr
->new_address
)) {
771 c2
->used
+= Vcb
->superblock
.node_size
;
772 space_list_subtract(c2
, false, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
773 release_chunk_lock(c2
, Vcb
);
780 release_chunk_lock(c2
, Vcb
);
786 // allocate new chunk if necessary
788 Status
= alloc_chunk(Vcb
, flags
, &newchunk
, false);
790 if (!NT_SUCCESS(Status
)) {
791 ERR("alloc_chunk returned %08x\n", Status
);
792 ExReleaseResourceLite(&Vcb
->chunk_lock
);
796 acquire_chunk_lock(newchunk
, Vcb
);
798 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
800 if (!find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
801 release_chunk_lock(newchunk
, Vcb
);
802 ExReleaseResourceLite(&Vcb
->chunk_lock
);
803 ERR("could not find address in new chunk\n");
804 Status
= STATUS_DISK_FULL
;
807 newchunk
->used
+= Vcb
->superblock
.node_size
;
808 space_list_subtract(newchunk
, false, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
811 release_chunk_lock(newchunk
, Vcb
);
814 ExReleaseResourceLite(&Vcb
->chunk_lock
);
818 le2
= mr
->refs
.Flink
;
819 while (le2
!= &mr
->refs
) {
820 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
824 internal_node
* in
= (internal_node
*)&ref
->parent
->data
[1];
826 for (i
= 0; i
< ref
->parent
->data
->num_items
; i
++) {
827 if (in
[i
].address
== mr
->address
) {
828 in
[i
].address
= mr
->new_address
;
833 if (ref
->parent
->t
) {
836 le3
= ref
->parent
->t
->itemlist
.Flink
;
837 while (le3
!= &ref
->parent
->t
->itemlist
) {
838 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
840 if (!td
->inserted
&& td
->treeholder
.address
== mr
->address
)
841 td
->treeholder
.address
= mr
->new_address
;
846 } else if (ref
->top
&& ref
->type
== TYPE_TREE_BLOCK_REF
) {
852 le3
= Vcb
->roots
.Flink
;
853 while (le3
!= &Vcb
->roots
) {
854 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
856 if (r2
->id
== ref
->tbr
.offset
) {
865 r
->treeholder
.address
= mr
->new_address
;
867 if (r
== Vcb
->root_root
)
868 Vcb
->superblock
.root_tree_addr
= mr
->new_address
;
869 else if (r
== Vcb
->chunk_root
)
870 Vcb
->superblock
.chunk_tree_addr
= mr
->new_address
;
871 else if (r
->root_item
.block_number
== mr
->address
) {
875 r
->root_item
.block_number
= mr
->new_address
;
877 searchkey
.obj_id
= r
->id
;
878 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
879 searchkey
.offset
= 0xffffffffffffffff;
881 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
882 if (!NT_SUCCESS(Status
)) {
883 ERR("find_item returned %08x\n", Status
);
887 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
888 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
889 Status
= STATUS_INTERNAL_ERROR
;
893 ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
895 ERR("out of memory\n");
896 Status
= STATUS_INSUFFICIENT_RESOURCES
;
900 RtlCopyMemory(ri
, &r
->root_item
, sizeof(ROOT_ITEM
));
902 Status
= delete_tree_item(Vcb
, &tp
);
903 if (!NT_SUCCESS(Status
)) {
904 ERR("delete_tree_item returned %08x\n", Status
);
908 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, NULL
);
909 if (!NT_SUCCESS(Status
)) {
910 ERR("insert_tree_item returned %08x\n", Status
);
920 mr
->data
->address
= mr
->new_address
;
929 // check if tree loaded more than once
930 if (t3
->list_entry
.Flink
!= &Vcb
->trees_hash
) {
931 tree
* nt
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
933 if (nt
->header
.address
== t3
->header
.address
)
937 t3
->header
.address
= mr
->new_address
;
941 if (Vcb
->trees_ptrs
[h
] == &t3
->list_entry_hash
) {
942 if (t3
->list_entry_hash
.Flink
== &Vcb
->trees_hash
)
943 Vcb
->trees_ptrs
[h
] = NULL
;
945 tree
* t2
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
947 if (t2
->hash
>> 24 == h
)
948 Vcb
->trees_ptrs
[h
] = &t2
->list_entry_hash
;
950 Vcb
->trees_ptrs
[h
] = NULL
;
954 RemoveEntryList(&t3
->list_entry_hash
);
956 t3
->hash
= calc_crc32c(0xffffffff, (uint8_t*)&t3
->header
.address
, sizeof(uint64_t));
959 if (!Vcb
->trees_ptrs
[h
]) {
962 le2
= Vcb
->trees_hash
.Flink
;
967 if (Vcb
->trees_ptrs
[h2
]) {
968 le2
= Vcb
->trees_ptrs
[h2
];
976 le2
= Vcb
->trees_ptrs
[h
];
979 while (le2
!= &Vcb
->trees_hash
) {
980 tree
* t2
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
982 if (t2
->hash
>= t3
->hash
) {
983 InsertHeadList(le2
->Blink
, &t3
->list_entry_hash
);
992 InsertTailList(&Vcb
->trees_hash
, &t3
->list_entry_hash
);
994 if (!Vcb
->trees_ptrs
[h
] || t3
->list_entry_hash
.Flink
== Vcb
->trees_ptrs
[h
])
995 Vcb
->trees_ptrs
[h
] = &t3
->list_entry_hash
;
997 if (data_items
&& level
== 0) {
998 le2
= data_items
->Flink
;
1000 while (le2
!= data_items
) {
1001 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
1002 LIST_ENTRY
* le3
= t3
->itemlist
.Flink
;
1004 while (le3
!= &t3
->itemlist
) {
1005 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
1007 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
1008 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
1010 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
1011 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1013 if (ed2
->address
== dr
->address
)
1014 ed2
->address
= dr
->new_address
;
1028 *((uint32_t*)mr
->data
) = ~calc_crc32c(0xffffffff, (uint8_t*)&mr
->data
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(mr
->data
->csum
));
1030 tw
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree_write
), ALLOC_TAG
);
1032 ERR("out of memory\n");
1033 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1037 tw
->address
= mr
->new_address
;
1038 tw
->length
= Vcb
->superblock
.node_size
;
1039 tw
->data
= (uint8_t*)mr
->data
;
1040 tw
->allocated
= false;
1042 if (IsListEmpty(&tree_writes
))
1043 InsertTailList(&tree_writes
, &tw
->list_entry
);
1045 bool inserted
= false;
1047 le2
= tree_writes
.Flink
;
1048 while (le2
!= &tree_writes
) {
1049 tree_write
* tw2
= CONTAINING_RECORD(le2
, tree_write
, list_entry
);
1051 if (tw2
->address
> tw
->address
) {
1052 InsertHeadList(le2
->Blink
, &tw
->list_entry
);
1061 InsertTailList(&tree_writes
, &tw
->list_entry
);
1069 Status
= do_tree_writes(Vcb
, &tree_writes
, true);
1070 if (!NT_SUCCESS(Status
)) {
1071 ERR("do_tree_writes returned %08x\n", Status
);
1076 while (le
!= items
) {
1077 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
1079 Status
= add_metadata_reloc_extent_item(Vcb
, mr
);
1080 if (!NT_SUCCESS(Status
)) {
1081 ERR("add_metadata_reloc_extent_item returned %08x\n", Status
);
1088 Status
= STATUS_SUCCESS
;
1091 while (!IsListEmpty(&tree_writes
)) {
1092 tree_write
* tw
= CONTAINING_RECORD(RemoveHeadList(&tree_writes
), tree_write
, list_entry
);
1095 ExFreePool(tw
->data
);
1103 static NTSTATUS
balance_metadata_chunk(device_extension
* Vcb
, chunk
* c
, bool* changed
) {
1108 LIST_ENTRY items
, rollback
;
1109 uint32_t loaded
= 0;
1111 TRACE("chunk %I64x\n", c
->offset
);
1113 InitializeListHead(&rollback
);
1114 InitializeListHead(&items
);
1116 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
1118 searchkey
.obj_id
= c
->offset
;
1119 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
1120 searchkey
.offset
= 0xffffffffffffffff;
1122 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
1123 if (!NT_SUCCESS(Status
)) {
1124 ERR("find_item returned %08x\n", Status
);
1129 traverse_ptr next_tp
;
1131 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1134 if (tp
.item
->key
.obj_id
>= c
->offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
1135 bool tree
= false, skinny
= false;
1137 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1140 } else if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
1141 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1142 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1144 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1149 Status
= add_metadata_reloc(Vcb
, &items
, &tp
, skinny
, NULL
, c
, &rollback
);
1151 if (!NT_SUCCESS(Status
)) {
1152 ERR("add_metadata_reloc returned %08x\n", Status
);
1158 if (loaded
>= 64) // only do 64 at a time
1163 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
1169 if (IsListEmpty(&items
)) {
1171 Status
= STATUS_SUCCESS
;
1176 Status
= write_metadata_items(Vcb
, &items
, NULL
, c
, &rollback
);
1177 if (!NT_SUCCESS(Status
)) {
1178 ERR("write_metadata_items returned %08x\n", Status
);
1182 Status
= STATUS_SUCCESS
;
1184 Vcb
->need_write
= true;
1187 if (NT_SUCCESS(Status
)) {
1188 Status
= do_write(Vcb
, NULL
);
1189 if (!NT_SUCCESS(Status
))
1190 ERR("do_write returned %08x\n", Status
);
1193 if (NT_SUCCESS(Status
))
1194 clear_rollback(&rollback
);
1196 do_rollback(Vcb
, &rollback
);
1200 ExReleaseResourceLite(&Vcb
->tree_lock
);
1202 while (!IsListEmpty(&items
)) {
1203 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&items
), metadata_reloc
, list_entry
);
1205 while (!IsListEmpty(&mr
->refs
)) {
1206 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
1212 ExFreePool(mr
->data
);
1220 static NTSTATUS
data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* metadata_items
,
1221 data_reloc
* dr
, EXTENT_DATA_REF
* edr
, LIST_ENTRY
* rollback
) {
1228 uint64_t last_tree
= 0;
1229 data_reloc_ref
* ref
;
1231 le
= Vcb
->roots
.Flink
;
1232 while (le
!= &Vcb
->roots
) {
1233 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
1235 if (r2
->id
== edr
->root
) {
1244 ERR("could not find subvol %I64x\n", edr
->count
);
1245 return STATUS_INTERNAL_ERROR
;
1248 searchkey
.obj_id
= edr
->objid
;
1249 searchkey
.obj_type
= TYPE_EXTENT_DATA
;
1250 searchkey
.offset
= 0;
1252 Status
= find_item(Vcb
, r
, &tp
, &searchkey
, false, NULL
);
1253 if (!NT_SUCCESS(Status
)) {
1254 ERR("find_item returned %08x\n", Status
);
1258 if (tp
.item
->key
.obj_id
< searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
< searchkey
.obj_type
)) {
1261 if (find_next_item(Vcb
, &tp
, &tp2
, false, NULL
))
1264 ERR("could not find EXTENT_DATA for inode %I64x in root %I64x\n", searchkey
.obj_id
, r
->id
);
1265 return STATUS_INTERNAL_ERROR
;
1271 while (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
1274 if (tp
.item
->size
>= sizeof(EXTENT_DATA
)) {
1275 EXTENT_DATA
* ed
= (EXTENT_DATA
*)tp
.item
->data
;
1277 if ((ed
->type
== EXTENT_TYPE_PREALLOC
|| ed
->type
== EXTENT_TYPE_REGULAR
) && tp
.item
->size
>= offsetof(EXTENT_DATA
, data
[0]) + sizeof(EXTENT_DATA2
)) {
1278 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1280 if (ed2
->address
== dr
->address
&& ed2
->size
== dr
->size
&& tp
.item
->key
.offset
- ed2
->offset
== edr
->offset
) {
1281 if (ref
&& last_tree
== tp
.tree
->header
.address
)
1284 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1286 ERR("out of memory\n");
1287 return STATUS_INSUFFICIENT_RESOURCES
;
1290 ref
->type
= TYPE_EXTENT_DATA_REF
;
1291 RtlCopyMemory(&ref
->edr
, edr
, sizeof(EXTENT_DATA_REF
));
1294 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, tp
.tree
->header
.address
, &mr
, rollback
);
1295 if (!NT_SUCCESS(Status
)) {
1296 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1301 last_tree
= tp
.tree
->header
.address
;
1304 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1310 if (find_next_item(Vcb
, &tp
, &tp2
, false, NULL
))
1316 return STATUS_SUCCESS
;
1319 static NTSTATUS
add_data_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, LIST_ENTRY
* metadata_items
,
1320 traverse_ptr
* tp
, chunk
* c
, LIST_ENTRY
* rollback
) {
1328 dr
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc
), ALLOC_TAG
);
1330 ERR("out of memory\n");
1331 return STATUS_INSUFFICIENT_RESOURCES
;
1334 dr
->address
= tp
->item
->key
.obj_id
;
1335 dr
->size
= tp
->item
->key
.offset
;
1336 dr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1337 InitializeListHead(&dr
->refs
);
1339 Status
= delete_tree_item(Vcb
, tp
);
1340 if (!NT_SUCCESS(Status
)) {
1341 ERR("delete_tree_item returned %08x\n", Status
);
1346 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
1349 acquire_chunk_lock(c
, Vcb
);
1351 c
->used
-= tp
->item
->key
.offset
;
1353 space_list_add(c
, tp
->item
->key
.obj_id
, tp
->item
->key
.offset
, rollback
);
1355 release_chunk_lock(c
, Vcb
);
1358 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1361 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
1362 ptr
= (uint8_t*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
1365 uint8_t secttype
= *ptr
;
1366 uint16_t sectlen
= secttype
== TYPE_EXTENT_DATA_REF
? sizeof(EXTENT_DATA_REF
) : (secttype
== TYPE_SHARED_DATA_REF
? sizeof(SHARED_DATA_REF
) : 0);
1370 if (sectlen
> len
) {
1371 ERR("(%I64x,%x,%I64x): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
1372 return STATUS_INTERNAL_ERROR
;
1376 ERR("(%I64x,%x,%I64x): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
1377 return STATUS_INTERNAL_ERROR
;
1380 if (secttype
== TYPE_EXTENT_DATA_REF
) {
1381 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)(ptr
+ sizeof(uint8_t));
1383 inline_rc
+= edr
->count
;
1385 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, edr
, rollback
);
1386 if (!NT_SUCCESS(Status
)) {
1387 ERR("data_reloc_add_tree_edr returned %08x\n", Status
);
1390 } else if (secttype
== TYPE_SHARED_DATA_REF
) {
1392 data_reloc_ref
* ref
;
1394 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1396 ERR("out of memory\n");
1397 return STATUS_INSUFFICIENT_RESOURCES
;
1400 ref
->type
= TYPE_SHARED_DATA_REF
;
1401 RtlCopyMemory(&ref
->sdr
, ptr
+ sizeof(uint8_t), sizeof(SHARED_DATA_REF
));
1402 inline_rc
+= ref
->sdr
.count
;
1404 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1405 if (!NT_SUCCESS(Status
)) {
1406 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1413 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1415 ERR("unexpected tree type %x\n", secttype
);
1416 return STATUS_INTERNAL_ERROR
;
1421 ptr
+= sizeof(uint8_t) + sectlen
;
1424 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
1425 traverse_ptr tp2
= *tp
, next_tp
;
1427 while (find_next_item(Vcb
, &tp2
, &next_tp
, false, NULL
)) {
1430 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
1431 if (tp2
.item
->key
.obj_type
== TYPE_EXTENT_DATA_REF
&& tp2
.item
->size
>= sizeof(EXTENT_DATA_REF
)) {
1432 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, (EXTENT_DATA_REF
*)tp2
.item
->data
, rollback
);
1433 if (!NT_SUCCESS(Status
)) {
1434 ERR("data_reloc_add_tree_edr returned %08x\n", Status
);
1438 Status
= delete_tree_item(Vcb
, &tp2
);
1439 if (!NT_SUCCESS(Status
)) {
1440 ERR("delete_tree_item returned %08x\n", Status
);
1443 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_DATA_REF
&& tp2
.item
->size
>= sizeof(uint32_t)) {
1445 data_reloc_ref
* ref
;
1447 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1449 ERR("out of memory\n");
1450 return STATUS_INSUFFICIENT_RESOURCES
;
1453 ref
->type
= TYPE_SHARED_DATA_REF
;
1454 ref
->sdr
.offset
= tp2
.item
->key
.offset
;
1455 ref
->sdr
.count
= *((uint32_t*)tp2
.item
->data
);
1457 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1458 if (!NT_SUCCESS(Status
)) {
1459 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1465 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1467 Status
= delete_tree_item(Vcb
, &tp2
);
1468 if (!NT_SUCCESS(Status
)) {
1469 ERR("delete_tree_item returned %08x\n", Status
);
1478 InsertTailList(items
, &dr
->list_entry
);
1480 return STATUS_SUCCESS
;
1483 static void sort_data_reloc_refs(data_reloc
* dr
) {
1484 LIST_ENTRY newlist
, *le
;
1486 if (IsListEmpty(&dr
->refs
))
1491 InitializeListHead(&newlist
);
1493 while (!IsListEmpty(&dr
->refs
)) {
1494 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
1495 bool inserted
= false;
1497 if (ref
->type
== TYPE_EXTENT_DATA_REF
)
1498 ref
->hash
= get_extent_data_ref_hash2(ref
->edr
.root
, ref
->edr
.objid
, ref
->edr
.offset
);
1499 else if (ref
->type
== TYPE_SHARED_DATA_REF
)
1500 ref
->hash
= ref
->parent
->new_address
;
1503 while (le
!= &newlist
) {
1504 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1506 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
1507 InsertHeadList(le
->Blink
, &ref
->list_entry
);
1516 InsertTailList(&newlist
, &ref
->list_entry
);
1520 while (le
!= &newlist
) {
1521 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1523 if (le
->Flink
!= &newlist
) {
1524 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
->Flink
, data_reloc_ref
, list_entry
);
1526 if (ref
->type
== TYPE_EXTENT_DATA_REF
&& ref2
->type
== TYPE_EXTENT_DATA_REF
&& ref
->edr
.root
== ref2
->edr
.root
&&
1527 ref
->edr
.objid
== ref2
->edr
.objid
&& ref
->edr
.offset
== ref2
->edr
.offset
) {
1528 RemoveEntryList(&ref2
->list_entry
);
1529 ref
->edr
.count
+= ref2
->edr
.count
;
1538 newlist
.Flink
->Blink
= &dr
->refs
;
1539 newlist
.Blink
->Flink
= &dr
->refs
;
1540 dr
->refs
.Flink
= newlist
.Flink
;
1541 dr
->refs
.Blink
= newlist
.Blink
;
1544 static NTSTATUS
add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, data_reloc
* dr
) {
1548 uint16_t inline_len
;
1549 bool all_inline
= true;
1550 data_reloc_ref
* first_noninline
= NULL
;
1554 inline_len
= sizeof(EXTENT_ITEM
);
1556 sort_data_reloc_refs(dr
);
1558 le
= dr
->refs
.Flink
;
1559 while (le
!= &dr
->refs
) {
1560 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1561 uint16_t extlen
= 0;
1563 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1564 extlen
+= sizeof(EXTENT_DATA_REF
);
1565 rc
+= ref
->edr
.count
;
1566 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1567 extlen
+= sizeof(SHARED_DATA_REF
);
1572 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
1574 first_noninline
= ref
;
1576 inline_len
+= extlen
+ 1;
1582 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
1584 ERR("out of memory\n");
1585 return STATUS_INSUFFICIENT_RESOURCES
;
1589 ei
->generation
= dr
->ei
->generation
;
1590 ei
->flags
= dr
->ei
->flags
;
1591 ptr
= (uint8_t*)&ei
[1];
1593 le
= dr
->refs
.Flink
;
1594 while (le
!= &dr
->refs
) {
1595 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1597 if (ref
== first_noninline
)
1603 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1604 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)ptr
;
1606 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1608 ptr
+= sizeof(EXTENT_DATA_REF
);
1609 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1610 SHARED_DATA_REF
* sdr
= (SHARED_DATA_REF
*)ptr
;
1612 sdr
->offset
= ref
->parent
->new_address
;
1613 sdr
->count
= ref
->sdr
.count
;
1615 ptr
+= sizeof(SHARED_DATA_REF
);
1621 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_ITEM
, dr
->size
, ei
, inline_len
, NULL
, NULL
);
1622 if (!NT_SUCCESS(Status
)) {
1623 ERR("insert_tree_item returned %08x\n", Status
);
1628 le
= &first_noninline
->list_entry
;
1630 while (le
!= &dr
->refs
) {
1631 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1633 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1634 EXTENT_DATA_REF
* edr
;
1636 edr
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_DATA_REF
), ALLOC_TAG
);
1638 ERR("out of memory\n");
1639 return STATUS_INSUFFICIENT_RESOURCES
;
1642 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1644 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_DATA_REF
, ref
->hash
, edr
, sizeof(EXTENT_DATA_REF
), NULL
, NULL
);
1645 if (!NT_SUCCESS(Status
)) {
1646 ERR("insert_tree_item returned %08x\n", Status
);
1649 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1652 sdr
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint32_t), ALLOC_TAG
);
1654 ERR("out of memory\n");
1655 return STATUS_INSUFFICIENT_RESOURCES
;
1658 *sdr
= ref
->sdr
.count
;
1660 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_SHARED_DATA_REF
, ref
->parent
->new_address
, sdr
, sizeof(uint32_t), NULL
, NULL
);
1661 if (!NT_SUCCESS(Status
)) {
1662 ERR("insert_tree_item returned %08x\n", Status
);
1671 return STATUS_SUCCESS
;
1674 static NTSTATUS
balance_data_chunk(device_extension
* Vcb
, chunk
* c
, bool* changed
) {
1679 LIST_ENTRY items
, metadata_items
, rollback
, *le
;
1680 uint64_t loaded
= 0, num_loaded
= 0;
1681 chunk
* newchunk
= NULL
;
1682 uint8_t* data
= NULL
;
1684 TRACE("chunk %I64x\n", c
->offset
);
1686 InitializeListHead(&rollback
);
1687 InitializeListHead(&items
);
1688 InitializeListHead(&metadata_items
);
1690 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
1692 searchkey
.obj_id
= c
->offset
;
1693 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
1694 searchkey
.offset
= 0xffffffffffffffff;
1696 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
1697 if (!NT_SUCCESS(Status
)) {
1698 ERR("find_item returned %08x\n", Status
);
1703 traverse_ptr next_tp
;
1705 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1708 if (tp
.item
->key
.obj_id
>= c
->offset
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
) {
1711 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1712 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1714 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1719 Status
= add_data_reloc(Vcb
, &items
, &metadata_items
, &tp
, c
, &rollback
);
1721 if (!NT_SUCCESS(Status
)) {
1722 ERR("add_data_reloc returned %08x\n", Status
);
1726 loaded
+= tp
.item
->key
.offset
;
1729 if (loaded
>= 0x1000000 || num_loaded
>= 100) // only do so much at a time, so we don't block too obnoxiously
1734 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
1740 if (IsListEmpty(&items
)) {
1742 Status
= STATUS_SUCCESS
;
1747 data
= ExAllocatePoolWithTag(PagedPool
, BALANCE_UNIT
, ALLOC_TAG
);
1749 ERR("out of memory\n");
1750 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1755 while (le
!= &items
) {
1756 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
1762 ULONG bmplen
, runlength
, index
, lastoff
;
1765 acquire_chunk_lock(newchunk
, Vcb
);
1767 if (find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1768 newchunk
->used
+= dr
->size
;
1769 space_list_subtract(newchunk
, false, dr
->new_address
, dr
->size
, &rollback
);
1773 release_chunk_lock(newchunk
, Vcb
);
1777 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
1779 le2
= Vcb
->chunks
.Flink
;
1780 while (le2
!= &Vcb
->chunks
) {
1781 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
1783 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== Vcb
->data_flags
) {
1784 acquire_chunk_lock(c2
, Vcb
);
1786 if ((c2
->chunk_item
->size
- c2
->used
) >= dr
->size
) {
1787 if (find_data_address_in_chunk(Vcb
, c2
, dr
->size
, &dr
->new_address
)) {
1788 c2
->used
+= dr
->size
;
1789 space_list_subtract(c2
, false, dr
->new_address
, dr
->size
, &rollback
);
1790 release_chunk_lock(c2
, Vcb
);
1797 release_chunk_lock(c2
, Vcb
);
1803 // allocate new chunk if necessary
1805 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &newchunk
, false);
1807 if (!NT_SUCCESS(Status
)) {
1808 ERR("alloc_chunk returned %08x\n", Status
);
1809 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1813 acquire_chunk_lock(newchunk
, Vcb
);
1815 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
1817 if (!find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1818 release_chunk_lock(newchunk
, Vcb
);
1819 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1820 ERR("could not find address in new chunk\n");
1821 Status
= STATUS_DISK_FULL
;
1824 newchunk
->used
+= dr
->size
;
1825 space_list_subtract(newchunk
, false, dr
->new_address
, dr
->size
, &rollback
);
1828 release_chunk_lock(newchunk
, Vcb
);
1831 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1834 dr
->newchunk
= newchunk
;
1836 bmplen
= (ULONG
)(dr
->size
/ Vcb
->superblock
.sector_size
);
1838 bmparr
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)sector_align(bmplen
+ 1, sizeof(ULONG
)), ALLOC_TAG
);
1840 ERR("out of memory\n");
1841 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1845 csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(dr
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
1847 ERR("out of memory\n");
1849 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1853 RtlInitializeBitMap(&bmp
, bmparr
, bmplen
);
1854 RtlSetAllBits(&bmp
); // 1 = no csum, 0 = csum
1856 searchkey
.obj_id
= EXTENT_CSUM_ID
;
1857 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
1858 searchkey
.offset
= dr
->address
;
1860 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, false, NULL
);
1861 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
1862 ERR("find_item returned %08x\n", Status
);
1868 if (Status
!= STATUS_NOT_FOUND
) {
1870 traverse_ptr next_tp
;
1872 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_CSUM
) {
1873 if (tp
.item
->key
.offset
>= dr
->address
+ dr
->size
)
1875 else if (tp
.item
->size
>= sizeof(uint32_t) && tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(uint32_t)) >= dr
->address
) {
1876 uint64_t cs
= max(dr
->address
, tp
.item
->key
.offset
);
1877 uint64_t ce
= min(dr
->address
+ dr
->size
, tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(uint32_t)));
1879 RtlCopyMemory(csum
+ ((cs
- dr
->address
) / Vcb
->superblock
.sector_size
),
1880 tp
.item
->data
+ ((cs
- tp
.item
->key
.offset
) * sizeof(uint32_t) / Vcb
->superblock
.sector_size
),
1881 (ULONG
)((ce
- cs
) * sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
1883 RtlClearBits(&bmp
, (ULONG
)((cs
- dr
->address
) / Vcb
->superblock
.sector_size
), (ULONG
)((ce
- cs
) / Vcb
->superblock
.sector_size
));
1885 if (ce
== dr
->address
+ dr
->size
)
1890 if (find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
))
1898 runlength
= RtlFindFirstRunClear(&bmp
, &index
);
1900 while (runlength
!= 0) {
1901 if (index
>= bmplen
)
1904 if (index
+ runlength
>= bmplen
) {
1905 runlength
= bmplen
- index
;
1911 if (index
> lastoff
) {
1912 ULONG off
= lastoff
;
1913 ULONG size
= index
- lastoff
;
1915 // handle no csum run
1919 if (size
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1920 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1924 Status
= read_data(Vcb
, dr
->address
+ (off
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, NULL
, false, data
,
1925 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
1926 if (!NT_SUCCESS(Status
)) {
1927 ERR("read_data returned %08x\n", Status
);
1933 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1934 NULL
, newchunk
, false, 0, NormalPagePriority
);
1935 if (!NT_SUCCESS(Status
)) {
1936 ERR("write_data_complete returned %08x\n", Status
);
1947 add_checksum_entry(Vcb
, dr
->new_address
+ (index
* Vcb
->superblock
.sector_size
), runlength
, &csum
[index
], NULL
);
1948 add_checksum_entry(Vcb
, dr
->address
+ (index
* Vcb
->superblock
.sector_size
), runlength
, NULL
, NULL
);
1954 if (runlength
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1955 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
1959 Status
= read_data(Vcb
, dr
->address
+ (index
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, &csum
[index
], false, data
,
1960 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
1961 if (!NT_SUCCESS(Status
)) {
1962 ERR("read_data returned %08x\n", Status
);
1968 Status
= write_data_complete(Vcb
, dr
->new_address
+ (index
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
1969 NULL
, newchunk
, false, 0, NormalPagePriority
);
1970 if (!NT_SUCCESS(Status
)) {
1971 ERR("write_data_complete returned %08x\n", Status
);
1979 } while (runlength
> 0);
1982 runlength
= RtlFindNextForwardRunClear(&bmp
, index
, &index
);
1988 // handle final nocsum run
1989 if (lastoff
< dr
->size
/ Vcb
->superblock
.sector_size
) {
1990 ULONG off
= lastoff
;
1991 ULONG size
= (ULONG
)((dr
->size
/ Vcb
->superblock
.sector_size
) - lastoff
);
1996 if (size
* Vcb
->superblock
.sector_size
> BALANCE_UNIT
)
1997 rl
= BALANCE_UNIT
/ Vcb
->superblock
.sector_size
;
2001 Status
= read_data(Vcb
, dr
->address
+ (off
* Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, NULL
, false, data
,
2002 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
2003 if (!NT_SUCCESS(Status
)) {
2004 ERR("read_data returned %08x\n", Status
);
2008 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
* Vcb
->superblock
.sector_size
), data
, rl
* Vcb
->superblock
.sector_size
,
2009 NULL
, newchunk
, false, 0, NormalPagePriority
);
2010 if (!NT_SUCCESS(Status
)) {
2011 ERR("write_data_complete returned %08x\n", Status
);
2026 Status
= write_metadata_items(Vcb
, &metadata_items
, &items
, NULL
, &rollback
);
2027 if (!NT_SUCCESS(Status
)) {
2028 ERR("write_metadata_items returned %08x\n", Status
);
2033 while (le
!= &items
) {
2034 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
2036 Status
= add_data_reloc_extent_item(Vcb
, dr
);
2037 if (!NT_SUCCESS(Status
)) {
2038 ERR("add_data_reloc_extent_item returned %08x\n", Status
);
2045 le
= c
->changed_extents
.Flink
;
2046 while (le
!= &c
->changed_extents
) {
2047 LIST_ENTRY
*le2
, *le3
;
2048 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
2053 while (le2
!= &items
) {
2054 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
2056 if (ce
->address
== dr
->address
) {
2057 ce
->address
= dr
->new_address
;
2058 RemoveEntryList(&ce
->list_entry
);
2059 InsertTailList(&dr
->newchunk
->changed_extents
, &ce
->list_entry
);
2069 Status
= STATUS_SUCCESS
;
2071 Vcb
->need_write
= true;
2074 if (NT_SUCCESS(Status
)) {
2075 // update extents in cache inodes before we flush
2076 le
= Vcb
->chunks
.Flink
;
2077 while (le
!= &Vcb
->chunks
) {
2078 chunk
* c2
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2083 ExAcquireResourceExclusiveLite(c2
->cache
->Header
.Resource
, true);
2085 le2
= c2
->cache
->extents
.Flink
;
2086 while (le2
!= &c2
->cache
->extents
) {
2087 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2090 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2091 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2093 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2094 LIST_ENTRY
* le3
= items
.Flink
;
2095 while (le3
!= &items
) {
2096 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2098 if (ed2
->address
== dr
->address
) {
2099 ed2
->address
= dr
->new_address
;
2112 ExReleaseResourceLite(c2
->cache
->Header
.Resource
);
2118 Status
= do_write(Vcb
, NULL
);
2119 if (!NT_SUCCESS(Status
))
2120 ERR("do_write returned %08x\n", Status
);
2123 if (NT_SUCCESS(Status
)) {
2124 clear_rollback(&rollback
);
2127 // FIXME - speed this up(?)
2129 le
= Vcb
->all_fcbs
.Flink
;
2130 while (le
!= &Vcb
->all_fcbs
) {
2131 struct _fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_all
);
2134 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, true);
2136 le2
= fcb
->extents
.Flink
;
2137 while (le2
!= &fcb
->extents
) {
2138 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2141 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2142 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2144 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2145 LIST_ENTRY
* le3
= items
.Flink
;
2146 while (le3
!= &items
) {
2147 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2149 if (ed2
->address
== dr
->address
) {
2150 ed2
->address
= dr
->new_address
;
2163 ExReleaseResourceLite(fcb
->Header
.Resource
);
2168 do_rollback(Vcb
, &rollback
);
2172 ExReleaseResourceLite(&Vcb
->tree_lock
);
2177 while (!IsListEmpty(&items
)) {
2178 data_reloc
* dr
= CONTAINING_RECORD(RemoveHeadList(&items
), data_reloc
, list_entry
);
2180 while (!IsListEmpty(&dr
->refs
)) {
2181 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
2189 while (!IsListEmpty(&metadata_items
)) {
2190 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&metadata_items
), metadata_reloc
, list_entry
);
2192 while (!IsListEmpty(&mr
->refs
)) {
2193 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
2199 ExFreePool(mr
->data
);
2207 static __inline
uint64_t get_chunk_dup_type(chunk
* c
) {
2208 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2209 return BLOCK_FLAG_RAID0
;
2210 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
2211 return BLOCK_FLAG_RAID1
;
2212 else if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
2213 return BLOCK_FLAG_DUPLICATE
;
2214 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2215 return BLOCK_FLAG_RAID10
;
2216 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2217 return BLOCK_FLAG_RAID5
;
2218 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2219 return BLOCK_FLAG_RAID6
;
2221 return BLOCK_FLAG_SINGLE
;
2224 static bool should_balance_chunk(device_extension
* Vcb
, uint8_t sort
, chunk
* c
) {
2225 btrfs_balance_opts
* opts
;
2227 opts
= &Vcb
->balance
.opts
[sort
];
2229 if (!(opts
->flags
& BTRFS_BALANCE_OPTS_ENABLED
))
2232 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2233 uint64_t type
= get_chunk_dup_type(c
);
2235 if (!(type
& opts
->profiles
))
2239 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2241 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2244 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2245 if (cis
[i
].dev_id
== opts
->devid
) {
2255 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2258 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2261 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2262 factor
= c
->chunk_item
->num_stripes
;
2263 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2264 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
2265 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2266 factor
= c
->chunk_item
->num_stripes
- 1;
2267 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2268 factor
= c
->chunk_item
->num_stripes
- 2;
2269 else // SINGLE, DUPLICATE, RAID1
2272 physsize
= c
->chunk_item
->size
/ factor
;
2274 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2275 if (cis
[i
].offset
< opts
->drange_end
&& cis
[i
].offset
+ physsize
>= opts
->drange_start
&&
2276 (!(opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) || cis
[i
].dev_id
== opts
->devid
)) {
2286 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2287 if (c
->offset
+ c
->chunk_item
->size
<= opts
->vrange_start
|| c
->offset
> opts
->vrange_end
)
2291 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2292 if (c
->chunk_item
->num_stripes
< opts
->stripes_start
|| c
->chunk_item
->num_stripes
< opts
->stripes_end
)
2296 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2297 uint64_t usage
= c
->used
* 100 / c
->chunk_item
->size
;
2299 // usage == 0 should mean completely empty, not just that usage rounds to 0%
2300 if (c
->used
> 0 && usage
== 0)
2303 if (usage
< opts
->usage_start
|| usage
> opts
->usage_end
)
2307 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
&& opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
) {
2308 uint64_t type
= get_chunk_dup_type(c
);
2310 if (type
== opts
->convert
)
2317 static void copy_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2318 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2319 args
->profiles
= opts
->profiles
;
2320 args
->flags
|= BALANCE_ARGS_FLAGS_PROFILES
;
2323 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2324 if (args
->usage_start
== 0) {
2325 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE_RANGE
;
2326 args
->usage_start
= opts
->usage_start
;
2327 args
->usage_end
= opts
->usage_end
;
2329 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE
;
2330 args
->usage
= opts
->usage_end
;
2334 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2335 args
->devid
= opts
->devid
;
2336 args
->flags
|= BALANCE_ARGS_FLAGS_DEVID
;
2339 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2340 args
->drange_start
= opts
->drange_start
;
2341 args
->drange_end
= opts
->drange_end
;
2342 args
->flags
|= BALANCE_ARGS_FLAGS_DRANGE
;
2345 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2346 args
->vrange_start
= opts
->vrange_start
;
2347 args
->vrange_end
= opts
->vrange_end
;
2348 args
->flags
|= BALANCE_ARGS_FLAGS_VRANGE
;
2351 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
2352 args
->convert
= opts
->convert
;
2353 args
->flags
|= BALANCE_ARGS_FLAGS_CONVERT
;
2355 if (opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
)
2356 args
->flags
|= BALANCE_ARGS_FLAGS_SOFT
;
2359 if (opts
->flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
2360 if (args
->limit_start
== 0) {
2361 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT_RANGE
;
2362 args
->limit_start
= (uint32_t)opts
->limit_start
;
2363 args
->limit_end
= (uint32_t)opts
->limit_end
;
2365 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT
;
2366 args
->limit
= opts
->limit_end
;
2370 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2371 args
->stripes_start
= opts
->stripes_start
;
2372 args
->stripes_end
= opts
->stripes_end
;
2373 args
->flags
|= BALANCE_ARGS_FLAGS_STRIPES_RANGE
;
2377 static NTSTATUS
add_balance_item(device_extension
* Vcb
) {
2383 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2384 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2385 searchkey
.offset
= 0;
2387 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
2389 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
2390 if (!NT_SUCCESS(Status
)) {
2391 ERR("find_item returned %08x\n", Status
);
2395 if (!keycmp(tp
.item
->key
, searchkey
)) {
2396 Status
= delete_tree_item(Vcb
, &tp
);
2397 if (!NT_SUCCESS(Status
)) {
2398 ERR("delete_tree_item returned %08x\n", Status
);
2403 bi
= ExAllocatePoolWithTag(PagedPool
, sizeof(BALANCE_ITEM
), ALLOC_TAG
);
2405 ERR("out of memory\n");
2406 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2410 RtlZeroMemory(bi
, sizeof(BALANCE_ITEM
));
2412 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2413 bi
->flags
|= BALANCE_FLAGS_DATA
;
2414 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
2417 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2418 bi
->flags
|= BALANCE_FLAGS_METADATA
;
2419 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
2422 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2423 bi
->flags
|= BALANCE_FLAGS_SYSTEM
;
2424 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
2427 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, BALANCE_ITEM_ID
, TYPE_TEMP_ITEM
, 0, bi
, sizeof(BALANCE_ITEM
), NULL
, NULL
);
2428 if (!NT_SUCCESS(Status
)) {
2429 ERR("insert_tree_item returned %08x\n", Status
);
2434 Status
= STATUS_SUCCESS
;
2437 if (NT_SUCCESS(Status
)) {
2438 Status
= do_write(Vcb
, NULL
);
2439 if (!NT_SUCCESS(Status
))
2440 ERR("do_write returned %08x\n", Status
);
2445 ExReleaseResourceLite(&Vcb
->tree_lock
);
2450 static NTSTATUS
remove_balance_item(device_extension
* Vcb
) {
2455 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2456 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2457 searchkey
.offset
= 0;
2459 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
2461 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
2462 if (!NT_SUCCESS(Status
)) {
2463 ERR("find_item returned %08x\n", Status
);
2467 if (!keycmp(tp
.item
->key
, searchkey
)) {
2468 Status
= delete_tree_item(Vcb
, &tp
);
2469 if (!NT_SUCCESS(Status
)) {
2470 ERR("delete_tree_item returned %08x\n", Status
);
2474 Status
= do_write(Vcb
, NULL
);
2475 if (!NT_SUCCESS(Status
)) {
2476 ERR("do_write returned %08x\n", Status
);
2483 Status
= STATUS_SUCCESS
;
2486 ExReleaseResourceLite(&Vcb
->tree_lock
);
2491 static void load_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2492 opts
->flags
= BTRFS_BALANCE_OPTS_ENABLED
;
2494 if (args
->flags
& BALANCE_ARGS_FLAGS_PROFILES
) {
2495 opts
->flags
|= BTRFS_BALANCE_OPTS_PROFILES
;
2496 opts
->profiles
= args
->profiles
;
2499 if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE
) {
2500 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2502 opts
->usage_start
= 0;
2503 opts
->usage_end
= (uint8_t)args
->usage
;
2504 } else if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE_RANGE
) {
2505 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2507 opts
->usage_start
= (uint8_t)args
->usage_start
;
2508 opts
->usage_end
= (uint8_t)args
->usage_end
;
2511 if (args
->flags
& BALANCE_ARGS_FLAGS_DEVID
) {
2512 opts
->flags
|= BTRFS_BALANCE_OPTS_DEVID
;
2513 opts
->devid
= args
->devid
;
2516 if (args
->flags
& BALANCE_ARGS_FLAGS_DRANGE
) {
2517 opts
->flags
|= BTRFS_BALANCE_OPTS_DRANGE
;
2518 opts
->drange_start
= args
->drange_start
;
2519 opts
->drange_end
= args
->drange_end
;
2522 if (args
->flags
& BALANCE_ARGS_FLAGS_VRANGE
) {
2523 opts
->flags
|= BTRFS_BALANCE_OPTS_VRANGE
;
2524 opts
->vrange_start
= args
->vrange_start
;
2525 opts
->vrange_end
= args
->vrange_end
;
2528 if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT
) {
2529 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2531 opts
->limit_start
= 0;
2532 opts
->limit_end
= args
->limit
;
2533 } else if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT_RANGE
) {
2534 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2536 opts
->limit_start
= args
->limit_start
;
2537 opts
->limit_end
= args
->limit_end
;
2540 if (args
->flags
& BALANCE_ARGS_FLAGS_STRIPES_RANGE
) {
2541 opts
->flags
|= BTRFS_BALANCE_OPTS_STRIPES
;
2543 opts
->stripes_start
= (uint16_t)args
->stripes_start
;
2544 opts
->stripes_end
= (uint16_t)args
->stripes_end
;
2547 if (args
->flags
& BALANCE_ARGS_FLAGS_CONVERT
) {
2548 opts
->flags
|= BTRFS_BALANCE_OPTS_CONVERT
;
2549 opts
->convert
= args
->convert
;
2551 if (args
->flags
& BALANCE_ARGS_FLAGS_SOFT
)
2552 opts
->flags
|= BTRFS_BALANCE_OPTS_SOFT
;
2556 static NTSTATUS
remove_superblocks(device
* dev
) {
2561 sb
= ExAllocatePoolWithTag(PagedPool
, sizeof(superblock
), ALLOC_TAG
);
2563 ERR("out of memory\n");
2564 return STATUS_INSUFFICIENT_RESOURCES
;
2567 RtlZeroMemory(sb
, sizeof(superblock
));
2569 while (superblock_addrs
[i
] > 0 && dev
->devitem
.num_bytes
>= superblock_addrs
[i
] + sizeof(superblock
)) {
2570 Status
= write_data_phys(dev
->devobj
, dev
->fileobj
, superblock_addrs
[i
], sb
, sizeof(superblock
));
2572 if (!NT_SUCCESS(Status
)) {
2582 return STATUS_SUCCESS
;
2585 static NTSTATUS
finish_removing_device(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2590 volume_device_extension
* vde
;
2592 if (Vcb
->need_write
) {
2593 Status
= do_write(Vcb
, NULL
);
2595 if (!NT_SUCCESS(Status
))
2596 ERR("do_write returned %08x\n", Status
);
2598 Status
= STATUS_SUCCESS
;
2602 if (!NT_SUCCESS(Status
))
2605 // remove entry in chunk tree
2607 searchkey
.obj_id
= 1;
2608 searchkey
.obj_type
= TYPE_DEV_ITEM
;
2609 searchkey
.offset
= dev
->devitem
.dev_id
;
2611 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, false, NULL
);
2612 if (!NT_SUCCESS(Status
)) {
2613 ERR("find_item returned %08x\n", Status
);
2617 if (!keycmp(searchkey
, tp
.item
->key
)) {
2618 Status
= delete_tree_item(Vcb
, &tp
);
2620 if (!NT_SUCCESS(Status
)) {
2621 ERR("delete_tree_item returned %08x\n", Status
);
2626 // remove stats entry in device tree
2628 searchkey
.obj_id
= 0;
2629 searchkey
.obj_type
= TYPE_DEV_STATS
;
2630 searchkey
.offset
= dev
->devitem
.dev_id
;
2632 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, NULL
);
2633 if (!NT_SUCCESS(Status
)) {
2634 ERR("find_item returned %08x\n", Status
);
2638 if (!keycmp(searchkey
, tp
.item
->key
)) {
2639 Status
= delete_tree_item(Vcb
, &tp
);
2641 if (!NT_SUCCESS(Status
)) {
2642 ERR("delete_tree_item returned %08x\n", Status
);
2647 // update superblock
2649 Vcb
->superblock
.num_devices
--;
2650 Vcb
->superblock
.total_bytes
-= dev
->devitem
.num_bytes
;
2651 Vcb
->devices_loaded
--;
2653 RemoveEntryList(&dev
->list_entry
);
2657 Status
= do_write(Vcb
, NULL
);
2658 if (!NT_SUCCESS(Status
))
2659 ERR("do_write returned %08x\n", Status
);
2663 if (!NT_SUCCESS(Status
))
2666 if (!dev
->readonly
&& dev
->devobj
) {
2667 Status
= remove_superblocks(dev
);
2668 if (!NT_SUCCESS(Status
))
2669 WARN("remove_superblocks returned %08x\n", Status
);
2672 // remove entry in volume list
2677 pdo_device_extension
* pdode
= vde
->pdode
;
2679 ExAcquireResourceExclusiveLite(&pdode
->child_lock
, true);
2681 le
= pdode
->children
.Flink
;
2682 while (le
!= &pdode
->children
) {
2683 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2685 if (RtlCompareMemory(&dev
->devitem
.device_uuid
, &vc
->uuid
, sizeof(BTRFS_UUID
)) == sizeof(BTRFS_UUID
)) {
2686 PFILE_OBJECT FileObject
;
2687 PDEVICE_OBJECT mountmgr
;
2688 UNICODE_STRING mmdevpath
;
2690 pdode
->children_loaded
--;
2692 if (vc
->had_drive_letter
) { // re-add entry to mountmgr
2693 RtlInitUnicodeString(&mmdevpath
, MOUNTMGR_DEVICE_NAME
);
2694 Status
= IoGetDeviceObjectPointer(&mmdevpath
, FILE_READ_ATTRIBUTES
, &FileObject
, &mountmgr
);
2695 if (!NT_SUCCESS(Status
))
2696 ERR("IoGetDeviceObjectPointer returned %08x\n", Status
);
2700 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, &mdn
, sizeof(MOUNTDEV_NAME
), true, NULL
);
2701 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
)
2702 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2704 MOUNTDEV_NAME
* mdn2
;
2705 ULONG mdnsize
= (ULONG
)offsetof(MOUNTDEV_NAME
, Name
[0]) + mdn
.NameLength
;
2707 mdn2
= ExAllocatePoolWithTag(PagedPool
, mdnsize
, ALLOC_TAG
);
2709 ERR("out of memory\n");
2711 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, mdn2
, mdnsize
, true, NULL
);
2712 if (!NT_SUCCESS(Status
))
2713 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2715 UNICODE_STRING name
;
2717 name
.Buffer
= mdn2
->Name
;
2718 name
.Length
= name
.MaximumLength
= mdn2
->NameLength
;
2720 Status
= mountmgr_add_drive_letter(mountmgr
, &name
);
2721 if (!NT_SUCCESS(Status
))
2722 WARN("mountmgr_add_drive_letter returned %08x\n", Status
);
2729 ObDereferenceObject(FileObject
);
2733 ExFreePool(vc
->pnp_name
.Buffer
);
2734 RemoveEntryList(&vc
->list_entry
);
2737 ObDereferenceObject(vc
->fileobj
);
2745 if (pdode
->children_loaded
> 0 && vde
->device
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2746 vde
->device
->Characteristics
&= ~FILE_REMOVABLE_MEDIA
;
2748 le
= pdode
->children
.Flink
;
2749 while (le
!= &pdode
->children
) {
2750 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2752 if (vc
->devobj
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2753 vde
->device
->Characteristics
|= FILE_REMOVABLE_MEDIA
;
2761 pdode
->num_children
= Vcb
->superblock
.num_devices
;
2763 ExReleaseResourceLite(&pdode
->child_lock
);
2767 if (dev
->trim
&& !dev
->readonly
&& !Vcb
->options
.no_trim
)
2768 trim_whole_device(dev
);
2771 while (!IsListEmpty(&dev
->space
)) {
2772 LIST_ENTRY
* le2
= RemoveHeadList(&dev
->space
);
2773 space
* s
= CONTAINING_RECORD(le2
, space
, list_entry
);
2783 le
= Vcb
->devices
.Flink
;
2784 while (le
!= &Vcb
->devices
) {
2785 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
2796 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
2798 return STATUS_SUCCESS
;
2801 static void trim_unalloc_space(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2802 DEVICE_MANAGE_DATA_SET_ATTRIBUTES
* dmdsa
;
2803 DEVICE_DATA_SET_RANGE
* ranges
;
2809 uint64_t lastoff
= 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
2812 dev
->num_trim_entries
= 0;
2814 searchkey
.obj_id
= dev
->devitem
.dev_id
;
2815 searchkey
.obj_type
= TYPE_DEV_EXTENT
;
2816 searchkey
.offset
= 0;
2818 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, NULL
);
2819 if (!NT_SUCCESS(Status
)) {
2820 ERR("find_item returned %08x\n", Status
);
2825 traverse_ptr next_tp
;
2827 if (tp
.item
->key
.obj_id
== dev
->devitem
.dev_id
&& tp
.item
->key
.obj_type
== TYPE_DEV_EXTENT
) {
2828 if (tp
.item
->size
>= sizeof(DEV_EXTENT
)) {
2829 DEV_EXTENT
* de
= (DEV_EXTENT
*)tp
.item
->data
;
2831 if (tp
.item
->key
.offset
> lastoff
)
2832 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, tp
.item
->key
.offset
- lastoff
);
2834 lastoff
= tp
.item
->key
.offset
+ de
->length
;
2836 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(DEV_EXTENT
));
2841 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
2845 if (tp
.item
->key
.obj_id
> searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
> searchkey
.obj_type
))
2850 if (lastoff
< dev
->devitem
.num_bytes
)
2851 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, dev
->devitem
.num_bytes
- lastoff
);
2853 if (dev
->num_trim_entries
== 0)
2856 datalen
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t)) + (dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
));
2858 dmdsa
= ExAllocatePoolWithTag(PagedPool
, datalen
, ALLOC_TAG
);
2860 ERR("out of memory\n");
2864 dmdsa
->Size
= sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
);
2865 dmdsa
->Action
= DeviceDsmAction_Trim
;
2866 dmdsa
->Flags
= DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED
;
2867 dmdsa
->ParameterBlockOffset
= 0;
2868 dmdsa
->ParameterBlockLength
= 0;
2869 dmdsa
->DataSetRangesOffset
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t));
2870 dmdsa
->DataSetRangesLength
= dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
);
2872 ranges
= (DEVICE_DATA_SET_RANGE
*)((uint8_t*)dmdsa
+ dmdsa
->DataSetRangesOffset
);
2875 le
= dev
->trim_list
.Flink
;
2876 while (le
!= &dev
->trim_list
) {
2877 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
2879 ranges
[i
].StartingOffset
= s
->address
;
2880 ranges
[i
].LengthInBytes
= s
->size
;
2886 Status
= dev_ioctl(dev
->devobj
, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES
, dmdsa
, datalen
, NULL
, 0, true, NULL
);
2887 if (!NT_SUCCESS(Status
))
2888 WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status
);
2893 while (!IsListEmpty(&dev
->trim_list
)) {
2894 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->trim_list
), space
, list_entry
);
2898 dev
->num_trim_entries
= 0;
2901 static NTSTATUS
try_consolidation(device_extension
* Vcb
, uint64_t flags
, chunk
** newchunk
) {
2907 // FIXME - allow with metadata chunks?
2912 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
2914 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
2916 // choose the least-used chunk we haven't looked at yet
2917 le
= Vcb
->chunks
.Flink
;
2918 while (le
!= &Vcb
->chunks
) {
2919 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2921 // FIXME - skip full-size chunks over e.g. 90% full?
2922 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&& !c
->readonly
&& c
->balance_num
!= Vcb
->balance
.balance_num
&& (!rc
|| c
->used
< rc
->used
))
2928 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2931 ExReleaseResourceLite(&Vcb
->tree_lock
);
2935 if (rc
->list_entry_balance
.Flink
) {
2936 RemoveEntryList(&rc
->list_entry_balance
);
2937 Vcb
->balance
.chunks_left
--;
2940 rc
->list_entry_balance
.Flink
= (LIST_ENTRY
*)1; // so it doesn't get dropped
2943 ExReleaseResourceLite(&Vcb
->tree_lock
);
2948 Status
= balance_data_chunk(Vcb
, rc
, &changed
);
2949 if (!NT_SUCCESS(Status
)) {
2950 ERR("balance_data_chunk returned %08x\n", Status
);
2951 Vcb
->balance
.status
= Status
;
2952 rc
->list_entry_balance
.Flink
= NULL
;
2957 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
2960 Vcb
->balance
.stopping
= true;
2962 if (Vcb
->balance
.stopping
)
2963 return STATUS_SUCCESS
;
2966 rc
->list_entry_balance
.Flink
= NULL
;
2969 rc
->space_changed
= true;
2970 rc
->balance_num
= Vcb
->balance
.balance_num
;
2972 Status
= do_write(Vcb
, NULL
);
2973 if (!NT_SUCCESS(Status
)) {
2974 ERR("do_write returned %08x\n", Status
);
2981 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
2983 Status
= alloc_chunk(Vcb
, flags
, &rc
, true);
2985 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2987 if (NT_SUCCESS(Status
)) {
2991 ERR("alloc_chunk returned %08x\n", Status
);
2996 static NTSTATUS
regenerate_space_list(device_extension
* Vcb
, device
* dev
) {
2999 while (!IsListEmpty(&dev
->space
)) {
3000 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->space
), space
, list_entry
);
3005 // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
3007 space_list_add2(&dev
->space
, NULL
, 0x100000, dev
->devitem
.num_bytes
- 0x100000, NULL
, NULL
);
3009 le
= Vcb
->chunks
.Flink
;
3010 while (le
!= &Vcb
->chunks
) {
3012 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3013 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
3015 for (n
= 0; n
< c
->chunk_item
->num_stripes
; n
++) {
3016 uint64_t stripe_size
= 0;
3018 if (cis
[n
].dev_id
== dev
->devitem
.dev_id
) {
3019 if (stripe_size
== 0) {
3022 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
3023 factor
= c
->chunk_item
->num_stripes
;
3024 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
3025 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
3026 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
3027 factor
= c
->chunk_item
->num_stripes
- 1;
3028 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
3029 factor
= c
->chunk_item
->num_stripes
- 2;
3030 else // SINGLE, DUP, RAID1
3033 stripe_size
= c
->chunk_item
->size
/ factor
;
3036 space_list_subtract2(&dev
->space
, NULL
, cis
[n
].offset
, stripe_size
, NULL
, NULL
);
3043 return STATUS_SUCCESS
;
3046 _Function_class_(KSTART_ROUTINE
)
3047 void __stdcall
balance_thread(void* context
) {
3048 device_extension
* Vcb
= (device_extension
*)context
;
3051 uint64_t num_chunks
[3], okay_metadata_chunks
= 0, okay_data_chunks
= 0, okay_system_chunks
= 0;
3052 uint64_t old_data_flags
= 0, old_metadata_flags
= 0, old_system_flags
= 0;
3055 Vcb
->balance
.balance_num
++;
3057 Vcb
->balance
.stopping
= false;
3058 KeInitializeEvent(&Vcb
->balance
.finished
, NotificationEvent
, false);
3060 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3061 old_data_flags
= Vcb
->data_flags
;
3062 Vcb
->data_flags
= BLOCK_FLAG_DATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
);
3064 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3067 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3068 old_metadata_flags
= Vcb
->metadata_flags
;
3069 Vcb
->metadata_flags
= BLOCK_FLAG_METADATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
);
3072 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3073 old_system_flags
= Vcb
->system_flags
;
3074 Vcb
->system_flags
= BLOCK_FLAG_SYSTEM
| (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
);
3077 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
) {
3078 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3079 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3080 else if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3081 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3084 num_chunks
[0] = num_chunks
[1] = num_chunks
[2] = 0;
3085 Vcb
->balance
.total_chunks
= Vcb
->balance
.chunks_left
= 0;
3087 InitializeListHead(&chunks
);
3089 // FIXME - what are we supposed to do with limit_start?
3091 if (!Vcb
->readonly
) {
3092 if (!Vcb
->balance
.removing
&& !Vcb
->balance
.shrinking
) {
3093 Status
= add_balance_item(Vcb
);
3094 if (!NT_SUCCESS(Status
)) {
3095 ERR("add_balance_item returned %08x\n", Status
);
3096 Vcb
->balance
.status
= Status
;
3100 if (Vcb
->need_write
) {
3101 Status
= do_write(Vcb
, NULL
);
3105 if (!NT_SUCCESS(Status
)) {
3106 ERR("do_write returned %08x\n", Status
);
3107 Vcb
->balance
.status
= Status
;
3114 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3116 if (Vcb
->balance
.stopping
)
3119 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3121 le
= Vcb
->chunks
.Flink
;
3122 while (le
!= &Vcb
->chunks
) {
3123 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3126 acquire_chunk_lock(c
, Vcb
);
3128 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
)
3129 sort
= BALANCE_OPTS_DATA
;
3130 else if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
)
3131 sort
= BALANCE_OPTS_METADATA
;
3132 else if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
3133 sort
= BALANCE_OPTS_SYSTEM
;
3135 ERR("unexpected chunk type %I64x\n", c
->chunk_item
->type
);
3136 release_chunk_lock(c
, Vcb
);
3140 if ((!(Vcb
->balance
.opts
[sort
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) || num_chunks
[sort
] < Vcb
->balance
.opts
[sort
].limit_end
) &&
3141 should_balance_chunk(Vcb
, sort
, c
)) {
3142 InsertTailList(&chunks
, &c
->list_entry_balance
);
3145 Vcb
->balance
.total_chunks
++;
3146 Vcb
->balance
.chunks_left
++;
3147 } else if (sort
== BALANCE_OPTS_METADATA
)
3148 okay_metadata_chunks
++;
3149 else if (sort
== BALANCE_OPTS_DATA
)
3151 else if (sort
== BALANCE_OPTS_SYSTEM
)
3152 okay_system_chunks
++;
3154 if (!c
->cache_loaded
) {
3155 Status
= load_cache_chunk(Vcb
, c
, NULL
);
3157 if (!NT_SUCCESS(Status
)) {
3158 ERR("load_cache_chunk returned %08x\n", Status
);
3159 Vcb
->balance
.status
= Status
;
3160 release_chunk_lock(c
, Vcb
);
3161 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3166 release_chunk_lock(c
, Vcb
);
3171 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3173 // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
3174 if (okay_metadata_chunks
== 0 || okay_data_chunks
== 0 || okay_system_chunks
== 0) {
3175 bool consolidated
= false;
3178 if (okay_metadata_chunks
== 0) {
3179 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3181 Status
= alloc_chunk(Vcb
, Vcb
->metadata_flags
, &c
, true);
3182 if (NT_SUCCESS(Status
))
3183 c
->balance_num
= Vcb
->balance
.balance_num
;
3184 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3185 ERR("alloc_chunk returned %08x\n", Status
);
3186 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3187 Vcb
->balance
.status
= Status
;
3191 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3193 if (Status
== STATUS_DISK_FULL
) {
3194 Status
= try_consolidation(Vcb
, Vcb
->metadata_flags
, &c
);
3195 if (!NT_SUCCESS(Status
)) {
3196 ERR("try_consolidation returned %08x\n", Status
);
3197 Vcb
->balance
.status
= Status
;
3200 c
->balance_num
= Vcb
->balance
.balance_num
;
3202 consolidated
= true;
3204 if (Vcb
->balance
.stopping
)
3209 if (okay_data_chunks
== 0) {
3210 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3212 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &c
, true);
3213 if (NT_SUCCESS(Status
))
3214 c
->balance_num
= Vcb
->balance
.balance_num
;
3215 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3216 ERR("alloc_chunk returned %08x\n", Status
);
3217 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3218 Vcb
->balance
.status
= Status
;
3222 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3224 if (Status
== STATUS_DISK_FULL
) {
3225 Status
= try_consolidation(Vcb
, Vcb
->data_flags
, &c
);
3226 if (!NT_SUCCESS(Status
)) {
3227 ERR("try_consolidation returned %08x\n", Status
);
3228 Vcb
->balance
.status
= Status
;
3231 c
->balance_num
= Vcb
->balance
.balance_num
;
3233 consolidated
= true;
3235 if (Vcb
->balance
.stopping
)
3240 if (okay_system_chunks
== 0) {
3241 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3243 Status
= alloc_chunk(Vcb
, Vcb
->system_flags
, &c
, true);
3244 if (NT_SUCCESS(Status
))
3245 c
->balance_num
= Vcb
->balance
.balance_num
;
3246 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3247 ERR("alloc_chunk returned %08x\n", Status
);
3248 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3249 Vcb
->balance
.status
= Status
;
3253 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3255 if (Status
== STATUS_DISK_FULL
) {
3256 Status
= try_consolidation(Vcb
, Vcb
->system_flags
, &c
);
3257 if (!NT_SUCCESS(Status
)) {
3258 ERR("try_consolidation returned %08x\n", Status
);
3259 Vcb
->balance
.status
= Status
;
3262 c
->balance_num
= Vcb
->balance
.balance_num
;
3264 consolidated
= true;
3266 if (Vcb
->balance
.stopping
)
3272 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3275 while (le
!= &chunks
) {
3276 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3283 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3285 // do data chunks before metadata
3287 while (le
!= &chunks
) {
3288 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3289 LIST_ENTRY
* le2
= le
->Flink
;
3291 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
3297 Status
= balance_data_chunk(Vcb
, c
, &changed
);
3298 if (!NT_SUCCESS(Status
)) {
3299 ERR("balance_data_chunk returned %08x\n", Status
);
3300 Vcb
->balance
.status
= Status
;
3304 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3307 Vcb
->balance
.stopping
= true;
3309 if (Vcb
->balance
.stopping
)
3314 c
->space_changed
= true;
3317 if (Vcb
->balance
.stopping
)
3320 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&&
3321 (!(Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) || !(c
->chunk_item
->type
& BLOCK_FLAG_METADATA
))) {
3322 RemoveEntryList(&c
->list_entry_balance
);
3323 c
->list_entry_balance
.Flink
= NULL
;
3325 Vcb
->balance
.chunks_left
--;
3331 // do metadata chunks
3332 while (!IsListEmpty(&chunks
)) {
3336 le
= RemoveHeadList(&chunks
);
3337 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3339 if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
|| c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
) {
3341 Status
= balance_metadata_chunk(Vcb
, c
, &changed
);
3342 if (!NT_SUCCESS(Status
)) {
3343 ERR("balance_metadata_chunk returned %08x\n", Status
);
3344 Vcb
->balance
.status
= Status
;
3348 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3351 Vcb
->balance
.stopping
= true;
3353 if (Vcb
->balance
.stopping
)
3358 c
->space_changed
= true;
3361 if (Vcb
->balance
.stopping
)
3364 c
->list_entry_balance
.Flink
= NULL
;
3366 Vcb
->balance
.chunks_left
--;
3370 if (!Vcb
->readonly
) {
3371 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3373 while (le
!= &chunks
) {
3374 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3378 c
->list_entry_balance
.Flink
= NULL
;
3381 if (old_data_flags
!= 0)
3382 Vcb
->data_flags
= old_data_flags
;
3384 if (old_metadata_flags
!= 0)
3385 Vcb
->metadata_flags
= old_metadata_flags
;
3387 if (old_system_flags
!= 0)
3388 Vcb
->system_flags
= old_system_flags
;
3391 if (Vcb
->balance
.removing
) {
3394 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3396 le
= Vcb
->devices
.Flink
;
3397 while (le
!= &Vcb
->devices
) {
3398 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3400 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3409 if (Vcb
->balance
.chunks_left
== 0) {
3410 Status
= finish_removing_device(Vcb
, dev
);
3412 if (!NT_SUCCESS(Status
)) {
3413 ERR("finish_removing_device returned %08x\n", Status
);
3420 ExReleaseResourceLite(&Vcb
->tree_lock
);
3421 } else if (Vcb
->balance
.shrinking
) {
3424 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3426 le
= Vcb
->devices
.Flink
;
3427 while (le
!= &Vcb
->devices
) {
3428 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3430 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3439 ERR("could not find device %I64x\n", Vcb
->balance
.opts
[0].devid
);
3440 Vcb
->balance
.status
= STATUS_INTERNAL_ERROR
;
3443 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3445 Status
= regenerate_space_list(Vcb
, dev
);
3446 if (!NT_SUCCESS(Status
))
3447 WARN("regenerate_space_list returned %08x\n", Status
);
3452 old_size
= dev
->devitem
.num_bytes
;
3453 dev
->devitem
.num_bytes
= Vcb
->balance
.opts
[0].drange_start
;
3455 Status
= update_dev_item(Vcb
, dev
, NULL
);
3456 if (!NT_SUCCESS(Status
)) {
3457 ERR("update_dev_item returned %08x\n", Status
);
3458 dev
->devitem
.num_bytes
= old_size
;
3459 Vcb
->balance
.status
= Status
;
3461 Status
= regenerate_space_list(Vcb
, dev
);
3462 if (!NT_SUCCESS(Status
))
3463 WARN("regenerate_space_list returned %08x\n", Status
);
3465 Vcb
->superblock
.total_bytes
-= old_size
- dev
->devitem
.num_bytes
;
3467 Status
= do_write(Vcb
, NULL
);
3468 if (!NT_SUCCESS(Status
))
3469 ERR("do_write returned %08x\n", Status
);
3475 ExReleaseResourceLite(&Vcb
->tree_lock
);
3477 if (!Vcb
->balance
.stopping
&& NT_SUCCESS(Vcb
->balance
.status
))
3478 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3480 Status
= remove_balance_item(Vcb
);
3481 if (!NT_SUCCESS(Status
)) {
3482 ERR("remove_balance_item returned %08x\n", Status
);
3487 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
3488 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3490 le
= Vcb
->devices
.Flink
;
3491 while (le
!= &Vcb
->devices
) {
3492 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3494 if (dev2
->devobj
&& !dev2
->readonly
&& dev2
->trim
)
3495 trim_unalloc_space(Vcb
, dev2
);
3500 ExReleaseResourceLite(&Vcb
->tree_lock
);
3504 ZwClose(Vcb
->balance
.thread
);
3505 Vcb
->balance
.thread
= NULL
;
3507 KeSetEvent(&Vcb
->balance
.finished
, 0, false);
3510 NTSTATUS
start_balance(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3512 btrfs_start_balance
* bsb
= (btrfs_start_balance
*)data
;
3513 OBJECT_ATTRIBUTES oa
;
3516 if (length
< sizeof(btrfs_start_balance
) || !data
)
3517 return STATUS_INVALID_PARAMETER
;
3519 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3520 return STATUS_PRIVILEGE_NOT_HELD
;
3523 WARN("cannot start balance while locked\n");
3524 return STATUS_DEVICE_NOT_READY
;
3527 if (Vcb
->scrub
.thread
) {
3528 WARN("cannot start balance while scrub running\n");
3529 return STATUS_DEVICE_NOT_READY
;
3532 if (Vcb
->balance
.thread
) {
3533 WARN("balance already running\n");
3534 return STATUS_DEVICE_NOT_READY
;
3538 return STATUS_MEDIA_WRITE_PROTECTED
;
3540 if (!(bsb
->opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3541 !(bsb
->opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3542 !(bsb
->opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
))
3543 return STATUS_SUCCESS
;
3545 for (i
= 0; i
< 3; i
++) {
3546 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3547 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
3548 bsb
->opts
[i
].profiles
&= BLOCK_FLAG_RAID0
| BLOCK_FLAG_RAID1
| BLOCK_FLAG_DUPLICATE
| BLOCK_FLAG_RAID10
|
3549 BLOCK_FLAG_RAID5
| BLOCK_FLAG_RAID6
| BLOCK_FLAG_SINGLE
;
3551 if (bsb
->opts
[i
].profiles
== 0)
3552 return STATUS_INVALID_PARAMETER
;
3555 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DEVID
) {
3556 if (bsb
->opts
[i
].devid
== 0)
3557 return STATUS_INVALID_PARAMETER
;
3560 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
3561 if (bsb
->opts
[i
].drange_start
> bsb
->opts
[i
].drange_end
)
3562 return STATUS_INVALID_PARAMETER
;
3565 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
3566 if (bsb
->opts
[i
].vrange_start
> bsb
->opts
[i
].vrange_end
)
3567 return STATUS_INVALID_PARAMETER
;
3570 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
3571 bsb
->opts
[i
].limit_start
= max(1, bsb
->opts
[i
].limit_start
);
3572 bsb
->opts
[i
].limit_end
= max(1, bsb
->opts
[i
].limit_end
);
3574 if (bsb
->opts
[i
].limit_start
> bsb
->opts
[i
].limit_end
)
3575 return STATUS_INVALID_PARAMETER
;
3578 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
3579 bsb
->opts
[i
].stripes_start
= max(1, bsb
->opts
[i
].stripes_start
);
3580 bsb
->opts
[i
].stripes_end
= max(1, bsb
->opts
[i
].stripes_end
);
3582 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3583 return STATUS_INVALID_PARAMETER
;
3586 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) {
3587 bsb
->opts
[i
].usage_start
= min(100, bsb
->opts
[i
].stripes_start
);
3588 bsb
->opts
[i
].usage_end
= min(100, bsb
->opts
[i
].stripes_end
);
3590 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3591 return STATUS_INVALID_PARAMETER
;
3594 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3595 if (bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID0
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1
&&
3596 bsb
->opts
[i
].convert
!= BLOCK_FLAG_DUPLICATE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID10
&&
3597 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID5
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID6
&&
3598 bsb
->opts
[i
].convert
!= BLOCK_FLAG_SINGLE
)
3599 return STATUS_INVALID_PARAMETER
;
3604 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bsb
->opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3605 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bsb
->opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3606 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bsb
->opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3608 Vcb
->balance
.paused
= false;
3609 Vcb
->balance
.removing
= false;
3610 Vcb
->balance
.shrinking
= false;
3611 Vcb
->balance
.status
= STATUS_SUCCESS
;
3612 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3614 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3616 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3617 if (!NT_SUCCESS(Status
)) {
3618 ERR("PsCreateSystemThread returned %08x\n", Status
);
3622 return STATUS_SUCCESS
;
3625 NTSTATUS
look_for_balance_item(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
) {
3630 OBJECT_ATTRIBUTES oa
;
3633 searchkey
.obj_id
= BALANCE_ITEM_ID
;
3634 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
3635 searchkey
.offset
= 0;
3637 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
3638 if (!NT_SUCCESS(Status
)) {
3639 ERR("find_item returned %08x\n", Status
);
3643 if (keycmp(tp
.item
->key
, searchkey
)) {
3644 TRACE("no balance item found\n");
3645 return STATUS_NOT_FOUND
;
3648 if (tp
.item
->size
< sizeof(BALANCE_ITEM
)) {
3649 WARN("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
3650 tp
.item
->size
, sizeof(BALANCE_ITEM
));
3651 return STATUS_INTERNAL_ERROR
;
3654 bi
= (BALANCE_ITEM
*)tp
.item
->data
;
3656 if (bi
->flags
& BALANCE_FLAGS_DATA
)
3657 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
3659 if (bi
->flags
& BALANCE_FLAGS_METADATA
)
3660 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
3662 if (bi
->flags
& BALANCE_FLAGS_SYSTEM
)
3663 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
3665 // do the heuristics that Linux driver does
3667 for (i
= 0; i
< 3; i
++) {
3668 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3669 // if converting, don't redo chunks already done
3671 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3672 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_SOFT
;
3674 // don't balance chunks more than 90% filled - presumably these
3675 // have already been done
3677 if (!(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) &&
3678 !(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3680 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_USAGE
;
3681 Vcb
->balance
.opts
[i
].usage_start
= 0;
3682 Vcb
->balance
.opts
[i
].usage_end
= 90;
3687 if (Vcb
->readonly
|| Vcb
->options
.skip_balance
)
3688 Vcb
->balance
.paused
= true;
3690 Vcb
->balance
.paused
= false;
3692 Vcb
->balance
.removing
= false;
3693 Vcb
->balance
.shrinking
= false;
3694 Vcb
->balance
.status
= STATUS_SUCCESS
;
3695 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3697 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3699 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3700 if (!NT_SUCCESS(Status
)) {
3701 ERR("PsCreateSystemThread returned %08x\n", Status
);
3705 return STATUS_SUCCESS
;
3708 NTSTATUS
query_balance(device_extension
* Vcb
, void* data
, ULONG length
) {
3709 btrfs_query_balance
* bqb
= (btrfs_query_balance
*)data
;
3711 if (length
< sizeof(btrfs_query_balance
) || !data
)
3712 return STATUS_INVALID_PARAMETER
;
3714 if (!Vcb
->balance
.thread
) {
3715 bqb
->status
= BTRFS_BALANCE_STOPPED
;
3717 if (!NT_SUCCESS(Vcb
->balance
.status
)) {
3718 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3719 bqb
->error
= Vcb
->balance
.status
;
3722 return STATUS_SUCCESS
;
3725 bqb
->status
= Vcb
->balance
.paused
? BTRFS_BALANCE_PAUSED
: BTRFS_BALANCE_RUNNING
;
3727 if (Vcb
->balance
.removing
)
3728 bqb
->status
|= BTRFS_BALANCE_REMOVAL
;
3730 if (Vcb
->balance
.shrinking
)
3731 bqb
->status
|= BTRFS_BALANCE_SHRINKING
;
3733 if (!NT_SUCCESS(Vcb
->balance
.status
))
3734 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3736 bqb
->chunks_left
= Vcb
->balance
.chunks_left
;
3737 bqb
->total_chunks
= Vcb
->balance
.total_chunks
;
3738 bqb
->error
= Vcb
->balance
.status
;
3739 RtlCopyMemory(&bqb
->data_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3740 RtlCopyMemory(&bqb
->metadata_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3741 RtlCopyMemory(&bqb
->system_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3743 return STATUS_SUCCESS
;
3746 NTSTATUS
pause_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3747 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3748 return STATUS_PRIVILEGE_NOT_HELD
;
3750 if (!Vcb
->balance
.thread
)
3751 return STATUS_DEVICE_NOT_READY
;
3753 if (Vcb
->balance
.paused
)
3754 return STATUS_DEVICE_NOT_READY
;
3756 Vcb
->balance
.paused
= true;
3757 KeClearEvent(&Vcb
->balance
.event
);
3759 return STATUS_SUCCESS
;
3762 NTSTATUS
resume_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3763 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3764 return STATUS_PRIVILEGE_NOT_HELD
;
3766 if (!Vcb
->balance
.thread
)
3767 return STATUS_DEVICE_NOT_READY
;
3769 if (!Vcb
->balance
.paused
)
3770 return STATUS_DEVICE_NOT_READY
;
3773 return STATUS_MEDIA_WRITE_PROTECTED
;
3775 Vcb
->balance
.paused
= false;
3776 KeSetEvent(&Vcb
->balance
.event
, 0, false);
3778 return STATUS_SUCCESS
;
3781 NTSTATUS
stop_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3782 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3783 return STATUS_PRIVILEGE_NOT_HELD
;
3785 if (!Vcb
->balance
.thread
)
3786 return STATUS_DEVICE_NOT_READY
;
3788 Vcb
->balance
.paused
= false;
3789 Vcb
->balance
.stopping
= true;
3790 Vcb
->balance
.status
= STATUS_SUCCESS
;
3791 KeSetEvent(&Vcb
->balance
.event
, 0, false);
3793 return STATUS_SUCCESS
;
3796 NTSTATUS
remove_device(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3802 uint64_t num_rw_devices
;
3803 OBJECT_ATTRIBUTES oa
;
3805 TRACE("(%p, %p, %x)\n", Vcb
, data
, length
);
3807 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3808 return STATUS_PRIVILEGE_NOT_HELD
;
3810 if (length
< sizeof(uint64_t))
3811 return STATUS_INVALID_PARAMETER
;
3813 devid
= *(uint64_t*)data
;
3815 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
3817 if (Vcb
->readonly
) {
3818 ExReleaseResourceLite(&Vcb
->tree_lock
);
3819 return STATUS_MEDIA_WRITE_PROTECTED
;
3824 le
= Vcb
->devices
.Flink
;
3825 while (le
!= &Vcb
->devices
) {
3826 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3828 if (dev2
->devitem
.dev_id
== devid
)
3831 if (!dev2
->readonly
)
3838 ExReleaseResourceLite(&Vcb
->tree_lock
);
3839 WARN("device %I64x not found\n", devid
);
3840 return STATUS_NOT_FOUND
;
3843 if (!dev
->readonly
) {
3844 if (num_rw_devices
== 1) {
3845 ExReleaseResourceLite(&Vcb
->tree_lock
);
3846 WARN("not removing last non-readonly device\n");
3847 return STATUS_INVALID_PARAMETER
;
3850 if (num_rw_devices
== 4 &&
3851 ((Vcb
->data_flags
& BLOCK_FLAG_RAID10
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID10
|| Vcb
->system_flags
& BLOCK_FLAG_RAID10
) ||
3852 (Vcb
->data_flags
& BLOCK_FLAG_RAID6
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID6
|| Vcb
->system_flags
& BLOCK_FLAG_RAID6
))
3854 ExReleaseResourceLite(&Vcb
->tree_lock
);
3855 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n");
3856 return STATUS_CANNOT_DELETE
;
3859 if (num_rw_devices
== 3 && (Vcb
->data_flags
& BLOCK_FLAG_RAID5
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID5
|| Vcb
->system_flags
& BLOCK_FLAG_RAID5
)) {
3860 ExReleaseResourceLite(&Vcb
->tree_lock
);
3861 ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n");
3862 return STATUS_CANNOT_DELETE
;
3865 if (num_rw_devices
== 2 &&
3866 ((Vcb
->data_flags
& BLOCK_FLAG_RAID0
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID0
|| Vcb
->system_flags
& BLOCK_FLAG_RAID0
) ||
3867 (Vcb
->data_flags
& BLOCK_FLAG_RAID1
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1
))
3869 ExReleaseResourceLite(&Vcb
->tree_lock
);
3870 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
3871 return STATUS_CANNOT_DELETE
;
3875 ExReleaseResourceLite(&Vcb
->tree_lock
);
3877 if (Vcb
->balance
.thread
) {
3878 WARN("balance already running\n");
3879 return STATUS_DEVICE_NOT_READY
;
3884 RtlZeroMemory(Vcb
->balance
.opts
, sizeof(btrfs_balance_opts
) * 3);
3886 for (i
= 0; i
< 3; i
++) {
3887 Vcb
->balance
.opts
[i
].flags
= BTRFS_BALANCE_OPTS_ENABLED
| BTRFS_BALANCE_OPTS_DEVID
;
3888 Vcb
->balance
.opts
[i
].devid
= devid
;
3891 Vcb
->balance
.paused
= false;
3892 Vcb
->balance
.removing
= true;
3893 Vcb
->balance
.shrinking
= false;
3894 Vcb
->balance
.status
= STATUS_SUCCESS
;
3895 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3897 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3899 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3900 if (!NT_SUCCESS(Status
)) {
3901 ERR("PsCreateSystemThread returned %08x\n", Status
);
3906 return STATUS_SUCCESS
;