1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
31 LIST_ENTRY list_entry
;
43 metadata_reloc
* parent
;
45 LIST_ENTRY list_entry
;
55 LIST_ENTRY list_entry
;
67 metadata_reloc
* parent
;
68 LIST_ENTRY list_entry
;
71 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time
73 static NTSTATUS
add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, traverse_ptr
* tp
,
74 bool skinny
, metadata_reloc
** mr2
, chunk
* c
, LIST_ENTRY
* rollback
) {
82 mr
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc
), ALLOC_TAG
);
84 ERR("out of memory\n");
85 return STATUS_INSUFFICIENT_RESOURCES
;
88 mr
->address
= tp
->item
->key
.obj_id
;
90 mr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
92 InitializeListHead(&mr
->refs
);
94 Status
= delete_tree_item(Vcb
, tp
);
95 if (!NT_SUCCESS(Status
)) {
96 ERR("delete_tree_item returned %08lx\n", Status
);
102 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
105 acquire_chunk_lock(c
, Vcb
);
107 c
->used
-= Vcb
->superblock
.node_size
;
109 space_list_add(c
, tp
->item
->key
.obj_id
, Vcb
->superblock
.node_size
, rollback
);
111 release_chunk_lock(c
, Vcb
);
114 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
117 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
118 ptr
= (uint8_t*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
120 len
-= sizeof(EXTENT_ITEM2
);
121 ptr
+= sizeof(EXTENT_ITEM2
);
125 uint8_t secttype
= *ptr
;
126 uint16_t sectlen
= secttype
== TYPE_TREE_BLOCK_REF
? sizeof(TREE_BLOCK_REF
) : (secttype
== TYPE_SHARED_BLOCK_REF
? sizeof(SHARED_BLOCK_REF
) : 0);
127 metadata_reloc_ref
* ref
;
132 ERR("(%I64x,%x,%I64x): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
133 return STATUS_INTERNAL_ERROR
;
137 ERR("(%I64x,%x,%I64x): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
138 return STATUS_INTERNAL_ERROR
;
141 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
143 ERR("out of memory\n");
144 return STATUS_INSUFFICIENT_RESOURCES
;
147 if (secttype
== TYPE_TREE_BLOCK_REF
) {
148 ref
->type
= TYPE_TREE_BLOCK_REF
;
149 RtlCopyMemory(&ref
->tbr
, ptr
+ sizeof(uint8_t), sizeof(TREE_BLOCK_REF
));
151 } else if (secttype
== TYPE_SHARED_BLOCK_REF
) {
152 ref
->type
= TYPE_SHARED_BLOCK_REF
;
153 RtlCopyMemory(&ref
->sbr
, ptr
+ sizeof(uint8_t), sizeof(SHARED_BLOCK_REF
));
156 ERR("unexpected tree type %x\n", secttype
);
158 return STATUS_INTERNAL_ERROR
;
163 InsertTailList(&mr
->refs
, &ref
->list_entry
);
166 ptr
+= sizeof(uint8_t) + sectlen
;
169 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
170 traverse_ptr tp2
= *tp
, next_tp
;
172 while (find_next_item(Vcb
, &tp2
, &next_tp
, false, NULL
)) {
175 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
176 if (tp2
.item
->key
.obj_type
== TYPE_TREE_BLOCK_REF
) {
177 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
179 ERR("out of memory\n");
180 return STATUS_INSUFFICIENT_RESOURCES
;
183 ref
->type
= TYPE_TREE_BLOCK_REF
;
184 ref
->tbr
.offset
= tp2
.item
->key
.offset
;
187 InsertTailList(&mr
->refs
, &ref
->list_entry
);
189 Status
= delete_tree_item(Vcb
, &tp2
);
190 if (!NT_SUCCESS(Status
)) {
191 ERR("delete_tree_item returned %08lx\n", Status
);
194 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_BLOCK_REF
) {
195 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
197 ERR("out of memory\n");
198 return STATUS_INSUFFICIENT_RESOURCES
;
201 ref
->type
= TYPE_SHARED_BLOCK_REF
;
202 ref
->sbr
.offset
= tp2
.item
->key
.offset
;
205 InsertTailList(&mr
->refs
, &ref
->list_entry
);
207 Status
= delete_tree_item(Vcb
, &tp2
);
208 if (!NT_SUCCESS(Status
)) {
209 ERR("delete_tree_item returned %08lx\n", Status
);
218 InsertTailList(items
, &mr
->list_entry
);
223 return STATUS_SUCCESS
;
226 static NTSTATUS
add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
227 uint64_t address
, metadata_reloc
** mr2
, LIST_ENTRY
* rollback
) {
235 while (le
!= items
) {
236 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
238 if (mr
->address
== address
) {
240 return STATUS_SUCCESS
;
246 searchkey
.obj_id
= address
;
247 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
248 searchkey
.offset
= 0xffffffffffffffff;
250 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
251 if (!NT_SUCCESS(Status
)) {
252 ERR("find_item returned %08lx\n", Status
);
256 if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
))
258 else if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
259 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
260 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
262 if (!(ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)) {
263 ERR("EXTENT_ITEM for %I64x found, but tree flag not set\n", address
);
264 return STATUS_INTERNAL_ERROR
;
267 ERR("could not find valid EXTENT_ITEM for address %I64x\n", address
);
268 return STATUS_INTERNAL_ERROR
;
271 Status
= add_metadata_reloc(Vcb
, items
, &tp
, skinny
, mr2
, NULL
, rollback
);
272 if (!NT_SUCCESS(Status
)) {
273 ERR("add_metadata_reloc returned %08lx\n", Status
);
277 return STATUS_SUCCESS
;
280 static void sort_metadata_reloc_refs(metadata_reloc
* mr
) {
281 LIST_ENTRY newlist
, *le
;
283 if (mr
->refs
.Flink
== mr
->refs
.Blink
) // 0 or 1 items
288 InitializeListHead(&newlist
);
290 while (!IsListEmpty(&mr
->refs
)) {
291 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
292 bool inserted
= false;
294 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
295 ref
->hash
= ref
->tbr
.offset
;
296 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
297 ref
->hash
= ref
->parent
->new_address
;
300 while (le
!= &newlist
) {
301 metadata_reloc_ref
* ref2
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
303 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
304 InsertHeadList(le
->Blink
, &ref
->list_entry
);
313 InsertTailList(&newlist
, &ref
->list_entry
);
316 newlist
.Flink
->Blink
= &mr
->refs
;
317 newlist
.Blink
->Flink
= &mr
->refs
;
318 mr
->refs
.Flink
= newlist
.Flink
;
319 mr
->refs
.Blink
= newlist
.Blink
;
322 static NTSTATUS
add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, metadata_reloc
* mr
) {
327 bool all_inline
= true;
328 metadata_reloc_ref
* first_noninline
= NULL
;
332 inline_len
= sizeof(EXTENT_ITEM
);
333 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
))
334 inline_len
+= sizeof(EXTENT_ITEM2
);
336 sort_metadata_reloc_refs(mr
);
339 while (le
!= &mr
->refs
) {
340 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
345 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
346 extlen
+= sizeof(TREE_BLOCK_REF
);
347 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
348 extlen
+= sizeof(SHARED_BLOCK_REF
);
351 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
353 first_noninline
= ref
;
355 inline_len
+= extlen
+ 1;
361 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
363 ERR("out of memory\n");
364 return STATUS_INSUFFICIENT_RESOURCES
;
368 ei
->generation
= mr
->ei
->generation
;
369 ei
->flags
= mr
->ei
->flags
;
370 ptr
= (uint8_t*)&ei
[1];
372 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)) {
373 EXTENT_ITEM2
* ei2
= (EXTENT_ITEM2
*)ptr
;
375 ei2
->firstitem
= *(KEY
*)&mr
->data
[1];
376 ei2
->level
= mr
->data
->level
;
378 ptr
+= sizeof(EXTENT_ITEM2
);
382 while (le
!= &mr
->refs
) {
383 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
385 if (ref
== first_noninline
)
391 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
392 TREE_BLOCK_REF
* tbr
= (TREE_BLOCK_REF
*)ptr
;
394 tbr
->offset
= ref
->tbr
.offset
;
396 ptr
+= sizeof(TREE_BLOCK_REF
);
397 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
398 SHARED_BLOCK_REF
* sbr
= (SHARED_BLOCK_REF
*)ptr
;
400 sbr
->offset
= ref
->parent
->new_address
;
402 ptr
+= sizeof(SHARED_BLOCK_REF
);
408 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)
409 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_METADATA_ITEM
, mr
->data
->level
, ei
, inline_len
, NULL
, NULL
);
411 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_EXTENT_ITEM
, Vcb
->superblock
.node_size
, ei
, inline_len
, NULL
, NULL
);
413 if (!NT_SUCCESS(Status
)) {
414 ERR("insert_tree_item returned %08lx\n", Status
);
420 le
= &first_noninline
->list_entry
;
422 while (le
!= &mr
->refs
) {
423 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
425 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
426 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_TREE_BLOCK_REF
, ref
->tbr
.offset
, NULL
, 0, NULL
, NULL
);
427 if (!NT_SUCCESS(Status
)) {
428 ERR("insert_tree_item returned %08lx\n", Status
);
431 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
432 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_SHARED_BLOCK_REF
, ref
->parent
->new_address
, NULL
, 0, NULL
, NULL
);
433 if (!NT_SUCCESS(Status
)) {
434 ERR("insert_tree_item returned %08lx\n", Status
);
443 if (ei
->flags
& EXTENT_ITEM_SHARED_BACKREFS
|| mr
->data
->flags
& HEADER_FLAG_SHARED_BACKREF
|| !(mr
->data
->flags
& HEADER_FLAG_MIXED_BACKREF
)) {
444 if (mr
->data
->level
> 0) {
446 internal_node
* in
= (internal_node
*)&mr
->data
[1];
448 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
449 uint64_t sbrrc
= find_extent_shared_tree_refcount(Vcb
, in
[i
].address
, mr
->address
, NULL
);
452 SHARED_BLOCK_REF sbr
;
454 sbr
.offset
= mr
->new_address
;
456 Status
= increase_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0, NULL
);
457 if (!NT_SUCCESS(Status
)) {
458 ERR("increase_extent_refcount returned %08lx\n", Status
);
462 sbr
.offset
= mr
->address
;
464 Status
= decrease_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
465 sbr
.offset
, false, NULL
);
466 if (!NT_SUCCESS(Status
)) {
467 ERR("decrease_extent_refcount returned %08lx\n", Status
);
474 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
476 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
477 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
478 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
480 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
481 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
483 if (ed2
->size
> 0) { // not sparse
484 uint32_t sdrrc
= find_extent_shared_data_refcount(Vcb
, ed2
->address
, mr
->address
, NULL
);
490 sdr
.offset
= mr
->new_address
;
493 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0, NULL
);
494 if (!NT_SUCCESS(Status
)) {
495 ERR("increase_extent_refcount returned %08lx\n", Status
);
499 sdr
.offset
= mr
->address
;
501 Status
= decrease_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
502 sdr
.offset
, false, NULL
);
503 if (!NT_SUCCESS(Status
)) {
504 ERR("decrease_extent_refcount returned %08lx\n", Status
);
508 c
= get_chunk_from_address(Vcb
, ed2
->address
);
511 // check changed_extents
513 ExAcquireResourceExclusiveLite(&c
->changed_extents_lock
, true);
515 le
= c
->changed_extents
.Flink
;
517 while (le
!= &c
->changed_extents
) {
518 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
520 if (ce
->address
== ed2
->address
) {
523 le2
= ce
->refs
.Flink
;
524 while (le2
!= &ce
->refs
) {
525 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
527 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
528 cer
->sdr
.offset
= mr
->new_address
;
535 le2
= ce
->old_refs
.Flink
;
536 while (le2
!= &ce
->old_refs
) {
537 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
539 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
540 cer
->sdr
.offset
= mr
->new_address
;
553 ExReleaseResourceLite(&c
->changed_extents_lock
);
563 return STATUS_SUCCESS
;
566 static NTSTATUS
write_metadata_items(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
,
567 LIST_ENTRY
* data_items
, chunk
* c
, LIST_ENTRY
* rollback
) {
568 LIST_ENTRY tree_writes
, *le
;
571 uint8_t level
, max_level
= 0;
572 chunk
* newchunk
= NULL
;
574 InitializeListHead(&tree_writes
);
577 while (le
!= items
) {
578 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
582 mr
->data
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
584 ERR("out of memory\n");
585 return STATUS_INSUFFICIENT_RESOURCES
;
588 Status
= read_data(Vcb
, mr
->address
, Vcb
->superblock
.node_size
, NULL
, true, (uint8_t*)mr
->data
,
589 c
&& mr
->address
>= c
->offset
&& mr
->address
< c
->offset
+ c
->chunk_item
->size
? c
: NULL
, &pc
, NULL
, 0, false, NormalPagePriority
);
590 if (!NT_SUCCESS(Status
)) {
591 ERR("read_data returned %08lx\n", Status
);
595 if (pc
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
598 if (data_items
&& mr
->data
->level
== 0) {
599 le2
= data_items
->Flink
;
600 while (le2
!= data_items
) {
601 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
602 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
605 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
606 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
607 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
609 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
610 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
612 if (ed2
->address
== dr
->address
)
613 ed2
->address
= dr
->new_address
;
622 if (mr
->data
->level
> max_level
)
623 max_level
= mr
->data
->level
;
625 le2
= mr
->refs
.Flink
;
626 while (le2
!= &mr
->refs
) {
627 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
629 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
635 firstitem
= (KEY
*)&mr
->data
[1];
637 le3
= Vcb
->roots
.Flink
;
638 while (le3
!= &Vcb
->roots
) {
639 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
641 if (r2
->id
== ref
->tbr
.offset
) {
650 ERR("could not find subvol with id %I64x\n", ref
->tbr
.offset
);
651 return STATUS_INTERNAL_ERROR
;
654 Status
= find_item_to_level(Vcb
, r
, &tp
, firstitem
, false, mr
->data
->level
+ 1, NULL
);
655 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
656 ERR("find_item_to_level returned %08lx\n", Status
);
661 while (t
&& t
->header
.level
< mr
->data
->level
+ 1) {
670 Status
= add_metadata_reloc_parent(Vcb
, items
, t
->header
.address
, &mr2
, rollback
);
671 if (!NT_SUCCESS(Status
)) {
672 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
678 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
681 Status
= add_metadata_reloc_parent(Vcb
, items
, ref
->sbr
.offset
, &mr2
, rollback
);
682 if (!NT_SUCCESS(Status
)) {
683 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
697 while (le
!= items
) {
698 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
704 hash
= calc_crc32c(0xffffffff, (uint8_t*)&mr
->address
, sizeof(uint64_t));
706 le2
= Vcb
->trees_ptrs
[hash
>> 24];
709 while (le2
!= &Vcb
->trees_hash
) {
710 tree
* t
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
712 if (t
->header
.address
== mr
->address
) {
715 } else if (t
->hash
> hash
)
725 for (level
= 0; level
<= max_level
; level
++) {
727 while (le
!= items
) {
728 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
730 if (mr
->data
->level
== level
) {
738 flags
= Vcb
->system_flags
;
739 else if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
)
740 flags
= Vcb
->data_flags
;
742 flags
= Vcb
->metadata_flags
;
745 acquire_chunk_lock(newchunk
, Vcb
);
747 if (newchunk
->chunk_item
->type
== flags
&& find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
748 newchunk
->used
+= Vcb
->superblock
.node_size
;
749 space_list_subtract(newchunk
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
753 release_chunk_lock(newchunk
, Vcb
);
757 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
759 le2
= Vcb
->chunks
.Flink
;
760 while (le2
!= &Vcb
->chunks
) {
761 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
763 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== flags
) {
764 acquire_chunk_lock(c2
, Vcb
);
766 if ((c2
->chunk_item
->size
- c2
->used
) >= Vcb
->superblock
.node_size
) {
767 if (find_metadata_address_in_chunk(Vcb
, c2
, &mr
->new_address
)) {
768 c2
->used
+= Vcb
->superblock
.node_size
;
769 space_list_subtract(c2
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
770 release_chunk_lock(c2
, Vcb
);
777 release_chunk_lock(c2
, Vcb
);
783 // allocate new chunk if necessary
785 Status
= alloc_chunk(Vcb
, flags
, &newchunk
, false);
787 if (!NT_SUCCESS(Status
)) {
788 ERR("alloc_chunk returned %08lx\n", Status
);
789 ExReleaseResourceLite(&Vcb
->chunk_lock
);
793 acquire_chunk_lock(newchunk
, Vcb
);
795 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
797 if (!find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
798 release_chunk_lock(newchunk
, Vcb
);
799 ExReleaseResourceLite(&Vcb
->chunk_lock
);
800 ERR("could not find address in new chunk\n");
801 Status
= STATUS_DISK_FULL
;
804 newchunk
->used
+= Vcb
->superblock
.node_size
;
805 space_list_subtract(newchunk
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
808 release_chunk_lock(newchunk
, Vcb
);
811 ExReleaseResourceLite(&Vcb
->chunk_lock
);
815 le2
= mr
->refs
.Flink
;
816 while (le2
!= &mr
->refs
) {
817 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
821 internal_node
* in
= (internal_node
*)&ref
->parent
->data
[1];
823 for (i
= 0; i
< ref
->parent
->data
->num_items
; i
++) {
824 if (in
[i
].address
== mr
->address
) {
825 in
[i
].address
= mr
->new_address
;
830 if (ref
->parent
->t
) {
833 le3
= ref
->parent
->t
->itemlist
.Flink
;
834 while (le3
!= &ref
->parent
->t
->itemlist
) {
835 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
837 if (!td
->inserted
&& td
->treeholder
.address
== mr
->address
)
838 td
->treeholder
.address
= mr
->new_address
;
843 } else if (ref
->top
&& ref
->type
== TYPE_TREE_BLOCK_REF
) {
849 le3
= Vcb
->roots
.Flink
;
850 while (le3
!= &Vcb
->roots
) {
851 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
853 if (r2
->id
== ref
->tbr
.offset
) {
862 r
->treeholder
.address
= mr
->new_address
;
864 if (r
== Vcb
->root_root
)
865 Vcb
->superblock
.root_tree_addr
= mr
->new_address
;
866 else if (r
== Vcb
->chunk_root
)
867 Vcb
->superblock
.chunk_tree_addr
= mr
->new_address
;
868 else if (r
->root_item
.block_number
== mr
->address
) {
872 r
->root_item
.block_number
= mr
->new_address
;
874 searchkey
.obj_id
= r
->id
;
875 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
876 searchkey
.offset
= 0xffffffffffffffff;
878 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
879 if (!NT_SUCCESS(Status
)) {
880 ERR("find_item returned %08lx\n", Status
);
884 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
885 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
886 Status
= STATUS_INTERNAL_ERROR
;
890 ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
892 ERR("out of memory\n");
893 Status
= STATUS_INSUFFICIENT_RESOURCES
;
897 RtlCopyMemory(ri
, &r
->root_item
, sizeof(ROOT_ITEM
));
899 Status
= delete_tree_item(Vcb
, &tp
);
900 if (!NT_SUCCESS(Status
)) {
901 ERR("delete_tree_item returned %08lx\n", Status
);
905 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, NULL
);
906 if (!NT_SUCCESS(Status
)) {
907 ERR("insert_tree_item returned %08lx\n", Status
);
917 mr
->data
->address
= mr
->new_address
;
926 // check if tree loaded more than once
927 if (t3
->list_entry
.Flink
!= &Vcb
->trees_hash
) {
928 tree
* nt
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
930 if (nt
->header
.address
== t3
->header
.address
)
934 t3
->header
.address
= mr
->new_address
;
938 if (Vcb
->trees_ptrs
[h
] == &t3
->list_entry_hash
) {
939 if (t3
->list_entry_hash
.Flink
== &Vcb
->trees_hash
)
940 Vcb
->trees_ptrs
[h
] = NULL
;
942 tree
* t2
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
944 if (t2
->hash
>> 24 == h
)
945 Vcb
->trees_ptrs
[h
] = &t2
->list_entry_hash
;
947 Vcb
->trees_ptrs
[h
] = NULL
;
951 RemoveEntryList(&t3
->list_entry_hash
);
953 t3
->hash
= calc_crc32c(0xffffffff, (uint8_t*)&t3
->header
.address
, sizeof(uint64_t));
956 if (!Vcb
->trees_ptrs
[h
]) {
959 le2
= Vcb
->trees_hash
.Flink
;
964 if (Vcb
->trees_ptrs
[h2
]) {
965 le2
= Vcb
->trees_ptrs
[h2
];
973 le2
= Vcb
->trees_ptrs
[h
];
976 while (le2
!= &Vcb
->trees_hash
) {
977 tree
* t2
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
979 if (t2
->hash
>= t3
->hash
) {
980 InsertHeadList(le2
->Blink
, &t3
->list_entry_hash
);
989 InsertTailList(&Vcb
->trees_hash
, &t3
->list_entry_hash
);
991 if (!Vcb
->trees_ptrs
[h
] || t3
->list_entry_hash
.Flink
== Vcb
->trees_ptrs
[h
])
992 Vcb
->trees_ptrs
[h
] = &t3
->list_entry_hash
;
994 if (data_items
&& level
== 0) {
995 le2
= data_items
->Flink
;
997 while (le2
!= data_items
) {
998 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
999 LIST_ENTRY
* le3
= t3
->itemlist
.Flink
;
1001 while (le3
!= &t3
->itemlist
) {
1002 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
1004 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
1005 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
1007 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
1008 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1010 if (ed2
->address
== dr
->address
)
1011 ed2
->address
= dr
->new_address
;
1025 calc_tree_checksum(Vcb
, mr
->data
);
1027 tw
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree_write
), ALLOC_TAG
);
1029 ERR("out of memory\n");
1030 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1034 tw
->address
= mr
->new_address
;
1035 tw
->length
= Vcb
->superblock
.node_size
;
1036 tw
->data
= (uint8_t*)mr
->data
;
1037 tw
->allocated
= false;
1039 if (IsListEmpty(&tree_writes
))
1040 InsertTailList(&tree_writes
, &tw
->list_entry
);
1042 bool inserted
= false;
1044 le2
= tree_writes
.Flink
;
1045 while (le2
!= &tree_writes
) {
1046 tree_write
* tw2
= CONTAINING_RECORD(le2
, tree_write
, list_entry
);
1048 if (tw2
->address
> tw
->address
) {
1049 InsertHeadList(le2
->Blink
, &tw
->list_entry
);
1058 InsertTailList(&tree_writes
, &tw
->list_entry
);
1066 Status
= do_tree_writes(Vcb
, &tree_writes
, true);
1067 if (!NT_SUCCESS(Status
)) {
1068 ERR("do_tree_writes returned %08lx\n", Status
);
1073 while (le
!= items
) {
1074 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
1076 Status
= add_metadata_reloc_extent_item(Vcb
, mr
);
1077 if (!NT_SUCCESS(Status
)) {
1078 ERR("add_metadata_reloc_extent_item returned %08lx\n", Status
);
1085 Status
= STATUS_SUCCESS
;
1088 while (!IsListEmpty(&tree_writes
)) {
1089 tree_write
* tw
= CONTAINING_RECORD(RemoveHeadList(&tree_writes
), tree_write
, list_entry
);
1092 ExFreePool(tw
->data
);
1100 static NTSTATUS
balance_metadata_chunk(device_extension
* Vcb
, chunk
* c
, bool* changed
) {
1105 LIST_ENTRY items
, rollback
;
1106 uint32_t loaded
= 0;
1108 TRACE("chunk %I64x\n", c
->offset
);
1110 InitializeListHead(&rollback
);
1111 InitializeListHead(&items
);
1113 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
1115 searchkey
.obj_id
= c
->offset
;
1116 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
1117 searchkey
.offset
= 0xffffffffffffffff;
1119 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
1120 if (!NT_SUCCESS(Status
)) {
1121 ERR("find_item returned %08lx\n", Status
);
1126 traverse_ptr next_tp
;
1128 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1131 if (tp
.item
->key
.obj_id
>= c
->offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
1132 bool tree
= false, skinny
= false;
1134 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1137 } else if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
1138 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1139 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1141 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1146 Status
= add_metadata_reloc(Vcb
, &items
, &tp
, skinny
, NULL
, c
, &rollback
);
1148 if (!NT_SUCCESS(Status
)) {
1149 ERR("add_metadata_reloc returned %08lx\n", Status
);
1155 if (loaded
>= 64) // only do 64 at a time
1160 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
1166 if (IsListEmpty(&items
)) {
1168 Status
= STATUS_SUCCESS
;
1173 Status
= write_metadata_items(Vcb
, &items
, NULL
, c
, &rollback
);
1174 if (!NT_SUCCESS(Status
)) {
1175 ERR("write_metadata_items returned %08lx\n", Status
);
1179 Status
= STATUS_SUCCESS
;
1181 Vcb
->need_write
= true;
1184 if (NT_SUCCESS(Status
)) {
1185 Status
= do_write(Vcb
, NULL
);
1186 if (!NT_SUCCESS(Status
))
1187 ERR("do_write returned %08lx\n", Status
);
1190 if (NT_SUCCESS(Status
))
1191 clear_rollback(&rollback
);
1193 do_rollback(Vcb
, &rollback
);
1197 ExReleaseResourceLite(&Vcb
->tree_lock
);
1199 while (!IsListEmpty(&items
)) {
1200 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&items
), metadata_reloc
, list_entry
);
1202 while (!IsListEmpty(&mr
->refs
)) {
1203 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
1209 ExFreePool(mr
->data
);
1217 static NTSTATUS
data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* metadata_items
,
1218 data_reloc
* dr
, EXTENT_DATA_REF
* edr
, LIST_ENTRY
* rollback
) {
1225 uint64_t last_tree
= 0;
1226 data_reloc_ref
* ref
;
1228 le
= Vcb
->roots
.Flink
;
1229 while (le
!= &Vcb
->roots
) {
1230 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
1232 if (r2
->id
== edr
->root
) {
1241 ERR("could not find subvol %I64x\n", edr
->root
);
1242 return STATUS_INTERNAL_ERROR
;
1245 searchkey
.obj_id
= edr
->objid
;
1246 searchkey
.obj_type
= TYPE_EXTENT_DATA
;
1247 searchkey
.offset
= 0;
1249 Status
= find_item(Vcb
, r
, &tp
, &searchkey
, false, NULL
);
1250 if (!NT_SUCCESS(Status
)) {
1251 ERR("find_item returned %08lx\n", Status
);
1255 if (tp
.item
->key
.obj_id
< searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
< searchkey
.obj_type
)) {
1258 if (find_next_item(Vcb
, &tp
, &tp2
, false, NULL
))
1261 ERR("could not find EXTENT_DATA for inode %I64x in root %I64x\n", searchkey
.obj_id
, r
->id
);
1262 return STATUS_INTERNAL_ERROR
;
1268 while (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
1271 if (tp
.item
->size
>= sizeof(EXTENT_DATA
)) {
1272 EXTENT_DATA
* ed
= (EXTENT_DATA
*)tp
.item
->data
;
1274 if ((ed
->type
== EXTENT_TYPE_PREALLOC
|| ed
->type
== EXTENT_TYPE_REGULAR
) && tp
.item
->size
>= offsetof(EXTENT_DATA
, data
[0]) + sizeof(EXTENT_DATA2
)) {
1275 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1277 if (ed2
->address
== dr
->address
&& ed2
->size
== dr
->size
&& tp
.item
->key
.offset
- ed2
->offset
== edr
->offset
) {
1278 if (ref
&& last_tree
== tp
.tree
->header
.address
)
1281 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1283 ERR("out of memory\n");
1284 return STATUS_INSUFFICIENT_RESOURCES
;
1287 ref
->type
= TYPE_EXTENT_DATA_REF
;
1288 RtlCopyMemory(&ref
->edr
, edr
, sizeof(EXTENT_DATA_REF
));
1291 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, tp
.tree
->header
.address
, &mr
, rollback
);
1292 if (!NT_SUCCESS(Status
)) {
1293 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
1298 last_tree
= tp
.tree
->header
.address
;
1301 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1307 if (find_next_item(Vcb
, &tp
, &tp2
, false, NULL
))
1313 return STATUS_SUCCESS
;
1316 static NTSTATUS
add_data_reloc(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, LIST_ENTRY
* items
, LIST_ENTRY
* metadata_items
,
1317 traverse_ptr
* tp
, chunk
* c
, LIST_ENTRY
* rollback
) {
1325 dr
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc
), ALLOC_TAG
);
1327 ERR("out of memory\n");
1328 return STATUS_INSUFFICIENT_RESOURCES
;
1331 dr
->address
= tp
->item
->key
.obj_id
;
1332 dr
->size
= tp
->item
->key
.offset
;
1333 dr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1334 InitializeListHead(&dr
->refs
);
1336 Status
= delete_tree_item(Vcb
, tp
);
1337 if (!NT_SUCCESS(Status
)) {
1338 ERR("delete_tree_item returned %08lx\n", Status
);
1343 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
1346 acquire_chunk_lock(c
, Vcb
);
1348 c
->used
-= tp
->item
->key
.offset
;
1350 space_list_add(c
, tp
->item
->key
.obj_id
, tp
->item
->key
.offset
, rollback
);
1352 release_chunk_lock(c
, Vcb
);
1355 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1358 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
1359 ptr
= (uint8_t*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
1362 uint8_t secttype
= *ptr
;
1363 uint16_t sectlen
= secttype
== TYPE_EXTENT_DATA_REF
? sizeof(EXTENT_DATA_REF
) : (secttype
== TYPE_SHARED_DATA_REF
? sizeof(SHARED_DATA_REF
) : 0);
1367 if (sectlen
> len
) {
1368 ERR("(%I64x,%x,%I64x): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
1369 return STATUS_INTERNAL_ERROR
;
1373 ERR("(%I64x,%x,%I64x): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
1374 return STATUS_INTERNAL_ERROR
;
1377 if (secttype
== TYPE_EXTENT_DATA_REF
) {
1378 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)(ptr
+ sizeof(uint8_t));
1380 inline_rc
+= edr
->count
;
1382 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, edr
, rollback
);
1383 if (!NT_SUCCESS(Status
)) {
1384 ERR("data_reloc_add_tree_edr returned %08lx\n", Status
);
1387 } else if (secttype
== TYPE_SHARED_DATA_REF
) {
1389 data_reloc_ref
* ref
;
1391 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1393 ERR("out of memory\n");
1394 return STATUS_INSUFFICIENT_RESOURCES
;
1397 ref
->type
= TYPE_SHARED_DATA_REF
;
1398 RtlCopyMemory(&ref
->sdr
, ptr
+ sizeof(uint8_t), sizeof(SHARED_DATA_REF
));
1399 inline_rc
+= ref
->sdr
.count
;
1401 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1402 if (!NT_SUCCESS(Status
)) {
1403 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
1410 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1412 ERR("unexpected tree type %x\n", secttype
);
1413 return STATUS_INTERNAL_ERROR
;
1418 ptr
+= sizeof(uint8_t) + sectlen
;
1421 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
1422 traverse_ptr tp2
= *tp
, next_tp
;
1424 while (find_next_item(Vcb
, &tp2
, &next_tp
, false, NULL
)) {
1427 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
1428 if (tp2
.item
->key
.obj_type
== TYPE_EXTENT_DATA_REF
&& tp2
.item
->size
>= sizeof(EXTENT_DATA_REF
)) {
1429 Status
= data_reloc_add_tree_edr(Vcb
, metadata_items
, dr
, (EXTENT_DATA_REF
*)tp2
.item
->data
, rollback
);
1430 if (!NT_SUCCESS(Status
)) {
1431 ERR("data_reloc_add_tree_edr returned %08lx\n", Status
);
1435 Status
= delete_tree_item(Vcb
, &tp2
);
1436 if (!NT_SUCCESS(Status
)) {
1437 ERR("delete_tree_item returned %08lx\n", Status
);
1440 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_DATA_REF
&& tp2
.item
->size
>= sizeof(uint32_t)) {
1442 data_reloc_ref
* ref
;
1444 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1446 ERR("out of memory\n");
1447 return STATUS_INSUFFICIENT_RESOURCES
;
1450 ref
->type
= TYPE_SHARED_DATA_REF
;
1451 ref
->sdr
.offset
= tp2
.item
->key
.offset
;
1452 ref
->sdr
.count
= *((uint32_t*)tp2
.item
->data
);
1454 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1455 if (!NT_SUCCESS(Status
)) {
1456 ERR("add_metadata_reloc_parent returned %08lx\n", Status
);
1462 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1464 Status
= delete_tree_item(Vcb
, &tp2
);
1465 if (!NT_SUCCESS(Status
)) {
1466 ERR("delete_tree_item returned %08lx\n", Status
);
1475 InsertTailList(items
, &dr
->list_entry
);
1477 return STATUS_SUCCESS
;
1480 static void sort_data_reloc_refs(data_reloc
* dr
) {
1481 LIST_ENTRY newlist
, *le
;
1483 if (IsListEmpty(&dr
->refs
))
1488 InitializeListHead(&newlist
);
1490 while (!IsListEmpty(&dr
->refs
)) {
1491 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
1492 bool inserted
= false;
1494 if (ref
->type
== TYPE_EXTENT_DATA_REF
)
1495 ref
->hash
= get_extent_data_ref_hash2(ref
->edr
.root
, ref
->edr
.objid
, ref
->edr
.offset
);
1496 else if (ref
->type
== TYPE_SHARED_DATA_REF
)
1497 ref
->hash
= ref
->parent
->new_address
;
1500 while (le
!= &newlist
) {
1501 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1503 if (ref
->type
< ref2
->type
|| (ref
->type
== ref2
->type
&& ref
->hash
> ref2
->hash
)) {
1504 InsertHeadList(le
->Blink
, &ref
->list_entry
);
1513 InsertTailList(&newlist
, &ref
->list_entry
);
1517 while (le
!= &newlist
) {
1518 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1520 if (le
->Flink
!= &newlist
) {
1521 data_reloc_ref
* ref2
= CONTAINING_RECORD(le
->Flink
, data_reloc_ref
, list_entry
);
1523 if (ref
->type
== TYPE_EXTENT_DATA_REF
&& ref2
->type
== TYPE_EXTENT_DATA_REF
&& ref
->edr
.root
== ref2
->edr
.root
&&
1524 ref
->edr
.objid
== ref2
->edr
.objid
&& ref
->edr
.offset
== ref2
->edr
.offset
) {
1525 RemoveEntryList(&ref2
->list_entry
);
1526 ref
->edr
.count
+= ref2
->edr
.count
;
1535 newlist
.Flink
->Blink
= &dr
->refs
;
1536 newlist
.Blink
->Flink
= &dr
->refs
;
1537 dr
->refs
.Flink
= newlist
.Flink
;
1538 dr
->refs
.Blink
= newlist
.Blink
;
1541 static NTSTATUS
add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, data_reloc
* dr
) {
1545 uint16_t inline_len
;
1546 bool all_inline
= true;
1547 data_reloc_ref
* first_noninline
= NULL
;
1551 inline_len
= sizeof(EXTENT_ITEM
);
1553 sort_data_reloc_refs(dr
);
1555 le
= dr
->refs
.Flink
;
1556 while (le
!= &dr
->refs
) {
1557 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1558 uint16_t extlen
= 0;
1560 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1561 extlen
+= sizeof(EXTENT_DATA_REF
);
1562 rc
+= ref
->edr
.count
;
1563 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1564 extlen
+= sizeof(SHARED_DATA_REF
);
1569 if ((ULONG
)(inline_len
+ 1 + extlen
) > (Vcb
->superblock
.node_size
>> 2)) {
1571 first_noninline
= ref
;
1573 inline_len
+= extlen
+ 1;
1579 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
1581 ERR("out of memory\n");
1582 return STATUS_INSUFFICIENT_RESOURCES
;
1586 ei
->generation
= dr
->ei
->generation
;
1587 ei
->flags
= dr
->ei
->flags
;
1588 ptr
= (uint8_t*)&ei
[1];
1590 le
= dr
->refs
.Flink
;
1591 while (le
!= &dr
->refs
) {
1592 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1594 if (ref
== first_noninline
)
1600 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1601 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)ptr
;
1603 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1605 ptr
+= sizeof(EXTENT_DATA_REF
);
1606 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1607 SHARED_DATA_REF
* sdr
= (SHARED_DATA_REF
*)ptr
;
1609 sdr
->offset
= ref
->parent
->new_address
;
1610 sdr
->count
= ref
->sdr
.count
;
1612 ptr
+= sizeof(SHARED_DATA_REF
);
1618 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_ITEM
, dr
->size
, ei
, inline_len
, NULL
, NULL
);
1619 if (!NT_SUCCESS(Status
)) {
1620 ERR("insert_tree_item returned %08lx\n", Status
);
1625 le
= &first_noninline
->list_entry
;
1627 while (le
!= &dr
->refs
) {
1628 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1630 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1631 EXTENT_DATA_REF
* edr
;
1633 edr
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_DATA_REF
), ALLOC_TAG
);
1635 ERR("out of memory\n");
1636 return STATUS_INSUFFICIENT_RESOURCES
;
1639 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1641 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_DATA_REF
, ref
->hash
, edr
, sizeof(EXTENT_DATA_REF
), NULL
, NULL
);
1642 if (!NT_SUCCESS(Status
)) {
1643 ERR("insert_tree_item returned %08lx\n", Status
);
1646 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1649 sdr
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint32_t), ALLOC_TAG
);
1651 ERR("out of memory\n");
1652 return STATUS_INSUFFICIENT_RESOURCES
;
1655 *sdr
= ref
->sdr
.count
;
1657 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_SHARED_DATA_REF
, ref
->parent
->new_address
, sdr
, sizeof(uint32_t), NULL
, NULL
);
1658 if (!NT_SUCCESS(Status
)) {
1659 ERR("insert_tree_item returned %08lx\n", Status
);
1668 return STATUS_SUCCESS
;
1671 static NTSTATUS
balance_data_chunk(device_extension
* Vcb
, chunk
* c
, bool* changed
) {
1676 LIST_ENTRY items
, metadata_items
, rollback
, *le
;
1677 uint64_t loaded
= 0, num_loaded
= 0;
1678 chunk
* newchunk
= NULL
;
1679 uint8_t* data
= NULL
;
1681 TRACE("chunk %I64x\n", c
->offset
);
1683 InitializeListHead(&rollback
);
1684 InitializeListHead(&items
);
1685 InitializeListHead(&metadata_items
);
1687 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
1689 searchkey
.obj_id
= c
->offset
;
1690 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
1691 searchkey
.offset
= 0xffffffffffffffff;
1693 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
1694 if (!NT_SUCCESS(Status
)) {
1695 ERR("find_item returned %08lx\n", Status
);
1700 traverse_ptr next_tp
;
1702 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1705 if (tp
.item
->key
.obj_id
>= c
->offset
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
) {
1708 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1709 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1711 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1716 Status
= add_data_reloc(Vcb
, &items
, &metadata_items
, &tp
, c
, &rollback
);
1718 if (!NT_SUCCESS(Status
)) {
1719 ERR("add_data_reloc returned %08lx\n", Status
);
1723 loaded
+= tp
.item
->key
.offset
;
1726 if (loaded
>= 0x1000000 || num_loaded
>= 100) // only do so much at a time, so we don't block too obnoxiously
1731 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
1737 if (IsListEmpty(&items
)) {
1739 Status
= STATUS_SUCCESS
;
1744 data
= ExAllocatePoolWithTag(PagedPool
, BALANCE_UNIT
, ALLOC_TAG
);
1746 ERR("out of memory\n");
1747 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1752 while (le
!= &items
) {
1753 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
1759 ULONG bmplen
, runlength
, index
, lastoff
;
1762 acquire_chunk_lock(newchunk
, Vcb
);
1764 if (find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1765 newchunk
->used
+= dr
->size
;
1766 space_list_subtract(newchunk
, dr
->new_address
, dr
->size
, &rollback
);
1770 release_chunk_lock(newchunk
, Vcb
);
1774 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
1776 le2
= Vcb
->chunks
.Flink
;
1777 while (le2
!= &Vcb
->chunks
) {
1778 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
1780 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== Vcb
->data_flags
) {
1781 acquire_chunk_lock(c2
, Vcb
);
1783 if ((c2
->chunk_item
->size
- c2
->used
) >= dr
->size
) {
1784 if (find_data_address_in_chunk(Vcb
, c2
, dr
->size
, &dr
->new_address
)) {
1785 c2
->used
+= dr
->size
;
1786 space_list_subtract(c2
, dr
->new_address
, dr
->size
, &rollback
);
1787 release_chunk_lock(c2
, Vcb
);
1794 release_chunk_lock(c2
, Vcb
);
1800 // allocate new chunk if necessary
1802 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &newchunk
, false);
1804 if (!NT_SUCCESS(Status
)) {
1805 ERR("alloc_chunk returned %08lx\n", Status
);
1806 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1810 acquire_chunk_lock(newchunk
, Vcb
);
1812 newchunk
->balance_num
= Vcb
->balance
.balance_num
;
1814 if (!find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1815 release_chunk_lock(newchunk
, Vcb
);
1816 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1817 ERR("could not find address in new chunk\n");
1818 Status
= STATUS_DISK_FULL
;
1821 newchunk
->used
+= dr
->size
;
1822 space_list_subtract(newchunk
, dr
->new_address
, dr
->size
, &rollback
);
1825 release_chunk_lock(newchunk
, Vcb
);
1828 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1831 dr
->newchunk
= newchunk
;
1833 bmplen
= (ULONG
)(dr
->size
>> Vcb
->sector_shift
);
1835 bmparr
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)sector_align(bmplen
+ 1, sizeof(ULONG
)), ALLOC_TAG
);
1837 ERR("out of memory\n");
1838 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1842 csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)((dr
->size
* Vcb
->csum_size
) >> Vcb
->sector_shift
), ALLOC_TAG
);
1844 ERR("out of memory\n");
1846 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1850 RtlInitializeBitMap(&bmp
, bmparr
, bmplen
);
1851 RtlSetAllBits(&bmp
); // 1 = no csum, 0 = csum
1853 searchkey
.obj_id
= EXTENT_CSUM_ID
;
1854 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
1855 searchkey
.offset
= dr
->address
;
1857 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, false, NULL
);
1858 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
1859 ERR("find_item returned %08lx\n", Status
);
1865 if (Status
!= STATUS_NOT_FOUND
) {
1867 traverse_ptr next_tp
;
1869 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_CSUM
) {
1870 if (tp
.item
->key
.offset
>= dr
->address
+ dr
->size
)
1872 else if (tp
.item
->size
>= Vcb
->csum_size
&& tp
.item
->key
.offset
+ (((unsigned int)tp
.item
->size
<< Vcb
->sector_shift
) / Vcb
->csum_size
) >= dr
->address
) {
1873 uint64_t cs
= max(dr
->address
, tp
.item
->key
.offset
);
1874 uint64_t ce
= min(dr
->address
+ dr
->size
, tp
.item
->key
.offset
+ (((unsigned int)tp
.item
->size
<< Vcb
->sector_shift
) / Vcb
->csum_size
));
1876 RtlCopyMemory((uint8_t*)csum
+ (((cs
- dr
->address
) * Vcb
->csum_size
) >> Vcb
->sector_shift
),
1877 tp
.item
->data
+ (((cs
- tp
.item
->key
.offset
) * Vcb
->csum_size
) >> Vcb
->sector_shift
),
1878 (ULONG
)(((ce
- cs
) * Vcb
->csum_size
) >> Vcb
->sector_shift
));
1880 RtlClearBits(&bmp
, (ULONG
)((cs
- dr
->address
) >> Vcb
->sector_shift
), (ULONG
)((ce
- cs
) >> Vcb
->sector_shift
));
1882 if (ce
== dr
->address
+ dr
->size
)
1887 if (find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
))
1895 runlength
= RtlFindFirstRunClear(&bmp
, &index
);
1897 while (runlength
!= 0) {
1898 if (index
>= bmplen
)
1901 if (index
+ runlength
>= bmplen
) {
1902 runlength
= bmplen
- index
;
1908 if (index
> lastoff
) {
1909 ULONG off
= lastoff
;
1910 ULONG size
= index
- lastoff
;
1912 // handle no csum run
1916 if (size
<< Vcb
->sector_shift
> BALANCE_UNIT
)
1917 rl
= BALANCE_UNIT
>> Vcb
->sector_shift
;
1921 Status
= read_data(Vcb
, dr
->address
+ (off
<< Vcb
->sector_shift
), rl
<< Vcb
->sector_shift
, NULL
, false, data
,
1922 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
1923 if (!NT_SUCCESS(Status
)) {
1924 ERR("read_data returned %08lx\n", Status
);
1930 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
<< Vcb
->sector_shift
), data
, rl
<< Vcb
->sector_shift
,
1931 NULL
, newchunk
, false, 0, NormalPagePriority
);
1932 if (!NT_SUCCESS(Status
)) {
1933 ERR("write_data_complete returned %08lx\n", Status
);
1944 add_checksum_entry(Vcb
, dr
->new_address
+ (index
<< Vcb
->sector_shift
), runlength
, (uint8_t*)csum
+ (index
* Vcb
->csum_size
), NULL
);
1945 add_checksum_entry(Vcb
, dr
->address
+ (index
<< Vcb
->sector_shift
), runlength
, NULL
, NULL
);
1951 if (runlength
<< Vcb
->sector_shift
> BALANCE_UNIT
)
1952 rl
= BALANCE_UNIT
>> Vcb
->sector_shift
;
1956 Status
= read_data(Vcb
, dr
->address
+ (index
<< Vcb
->sector_shift
), rl
<< Vcb
->sector_shift
,
1957 (uint8_t*)csum
+ (index
* Vcb
->csum_size
), false, data
, c
, NULL
, NULL
, 0, false, NormalPagePriority
);
1958 if (!NT_SUCCESS(Status
)) {
1959 ERR("read_data returned %08lx\n", Status
);
1965 Status
= write_data_complete(Vcb
, dr
->new_address
+ (index
<< Vcb
->sector_shift
), data
, rl
<< Vcb
->sector_shift
,
1966 NULL
, newchunk
, false, 0, NormalPagePriority
);
1967 if (!NT_SUCCESS(Status
)) {
1968 ERR("write_data_complete returned %08lx\n", Status
);
1976 } while (runlength
> 0);
1979 runlength
= RtlFindNextForwardRunClear(&bmp
, index
, &index
);
1985 // handle final nocsum run
1986 if (lastoff
< dr
->size
>> Vcb
->sector_shift
) {
1987 ULONG off
= lastoff
;
1988 ULONG size
= (ULONG
)((dr
->size
>> Vcb
->sector_shift
) - lastoff
);
1993 if (size
<< Vcb
->sector_shift
> BALANCE_UNIT
)
1994 rl
= BALANCE_UNIT
>> Vcb
->sector_shift
;
1998 Status
= read_data(Vcb
, dr
->address
+ (off
<< Vcb
->sector_shift
), rl
<< Vcb
->sector_shift
, NULL
, false, data
,
1999 c
, NULL
, NULL
, 0, false, NormalPagePriority
);
2000 if (!NT_SUCCESS(Status
)) {
2001 ERR("read_data returned %08lx\n", Status
);
2005 Status
= write_data_complete(Vcb
, dr
->new_address
+ (off
<< Vcb
->sector_shift
), data
, rl
<< Vcb
->sector_shift
,
2006 NULL
, newchunk
, false, 0, NormalPagePriority
);
2007 if (!NT_SUCCESS(Status
)) {
2008 ERR("write_data_complete returned %08lx\n", Status
);
2023 Status
= write_metadata_items(Vcb
, &metadata_items
, &items
, NULL
, &rollback
);
2024 if (!NT_SUCCESS(Status
)) {
2025 ERR("write_metadata_items returned %08lx\n", Status
);
2030 while (le
!= &items
) {
2031 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
2033 Status
= add_data_reloc_extent_item(Vcb
, dr
);
2034 if (!NT_SUCCESS(Status
)) {
2035 ERR("add_data_reloc_extent_item returned %08lx\n", Status
);
2042 le
= c
->changed_extents
.Flink
;
2043 while (le
!= &c
->changed_extents
) {
2044 LIST_ENTRY
*le2
, *le3
;
2045 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
2050 while (le2
!= &items
) {
2051 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
2053 if (ce
->address
== dr
->address
) {
2054 ce
->address
= dr
->new_address
;
2055 RemoveEntryList(&ce
->list_entry
);
2056 InsertTailList(&dr
->newchunk
->changed_extents
, &ce
->list_entry
);
2066 Status
= STATUS_SUCCESS
;
2068 Vcb
->need_write
= true;
2071 if (NT_SUCCESS(Status
)) {
2072 // update extents in cache inodes before we flush
2073 le
= Vcb
->chunks
.Flink
;
2074 while (le
!= &Vcb
->chunks
) {
2075 chunk
* c2
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2080 ExAcquireResourceExclusiveLite(c2
->cache
->Header
.Resource
, true);
2082 le2
= c2
->cache
->extents
.Flink
;
2083 while (le2
!= &c2
->cache
->extents
) {
2084 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2087 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2088 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2090 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2091 LIST_ENTRY
* le3
= items
.Flink
;
2092 while (le3
!= &items
) {
2093 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2095 if (ed2
->address
== dr
->address
) {
2096 ed2
->address
= dr
->new_address
;
2109 ExReleaseResourceLite(c2
->cache
->Header
.Resource
);
2115 Status
= do_write(Vcb
, NULL
);
2116 if (!NT_SUCCESS(Status
))
2117 ERR("do_write returned %08lx\n", Status
);
2120 if (NT_SUCCESS(Status
)) {
2121 clear_rollback(&rollback
);
2124 // FIXME - speed this up(?)
2126 le
= Vcb
->all_fcbs
.Flink
;
2127 while (le
!= &Vcb
->all_fcbs
) {
2128 struct _fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_all
);
2131 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, true);
2133 le2
= fcb
->extents
.Flink
;
2134 while (le2
!= &fcb
->extents
) {
2135 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
2138 if (ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) {
2139 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
2141 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
2142 LIST_ENTRY
* le3
= items
.Flink
;
2143 while (le3
!= &items
) {
2144 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
2146 if (ed2
->address
== dr
->address
) {
2147 ed2
->address
= dr
->new_address
;
2160 ExReleaseResourceLite(fcb
->Header
.Resource
);
2165 do_rollback(Vcb
, &rollback
);
2169 ExReleaseResourceLite(&Vcb
->tree_lock
);
2174 while (!IsListEmpty(&items
)) {
2175 data_reloc
* dr
= CONTAINING_RECORD(RemoveHeadList(&items
), data_reloc
, list_entry
);
2177 while (!IsListEmpty(&dr
->refs
)) {
2178 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
2186 while (!IsListEmpty(&metadata_items
)) {
2187 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&metadata_items
), metadata_reloc
, list_entry
);
2189 while (!IsListEmpty(&mr
->refs
)) {
2190 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
2196 ExFreePool(mr
->data
);
2204 static __inline
uint64_t get_chunk_dup_type(chunk
* c
) {
2205 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2206 return BLOCK_FLAG_RAID0
;
2207 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
2208 return BLOCK_FLAG_RAID1
;
2209 else if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
2210 return BLOCK_FLAG_DUPLICATE
;
2211 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2212 return BLOCK_FLAG_RAID10
;
2213 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2214 return BLOCK_FLAG_RAID5
;
2215 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2216 return BLOCK_FLAG_RAID6
;
2217 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1C3
)
2218 return BLOCK_FLAG_RAID1C3
;
2219 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1C4
)
2220 return BLOCK_FLAG_RAID1C4
;
2222 return BLOCK_FLAG_SINGLE
;
2225 static bool should_balance_chunk(device_extension
* Vcb
, uint8_t sort
, chunk
* c
) {
2226 btrfs_balance_opts
* opts
;
2228 opts
= &Vcb
->balance
.opts
[sort
];
2230 if (!(opts
->flags
& BTRFS_BALANCE_OPTS_ENABLED
))
2233 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2234 uint64_t type
= get_chunk_dup_type(c
);
2236 if (!(type
& opts
->profiles
))
2240 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2242 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2245 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2246 if (cis
[i
].dev_id
== opts
->devid
) {
2256 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2259 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2262 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2263 factor
= c
->chunk_item
->num_stripes
;
2264 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2265 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
2266 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
2267 factor
= c
->chunk_item
->num_stripes
- 1;
2268 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2269 factor
= c
->chunk_item
->num_stripes
- 2;
2270 else // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
2273 physsize
= c
->chunk_item
->size
/ factor
;
2275 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2276 if (cis
[i
].offset
< opts
->drange_end
&& cis
[i
].offset
+ physsize
>= opts
->drange_start
&&
2277 (!(opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) || cis
[i
].dev_id
== opts
->devid
)) {
2287 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2288 if (c
->offset
+ c
->chunk_item
->size
<= opts
->vrange_start
|| c
->offset
> opts
->vrange_end
)
2292 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2293 if (c
->chunk_item
->num_stripes
< opts
->stripes_start
|| c
->chunk_item
->num_stripes
< opts
->stripes_end
)
2297 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2298 uint64_t usage
= c
->used
* 100 / c
->chunk_item
->size
;
2300 // usage == 0 should mean completely empty, not just that usage rounds to 0%
2301 if (c
->used
> 0 && usage
== 0)
2304 if (usage
< opts
->usage_start
|| usage
> opts
->usage_end
)
2308 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
&& opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
) {
2309 uint64_t type
= get_chunk_dup_type(c
);
2311 if (type
== opts
->convert
)
2318 static void copy_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2319 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2320 args
->profiles
= opts
->profiles
;
2321 args
->flags
|= BALANCE_ARGS_FLAGS_PROFILES
;
2324 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2325 if (args
->usage_start
== 0) {
2326 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE_RANGE
;
2327 args
->usage_start
= opts
->usage_start
;
2328 args
->usage_end
= opts
->usage_end
;
2330 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE
;
2331 args
->usage
= opts
->usage_end
;
2335 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2336 args
->devid
= opts
->devid
;
2337 args
->flags
|= BALANCE_ARGS_FLAGS_DEVID
;
2340 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2341 args
->drange_start
= opts
->drange_start
;
2342 args
->drange_end
= opts
->drange_end
;
2343 args
->flags
|= BALANCE_ARGS_FLAGS_DRANGE
;
2346 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2347 args
->vrange_start
= opts
->vrange_start
;
2348 args
->vrange_end
= opts
->vrange_end
;
2349 args
->flags
|= BALANCE_ARGS_FLAGS_VRANGE
;
2352 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
2353 args
->convert
= opts
->convert
;
2354 args
->flags
|= BALANCE_ARGS_FLAGS_CONVERT
;
2356 if (opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
)
2357 args
->flags
|= BALANCE_ARGS_FLAGS_SOFT
;
2360 if (opts
->flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
2361 if (args
->limit_start
== 0) {
2362 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT_RANGE
;
2363 args
->limit_start
= (uint32_t)opts
->limit_start
;
2364 args
->limit_end
= (uint32_t)opts
->limit_end
;
2366 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT
;
2367 args
->limit
= opts
->limit_end
;
2371 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2372 args
->stripes_start
= opts
->stripes_start
;
2373 args
->stripes_end
= opts
->stripes_end
;
2374 args
->flags
|= BALANCE_ARGS_FLAGS_STRIPES_RANGE
;
2378 static NTSTATUS
add_balance_item(device_extension
* Vcb
) {
2384 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2385 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2386 searchkey
.offset
= 0;
2388 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
2390 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
2391 if (!NT_SUCCESS(Status
)) {
2392 ERR("find_item returned %08lx\n", Status
);
2396 if (!keycmp(tp
.item
->key
, searchkey
)) {
2397 Status
= delete_tree_item(Vcb
, &tp
);
2398 if (!NT_SUCCESS(Status
)) {
2399 ERR("delete_tree_item returned %08lx\n", Status
);
2404 bi
= ExAllocatePoolWithTag(PagedPool
, sizeof(BALANCE_ITEM
), ALLOC_TAG
);
2406 ERR("out of memory\n");
2407 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2411 RtlZeroMemory(bi
, sizeof(BALANCE_ITEM
));
2413 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2414 bi
->flags
|= BALANCE_FLAGS_DATA
;
2415 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
2418 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2419 bi
->flags
|= BALANCE_FLAGS_METADATA
;
2420 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
2423 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2424 bi
->flags
|= BALANCE_FLAGS_SYSTEM
;
2425 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
2428 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, BALANCE_ITEM_ID
, TYPE_TEMP_ITEM
, 0, bi
, sizeof(BALANCE_ITEM
), NULL
, NULL
);
2429 if (!NT_SUCCESS(Status
)) {
2430 ERR("insert_tree_item returned %08lx\n", Status
);
2435 Status
= STATUS_SUCCESS
;
2438 if (NT_SUCCESS(Status
)) {
2439 Status
= do_write(Vcb
, NULL
);
2440 if (!NT_SUCCESS(Status
))
2441 ERR("do_write returned %08lx\n", Status
);
2446 ExReleaseResourceLite(&Vcb
->tree_lock
);
2451 static NTSTATUS
remove_balance_item(device_extension
* Vcb
) {
2456 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2457 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2458 searchkey
.offset
= 0;
2460 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
2462 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
2463 if (!NT_SUCCESS(Status
)) {
2464 ERR("find_item returned %08lx\n", Status
);
2468 if (!keycmp(tp
.item
->key
, searchkey
)) {
2469 Status
= delete_tree_item(Vcb
, &tp
);
2470 if (!NT_SUCCESS(Status
)) {
2471 ERR("delete_tree_item returned %08lx\n", Status
);
2475 Status
= do_write(Vcb
, NULL
);
2476 if (!NT_SUCCESS(Status
)) {
2477 ERR("do_write returned %08lx\n", Status
);
2484 Status
= STATUS_SUCCESS
;
2487 ExReleaseResourceLite(&Vcb
->tree_lock
);
2492 static void load_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2493 opts
->flags
= BTRFS_BALANCE_OPTS_ENABLED
;
2495 if (args
->flags
& BALANCE_ARGS_FLAGS_PROFILES
) {
2496 opts
->flags
|= BTRFS_BALANCE_OPTS_PROFILES
;
2497 opts
->profiles
= args
->profiles
;
2500 if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE
) {
2501 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2503 opts
->usage_start
= 0;
2504 opts
->usage_end
= (uint8_t)args
->usage
;
2505 } else if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE_RANGE
) {
2506 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2508 opts
->usage_start
= (uint8_t)args
->usage_start
;
2509 opts
->usage_end
= (uint8_t)args
->usage_end
;
2512 if (args
->flags
& BALANCE_ARGS_FLAGS_DEVID
) {
2513 opts
->flags
|= BTRFS_BALANCE_OPTS_DEVID
;
2514 opts
->devid
= args
->devid
;
2517 if (args
->flags
& BALANCE_ARGS_FLAGS_DRANGE
) {
2518 opts
->flags
|= BTRFS_BALANCE_OPTS_DRANGE
;
2519 opts
->drange_start
= args
->drange_start
;
2520 opts
->drange_end
= args
->drange_end
;
2523 if (args
->flags
& BALANCE_ARGS_FLAGS_VRANGE
) {
2524 opts
->flags
|= BTRFS_BALANCE_OPTS_VRANGE
;
2525 opts
->vrange_start
= args
->vrange_start
;
2526 opts
->vrange_end
= args
->vrange_end
;
2529 if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT
) {
2530 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2532 opts
->limit_start
= 0;
2533 opts
->limit_end
= args
->limit
;
2534 } else if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT_RANGE
) {
2535 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2537 opts
->limit_start
= args
->limit_start
;
2538 opts
->limit_end
= args
->limit_end
;
2541 if (args
->flags
& BALANCE_ARGS_FLAGS_STRIPES_RANGE
) {
2542 opts
->flags
|= BTRFS_BALANCE_OPTS_STRIPES
;
2544 opts
->stripes_start
= (uint16_t)args
->stripes_start
;
2545 opts
->stripes_end
= (uint16_t)args
->stripes_end
;
2548 if (args
->flags
& BALANCE_ARGS_FLAGS_CONVERT
) {
2549 opts
->flags
|= BTRFS_BALANCE_OPTS_CONVERT
;
2550 opts
->convert
= args
->convert
;
2552 if (args
->flags
& BALANCE_ARGS_FLAGS_SOFT
)
2553 opts
->flags
|= BTRFS_BALANCE_OPTS_SOFT
;
2557 static NTSTATUS
remove_superblocks(device
* dev
) {
2562 sb
= ExAllocatePoolWithTag(PagedPool
, sizeof(superblock
), ALLOC_TAG
);
2564 ERR("out of memory\n");
2565 return STATUS_INSUFFICIENT_RESOURCES
;
2568 RtlZeroMemory(sb
, sizeof(superblock
));
2570 while (superblock_addrs
[i
] > 0 && dev
->devitem
.num_bytes
>= superblock_addrs
[i
] + sizeof(superblock
)) {
2571 Status
= write_data_phys(dev
->devobj
, dev
->fileobj
, superblock_addrs
[i
], sb
, sizeof(superblock
));
2573 if (!NT_SUCCESS(Status
)) {
2583 return STATUS_SUCCESS
;
2586 static NTSTATUS
finish_removing_device(_Requires_exclusive_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2591 volume_device_extension
* vde
;
2593 if (Vcb
->need_write
) {
2594 Status
= do_write(Vcb
, NULL
);
2596 if (!NT_SUCCESS(Status
))
2597 ERR("do_write returned %08lx\n", Status
);
2599 Status
= STATUS_SUCCESS
;
2603 if (!NT_SUCCESS(Status
))
2606 // remove entry in chunk tree
2608 searchkey
.obj_id
= 1;
2609 searchkey
.obj_type
= TYPE_DEV_ITEM
;
2610 searchkey
.offset
= dev
->devitem
.dev_id
;
2612 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, false, NULL
);
2613 if (!NT_SUCCESS(Status
)) {
2614 ERR("find_item returned %08lx\n", Status
);
2618 if (!keycmp(searchkey
, tp
.item
->key
)) {
2619 Status
= delete_tree_item(Vcb
, &tp
);
2621 if (!NT_SUCCESS(Status
)) {
2622 ERR("delete_tree_item returned %08lx\n", Status
);
2627 // remove stats entry in device tree
2629 searchkey
.obj_id
= 0;
2630 searchkey
.obj_type
= TYPE_DEV_STATS
;
2631 searchkey
.offset
= dev
->devitem
.dev_id
;
2633 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, NULL
);
2634 if (!NT_SUCCESS(Status
)) {
2635 ERR("find_item returned %08lx\n", Status
);
2639 if (!keycmp(searchkey
, tp
.item
->key
)) {
2640 Status
= delete_tree_item(Vcb
, &tp
);
2642 if (!NT_SUCCESS(Status
)) {
2643 ERR("delete_tree_item returned %08lx\n", Status
);
2648 // update superblock
2650 Vcb
->superblock
.num_devices
--;
2651 Vcb
->superblock
.total_bytes
-= dev
->devitem
.num_bytes
;
2652 Vcb
->devices_loaded
--;
2654 RemoveEntryList(&dev
->list_entry
);
2658 Status
= do_write(Vcb
, NULL
);
2659 if (!NT_SUCCESS(Status
))
2660 ERR("do_write returned %08lx\n", Status
);
2664 if (!NT_SUCCESS(Status
))
2667 if (!dev
->readonly
&& dev
->devobj
) {
2668 Status
= remove_superblocks(dev
);
2669 if (!NT_SUCCESS(Status
))
2670 WARN("remove_superblocks returned %08lx\n", Status
);
2673 // remove entry in volume list
2678 pdo_device_extension
* pdode
= vde
->pdode
;
2680 ExAcquireResourceExclusiveLite(&pdode
->child_lock
, true);
2682 le
= pdode
->children
.Flink
;
2683 while (le
!= &pdode
->children
) {
2684 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2686 if (RtlCompareMemory(&dev
->devitem
.device_uuid
, &vc
->uuid
, sizeof(BTRFS_UUID
)) == sizeof(BTRFS_UUID
)) {
2687 PFILE_OBJECT FileObject
;
2688 PDEVICE_OBJECT mountmgr
;
2689 UNICODE_STRING mmdevpath
;
2691 pdode
->children_loaded
--;
2693 if (vc
->had_drive_letter
) { // re-add entry to mountmgr
2694 RtlInitUnicodeString(&mmdevpath
, MOUNTMGR_DEVICE_NAME
);
2695 Status
= IoGetDeviceObjectPointer(&mmdevpath
, FILE_READ_ATTRIBUTES
, &FileObject
, &mountmgr
);
2696 if (!NT_SUCCESS(Status
))
2697 ERR("IoGetDeviceObjectPointer returned %08lx\n", Status
);
2701 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, &mdn
, sizeof(MOUNTDEV_NAME
), true, NULL
);
2702 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
)
2703 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08lx\n", Status
);
2705 MOUNTDEV_NAME
* mdn2
;
2706 ULONG mdnsize
= (ULONG
)offsetof(MOUNTDEV_NAME
, Name
[0]) + mdn
.NameLength
;
2708 mdn2
= ExAllocatePoolWithTag(PagedPool
, mdnsize
, ALLOC_TAG
);
2710 ERR("out of memory\n");
2712 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, mdn2
, mdnsize
, true, NULL
);
2713 if (!NT_SUCCESS(Status
))
2714 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08lx\n", Status
);
2716 UNICODE_STRING name
;
2718 name
.Buffer
= mdn2
->Name
;
2719 name
.Length
= name
.MaximumLength
= mdn2
->NameLength
;
2721 Status
= mountmgr_add_drive_letter(mountmgr
, &name
);
2722 if (!NT_SUCCESS(Status
))
2723 WARN("mountmgr_add_drive_letter returned %08lx\n", Status
);
2730 ObDereferenceObject(FileObject
);
2734 ExFreePool(vc
->pnp_name
.Buffer
);
2735 RemoveEntryList(&vc
->list_entry
);
2738 ObDereferenceObject(vc
->fileobj
);
2746 if (pdode
->children_loaded
> 0 && vde
->device
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2747 vde
->device
->Characteristics
&= ~FILE_REMOVABLE_MEDIA
;
2749 le
= pdode
->children
.Flink
;
2750 while (le
!= &pdode
->children
) {
2751 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
2753 if (vc
->devobj
->Characteristics
& FILE_REMOVABLE_MEDIA
) {
2754 vde
->device
->Characteristics
|= FILE_REMOVABLE_MEDIA
;
2762 pdode
->num_children
= Vcb
->superblock
.num_devices
;
2764 ExReleaseResourceLite(&pdode
->child_lock
);
2768 if (dev
->trim
&& !dev
->readonly
&& !Vcb
->options
.no_trim
)
2769 trim_whole_device(dev
);
2772 while (!IsListEmpty(&dev
->space
)) {
2773 LIST_ENTRY
* le2
= RemoveHeadList(&dev
->space
);
2774 space
* s
= CONTAINING_RECORD(le2
, space
, list_entry
);
2784 le
= Vcb
->devices
.Flink
;
2785 while (le
!= &Vcb
->devices
) {
2786 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
2797 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
2799 return STATUS_SUCCESS
;
2802 static void trim_unalloc_space(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
, device
* dev
) {
2803 DEVICE_MANAGE_DATA_SET_ATTRIBUTES
* dmdsa
;
2804 DEVICE_DATA_SET_RANGE
* ranges
;
2810 uint64_t lastoff
= 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
2813 dev
->num_trim_entries
= 0;
2815 searchkey
.obj_id
= dev
->devitem
.dev_id
;
2816 searchkey
.obj_type
= TYPE_DEV_EXTENT
;
2817 searchkey
.offset
= 0;
2819 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, NULL
);
2820 if (!NT_SUCCESS(Status
)) {
2821 ERR("find_item returned %08lx\n", Status
);
2826 traverse_ptr next_tp
;
2828 if (tp
.item
->key
.obj_id
== dev
->devitem
.dev_id
&& tp
.item
->key
.obj_type
== TYPE_DEV_EXTENT
) {
2829 if (tp
.item
->size
>= sizeof(DEV_EXTENT
)) {
2830 DEV_EXTENT
* de
= (DEV_EXTENT
*)tp
.item
->data
;
2832 if (tp
.item
->key
.offset
> lastoff
)
2833 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, tp
.item
->key
.offset
- lastoff
);
2835 lastoff
= tp
.item
->key
.offset
+ de
->length
;
2837 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(DEV_EXTENT
));
2842 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
2846 if (tp
.item
->key
.obj_id
> searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
> searchkey
.obj_type
))
2851 if (lastoff
< dev
->devitem
.num_bytes
)
2852 add_trim_entry_avoid_sb(Vcb
, dev
, lastoff
, dev
->devitem
.num_bytes
- lastoff
);
2854 if (dev
->num_trim_entries
== 0)
2857 datalen
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t)) + (dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
));
2859 dmdsa
= ExAllocatePoolWithTag(PagedPool
, datalen
, ALLOC_TAG
);
2861 ERR("out of memory\n");
2865 dmdsa
->Size
= sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
);
2866 dmdsa
->Action
= DeviceDsmAction_Trim
;
2867 dmdsa
->Flags
= DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED
;
2868 dmdsa
->ParameterBlockOffset
= 0;
2869 dmdsa
->ParameterBlockLength
= 0;
2870 dmdsa
->DataSetRangesOffset
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t));
2871 dmdsa
->DataSetRangesLength
= dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
);
2873 ranges
= (DEVICE_DATA_SET_RANGE
*)((uint8_t*)dmdsa
+ dmdsa
->DataSetRangesOffset
);
2876 le
= dev
->trim_list
.Flink
;
2877 while (le
!= &dev
->trim_list
) {
2878 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
2880 ranges
[i
].StartingOffset
= s
->address
;
2881 ranges
[i
].LengthInBytes
= s
->size
;
2887 Status
= dev_ioctl(dev
->devobj
, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES
, dmdsa
, datalen
, NULL
, 0, true, NULL
);
2888 if (!NT_SUCCESS(Status
))
2889 WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08lx\n", Status
);
2894 while (!IsListEmpty(&dev
->trim_list
)) {
2895 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->trim_list
), space
, list_entry
);
2899 dev
->num_trim_entries
= 0;
2902 static NTSTATUS
try_consolidation(device_extension
* Vcb
, uint64_t flags
, chunk
** newchunk
) {
2908 // FIXME - allow with metadata chunks?
2913 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
2915 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
2917 // choose the least-used chunk we haven't looked at yet
2918 le
= Vcb
->chunks
.Flink
;
2919 while (le
!= &Vcb
->chunks
) {
2920 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2922 // FIXME - skip full-size chunks over e.g. 90% full?
2923 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&& !c
->readonly
&& c
->balance_num
!= Vcb
->balance
.balance_num
&& (!rc
|| c
->used
< rc
->used
))
2929 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2932 ExReleaseResourceLite(&Vcb
->tree_lock
);
2936 if (rc
->list_entry_balance
.Flink
) {
2937 RemoveEntryList(&rc
->list_entry_balance
);
2938 Vcb
->balance
.chunks_left
--;
2941 rc
->list_entry_balance
.Flink
= (LIST_ENTRY
*)1; // so it doesn't get dropped
2944 ExReleaseResourceLite(&Vcb
->tree_lock
);
2949 Status
= balance_data_chunk(Vcb
, rc
, &changed
);
2950 if (!NT_SUCCESS(Status
)) {
2951 ERR("balance_data_chunk returned %08lx\n", Status
);
2952 Vcb
->balance
.status
= Status
;
2953 rc
->list_entry_balance
.Flink
= NULL
;
2958 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
2961 Vcb
->balance
.stopping
= true;
2963 if (Vcb
->balance
.stopping
)
2964 return STATUS_SUCCESS
;
2967 rc
->list_entry_balance
.Flink
= NULL
;
2970 rc
->space_changed
= true;
2971 rc
->balance_num
= Vcb
->balance
.balance_num
;
2973 Status
= do_write(Vcb
, NULL
);
2974 if (!NT_SUCCESS(Status
)) {
2975 ERR("do_write returned %08lx\n", Status
);
2982 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
2984 Status
= alloc_chunk(Vcb
, flags
, &rc
, true);
2986 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2988 if (NT_SUCCESS(Status
)) {
2992 ERR("alloc_chunk returned %08lx\n", Status
);
2997 static NTSTATUS
regenerate_space_list(device_extension
* Vcb
, device
* dev
) {
3000 while (!IsListEmpty(&dev
->space
)) {
3001 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->space
), space
, list_entry
);
3006 // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
3008 space_list_add2(&dev
->space
, NULL
, 0x100000, dev
->devitem
.num_bytes
- 0x100000, NULL
, NULL
);
3010 le
= Vcb
->chunks
.Flink
;
3011 while (le
!= &Vcb
->chunks
) {
3013 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3014 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
3016 for (n
= 0; n
< c
->chunk_item
->num_stripes
; n
++) {
3017 uint64_t stripe_size
= 0;
3019 if (cis
[n
].dev_id
== dev
->devitem
.dev_id
) {
3020 if (stripe_size
== 0) {
3023 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
3024 factor
= c
->chunk_item
->num_stripes
;
3025 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
3026 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
3027 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
3028 factor
= c
->chunk_item
->num_stripes
- 1;
3029 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
3030 factor
= c
->chunk_item
->num_stripes
- 2;
3031 else // SINGLE, DUP, RAID1, RAID1C3, RAID1C4
3034 stripe_size
= c
->chunk_item
->size
/ factor
;
3037 space_list_subtract2(&dev
->space
, NULL
, cis
[n
].offset
, stripe_size
, NULL
, NULL
);
3044 return STATUS_SUCCESS
;
3047 _Function_class_(KSTART_ROUTINE
)
3048 void __stdcall
balance_thread(void* context
) {
3049 device_extension
* Vcb
= (device_extension
*)context
;
3052 uint64_t num_chunks
[3], okay_metadata_chunks
= 0, okay_data_chunks
= 0, okay_system_chunks
= 0;
3053 uint64_t old_data_flags
= 0, old_metadata_flags
= 0, old_system_flags
= 0;
3056 Vcb
->balance
.balance_num
++;
3058 Vcb
->balance
.stopping
= false;
3059 KeInitializeEvent(&Vcb
->balance
.finished
, NotificationEvent
, false);
3061 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3062 old_data_flags
= Vcb
->data_flags
;
3063 Vcb
->data_flags
= BLOCK_FLAG_DATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
);
3065 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3068 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3069 old_metadata_flags
= Vcb
->metadata_flags
;
3070 Vcb
->metadata_flags
= BLOCK_FLAG_METADATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
);
3073 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3074 old_system_flags
= Vcb
->system_flags
;
3075 Vcb
->system_flags
= BLOCK_FLAG_SYSTEM
| (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
);
3078 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
) {
3079 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3080 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3081 else if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
3082 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3085 num_chunks
[0] = num_chunks
[1] = num_chunks
[2] = 0;
3086 Vcb
->balance
.total_chunks
= Vcb
->balance
.chunks_left
= 0;
3088 InitializeListHead(&chunks
);
3090 // FIXME - what are we supposed to do with limit_start?
3092 if (!Vcb
->readonly
) {
3093 if (!Vcb
->balance
.removing
&& !Vcb
->balance
.shrinking
) {
3094 Status
= add_balance_item(Vcb
);
3095 if (!NT_SUCCESS(Status
)) {
3096 ERR("add_balance_item returned %08lx\n", Status
);
3097 Vcb
->balance
.status
= Status
;
3101 if (Vcb
->need_write
) {
3102 Status
= do_write(Vcb
, NULL
);
3106 if (!NT_SUCCESS(Status
)) {
3107 ERR("do_write returned %08lx\n", Status
);
3108 Vcb
->balance
.status
= Status
;
3115 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3117 if (Vcb
->balance
.stopping
)
3120 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3122 le
= Vcb
->chunks
.Flink
;
3123 while (le
!= &Vcb
->chunks
) {
3124 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3127 acquire_chunk_lock(c
, Vcb
);
3129 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
)
3130 sort
= BALANCE_OPTS_DATA
;
3131 else if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
)
3132 sort
= BALANCE_OPTS_METADATA
;
3133 else if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
3134 sort
= BALANCE_OPTS_SYSTEM
;
3136 ERR("unexpected chunk type %I64x\n", c
->chunk_item
->type
);
3137 release_chunk_lock(c
, Vcb
);
3141 if ((!(Vcb
->balance
.opts
[sort
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) || num_chunks
[sort
] < Vcb
->balance
.opts
[sort
].limit_end
) &&
3142 should_balance_chunk(Vcb
, sort
, c
)) {
3143 InsertTailList(&chunks
, &c
->list_entry_balance
);
3146 Vcb
->balance
.total_chunks
++;
3147 Vcb
->balance
.chunks_left
++;
3148 } else if (sort
== BALANCE_OPTS_METADATA
)
3149 okay_metadata_chunks
++;
3150 else if (sort
== BALANCE_OPTS_DATA
)
3152 else if (sort
== BALANCE_OPTS_SYSTEM
)
3153 okay_system_chunks
++;
3155 if (!c
->cache_loaded
) {
3156 Status
= load_cache_chunk(Vcb
, c
, NULL
);
3158 if (!NT_SUCCESS(Status
)) {
3159 ERR("load_cache_chunk returned %08lx\n", Status
);
3160 Vcb
->balance
.status
= Status
;
3161 release_chunk_lock(c
, Vcb
);
3162 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3167 release_chunk_lock(c
, Vcb
);
3172 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3174 // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
3175 if (okay_metadata_chunks
== 0 || okay_data_chunks
== 0 || okay_system_chunks
== 0) {
3176 bool consolidated
= false;
3179 if (okay_metadata_chunks
== 0) {
3180 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3182 Status
= alloc_chunk(Vcb
, Vcb
->metadata_flags
, &c
, true);
3183 if (NT_SUCCESS(Status
))
3184 c
->balance_num
= Vcb
->balance
.balance_num
;
3185 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3186 ERR("alloc_chunk returned %08lx\n", Status
);
3187 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3188 Vcb
->balance
.status
= Status
;
3192 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3194 if (Status
== STATUS_DISK_FULL
) {
3195 Status
= try_consolidation(Vcb
, Vcb
->metadata_flags
, &c
);
3196 if (!NT_SUCCESS(Status
)) {
3197 ERR("try_consolidation returned %08lx\n", Status
);
3198 Vcb
->balance
.status
= Status
;
3201 c
->balance_num
= Vcb
->balance
.balance_num
;
3203 consolidated
= true;
3205 if (Vcb
->balance
.stopping
)
3210 if (okay_data_chunks
== 0) {
3211 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3213 Status
= alloc_chunk(Vcb
, Vcb
->data_flags
, &c
, true);
3214 if (NT_SUCCESS(Status
))
3215 c
->balance_num
= Vcb
->balance
.balance_num
;
3216 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3217 ERR("alloc_chunk returned %08lx\n", Status
);
3218 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3219 Vcb
->balance
.status
= Status
;
3223 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3225 if (Status
== STATUS_DISK_FULL
) {
3226 Status
= try_consolidation(Vcb
, Vcb
->data_flags
, &c
);
3227 if (!NT_SUCCESS(Status
)) {
3228 ERR("try_consolidation returned %08lx\n", Status
);
3229 Vcb
->balance
.status
= Status
;
3232 c
->balance_num
= Vcb
->balance
.balance_num
;
3234 consolidated
= true;
3236 if (Vcb
->balance
.stopping
)
3241 if (okay_system_chunks
== 0) {
3242 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3244 Status
= alloc_chunk(Vcb
, Vcb
->system_flags
, &c
, true);
3245 if (NT_SUCCESS(Status
))
3246 c
->balance_num
= Vcb
->balance
.balance_num
;
3247 else if (Status
!= STATUS_DISK_FULL
|| consolidated
) {
3248 ERR("alloc_chunk returned %08lx\n", Status
);
3249 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3250 Vcb
->balance
.status
= Status
;
3254 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3256 if (Status
== STATUS_DISK_FULL
) {
3257 Status
= try_consolidation(Vcb
, Vcb
->system_flags
, &c
);
3258 if (!NT_SUCCESS(Status
)) {
3259 ERR("try_consolidation returned %08lx\n", Status
);
3260 Vcb
->balance
.status
= Status
;
3263 c
->balance_num
= Vcb
->balance
.balance_num
;
3265 consolidated
= true;
3267 if (Vcb
->balance
.stopping
)
3273 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3276 while (le
!= &chunks
) {
3277 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3284 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3286 // do data chunks before metadata
3288 while (le
!= &chunks
) {
3289 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3290 LIST_ENTRY
* le2
= le
->Flink
;
3292 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
3298 Status
= balance_data_chunk(Vcb
, c
, &changed
);
3299 if (!NT_SUCCESS(Status
)) {
3300 ERR("balance_data_chunk returned %08lx\n", Status
);
3301 Vcb
->balance
.status
= Status
;
3305 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3308 Vcb
->balance
.stopping
= true;
3310 if (Vcb
->balance
.stopping
)
3315 c
->space_changed
= true;
3318 if (Vcb
->balance
.stopping
)
3321 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&&
3322 (!(Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) || !(c
->chunk_item
->type
& BLOCK_FLAG_METADATA
))) {
3323 RemoveEntryList(&c
->list_entry_balance
);
3324 c
->list_entry_balance
.Flink
= NULL
;
3326 Vcb
->balance
.chunks_left
--;
3332 // do metadata chunks
3333 while (!IsListEmpty(&chunks
)) {
3337 le
= RemoveHeadList(&chunks
);
3338 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3340 if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
|| c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
) {
3342 Status
= balance_metadata_chunk(Vcb
, c
, &changed
);
3343 if (!NT_SUCCESS(Status
)) {
3344 ERR("balance_metadata_chunk returned %08lx\n", Status
);
3345 Vcb
->balance
.status
= Status
;
3349 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, false, NULL
);
3352 Vcb
->balance
.stopping
= true;
3354 if (Vcb
->balance
.stopping
)
3359 c
->space_changed
= true;
3362 if (Vcb
->balance
.stopping
)
3365 c
->list_entry_balance
.Flink
= NULL
;
3367 Vcb
->balance
.chunks_left
--;
3371 if (!Vcb
->readonly
) {
3372 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3374 while (le
!= &chunks
) {
3375 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
3379 c
->list_entry_balance
.Flink
= NULL
;
3382 if (old_data_flags
!= 0)
3383 Vcb
->data_flags
= old_data_flags
;
3385 if (old_metadata_flags
!= 0)
3386 Vcb
->metadata_flags
= old_metadata_flags
;
3388 if (old_system_flags
!= 0)
3389 Vcb
->system_flags
= old_system_flags
;
3392 if (Vcb
->balance
.removing
) {
3395 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3397 le
= Vcb
->devices
.Flink
;
3398 while (le
!= &Vcb
->devices
) {
3399 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3401 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3410 if (Vcb
->balance
.chunks_left
== 0) {
3411 Status
= finish_removing_device(Vcb
, dev
);
3413 if (!NT_SUCCESS(Status
)) {
3414 ERR("finish_removing_device returned %08lx\n", Status
);
3421 ExReleaseResourceLite(&Vcb
->tree_lock
);
3422 } else if (Vcb
->balance
.shrinking
) {
3425 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3427 le
= Vcb
->devices
.Flink
;
3428 while (le
!= &Vcb
->devices
) {
3429 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3431 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
3440 ERR("could not find device %I64x\n", Vcb
->balance
.opts
[0].devid
);
3441 Vcb
->balance
.status
= STATUS_INTERNAL_ERROR
;
3444 if (Vcb
->balance
.stopping
|| !NT_SUCCESS(Vcb
->balance
.status
)) {
3446 Status
= regenerate_space_list(Vcb
, dev
);
3447 if (!NT_SUCCESS(Status
))
3448 WARN("regenerate_space_list returned %08lx\n", Status
);
3453 old_size
= dev
->devitem
.num_bytes
;
3454 dev
->devitem
.num_bytes
= Vcb
->balance
.opts
[0].drange_start
;
3456 Status
= update_dev_item(Vcb
, dev
, NULL
);
3457 if (!NT_SUCCESS(Status
)) {
3458 ERR("update_dev_item returned %08lx\n", Status
);
3459 dev
->devitem
.num_bytes
= old_size
;
3460 Vcb
->balance
.status
= Status
;
3462 Status
= regenerate_space_list(Vcb
, dev
);
3463 if (!NT_SUCCESS(Status
))
3464 WARN("regenerate_space_list returned %08lx\n", Status
);
3466 Vcb
->superblock
.total_bytes
-= old_size
- dev
->devitem
.num_bytes
;
3468 Status
= do_write(Vcb
, NULL
);
3469 if (!NT_SUCCESS(Status
))
3470 ERR("do_write returned %08lx\n", Status
);
3476 ExReleaseResourceLite(&Vcb
->tree_lock
);
3478 if (!Vcb
->balance
.stopping
&& NT_SUCCESS(Vcb
->balance
.status
))
3479 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_CHANGE_SIZE
);
3481 Status
= remove_balance_item(Vcb
);
3482 if (!NT_SUCCESS(Status
)) {
3483 ERR("remove_balance_item returned %08lx\n", Status
);
3488 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
3489 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3491 le
= Vcb
->devices
.Flink
;
3492 while (le
!= &Vcb
->devices
) {
3493 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3495 if (dev2
->devobj
&& !dev2
->readonly
&& dev2
->trim
)
3496 trim_unalloc_space(Vcb
, dev2
);
3501 ExReleaseResourceLite(&Vcb
->tree_lock
);
3505 ZwClose(Vcb
->balance
.thread
);
3506 Vcb
->balance
.thread
= NULL
;
3508 KeSetEvent(&Vcb
->balance
.finished
, 0, false);
3511 NTSTATUS
start_balance(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3513 btrfs_start_balance
* bsb
= (btrfs_start_balance
*)data
;
3514 OBJECT_ATTRIBUTES oa
;
3517 if (length
< sizeof(btrfs_start_balance
) || !data
)
3518 return STATUS_INVALID_PARAMETER
;
3520 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3521 return STATUS_PRIVILEGE_NOT_HELD
;
3524 WARN("cannot start balance while locked\n");
3525 return STATUS_DEVICE_NOT_READY
;
3528 if (Vcb
->scrub
.thread
) {
3529 WARN("cannot start balance while scrub running\n");
3530 return STATUS_DEVICE_NOT_READY
;
3533 if (Vcb
->balance
.thread
) {
3534 WARN("balance already running\n");
3535 return STATUS_DEVICE_NOT_READY
;
3539 return STATUS_MEDIA_WRITE_PROTECTED
;
3541 if (!(bsb
->opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3542 !(bsb
->opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
3543 !(bsb
->opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
))
3544 return STATUS_SUCCESS
;
3546 for (i
= 0; i
< 3; i
++) {
3547 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3548 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
3549 bsb
->opts
[i
].profiles
&= BLOCK_FLAG_RAID0
| BLOCK_FLAG_RAID1
| BLOCK_FLAG_DUPLICATE
| BLOCK_FLAG_RAID10
|
3550 BLOCK_FLAG_RAID5
| BLOCK_FLAG_RAID6
| BLOCK_FLAG_SINGLE
| BLOCK_FLAG_RAID1C3
|
3553 if (bsb
->opts
[i
].profiles
== 0)
3554 return STATUS_INVALID_PARAMETER
;
3557 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DEVID
) {
3558 if (bsb
->opts
[i
].devid
== 0)
3559 return STATUS_INVALID_PARAMETER
;
3562 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
3563 if (bsb
->opts
[i
].drange_start
> bsb
->opts
[i
].drange_end
)
3564 return STATUS_INVALID_PARAMETER
;
3567 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
3568 if (bsb
->opts
[i
].vrange_start
> bsb
->opts
[i
].vrange_end
)
3569 return STATUS_INVALID_PARAMETER
;
3572 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
3573 bsb
->opts
[i
].limit_start
= max(1, bsb
->opts
[i
].limit_start
);
3574 bsb
->opts
[i
].limit_end
= max(1, bsb
->opts
[i
].limit_end
);
3576 if (bsb
->opts
[i
].limit_start
> bsb
->opts
[i
].limit_end
)
3577 return STATUS_INVALID_PARAMETER
;
3580 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
3581 bsb
->opts
[i
].stripes_start
= max(1, bsb
->opts
[i
].stripes_start
);
3582 bsb
->opts
[i
].stripes_end
= max(1, bsb
->opts
[i
].stripes_end
);
3584 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3585 return STATUS_INVALID_PARAMETER
;
3588 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) {
3589 bsb
->opts
[i
].usage_start
= min(100, bsb
->opts
[i
].stripes_start
);
3590 bsb
->opts
[i
].usage_end
= min(100, bsb
->opts
[i
].stripes_end
);
3592 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
3593 return STATUS_INVALID_PARAMETER
;
3596 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
3597 if (bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID0
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1
&&
3598 bsb
->opts
[i
].convert
!= BLOCK_FLAG_DUPLICATE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID10
&&
3599 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID5
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID6
&&
3600 bsb
->opts
[i
].convert
!= BLOCK_FLAG_SINGLE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1C3
&&
3601 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1C4
)
3602 return STATUS_INVALID_PARAMETER
;
3607 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bsb
->opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3608 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bsb
->opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3609 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bsb
->opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3611 Vcb
->balance
.paused
= false;
3612 Vcb
->balance
.removing
= false;
3613 Vcb
->balance
.shrinking
= false;
3614 Vcb
->balance
.status
= STATUS_SUCCESS
;
3615 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3617 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3619 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3620 if (!NT_SUCCESS(Status
)) {
3621 ERR("PsCreateSystemThread returned %08lx\n", Status
);
3625 return STATUS_SUCCESS
;
3628 NTSTATUS
look_for_balance_item(_Requires_lock_held_(_Curr_
->tree_lock
) device_extension
* Vcb
) {
3633 OBJECT_ATTRIBUTES oa
;
3636 searchkey
.obj_id
= BALANCE_ITEM_ID
;
3637 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
3638 searchkey
.offset
= 0;
3640 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
3641 if (!NT_SUCCESS(Status
)) {
3642 ERR("find_item returned %08lx\n", Status
);
3646 if (keycmp(tp
.item
->key
, searchkey
)) {
3647 TRACE("no balance item found\n");
3648 return STATUS_NOT_FOUND
;
3651 if (tp
.item
->size
< sizeof(BALANCE_ITEM
)) {
3652 WARN("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
3653 tp
.item
->size
, sizeof(BALANCE_ITEM
));
3654 return STATUS_INTERNAL_ERROR
;
3657 bi
= (BALANCE_ITEM
*)tp
.item
->data
;
3659 if (bi
->flags
& BALANCE_FLAGS_DATA
)
3660 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
3662 if (bi
->flags
& BALANCE_FLAGS_METADATA
)
3663 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
3665 if (bi
->flags
& BALANCE_FLAGS_SYSTEM
)
3666 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
3668 // do the heuristics that Linux driver does
3670 for (i
= 0; i
< 3; i
++) {
3671 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
3672 // if converting, don't redo chunks already done
3674 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3675 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_SOFT
;
3677 // don't balance chunks more than 90% filled - presumably these
3678 // have already been done
3680 if (!(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) &&
3681 !(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
3683 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_USAGE
;
3684 Vcb
->balance
.opts
[i
].usage_start
= 0;
3685 Vcb
->balance
.opts
[i
].usage_end
= 90;
3690 if (Vcb
->readonly
|| Vcb
->options
.skip_balance
)
3691 Vcb
->balance
.paused
= true;
3693 Vcb
->balance
.paused
= false;
3695 Vcb
->balance
.removing
= false;
3696 Vcb
->balance
.shrinking
= false;
3697 Vcb
->balance
.status
= STATUS_SUCCESS
;
3698 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3700 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3702 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3703 if (!NT_SUCCESS(Status
)) {
3704 ERR("PsCreateSystemThread returned %08lx\n", Status
);
3708 return STATUS_SUCCESS
;
3711 NTSTATUS
query_balance(device_extension
* Vcb
, void* data
, ULONG length
) {
3712 btrfs_query_balance
* bqb
= (btrfs_query_balance
*)data
;
3714 if (length
< sizeof(btrfs_query_balance
) || !data
)
3715 return STATUS_INVALID_PARAMETER
;
3717 if (!Vcb
->balance
.thread
) {
3718 bqb
->status
= BTRFS_BALANCE_STOPPED
;
3720 if (!NT_SUCCESS(Vcb
->balance
.status
)) {
3721 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3722 bqb
->error
= Vcb
->balance
.status
;
3725 return STATUS_SUCCESS
;
3728 bqb
->status
= Vcb
->balance
.paused
? BTRFS_BALANCE_PAUSED
: BTRFS_BALANCE_RUNNING
;
3730 if (Vcb
->balance
.removing
)
3731 bqb
->status
|= BTRFS_BALANCE_REMOVAL
;
3733 if (Vcb
->balance
.shrinking
)
3734 bqb
->status
|= BTRFS_BALANCE_SHRINKING
;
3736 if (!NT_SUCCESS(Vcb
->balance
.status
))
3737 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3739 bqb
->chunks_left
= Vcb
->balance
.chunks_left
;
3740 bqb
->total_chunks
= Vcb
->balance
.total_chunks
;
3741 bqb
->error
= Vcb
->balance
.status
;
3742 RtlCopyMemory(&bqb
->data_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3743 RtlCopyMemory(&bqb
->metadata_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3744 RtlCopyMemory(&bqb
->system_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3746 return STATUS_SUCCESS
;
3749 NTSTATUS
pause_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3750 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3751 return STATUS_PRIVILEGE_NOT_HELD
;
3753 if (!Vcb
->balance
.thread
)
3754 return STATUS_DEVICE_NOT_READY
;
3756 if (Vcb
->balance
.paused
)
3757 return STATUS_DEVICE_NOT_READY
;
3759 Vcb
->balance
.paused
= true;
3760 KeClearEvent(&Vcb
->balance
.event
);
3762 return STATUS_SUCCESS
;
3765 NTSTATUS
resume_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3766 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3767 return STATUS_PRIVILEGE_NOT_HELD
;
3769 if (!Vcb
->balance
.thread
)
3770 return STATUS_DEVICE_NOT_READY
;
3772 if (!Vcb
->balance
.paused
)
3773 return STATUS_DEVICE_NOT_READY
;
3776 return STATUS_MEDIA_WRITE_PROTECTED
;
3778 Vcb
->balance
.paused
= false;
3779 KeSetEvent(&Vcb
->balance
.event
, 0, false);
3781 return STATUS_SUCCESS
;
3784 NTSTATUS
stop_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3785 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3786 return STATUS_PRIVILEGE_NOT_HELD
;
3788 if (!Vcb
->balance
.thread
)
3789 return STATUS_DEVICE_NOT_READY
;
3791 Vcb
->balance
.paused
= false;
3792 Vcb
->balance
.stopping
= true;
3793 Vcb
->balance
.status
= STATUS_SUCCESS
;
3794 KeSetEvent(&Vcb
->balance
.event
, 0, false);
3796 return STATUS_SUCCESS
;
3799 NTSTATUS
remove_device(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3805 uint64_t num_rw_devices
;
3806 OBJECT_ATTRIBUTES oa
;
3808 TRACE("(%p, %p, %lx)\n", Vcb
, data
, length
);
3810 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3811 return STATUS_PRIVILEGE_NOT_HELD
;
3813 if (length
< sizeof(uint64_t))
3814 return STATUS_INVALID_PARAMETER
;
3816 devid
= *(uint64_t*)data
;
3818 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
3820 if (Vcb
->readonly
) {
3821 ExReleaseResourceLite(&Vcb
->tree_lock
);
3822 return STATUS_MEDIA_WRITE_PROTECTED
;
3827 le
= Vcb
->devices
.Flink
;
3828 while (le
!= &Vcb
->devices
) {
3829 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3831 if (dev2
->devitem
.dev_id
== devid
)
3834 if (!dev2
->readonly
)
3841 ExReleaseResourceLite(&Vcb
->tree_lock
);
3842 WARN("device %I64x not found\n", devid
);
3843 return STATUS_NOT_FOUND
;
3846 if (!dev
->readonly
) {
3847 if (num_rw_devices
== 1) {
3848 ExReleaseResourceLite(&Vcb
->tree_lock
);
3849 WARN("not removing last non-readonly device\n");
3850 return STATUS_INVALID_PARAMETER
;
3853 if (num_rw_devices
== 4 &&
3854 ((Vcb
->data_flags
& BLOCK_FLAG_RAID10
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID10
|| Vcb
->system_flags
& BLOCK_FLAG_RAID10
) ||
3855 (Vcb
->data_flags
& BLOCK_FLAG_RAID6
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID6
|| Vcb
->system_flags
& BLOCK_FLAG_RAID6
) ||
3856 (Vcb
->data_flags
& BLOCK_FLAG_RAID1C4
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1C4
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1C4
)
3859 ExReleaseResourceLite(&Vcb
->tree_lock
);
3860 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10/1C4)\n");
3861 return STATUS_CANNOT_DELETE
;
3864 if (num_rw_devices
== 3 &&
3865 ((Vcb
->data_flags
& BLOCK_FLAG_RAID5
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID5
|| Vcb
->system_flags
& BLOCK_FLAG_RAID5
) ||
3866 (Vcb
->data_flags
& BLOCK_FLAG_RAID1C3
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1C3
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1C3
))
3868 ExReleaseResourceLite(&Vcb
->tree_lock
);
3869 ERR("would not be enough devices to satisfy RAID requirement (RAID5/1C3)\n");
3870 return STATUS_CANNOT_DELETE
;
3873 if (num_rw_devices
== 2 &&
3874 ((Vcb
->data_flags
& BLOCK_FLAG_RAID0
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID0
|| Vcb
->system_flags
& BLOCK_FLAG_RAID0
) ||
3875 (Vcb
->data_flags
& BLOCK_FLAG_RAID1
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1
))
3877 ExReleaseResourceLite(&Vcb
->tree_lock
);
3878 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
3879 return STATUS_CANNOT_DELETE
;
3883 ExReleaseResourceLite(&Vcb
->tree_lock
);
3885 if (Vcb
->balance
.thread
) {
3886 WARN("balance already running\n");
3887 return STATUS_DEVICE_NOT_READY
;
3892 RtlZeroMemory(Vcb
->balance
.opts
, sizeof(btrfs_balance_opts
) * 3);
3894 for (i
= 0; i
< 3; i
++) {
3895 Vcb
->balance
.opts
[i
].flags
= BTRFS_BALANCE_OPTS_ENABLED
| BTRFS_BALANCE_OPTS_DEVID
;
3896 Vcb
->balance
.opts
[i
].devid
= devid
;
3899 Vcb
->balance
.paused
= false;
3900 Vcb
->balance
.removing
= true;
3901 Vcb
->balance
.shrinking
= false;
3902 Vcb
->balance
.status
= STATUS_SUCCESS
;
3903 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3905 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3907 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, &oa
, NULL
, NULL
, balance_thread
, Vcb
);
3908 if (!NT_SUCCESS(Status
)) {
3909 ERR("PsCreateSystemThread returned %08lx\n", Status
);
3914 return STATUS_SUCCESS
;