1 /* Copyright (c) Mark Harmstone 2016
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
29 LIST_ENTRY list_entry
;
40 metadata_reloc
* parent
;
42 LIST_ENTRY list_entry
;
52 LIST_ENTRY list_entry
;
63 metadata_reloc
* parent
;
64 LIST_ENTRY list_entry
;
67 extern LIST_ENTRY volumes
;
68 extern ERESOURCE volumes_lock
;
70 static NTSTATUS
add_metadata_reloc(device_extension
* Vcb
, LIST_ENTRY
* items
, traverse_ptr
* tp
, BOOL skinny
, metadata_reloc
** mr2
, chunk
* c
, LIST_ENTRY
* rollback
) {
77 mr
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc
), ALLOC_TAG
);
79 ERR("out of memory\n");
80 return STATUS_INSUFFICIENT_RESOURCES
;
83 mr
->address
= tp
->item
->key
.obj_id
;
85 mr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
87 InitializeListHead(&mr
->refs
);
89 delete_tree_item(Vcb
, tp
, rollback
);
92 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
95 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
97 decrease_chunk_usage(c
, Vcb
->superblock
.node_size
);
99 space_list_add(Vcb
, c
, TRUE
, tp
->item
->key
.obj_id
, Vcb
->superblock
.node_size
, rollback
);
101 ExReleaseResourceLite(&c
->lock
);
104 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
107 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
108 ptr
= (UINT8
*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
110 len
-= sizeof(EXTENT_ITEM2
);
111 ptr
+= sizeof(EXTENT_ITEM2
);
115 UINT8 secttype
= *ptr
;
116 ULONG sectlen
= secttype
== TYPE_TREE_BLOCK_REF
? sizeof(TREE_BLOCK_REF
) : (secttype
== TYPE_SHARED_BLOCK_REF
? sizeof(SHARED_BLOCK_REF
) : 0);
117 metadata_reloc_ref
* ref
;
122 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
123 return STATUS_INTERNAL_ERROR
;
127 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
128 return STATUS_INTERNAL_ERROR
;
131 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
133 ERR("out of memory\n");
134 return STATUS_INSUFFICIENT_RESOURCES
;
137 if (secttype
== TYPE_TREE_BLOCK_REF
) {
138 ref
->type
= TYPE_TREE_BLOCK_REF
;
139 RtlCopyMemory(&ref
->tbr
, ptr
+ sizeof(UINT8
), sizeof(TREE_BLOCK_REF
));
141 } else if (secttype
== TYPE_SHARED_BLOCK_REF
) {
142 ref
->type
= TYPE_SHARED_BLOCK_REF
;
143 RtlCopyMemory(&ref
->sbr
, ptr
+ sizeof(UINT8
), sizeof(SHARED_BLOCK_REF
));
146 ERR("unexpected tree type %x\n", secttype
);
148 return STATUS_INTERNAL_ERROR
;
153 InsertTailList(&mr
->refs
, &ref
->list_entry
);
156 ptr
+= sizeof(UINT8
) + sectlen
;
159 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
160 traverse_ptr tp2
= *tp
, next_tp
;
162 while (find_next_item(Vcb
, &tp2
, &next_tp
, FALSE
, NULL
)) {
165 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
166 if (tp2
.item
->key
.obj_type
== TYPE_TREE_BLOCK_REF
&& tp2
.item
->size
>= sizeof(TREE_BLOCK_REF
)) {
167 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
169 ERR("out of memory\n");
170 return STATUS_INSUFFICIENT_RESOURCES
;
173 ref
->type
= TYPE_TREE_BLOCK_REF
;
174 RtlCopyMemory(&ref
->tbr
, tp2
.item
->data
, sizeof(TREE_BLOCK_REF
));
177 InsertTailList(&mr
->refs
, &ref
->list_entry
);
179 delete_tree_item(Vcb
, &tp2
, rollback
);
180 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_BLOCK_REF
&& tp2
.item
->size
>= sizeof(SHARED_BLOCK_REF
)) {
181 metadata_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(metadata_reloc_ref
), ALLOC_TAG
);
183 ERR("out of memory\n");
184 return STATUS_INSUFFICIENT_RESOURCES
;
187 ref
->type
= TYPE_SHARED_BLOCK_REF
;
188 RtlCopyMemory(&ref
->sbr
, tp2
.item
->data
, sizeof(SHARED_BLOCK_REF
));
191 InsertTailList(&mr
->refs
, &ref
->list_entry
);
193 delete_tree_item(Vcb
, &tp2
, rollback
);
200 InsertTailList(items
, &mr
->list_entry
);
205 return STATUS_SUCCESS
;
208 static NTSTATUS
add_metadata_reloc_parent(device_extension
* Vcb
, LIST_ENTRY
* items
, UINT64 address
, metadata_reloc
** mr2
, LIST_ENTRY
* rollback
) {
216 while (le
!= items
) {
217 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
219 if (mr
->address
== address
) {
221 return STATUS_SUCCESS
;
227 searchkey
.obj_id
= address
;
228 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
229 searchkey
.offset
= 0xffffffffffffffff;
231 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, NULL
);
232 if (!NT_SUCCESS(Status
)) {
233 ERR("find_item returned %08x\n", Status
);
237 if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
))
239 else if (tp
.item
->key
.obj_id
== address
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
240 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
241 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
243 if (!(ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)) {
244 ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address
);
245 return STATUS_INTERNAL_ERROR
;
248 ERR("could not find valid EXTENT_ITEM for address %llx\n", address
);
249 return STATUS_INTERNAL_ERROR
;
252 Status
= add_metadata_reloc(Vcb
, items
, &tp
, skinny
, mr2
, NULL
, rollback
);
253 if (!NT_SUCCESS(Status
)) {
254 ERR("add_metadata_reloc returned %08x\n", Status
);
258 return STATUS_SUCCESS
;
261 static NTSTATUS
add_metadata_reloc_extent_item(device_extension
* Vcb
, metadata_reloc
* mr
, LIST_ENTRY
* rollback
) {
265 BOOL all_inline
= TRUE
;
266 metadata_reloc_ref
* first_noninline
= NULL
;
270 inline_len
= sizeof(EXTENT_ITEM
);
271 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
))
272 inline_len
+= sizeof(EXTENT_ITEM2
);
275 while (le
!= &mr
->refs
) {
276 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
281 if (ref
->type
== TYPE_TREE_BLOCK_REF
)
282 extlen
+= sizeof(TREE_BLOCK_REF
);
283 else if (ref
->type
== TYPE_SHARED_BLOCK_REF
)
284 extlen
+= sizeof(SHARED_BLOCK_REF
);
287 if (inline_len
+ 1 + extlen
> Vcb
->superblock
.node_size
/ 4) {
289 first_noninline
= ref
;
291 inline_len
+= extlen
+ 1;
297 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
299 ERR("out of memory\n");
300 return STATUS_INSUFFICIENT_RESOURCES
;
304 ei
->generation
= mr
->ei
->generation
;
305 ei
->flags
= mr
->ei
->flags
;
306 ptr
= (UINT8
*)&ei
[1];
308 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)) {
309 EXTENT_ITEM2
* ei2
= (EXTENT_ITEM2
*)ptr
;
311 ei2
->firstitem
= *(KEY
*)&mr
->data
[1];
312 ei2
->level
= mr
->data
->level
;
314 ptr
+= sizeof(EXTENT_ITEM2
);
318 while (le
!= &mr
->refs
) {
319 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
321 if (ref
== first_noninline
)
327 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
328 TREE_BLOCK_REF
* tbr
= (TREE_BLOCK_REF
*)ptr
;
330 tbr
->offset
= ref
->tbr
.offset
;
332 ptr
+= sizeof(TREE_BLOCK_REF
);
333 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
334 SHARED_BLOCK_REF
* sbr
= (SHARED_BLOCK_REF
*)ptr
;
336 sbr
->offset
= ref
->parent
->new_address
;
338 ptr
+= sizeof(SHARED_BLOCK_REF
);
344 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
) {
345 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_METADATA_ITEM
, mr
->data
->level
, ei
, inline_len
, NULL
, NULL
, rollback
)) {
346 ERR("insert_tree_item failed\n");
347 return STATUS_INTERNAL_ERROR
;
350 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_EXTENT_ITEM
, Vcb
->superblock
.node_size
, ei
, inline_len
, NULL
, NULL
, rollback
)) {
351 ERR("insert_tree_item failed\n");
352 return STATUS_INTERNAL_ERROR
;
357 le
= &first_noninline
->list_entry
;
359 while (le
!= &mr
->refs
) {
360 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le
, metadata_reloc_ref
, list_entry
);
362 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
365 tbr
= ExAllocatePoolWithTag(PagedPool
, sizeof(TREE_BLOCK_REF
), ALLOC_TAG
);
367 ERR("out of memory\n");
368 return STATUS_INSUFFICIENT_RESOURCES
;
371 tbr
->offset
= ref
->tbr
.offset
;
373 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_TREE_BLOCK_REF
, tbr
->offset
, tbr
, sizeof(TREE_BLOCK_REF
), NULL
, NULL
, rollback
)) {
374 ERR("insert_tree_item failed\n");
375 return STATUS_INTERNAL_ERROR
;
377 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
378 SHARED_BLOCK_REF
* sbr
;
380 sbr
= ExAllocatePoolWithTag(PagedPool
, sizeof(SHARED_BLOCK_REF
), ALLOC_TAG
);
382 ERR("out of memory\n");
383 return STATUS_INSUFFICIENT_RESOURCES
;
386 sbr
->offset
= ref
->parent
->new_address
;
388 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, mr
->new_address
, TYPE_SHARED_BLOCK_REF
, sbr
->offset
, sbr
, sizeof(SHARED_BLOCK_REF
), NULL
, NULL
, rollback
)) {
389 ERR("insert_tree_item failed\n");
390 return STATUS_INTERNAL_ERROR
;
398 if (ei
->flags
& EXTENT_ITEM_SHARED_BACKREFS
|| mr
->data
->flags
& HEADER_FLAG_SHARED_BACKREF
|| !(mr
->data
->flags
& HEADER_FLAG_MIXED_BACKREF
)) {
399 if (mr
->data
->level
> 0) {
401 internal_node
* in
= (internal_node
*)&mr
->data
[1];
403 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
404 UINT64 sbrrc
= find_extent_shared_tree_refcount(Vcb
, in
[i
].address
, mr
->address
, NULL
);
408 SHARED_BLOCK_REF sbr
;
410 sbr
.offset
= mr
->new_address
;
412 Status
= increase_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
414 if (!NT_SUCCESS(Status
)) {
415 ERR("increase_extent_refcount returned %08x\n", Status
);
419 sbr
.offset
= mr
->address
;
421 Status
= decrease_extent_refcount(Vcb
, in
[i
].address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
422 sbr
.offset
, FALSE
, NULL
, rollback
);
423 if (!NT_SUCCESS(Status
)) {
424 ERR("decrease_extent_refcount returned %08x\n", Status
);
431 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
433 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
434 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
435 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((UINT8
*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
437 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
438 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
440 if (ed2
->size
> 0) { // not sparse
441 UINT64 sdrrc
= find_extent_shared_data_refcount(Vcb
, ed2
->address
, mr
->address
, NULL
);
448 sdr
.offset
= mr
->new_address
;
451 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
453 if (!NT_SUCCESS(Status
)) {
454 ERR("increase_extent_refcount returned %08x\n", Status
);
458 sdr
.offset
= mr
->address
;
460 Status
= decrease_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
461 sdr
.offset
, FALSE
, NULL
, rollback
);
462 if (!NT_SUCCESS(Status
)) {
463 ERR("decrease_extent_refcount returned %08x\n", Status
);
467 c
= get_chunk_from_address(Vcb
, ed2
->address
);
470 // check changed_extents
472 ExAcquireResourceExclusiveLite(&c
->changed_extents_lock
, TRUE
);
474 le
= c
->changed_extents
.Flink
;
476 while (le
!= &c
->changed_extents
) {
477 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
479 if (ce
->address
== ed2
->address
) {
482 le2
= ce
->refs
.Flink
;
483 while (le2
!= &ce
->refs
) {
484 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
486 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
487 cer
->sdr
.offset
= mr
->new_address
;
494 le2
= ce
->old_refs
.Flink
;
495 while (le2
!= &ce
->old_refs
) {
496 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
498 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== mr
->address
) {
499 cer
->sdr
.offset
= mr
->new_address
;
512 ExReleaseResourceLite(&c
->changed_extents_lock
);
522 return STATUS_SUCCESS
;
525 static NTSTATUS
write_metadata_items(device_extension
* Vcb
, LIST_ENTRY
* items
, LIST_ENTRY
* data_items
, chunk
* c
, LIST_ENTRY
* rollback
) {
526 LIST_ENTRY tree_writes
, *le
;
529 UINT8 level
, max_level
= 0;
530 chunk
* newchunk
= NULL
;
532 InitializeListHead(&tree_writes
);
535 while (le
!= items
) {
536 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
540 // ERR("address %llx\n", mr->address);
542 mr
->data
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
544 ERR("out of memory\n");
545 return STATUS_INSUFFICIENT_RESOURCES
;
548 Status
= read_data(Vcb
, mr
->address
, Vcb
->superblock
.node_size
, NULL
, TRUE
, (UINT8
*)mr
->data
,
549 c
&& mr
->address
>= c
->offset
&& mr
->address
< c
->offset
+ c
->chunk_item
->size
? c
: NULL
, &pc
, NULL
, FALSE
);
550 if (!NT_SUCCESS(Status
)) {
551 ERR("read_data returned %08x\n", Status
);
555 if (pc
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
558 if (data_items
&& mr
->data
->level
== 0) {
559 LIST_ENTRY
* le2
= data_items
->Flink
;
560 while (le2
!= data_items
) {
561 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
562 leaf_node
* ln
= (leaf_node
*)&mr
->data
[1];
565 for (i
= 0; i
< mr
->data
->num_items
; i
++) {
566 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
567 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((UINT8
*)mr
->data
+ sizeof(tree_header
) + ln
[i
].offset
);
569 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
570 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
572 if (ed2
->address
== dr
->address
)
573 ed2
->address
= dr
->new_address
;
582 if (mr
->data
->level
> max_level
)
583 max_level
= mr
->data
->level
;
585 le2
= mr
->refs
.Flink
;
586 while (le2
!= &mr
->refs
) {
587 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
589 if (ref
->type
== TYPE_TREE_BLOCK_REF
) {
595 firstitem
= (KEY
*)&mr
->data
[1];
597 le3
= Vcb
->roots
.Flink
;
598 while (le3
!= &Vcb
->roots
) {
599 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
601 if (r2
->id
== ref
->tbr
.offset
) {
610 ERR("could not find subvol with id %llx\n", ref
->tbr
.offset
);
611 return STATUS_INTERNAL_ERROR
;
614 Status
= find_item_to_level(Vcb
, r
, &tp
, firstitem
, FALSE
, mr
->data
->level
+ 1, NULL
);
615 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
616 ERR("find_item_to_level returned %08x\n", Status
);
621 while (t
&& t
->header
.level
< mr
->data
->level
+ 1) {
630 Status
= add_metadata_reloc_parent(Vcb
, items
, t
->header
.address
, &mr2
, rollback
);
631 if (!NT_SUCCESS(Status
)) {
632 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
638 } else if (ref
->type
== TYPE_SHARED_BLOCK_REF
) {
641 Status
= add_metadata_reloc_parent(Vcb
, items
, ref
->sbr
.offset
, &mr2
, rollback
);
642 if (!NT_SUCCESS(Status
)) {
643 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
657 while (le
!= items
) {
658 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
664 hash
= calc_crc32c(0xffffffff, (UINT8
*)&mr
->address
, sizeof(UINT64
));
666 le2
= Vcb
->trees_ptrs
[hash
>> 24];
669 while (le2
!= &Vcb
->trees_hash
) {
670 tree
* t
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
672 if (t
->header
.address
== mr
->address
) {
675 } else if (t
->hash
> hash
)
685 for (level
= 0; level
<= max_level
; level
++) {
687 while (le
!= items
) {
688 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
690 if (mr
->data
->level
== level
) {
698 flags
= Vcb
->system_flags
;
699 else if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
)
700 flags
= Vcb
->data_flags
;
702 flags
= Vcb
->metadata_flags
;
705 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
707 if (newchunk
->chunk_item
->type
== flags
&& find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
708 increase_chunk_usage(newchunk
, Vcb
->superblock
.node_size
);
709 space_list_subtract(Vcb
, newchunk
, FALSE
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
713 ExReleaseResourceLite(&newchunk
->lock
);
717 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
719 le2
= Vcb
->chunks
.Flink
;
720 while (le2
!= &Vcb
->chunks
) {
721 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
723 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== flags
) {
724 ExAcquireResourceExclusiveLite(&c2
->lock
, TRUE
);
726 if ((c2
->chunk_item
->size
- c2
->used
) >= Vcb
->superblock
.node_size
) {
727 if (find_metadata_address_in_chunk(Vcb
, c2
, &mr
->new_address
)) {
728 increase_chunk_usage(c2
, Vcb
->superblock
.node_size
);
729 space_list_subtract(Vcb
, c2
, FALSE
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
730 ExReleaseResourceLite(&c2
->lock
);
737 ExReleaseResourceLite(&c2
->lock
);
743 // allocate new chunk if necessary
745 newchunk
= alloc_chunk(Vcb
, flags
);
748 ERR("could not allocate new chunk\n");
749 ExReleaseResourceLite(&Vcb
->chunk_lock
);
750 Status
= STATUS_DISK_FULL
;
754 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
756 if (!find_metadata_address_in_chunk(Vcb
, newchunk
, &mr
->new_address
)) {
757 ExReleaseResourceLite(&newchunk
->lock
);
758 ERR("could not find address in new chunk\n");
759 Status
= STATUS_DISK_FULL
;
762 increase_chunk_usage(newchunk
, Vcb
->superblock
.node_size
);
763 space_list_subtract(Vcb
, newchunk
, FALSE
, mr
->new_address
, Vcb
->superblock
.node_size
, rollback
);
766 ExReleaseResourceLite(&newchunk
->lock
);
769 ExReleaseResourceLite(&Vcb
->chunk_lock
);
773 le2
= mr
->refs
.Flink
;
774 while (le2
!= &mr
->refs
) {
775 metadata_reloc_ref
* ref
= CONTAINING_RECORD(le2
, metadata_reloc_ref
, list_entry
);
779 internal_node
* in
= (internal_node
*)&ref
->parent
->data
[1];
781 for (i
= 0; i
< ref
->parent
->data
->num_items
; i
++) {
782 if (in
[i
].address
== mr
->address
) {
783 in
[i
].address
= mr
->new_address
;
788 if (ref
->parent
->t
) {
791 le3
= ref
->parent
->t
->itemlist
.Flink
;
792 while (le3
!= &ref
->parent
->t
->itemlist
) {
793 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
795 if (!td
->inserted
&& td
->treeholder
.address
== mr
->address
)
796 td
->treeholder
.address
= mr
->new_address
;
801 } else if (ref
->top
&& ref
->type
== TYPE_TREE_BLOCK_REF
) {
807 le3
= Vcb
->roots
.Flink
;
808 while (le3
!= &Vcb
->roots
) {
809 root
* r2
= CONTAINING_RECORD(le3
, root
, list_entry
);
811 if (r2
->id
== ref
->tbr
.offset
) {
820 r
->treeholder
.address
= mr
->new_address
;
822 if (r
== Vcb
->root_root
)
823 Vcb
->superblock
.root_tree_addr
= mr
->new_address
;
824 else if (r
== Vcb
->chunk_root
)
825 Vcb
->superblock
.chunk_tree_addr
= mr
->new_address
;
826 else if (r
->root_item
.block_number
== mr
->address
) {
830 r
->root_item
.block_number
= mr
->new_address
;
832 searchkey
.obj_id
= r
->id
;
833 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
834 searchkey
.offset
= 0xffffffffffffffff;
836 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
837 if (!NT_SUCCESS(Status
)) {
838 ERR("find_item returned %08x\n", Status
);
842 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
843 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey
.obj_id
);
844 Status
= STATUS_INTERNAL_ERROR
;
848 ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
850 ERR("out of memory\n");
851 Status
= STATUS_INSUFFICIENT_RESOURCES
;
855 RtlCopyMemory(ri
, &r
->root_item
, sizeof(ROOT_ITEM
));
857 delete_tree_item(Vcb
, &tp
, rollback
);
859 if (!insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, NULL
, rollback
)) {
860 ERR("insert_tree_item failed\n");
861 Status
= STATUS_INTERNAL_ERROR
;
871 mr
->data
->address
= mr
->new_address
;
880 // check if tree loaded more than once
881 if (t3
->list_entry
.Flink
!= &Vcb
->trees_hash
) {
882 tree
* nt
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
884 if (nt
->header
.address
== t3
->header
.address
)
888 t3
->header
.address
= mr
->new_address
;
892 if (Vcb
->trees_ptrs
[h
] == &t3
->list_entry_hash
) {
893 if (t3
->list_entry_hash
.Flink
== &Vcb
->trees_hash
)
894 Vcb
->trees_ptrs
[h
] = NULL
;
896 tree
* t2
= CONTAINING_RECORD(t3
->list_entry_hash
.Flink
, tree
, list_entry_hash
);
898 if (t2
->hash
>> 24 == h
)
899 Vcb
->trees_ptrs
[h
] = &t2
->list_entry_hash
;
901 Vcb
->trees_ptrs
[h
] = NULL
;
905 RemoveEntryList(&t3
->list_entry_hash
);
907 t3
->hash
= calc_crc32c(0xffffffff, (UINT8
*)&t3
->header
.address
, sizeof(UINT64
));
910 if (!Vcb
->trees_ptrs
[h
]) {
913 le2
= Vcb
->trees_hash
.Flink
;
918 if (Vcb
->trees_ptrs
[h2
]) {
919 le2
= Vcb
->trees_ptrs
[h2
];
927 le2
= Vcb
->trees_ptrs
[h
];
930 while (le2
!= &Vcb
->trees_hash
) {
931 tree
* t2
= CONTAINING_RECORD(le2
, tree
, list_entry_hash
);
933 if (t2
->hash
>= t3
->hash
) {
934 InsertHeadList(le2
->Blink
, &t3
->list_entry_hash
);
943 InsertTailList(&Vcb
->trees_hash
, &t3
->list_entry_hash
);
945 if (!Vcb
->trees_ptrs
[h
] || t3
->list_entry_hash
.Flink
== Vcb
->trees_ptrs
[h
])
946 Vcb
->trees_ptrs
[h
] = &t3
->list_entry_hash
;
948 if (data_items
&& level
== 0) {
949 le2
= data_items
->Flink
;
951 while (le2
!= data_items
) {
952 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
953 LIST_ENTRY
* le3
= t3
->itemlist
.Flink
;
955 while (le3
!= &t3
->itemlist
) {
956 tree_data
* td
= CONTAINING_RECORD(le3
, tree_data
, list_entry
);
958 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
959 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
961 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
962 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
964 if (ed2
->address
== dr
->address
)
965 ed2
->address
= dr
->new_address
;
979 *((UINT32
*)mr
->data
) = ~calc_crc32c(0xffffffff, (UINT8
*)&mr
->data
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(mr
->data
->csum
));
981 tw
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree_write
), ALLOC_TAG
);
983 ERR("out of memory\n");
984 Status
= STATUS_INSUFFICIENT_RESOURCES
;
988 tw
->address
= mr
->new_address
;
989 tw
->length
= Vcb
->superblock
.node_size
;
990 tw
->data
= (UINT8
*)mr
->data
;
993 if (IsListEmpty(&tree_writes
))
994 InsertTailList(&tree_writes
, &tw
->list_entry
);
996 BOOL inserted
= FALSE
;
998 le2
= tree_writes
.Flink
;
999 while (le2
!= &tree_writes
) {
1000 tree_write
* tw2
= CONTAINING_RECORD(le2
, tree_write
, list_entry
);
1002 if (tw2
->address
> tw
->address
) {
1003 InsertHeadList(le2
->Blink
, &tw
->list_entry
);
1012 InsertTailList(&tree_writes
, &tw
->list_entry
);
1021 while (le
!= items
) {
1022 metadata_reloc
* mr
= CONTAINING_RECORD(le
, metadata_reloc
, list_entry
);
1024 Status
= add_metadata_reloc_extent_item(Vcb
, mr
, rollback
);
1025 if (!NT_SUCCESS(Status
)) {
1026 ERR("add_metadata_reloc_extent_item returned %08x\n", Status
);
1033 Status
= do_tree_writes(Vcb
, &tree_writes
, NULL
);
1034 if (!NT_SUCCESS(Status
)) {
1035 ERR("do_tree_writes returned %08x\n", Status
);
1039 Status
= STATUS_SUCCESS
;
1042 while (!IsListEmpty(&tree_writes
)) {
1043 tree_write
* tw
= CONTAINING_RECORD(RemoveHeadList(&tree_writes
), tree_write
, list_entry
);
1050 static NTSTATUS
balance_metadata_chunk(device_extension
* Vcb
, chunk
* c
, BOOL
* changed
) {
1055 LIST_ENTRY items
, rollback
;
1058 TRACE("chunk %llx\n", c
->offset
);
1060 InitializeListHead(&rollback
);
1061 InitializeListHead(&items
);
1063 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
1065 searchkey
.obj_id
= c
->offset
;
1066 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
1067 searchkey
.offset
= 0xffffffffffffffff;
1069 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, NULL
);
1070 if (!NT_SUCCESS(Status
)) {
1071 ERR("find_item returned %08x\n", Status
);
1076 traverse_ptr next_tp
;
1078 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1081 if (tp
.item
->key
.obj_id
>= c
->offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
1082 BOOL tree
= FALSE
, skinny
= FALSE
;
1084 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1087 } else if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.offset
== Vcb
->superblock
.node_size
&&
1088 tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1089 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1091 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1096 Status
= add_metadata_reloc(Vcb
, &items
, &tp
, skinny
, NULL
, c
, &rollback
);
1098 if (!NT_SUCCESS(Status
)) {
1099 ERR("add_metadata_reloc returned %08x\n", Status
);
1105 if (loaded
>= 64) // only do 64 at a time
1110 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, NULL
);
1116 if (IsListEmpty(&items
)) {
1118 Status
= STATUS_SUCCESS
;
1123 Status
= write_metadata_items(Vcb
, &items
, NULL
, c
, &rollback
);
1124 if (!NT_SUCCESS(Status
)) {
1125 ERR("write_metadata_items returned %08x\n", Status
);
1129 Status
= STATUS_SUCCESS
;
1131 Vcb
->need_write
= TRUE
;
1134 if (NT_SUCCESS(Status
))
1135 clear_rollback(Vcb
, &rollback
);
1137 do_rollback(Vcb
, &rollback
);
1139 ExReleaseResourceLite(&Vcb
->tree_lock
);
1141 while (!IsListEmpty(&items
)) {
1142 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&items
), metadata_reloc
, list_entry
);
1144 while (!IsListEmpty(&mr
->refs
)) {
1145 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
1156 static NTSTATUS
add_data_reloc(device_extension
* Vcb
, LIST_ENTRY
* items
, LIST_ENTRY
* metadata_items
, traverse_ptr
* tp
, chunk
* c
, LIST_ENTRY
* rollback
) {
1163 dr
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc
), ALLOC_TAG
);
1165 ERR("out of memory\n");
1166 return STATUS_INSUFFICIENT_RESOURCES
;
1169 dr
->address
= tp
->item
->key
.obj_id
;
1170 dr
->size
= tp
->item
->key
.offset
;
1171 dr
->ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1172 InitializeListHead(&dr
->refs
);
1174 delete_tree_item(Vcb
, tp
, rollback
);
1177 c
= get_chunk_from_address(Vcb
, tp
->item
->key
.obj_id
);
1180 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
1182 decrease_chunk_usage(c
, tp
->item
->key
.offset
);
1184 space_list_add(Vcb
, c
, TRUE
, tp
->item
->key
.obj_id
, tp
->item
->key
.offset
, rollback
);
1186 ExReleaseResourceLite(&c
->lock
);
1189 ei
= (EXTENT_ITEM
*)tp
->item
->data
;
1192 len
= tp
->item
->size
- sizeof(EXTENT_ITEM
);
1193 ptr
= (UINT8
*)tp
->item
->data
+ sizeof(EXTENT_ITEM
);
1196 UINT8 secttype
= *ptr
;
1197 ULONG sectlen
= secttype
== TYPE_EXTENT_DATA_REF
? sizeof(EXTENT_DATA_REF
) : (secttype
== TYPE_SHARED_DATA_REF
? sizeof(SHARED_DATA_REF
) : 0);
1198 data_reloc_ref
* ref
;
1204 if (sectlen
> len
) {
1205 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, len
, sectlen
);
1206 return STATUS_INTERNAL_ERROR
;
1210 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp
->item
->key
.obj_id
, tp
->item
->key
.obj_type
, tp
->item
->key
.offset
, secttype
);
1211 return STATUS_INTERNAL_ERROR
;
1214 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1216 ERR("out of memory\n");
1217 return STATUS_INSUFFICIENT_RESOURCES
;
1220 if (secttype
== TYPE_EXTENT_DATA_REF
) {
1226 ref
->type
= TYPE_EXTENT_DATA_REF
;
1227 RtlCopyMemory(&ref
->edr
, ptr
+ sizeof(UINT8
), sizeof(EXTENT_DATA_REF
));
1228 inline_rc
+= ref
->edr
.count
;
1230 le
= Vcb
->roots
.Flink
;
1231 while (le
!= &Vcb
->roots
) {
1232 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
1234 if (r2
->id
== ref
->edr
.root
) {
1243 ERR("could not find subvol %llx\n", ref
->edr
.count
);
1245 return STATUS_INTERNAL_ERROR
;
1248 searchkey
.obj_id
= ref
->edr
.objid
;
1249 searchkey
.obj_type
= TYPE_EXTENT_DATA
;
1250 searchkey
.offset
= ref
->edr
.offset
;
1252 Status
= find_item(Vcb
, r
, &tp3
, &searchkey
, FALSE
, NULL
);
1253 if (!NT_SUCCESS(Status
)) {
1254 ERR("find_item returned %08x\n", Status
);
1259 if (keycmp(tp3
.item
->key
, searchkey
)) {
1260 ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, r
->id
);
1262 return STATUS_INTERNAL_ERROR
;
1265 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, tp3
.tree
->header
.address
, &mr
, rollback
);
1266 if (!NT_SUCCESS(Status
)) {
1267 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1273 } else if (secttype
== TYPE_SHARED_DATA_REF
) {
1274 ref
->type
= TYPE_SHARED_DATA_REF
;
1275 RtlCopyMemory(&ref
->sdr
, ptr
+ sizeof(UINT8
), sizeof(SHARED_DATA_REF
));
1276 inline_rc
+= ref
->sdr
.count
;
1278 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1279 if (!NT_SUCCESS(Status
)) {
1280 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1287 ERR("unexpected tree type %x\n", secttype
);
1289 return STATUS_INTERNAL_ERROR
;
1292 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1295 ptr
+= sizeof(UINT8
) + sectlen
;
1298 if (inline_rc
< ei
->refcount
) { // look for non-inline entries
1299 traverse_ptr tp2
= *tp
, next_tp
;
1301 while (find_next_item(Vcb
, &tp2
, &next_tp
, FALSE
, NULL
)) {
1307 if (tp2
.item
->key
.obj_id
== tp
->item
->key
.obj_id
) {
1308 if (tp2
.item
->key
.obj_type
== TYPE_EXTENT_DATA_REF
&& tp2
.item
->size
>= sizeof(EXTENT_DATA_REF
)) {
1309 data_reloc_ref
* ref
;
1315 ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1317 ERR("out of memory\n");
1318 return STATUS_INSUFFICIENT_RESOURCES
;
1321 ref
->type
= TYPE_EXTENT_DATA_REF
;
1322 RtlCopyMemory(&ref
->edr
, tp2
.item
->data
, sizeof(EXTENT_DATA_REF
));
1324 le
= Vcb
->roots
.Flink
;
1325 while (le
!= &Vcb
->roots
) {
1326 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
1328 if (r2
->id
== ref
->edr
.root
) {
1337 ERR("could not find subvol %llx\n", ref
->edr
.count
);
1339 return STATUS_INTERNAL_ERROR
;
1342 searchkey
.obj_id
= ref
->edr
.objid
;
1343 searchkey
.obj_type
= TYPE_EXTENT_DATA
;
1344 searchkey
.offset
= ref
->edr
.offset
;
1346 Status
= find_item(Vcb
, r
, &tp3
, &searchkey
, FALSE
, NULL
);
1347 if (!NT_SUCCESS(Status
)) {
1348 ERR("find_item returned %08x\n", Status
);
1353 if (!keycmp(tp3
.item
->key
, searchkey
)) {
1354 ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, r
->id
);
1356 return STATUS_INTERNAL_ERROR
;
1359 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, tp3
.tree
->header
.address
, &mr
, rollback
);
1360 if (!NT_SUCCESS(Status
)) {
1361 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1367 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1369 delete_tree_item(Vcb
, &tp2
, rollback
);
1370 } else if (tp2
.item
->key
.obj_type
== TYPE_SHARED_DATA_REF
&& tp2
.item
->size
>= sizeof(SHARED_DATA_REF
)) {
1371 data_reloc_ref
* ref
= ExAllocatePoolWithTag(PagedPool
, sizeof(data_reloc_ref
), ALLOC_TAG
);
1373 ERR("out of memory\n");
1374 return STATUS_INSUFFICIENT_RESOURCES
;
1377 ref
->type
= TYPE_SHARED_DATA_REF
;
1378 RtlCopyMemory(&ref
->sdr
, tp2
.item
->data
, sizeof(SHARED_DATA_REF
));
1380 Status
= add_metadata_reloc_parent(Vcb
, metadata_items
, ref
->sdr
.offset
, &mr
, rollback
);
1381 if (!NT_SUCCESS(Status
)) {
1382 ERR("add_metadata_reloc_parent returned %08x\n", Status
);
1388 InsertTailList(&dr
->refs
, &ref
->list_entry
);
1390 delete_tree_item(Vcb
, &tp2
, rollback
);
1397 InsertTailList(items
, &dr
->list_entry
);
1399 return STATUS_SUCCESS
;
1402 static NTSTATUS
add_data_reloc_extent_item(device_extension
* Vcb
, data_reloc
* dr
, LIST_ENTRY
* rollback
) {
1406 BOOL all_inline
= TRUE
;
1407 data_reloc_ref
* first_noninline
= NULL
;
1411 inline_len
= sizeof(EXTENT_ITEM
);
1413 le
= dr
->refs
.Flink
;
1414 while (le
!= &dr
->refs
) {
1415 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1420 if (ref
->type
== TYPE_EXTENT_DATA_REF
)
1421 extlen
+= sizeof(EXTENT_DATA_REF
);
1422 else if (ref
->type
== TYPE_SHARED_DATA_REF
)
1423 extlen
+= sizeof(SHARED_DATA_REF
);
1426 if (inline_len
+ 1 + extlen
> Vcb
->superblock
.node_size
/ 4) {
1428 first_noninline
= ref
;
1430 inline_len
+= extlen
+ 1;
1436 ei
= ExAllocatePoolWithTag(PagedPool
, inline_len
, ALLOC_TAG
);
1438 ERR("out of memory\n");
1439 return STATUS_INSUFFICIENT_RESOURCES
;
1443 ei
->generation
= dr
->ei
->generation
;
1444 ei
->flags
= dr
->ei
->flags
;
1445 ptr
= (UINT8
*)&ei
[1];
1447 le
= dr
->refs
.Flink
;
1448 while (le
!= &dr
->refs
) {
1449 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1451 if (ref
== first_noninline
)
1457 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1458 EXTENT_DATA_REF
* edr
= (EXTENT_DATA_REF
*)ptr
;
1460 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1462 ptr
+= sizeof(EXTENT_DATA_REF
);
1463 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1464 SHARED_DATA_REF
* sdr
= (SHARED_DATA_REF
*)ptr
;
1466 sdr
->offset
= ref
->parent
->new_address
;
1467 sdr
->count
= ref
->sdr
.count
;
1469 ptr
+= sizeof(SHARED_DATA_REF
);
1475 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_ITEM
, dr
->size
, ei
, inline_len
, NULL
, NULL
, rollback
)) {
1476 ERR("insert_tree_item failed\n");
1477 return STATUS_INTERNAL_ERROR
;
1481 le
= &first_noninline
->list_entry
;
1483 while (le
!= &dr
->refs
) {
1484 data_reloc_ref
* ref
= CONTAINING_RECORD(le
, data_reloc_ref
, list_entry
);
1486 if (ref
->type
== TYPE_EXTENT_DATA_REF
) {
1487 EXTENT_DATA_REF
* edr
;
1490 edr
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_DATA_REF
), ALLOC_TAG
);
1492 ERR("out of memory\n");
1493 return STATUS_INSUFFICIENT_RESOURCES
;
1496 RtlCopyMemory(edr
, &ref
->edr
, sizeof(EXTENT_DATA_REF
));
1498 off
= get_extent_data_ref_hash2(ref
->edr
.root
, ref
->edr
.objid
, ref
->edr
.offset
);
1500 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_EXTENT_DATA_REF
, off
, edr
, sizeof(EXTENT_DATA_REF
), NULL
, NULL
, rollback
)) {
1501 ERR("insert_tree_item failed\n");
1502 return STATUS_INTERNAL_ERROR
;
1504 } else if (ref
->type
== TYPE_SHARED_DATA_REF
) {
1505 SHARED_DATA_REF
* sdr
;
1507 sdr
= ExAllocatePoolWithTag(PagedPool
, sizeof(SHARED_DATA_REF
), ALLOC_TAG
);
1509 ERR("out of memory\n");
1510 return STATUS_INSUFFICIENT_RESOURCES
;
1513 sdr
->offset
= ref
->parent
->new_address
;
1514 sdr
->count
= ref
->sdr
.count
;
1516 if (!insert_tree_item(Vcb
, Vcb
->extent_root
, dr
->new_address
, TYPE_SHARED_DATA_REF
, sdr
->offset
, sdr
, sizeof(SHARED_DATA_REF
), NULL
, NULL
, rollback
)) {
1517 ERR("insert_tree_item failed\n");
1518 return STATUS_INTERNAL_ERROR
;
1526 return STATUS_SUCCESS
;
1529 static NTSTATUS
balance_data_chunk(device_extension
* Vcb
, chunk
* c
, BOOL
* changed
) {
1534 LIST_ENTRY items
, metadata_items
, rollback
, *le
;
1535 UINT64 loaded
= 0, num_loaded
= 0;
1536 chunk
* newchunk
= NULL
;
1539 TRACE("chunk %llx\n", c
->offset
);
1541 InitializeListHead(&rollback
);
1542 InitializeListHead(&items
);
1543 InitializeListHead(&metadata_items
);
1545 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
1547 searchkey
.obj_id
= c
->offset
;
1548 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
1549 searchkey
.offset
= 0xffffffffffffffff;
1551 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, FALSE
, NULL
);
1552 if (!NT_SUCCESS(Status
)) {
1553 ERR("find_item returned %08x\n", Status
);
1558 traverse_ptr next_tp
;
1560 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
1563 if (tp
.item
->key
.obj_id
>= c
->offset
&& tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
) {
1566 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->size
>= sizeof(EXTENT_ITEM
)) {
1567 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
1569 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
1574 Status
= add_data_reloc(Vcb
, &items
, &metadata_items
, &tp
, c
, &rollback
);
1576 if (!NT_SUCCESS(Status
)) {
1577 ERR("add_data_reloc returned %08x\n", Status
);
1581 loaded
+= tp
.item
->key
.offset
;
1584 if (loaded
>= 0x1000000 || num_loaded
>= 100) // only do so much at a time, so we don't block too obnoxiously
1589 b
= find_next_item(Vcb
, &tp
, &next_tp
, FALSE
, NULL
);
1595 if (IsListEmpty(&items
)) {
1597 Status
= STATUS_SUCCESS
;
1602 data
= ExAllocatePoolWithTag(PagedPool
, 0x100000, ALLOC_TAG
);
1604 ERR("out of memory\n");
1605 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1610 while (le
!= &items
) {
1611 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
1618 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
1620 if (find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1621 increase_chunk_usage(newchunk
, dr
->size
);
1622 space_list_subtract(Vcb
, newchunk
, FALSE
, dr
->new_address
, dr
->size
, &rollback
);
1626 ExReleaseResourceLite(&newchunk
->lock
);
1630 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, TRUE
);
1632 le2
= Vcb
->chunks
.Flink
;
1633 while (le2
!= &Vcb
->chunks
) {
1634 chunk
* c2
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
1636 if (!c2
->readonly
&& !c2
->reloc
&& c2
!= newchunk
&& c2
->chunk_item
->type
== Vcb
->data_flags
) {
1637 ExAcquireResourceExclusiveLite(&c2
->lock
, TRUE
);
1639 if ((c2
->chunk_item
->size
- c2
->used
) >= dr
->size
) {
1640 if (find_data_address_in_chunk(Vcb
, c2
, dr
->size
, &dr
->new_address
)) {
1641 increase_chunk_usage(c2
, dr
->size
);
1642 space_list_subtract(Vcb
, c2
, FALSE
, dr
->new_address
, dr
->size
, &rollback
);
1643 ExReleaseResourceLite(&c2
->lock
);
1650 ExReleaseResourceLite(&c2
->lock
);
1656 // allocate new chunk if necessary
1658 newchunk
= alloc_chunk(Vcb
, Vcb
->data_flags
);
1661 ERR("could not allocate new chunk\n");
1662 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1663 Status
= STATUS_DISK_FULL
;
1667 ExAcquireResourceExclusiveLite(&newchunk
->lock
, TRUE
);
1669 if (!find_data_address_in_chunk(Vcb
, newchunk
, dr
->size
, &dr
->new_address
)) {
1670 ExReleaseResourceLite(&newchunk
->lock
);
1671 ERR("could not find address in new chunk\n");
1672 Status
= STATUS_DISK_FULL
;
1675 increase_chunk_usage(newchunk
, dr
->size
);
1676 space_list_subtract(Vcb
, newchunk
, FALSE
, dr
->new_address
, dr
->size
, &rollback
);
1679 ExReleaseResourceLite(&newchunk
->lock
);
1682 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1685 dr
->newchunk
= newchunk
;
1687 csum
= ExAllocatePoolWithTag(PagedPool
, dr
->size
* sizeof(UINT32
) / Vcb
->superblock
.sector_size
, ALLOC_TAG
);
1689 ERR("out of memory\n");
1690 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1694 Status
= load_csum(Vcb
, csum
, dr
->address
, dr
->size
/ Vcb
->superblock
.sector_size
, NULL
);
1696 if (NT_SUCCESS(Status
)) {
1697 add_checksum_entry(Vcb
, dr
->new_address
, dr
->size
/ Vcb
->superblock
.sector_size
, csum
, NULL
, &rollback
);
1698 add_checksum_entry(Vcb
, dr
->address
, dr
->size
/ Vcb
->superblock
.sector_size
, NULL
, NULL
, &rollback
);
1705 while (off
< dr
->size
) {
1706 ULONG ds
= min(dr
->size
- off
, 0x100000);
1708 Status
= read_data(Vcb
, dr
->address
+ off
, ds
, NULL
, FALSE
, data
, c
, NULL
, NULL
, FALSE
);
1709 if (!NT_SUCCESS(Status
)) {
1710 ERR("read_data returned %08x\n", Status
);
1714 Status
= write_data_complete(Vcb
, dr
->new_address
+ off
, data
, ds
, NULL
, newchunk
);
1715 if (!NT_SUCCESS(Status
)) {
1716 ERR("write_data_complete returned %08x\n", Status
);
1729 Status
= write_metadata_items(Vcb
, &metadata_items
, &items
, NULL
, &rollback
);
1730 if (!NT_SUCCESS(Status
)) {
1731 ERR("write_metadata_items returned %08x\n", Status
);
1736 while (le
!= &items
) {
1737 data_reloc
* dr
= CONTAINING_RECORD(le
, data_reloc
, list_entry
);
1739 Status
= add_data_reloc_extent_item(Vcb
, dr
, &rollback
);
1740 if (!NT_SUCCESS(Status
)) {
1741 ERR("add_data_reloc_extent_item returned %08x\n", Status
);
1748 le
= c
->changed_extents
.Flink
;
1749 while (le
!= &c
->changed_extents
) {
1750 LIST_ENTRY
*le2
, *le3
;
1751 changed_extent
* ce
= CONTAINING_RECORD(le
, changed_extent
, list_entry
);
1756 while (le2
!= &items
) {
1757 data_reloc
* dr
= CONTAINING_RECORD(le2
, data_reloc
, list_entry
);
1759 if (ce
->address
== dr
->address
) {
1760 ce
->address
= dr
->new_address
;
1761 RemoveEntryList(&ce
->list_entry
);
1762 InsertTailList(&dr
->newchunk
->changed_extents
, &ce
->list_entry
);
1773 // FIXME - speed this up
1775 ExAcquireResourceSharedLite(&Vcb
->fcb_lock
, TRUE
);
1777 le
= Vcb
->all_fcbs
.Flink
;
1778 while (le
!= &Vcb
->all_fcbs
) {
1779 struct _fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_all
);
1782 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, TRUE
);
1784 le2
= fcb
->extents
.Flink
;
1785 while (le2
!= &fcb
->extents
) {
1786 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
1789 if (ext
->data
->type
== EXTENT_TYPE_REGULAR
|| ext
->data
->type
== EXTENT_TYPE_PREALLOC
) {
1790 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->data
->data
;
1792 if (ed2
->size
> 0 && ed2
->address
>= c
->offset
&& ed2
->address
< c
->offset
+ c
->chunk_item
->size
) {
1793 LIST_ENTRY
* le3
= items
.Flink
;
1794 while (le3
!= &items
) {
1795 data_reloc
* dr
= CONTAINING_RECORD(le3
, data_reloc
, list_entry
);
1797 if (ed2
->address
== dr
->address
) {
1798 ed2
->address
= dr
->new_address
;
1811 ExReleaseResourceLite(fcb
->Header
.Resource
);
1816 ExReleaseResourceLite(&Vcb
->fcb_lock
);
1818 Status
= STATUS_SUCCESS
;
1820 Vcb
->need_write
= TRUE
;
1823 if (NT_SUCCESS(Status
))
1824 clear_rollback(Vcb
, &rollback
);
1826 do_rollback(Vcb
, &rollback
);
1828 ExReleaseResourceLite(&Vcb
->tree_lock
);
1833 while (!IsListEmpty(&items
)) {
1834 data_reloc
* dr
= CONTAINING_RECORD(RemoveHeadList(&items
), data_reloc
, list_entry
);
1836 while (!IsListEmpty(&dr
->refs
)) {
1837 data_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&dr
->refs
), data_reloc_ref
, list_entry
);
1845 while (!IsListEmpty(&metadata_items
)) {
1846 metadata_reloc
* mr
= CONTAINING_RECORD(RemoveHeadList(&metadata_items
), metadata_reloc
, list_entry
);
1848 while (!IsListEmpty(&mr
->refs
)) {
1849 metadata_reloc_ref
* ref
= CONTAINING_RECORD(RemoveHeadList(&mr
->refs
), metadata_reloc_ref
, list_entry
);
1860 static __inline UINT64
get_chunk_dup_type(chunk
* c
) {
1861 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
1862 return BLOCK_FLAG_RAID0
;
1863 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
1864 return BLOCK_FLAG_RAID1
;
1865 else if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
1866 return BLOCK_FLAG_DUPLICATE
;
1867 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
1868 return BLOCK_FLAG_RAID10
;
1869 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
1870 return BLOCK_FLAG_RAID5
;
1871 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
1872 return BLOCK_FLAG_RAID6
;
1874 return BLOCK_FLAG_SINGLE
;
1877 static BOOL
should_balance_chunk(device_extension
* Vcb
, UINT8 sort
, chunk
* c
) {
1878 btrfs_balance_opts
* opts
;
1880 opts
= &Vcb
->balance
.opts
[sort
];
1882 if (!(opts
->flags
& BTRFS_BALANCE_OPTS_ENABLED
))
1885 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
1886 UINT64 type
= get_chunk_dup_type(c
);
1888 if (!(type
& opts
->profiles
))
1892 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
1894 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
1897 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1898 if (cis
[i
].dev_id
== opts
->devid
) {
1908 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
1911 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
1914 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
1915 factor
= c
->chunk_item
->num_stripes
;
1916 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
1917 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
1918 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
1919 factor
= c
->chunk_item
->num_stripes
- 1;
1920 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
1921 factor
= c
->chunk_item
->num_stripes
- 2;
1922 else // SINGLE, DUPLICATE, RAID1
1925 physsize
= c
->chunk_item
->size
/ factor
;
1927 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1928 if (cis
[i
].offset
>= opts
->drange_start
&& cis
[i
].offset
+ physsize
< opts
->drange_end
) {
1938 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
1939 if (c
->offset
+ c
->chunk_item
->size
<= opts
->vrange_start
|| c
->offset
> opts
->vrange_end
)
1943 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
1944 if (c
->chunk_item
->num_stripes
< opts
->stripes_start
|| c
->chunk_item
->num_stripes
< opts
->stripes_end
)
1948 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
1949 UINT64 usage
= c
->used
* 100 / c
->chunk_item
->size
;
1951 // usage == 0 should mean completely empty, not just that usage rounds to 0%
1952 if (c
->used
> 0 && usage
== 0)
1955 if (usage
< opts
->usage_start
|| usage
> opts
->usage_end
)
1959 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
&& opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
) {
1960 UINT64 type
= get_chunk_dup_type(c
);
1962 if (type
== opts
->convert
)
1969 static void copy_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
1970 if (opts
->flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
1971 args
->profiles
= opts
->profiles
;
1972 args
->flags
|= BALANCE_ARGS_FLAGS_PROFILES
;
1975 if (opts
->flags
& BTRFS_BALANCE_OPTS_USAGE
) {
1976 if (args
->usage_start
== 0) {
1977 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE_RANGE
;
1978 args
->usage_start
= opts
->usage_start
;
1979 args
->usage_end
= opts
->usage_end
;
1981 args
->flags
|= BALANCE_ARGS_FLAGS_USAGE
;
1982 args
->usage
= opts
->usage_end
;
1986 if (opts
->flags
& BTRFS_BALANCE_OPTS_DEVID
) {
1987 args
->devid
= opts
->devid
;
1988 args
->flags
|= BALANCE_ARGS_FLAGS_DEVID
;
1991 if (opts
->flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
1992 args
->drange_start
= opts
->drange_start
;
1993 args
->drange_end
= opts
->drange_end
;
1994 args
->flags
|= BALANCE_ARGS_FLAGS_DRANGE
;
1997 if (opts
->flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
1998 args
->vrange_start
= opts
->vrange_start
;
1999 args
->vrange_end
= opts
->vrange_end
;
2000 args
->flags
|= BALANCE_ARGS_FLAGS_VRANGE
;
2003 if (opts
->flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
2004 args
->convert
= opts
->convert
;
2005 args
->flags
|= BALANCE_ARGS_FLAGS_CONVERT
;
2007 if (opts
->flags
& BTRFS_BALANCE_OPTS_SOFT
)
2008 args
->flags
|= BALANCE_ARGS_FLAGS_SOFT
;
2011 if (opts
->flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
2012 if (args
->limit_start
== 0) {
2013 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT_RANGE
;
2014 args
->limit_start
= opts
->limit_start
;
2015 args
->limit_end
= opts
->limit_end
;
2017 args
->flags
|= BALANCE_ARGS_FLAGS_LIMIT
;
2018 args
->limit
= opts
->limit_end
;
2022 if (opts
->flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2023 args
->stripes_start
= opts
->stripes_start
;
2024 args
->stripes_end
= opts
->stripes_end
;
2025 args
->flags
|= BALANCE_ARGS_FLAGS_STRIPES_RANGE
;
2029 static NTSTATUS
add_balance_item(device_extension
* Vcb
) {
2030 LIST_ENTRY rollback
;
2036 InitializeListHead(&rollback
);
2038 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2039 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2040 searchkey
.offset
= 0;
2042 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
2044 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
2045 if (!NT_SUCCESS(Status
)) {
2046 ERR("find_item returned %08x\n", Status
);
2050 if (!keycmp(tp
.item
->key
, searchkey
))
2051 delete_tree_item(Vcb
, &tp
, &rollback
);
2053 bi
= ExAllocatePoolWithTag(PagedPool
, sizeof(BALANCE_ITEM
), ALLOC_TAG
);
2055 ERR("out of memory\n");
2056 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2060 RtlZeroMemory(bi
, sizeof(BALANCE_ITEM
));
2062 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2063 bi
->flags
|= BALANCE_FLAGS_DATA
;
2064 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
2067 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2068 bi
->flags
|= BALANCE_FLAGS_METADATA
;
2069 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
2072 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2073 bi
->flags
|= BALANCE_FLAGS_SYSTEM
;
2074 copy_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
2077 if (!insert_tree_item(Vcb
, Vcb
->root_root
, BALANCE_ITEM_ID
, TYPE_TEMP_ITEM
, 0, bi
, sizeof(BALANCE_ITEM
), NULL
, NULL
, &rollback
)) {
2078 ERR("insert_tree_item failed\n");
2079 Status
= STATUS_INTERNAL_ERROR
;
2083 Status
= STATUS_SUCCESS
;
2086 if (NT_SUCCESS(Status
)) {
2087 do_write(Vcb
, NULL
, &rollback
);
2090 clear_rollback(Vcb
, &rollback
);
2092 do_rollback(Vcb
, &rollback
);
2094 ExReleaseResourceLite(&Vcb
->tree_lock
);
2099 static NTSTATUS
remove_balance_item(device_extension
* Vcb
) {
2100 LIST_ENTRY rollback
;
2105 InitializeListHead(&rollback
);
2107 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2108 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2109 searchkey
.offset
= 0;
2111 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
2113 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
2114 if (!NT_SUCCESS(Status
)) {
2115 ERR("find_item returned %08x\n", Status
);
2119 if (!keycmp(tp
.item
->key
, searchkey
)) {
2120 delete_tree_item(Vcb
, &tp
, &rollback
);
2122 do_write(Vcb
, NULL
, &rollback
);
2126 Status
= STATUS_SUCCESS
;
2129 if (NT_SUCCESS(Status
))
2130 clear_rollback(Vcb
, &rollback
);
2132 do_rollback(Vcb
, &rollback
);
2134 ExReleaseResourceLite(&Vcb
->tree_lock
);
2139 static void load_balance_args(btrfs_balance_opts
* opts
, BALANCE_ARGS
* args
) {
2140 opts
->flags
= BTRFS_BALANCE_OPTS_ENABLED
;
2142 if (args
->flags
& BALANCE_ARGS_FLAGS_PROFILES
) {
2143 opts
->flags
|= BTRFS_BALANCE_OPTS_PROFILES
;
2144 opts
->profiles
= args
->profiles
;
2147 if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE
) {
2148 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2150 opts
->usage_start
= 0;
2151 opts
->usage_end
= args
->usage
;
2152 } else if (args
->flags
& BALANCE_ARGS_FLAGS_USAGE_RANGE
) {
2153 opts
->flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2155 opts
->usage_start
= args
->usage_start
;
2156 opts
->usage_end
= args
->usage_end
;
2159 if (args
->flags
& BALANCE_ARGS_FLAGS_DEVID
) {
2160 opts
->flags
|= BTRFS_BALANCE_OPTS_DEVID
;
2161 opts
->devid
= args
->devid
;
2164 if (args
->flags
& BALANCE_ARGS_FLAGS_DRANGE
) {
2165 opts
->flags
|= BTRFS_BALANCE_OPTS_DRANGE
;
2166 opts
->drange_start
= args
->drange_start
;
2167 opts
->drange_end
= args
->drange_end
;
2170 if (args
->flags
& BALANCE_ARGS_FLAGS_VRANGE
) {
2171 opts
->flags
|= BTRFS_BALANCE_OPTS_VRANGE
;
2172 opts
->vrange_start
= args
->vrange_start
;
2173 opts
->vrange_end
= args
->vrange_end
;
2176 if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT
) {
2177 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2179 opts
->limit_start
= 0;
2180 opts
->limit_end
= args
->limit
;
2181 } else if (args
->flags
& BALANCE_ARGS_FLAGS_LIMIT_RANGE
) {
2182 opts
->flags
|= BTRFS_BALANCE_OPTS_LIMIT
;
2184 opts
->limit_start
= args
->limit_start
;
2185 opts
->limit_end
= args
->limit_end
;
2188 if (args
->flags
& BALANCE_ARGS_FLAGS_STRIPES_RANGE
) {
2189 opts
->flags
|= BTRFS_BALANCE_OPTS_STRIPES
;
2191 opts
->stripes_start
= args
->stripes_start
;
2192 opts
->stripes_end
= args
->stripes_end
;
2195 if (args
->flags
& BALANCE_ARGS_FLAGS_CONVERT
) {
2196 opts
->flags
|= BTRFS_BALANCE_OPTS_CONVERT
;
2197 opts
->convert
= args
->convert
;
2199 if (args
->flags
& BALANCE_ARGS_FLAGS_SOFT
)
2200 opts
->flags
|= BTRFS_BALANCE_OPTS_SOFT
;
2204 static NTSTATUS
remove_superblocks(device
* dev
) {
2209 sb
= ExAllocatePoolWithTag(PagedPool
, sizeof(superblock
), ALLOC_TAG
);
2211 ERR("out of memory\n");
2212 return STATUS_INSUFFICIENT_RESOURCES
;
2215 RtlZeroMemory(sb
, sizeof(superblock
));
2217 while (superblock_addrs
[i
] > 0 && dev
->length
>= superblock_addrs
[i
] + sizeof(superblock
)) {
2218 Status
= write_data_phys(dev
->devobj
, superblock_addrs
[i
], sb
, sizeof(superblock
));
2220 if (!NT_SUCCESS(Status
)) {
2230 return STATUS_SUCCESS
;
2233 static NTSTATUS
replace_mount_dev(device_extension
* Vcb
, device
* dev
, PDEVICE_OBJECT mountmgr
, BOOL part0
) {
2235 MOUNTDEV_NAME mdn
, *mdn2
= NULL
, *mdn3
= NULL
;
2236 ULONG mdnsize
, mmpsize
;
2237 MOUNTMGR_MOUNT_POINT
* mmp
= NULL
;
2238 MOUNTMGR_MOUNT_POINTS mmps
, *mmps2
= NULL
;
2242 // get old device name
2244 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, &mdn
, sizeof(MOUNTDEV_NAME
), TRUE
, NULL
);
2245 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
) {
2246 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2250 mdnsize
= offsetof(MOUNTDEV_NAME
, Name
[0]) + mdn
.NameLength
;
2252 mdn2
= ExAllocatePoolWithTag(PagedPool
, mdnsize
, ALLOC_TAG
);
2254 ERR("out of memory\n");
2255 return STATUS_INSUFFICIENT_RESOURCES
;
2258 Status
= dev_ioctl(dev
->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, mdn2
, mdnsize
, TRUE
, NULL
);
2259 if (!NT_SUCCESS(Status
)) {
2260 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2264 // get new device name
2266 Status
= dev_ioctl(first_device(Vcb
)->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, &mdn
, sizeof(MOUNTDEV_NAME
), TRUE
, NULL
);
2267 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
) {
2268 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2272 mdnsize
= offsetof(MOUNTDEV_NAME
, Name
[0]) + mdn
.NameLength
;
2274 mdn3
= ExAllocatePoolWithTag(PagedPool
, mdnsize
, ALLOC_TAG
);
2276 ERR("out of memory\n");
2277 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2281 Status
= dev_ioctl(first_device(Vcb
)->devobj
, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME
, NULL
, 0, mdn3
, mdnsize
, TRUE
, NULL
);
2282 if (!NT_SUCCESS(Status
)) {
2283 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status
);
2287 // query and delete existing mount points
2289 mmpsize
= sizeof(MOUNTMGR_MOUNT_POINT
) + mdn2
->NameLength
;
2291 mmp
= ExAllocatePoolWithTag(PagedPool
, mmpsize
, ALLOC_TAG
);
2293 ERR("out of memory\n");
2294 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2298 RtlZeroMemory(mmp
, sizeof(MOUNTMGR_MOUNT_POINT
));
2299 mmp
->DeviceNameOffset
= sizeof(MOUNTMGR_MOUNT_POINT
);
2300 mmp
->DeviceNameLength
= mdn2
->NameLength
;
2301 RtlCopyMemory(&mmp
[1], mdn2
->Name
, mdn2
->NameLength
);
2303 Status
= dev_ioctl(mountmgr
, IOCTL_MOUNTMGR_QUERY_POINTS
, mmp
, mmpsize
, &mmps
, mmpsize
, TRUE
, NULL
);
2304 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
) {
2305 ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status
);
2309 mmps2
= ExAllocatePoolWithTag(PagedPool
, mmps
.Size
, ALLOC_TAG
);
2311 ERR("out of memory\n");
2312 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2316 Status
= dev_ioctl(mountmgr
, IOCTL_MOUNTMGR_DELETE_POINTS
, mmp
, mmpsize
, mmps2
, mmps
.Size
, TRUE
, NULL
);
2317 if (!NT_SUCCESS(Status
) && Status
!= STATUS_BUFFER_OVERFLOW
) {
2318 ERR("IOCTL_MOUNTMGR_DELETE_POINTS returned %08x\n", Status
);
2322 // re-create mount points
2324 for (i
= 0; i
< mmps2
->NumberOfMountPoints
; i
++) {
2325 if (mmps2
->MountPoints
[i
].SymbolicLinkNameOffset
!= 0) {
2327 MOUNTMGR_CREATE_POINT_INPUT
* mcpi
;
2329 mcpilen
= sizeof(MOUNTMGR_CREATE_POINT_INPUT
) + mmps2
->MountPoints
[i
].SymbolicLinkNameLength
+ mdn3
->NameLength
;
2331 mcpi
= ExAllocatePoolWithTag(PagedPool
, mcpilen
, ALLOC_TAG
);
2333 ERR("out of memory\n");
2334 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2338 mcpi
->SymbolicLinkNameOffset
= sizeof(MOUNTMGR_CREATE_POINT_INPUT
);
2339 mcpi
->SymbolicLinkNameLength
= mmps2
->MountPoints
[i
].SymbolicLinkNameLength
;
2340 mcpi
->DeviceNameOffset
= mcpi
->SymbolicLinkNameOffset
+ mcpi
->SymbolicLinkNameLength
;
2341 mcpi
->DeviceNameLength
= mdn3
->NameLength
;
2343 RtlCopyMemory((UINT8
*)mcpi
+ mcpi
->SymbolicLinkNameOffset
, (UINT8
*)mmps2
+ mmps2
->MountPoints
[i
].SymbolicLinkNameOffset
,
2344 mcpi
->SymbolicLinkNameLength
);
2345 RtlCopyMemory((UINT8
*)mcpi
+ mcpi
->DeviceNameOffset
, mdn3
->Name
, mdn3
->NameLength
);
2347 Status
= dev_ioctl(mountmgr
, IOCTL_MOUNTMGR_CREATE_POINT
, mcpi
, mcpilen
, NULL
, 0, TRUE
, NULL
);
2348 if (!NT_SUCCESS(Status
)) {
2349 ERR("IOCTL_MOUNTMGR_CREATE_POINT returned %08x\n", Status
);
2358 Status
= STATUS_SUCCESS
;
2361 // re-add old device back to mountmgr
2364 us
.Buffer
= mdn2
->Name
;
2365 us
.Length
= us
.MaximumLength
= mdn2
->NameLength
;
2367 add_volume(mountmgr
, &us
);
2386 static NTSTATUS
finish_removing_device(device_extension
* Vcb
, device
* dev
) {
2390 LIST_ENTRY rollback
, *le
;
2391 BOOL first_dev
, part0
= FALSE
;
2393 InitializeListHead(&rollback
);
2395 if (Vcb
->need_write
)
2396 do_write(Vcb
, NULL
, &rollback
);
2400 clear_rollback(Vcb
, &rollback
);
2402 // remove entry in chunk tree
2404 searchkey
.obj_id
= 1;
2405 searchkey
.obj_type
= TYPE_DEV_ITEM
;
2406 searchkey
.offset
= dev
->devitem
.dev_id
;
2408 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, FALSE
, NULL
);
2409 if (!NT_SUCCESS(Status
)) {
2410 ERR("find_item returned %08x\n", Status
);
2414 if (!keycmp(searchkey
, tp
.item
->key
))
2415 delete_tree_item(Vcb
, &tp
, &rollback
);
2417 // remove stats entry in device tree
2419 searchkey
.obj_id
= 0;
2420 searchkey
.obj_type
= TYPE_DEV_STATS
;
2421 searchkey
.offset
= dev
->devitem
.dev_id
;
2423 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, FALSE
, NULL
);
2424 if (!NT_SUCCESS(Status
)) {
2425 ERR("find_item returned %08x\n", Status
);
2429 if (!keycmp(searchkey
, tp
.item
->key
))
2430 delete_tree_item(Vcb
, &tp
, &rollback
);
2432 // update superblock
2434 Vcb
->superblock
.num_devices
--;
2435 Vcb
->superblock
.total_bytes
-= dev
->devitem
.num_bytes
;
2436 Vcb
->devices_loaded
--;
2438 first_dev
= first_device(Vcb
) == dev
;
2440 RemoveEntryList(&dev
->list_entry
);
2444 do_write(Vcb
, NULL
, &rollback
);
2448 clear_rollback(Vcb
, &rollback
);
2450 if (!dev
->readonly
) {
2451 Status
= remove_superblocks(dev
);
2452 if (!NT_SUCCESS(Status
))
2453 WARN("remove_superblocks returned %08x\n", Status
);
2456 // remove entry in volume list
2458 ExAcquireResourceExclusiveLite(&volumes_lock
, TRUE
);
2461 while (le
!= &volumes
) {
2462 volume
* v
= CONTAINING_RECORD(le
, volume
, list_entry
);
2464 if (RtlCompareMemory(&Vcb
->superblock
.uuid
, &v
->fsuuid
, sizeof(BTRFS_UUID
)) == sizeof(BTRFS_UUID
) &&
2465 RtlCompareMemory(&dev
->devitem
.device_uuid
, &v
->devuuid
, sizeof(BTRFS_UUID
)) == sizeof(BTRFS_UUID
)) {
2466 PFILE_OBJECT FileObject
;
2467 PDEVICE_OBJECT mountmgr
;
2468 UNICODE_STRING mmdevpath
;
2470 RemoveEntryList(&v
->list_entry
);
2472 // re-add entry to mountmgr
2474 if (!first_dev
&& v
->part_num
!= 0) {
2475 RtlInitUnicodeString(&mmdevpath
, MOUNTMGR_DEVICE_NAME
);
2476 Status
= IoGetDeviceObjectPointer(&mmdevpath
, FILE_READ_ATTRIBUTES
, &FileObject
, &mountmgr
);
2477 if (!NT_SUCCESS(Status
))
2478 ERR("IoGetDeviceObjectPointer returned %08x\n", Status
);
2480 add_volume(mountmgr
, &v
->devpath
);
2481 ObDereferenceObject(FileObject
);
2485 part0
= v
->part_num
== 0 ? TRUE
: FALSE
;
2487 if (v
->devpath
.Buffer
)
2488 ExFreePool(v
->devpath
.Buffer
);
2497 ExReleaseResourceLite(&volumes_lock
);
2500 PDEVICE_OBJECT DeviceObject
, olddev
;
2501 device
* newfirstdev
;
2502 PFILE_OBJECT FileObject
;
2503 UNICODE_STRING mmdevpath
;
2504 PDEVICE_OBJECT mountmgr
;
2506 DeviceObject
= Vcb
->Vpb
->DeviceObject
;
2508 olddev
= DeviceObject
->Vpb
->RealDevice
;
2509 newfirstdev
= first_device(Vcb
);
2511 ObReferenceObject(newfirstdev
->devobj
);
2512 DeviceObject
->Vpb
->RealDevice
= newfirstdev
->devobj
;
2513 ObDereferenceObject(olddev
);
2515 RtlInitUnicodeString(&mmdevpath
, MOUNTMGR_DEVICE_NAME
);
2516 Status
= IoGetDeviceObjectPointer(&mmdevpath
, FILE_READ_ATTRIBUTES
, &FileObject
, &mountmgr
);
2517 if (!NT_SUCCESS(Status
))
2518 ERR("IoGetDeviceObjectPointer returned %08x\n", Status
);
2520 Status
= replace_mount_dev(Vcb
, dev
, mountmgr
, part0
);
2521 if (!NT_SUCCESS(Status
))
2522 ERR("replace_mount_dev returned %08x\n", Status
);
2524 ObDereferenceObject(FileObject
);
2531 ObDereferenceObject(dev
->devobj
);
2533 while (!IsListEmpty(&dev
->space
)) {
2534 LIST_ENTRY
* le2
= RemoveHeadList(&dev
->space
);
2535 space
* s
= CONTAINING_RECORD(le2
, space
, list_entry
);
2542 return STATUS_SUCCESS
;
2546 static void balance_thread(void* context
) {
2548 static void NTAPI
balance_thread(void* context
) {
2550 device_extension
* Vcb
= (device_extension
*)context
;
2553 UINT64 num_chunks
[3];
2556 Vcb
->balance
.stopping
= FALSE
;
2557 Vcb
->balance
.cancelling
= FALSE
;
2558 KeInitializeEvent(&Vcb
->balance
.finished
, NotificationEvent
, FALSE
);
2560 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
2561 Vcb
->data_flags
= BLOCK_FLAG_DATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].convert
);
2563 if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
2564 Vcb
->metadata_flags
= BLOCK_FLAG_METADATA
| (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].convert
);
2566 if (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
&& Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
2567 Vcb
->system_flags
= BLOCK_FLAG_SYSTEM
| (Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
== BLOCK_FLAG_SINGLE
? 0 : Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
].convert
);
2569 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS
) {
2570 if (Vcb
->balance
.opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
2571 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
2572 else if (Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
)
2573 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
2576 // FIXME - what are we supposed to do with limit_start?
2578 if (!Vcb
->readonly
) {
2579 if (!Vcb
->balance
.removing
) {
2580 Status
= add_balance_item(Vcb
);
2581 if (!NT_SUCCESS(Status
)) {
2582 ERR("add_balance_item returned %08x\n", Status
);
2586 if (Vcb
->need_write
) {
2587 LIST_ENTRY rollback
;
2589 InitializeListHead(&rollback
);
2590 do_write(Vcb
, NULL
, &rollback
);
2593 clear_rollback(Vcb
, &rollback
);
2598 num_chunks
[0] = num_chunks
[1] = num_chunks
[2] = 0;
2599 Vcb
->balance
.total_chunks
= 0;
2601 InitializeListHead(&chunks
);
2603 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
2605 if (Vcb
->balance
.stopping
)
2608 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, TRUE
);
2610 le
= Vcb
->chunks
.Flink
;
2611 while (le
!= &Vcb
->chunks
) {
2612 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2615 ExAcquireResourceExclusiveLite(&c
->lock
, TRUE
);
2617 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
)
2618 sort
= BALANCE_OPTS_DATA
;
2619 else if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
)
2620 sort
= BALANCE_OPTS_METADATA
;
2621 else if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
2622 sort
= BALANCE_OPTS_SYSTEM
;
2624 ERR("unexpected chunk type %llx\n", c
->chunk_item
->type
);
2625 ExReleaseResourceLite(&c
->lock
);
2629 if ((!(Vcb
->balance
.opts
[sort
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) || num_chunks
[sort
] < Vcb
->balance
.opts
[sort
].limit_end
) &&
2630 should_balance_chunk(Vcb
, sort
, c
)) {
2633 InsertTailList(&chunks
, &c
->list_entry_balance
);
2636 Vcb
->balance
.total_chunks
++;
2639 ExReleaseResourceLite(&c
->lock
);
2644 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2646 Vcb
->balance
.chunks_left
= Vcb
->balance
.total_chunks
;
2648 // do data chunks before metadata
2650 while (le
!= &chunks
) {
2651 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
2652 LIST_ENTRY
* le2
= le
->Flink
;
2654 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
2661 FsRtlEnterFileSystem();
2663 Status
= balance_data_chunk(Vcb
, c
, &changed
);
2665 FsRtlExitFileSystem();
2667 if (!NT_SUCCESS(Status
)) {
2668 ERR("balance_data_chunk returned %08x\n", Status
);
2669 Vcb
->balance
.status
= Status
;
2673 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
2675 if (Vcb
->balance
.stopping
)
2679 if (!c
->list_entry_changed
.Flink
)
2680 InsertTailList(&Vcb
->chunks_changed
, &c
->list_entry_changed
);
2683 if (Vcb
->balance
.stopping
) {
2684 while (le
!= &chunks
) {
2685 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
2693 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
&&
2694 (!(Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) || !(c
->chunk_item
->type
& BLOCK_FLAG_METADATA
))) {
2695 RemoveEntryList(&c
->list_entry_balance
);
2696 c
->list_entry_balance
.Flink
= NULL
;
2698 Vcb
->balance
.chunks_left
--;
2704 // do metadata chunks
2705 while (!IsListEmpty(&chunks
)) {
2710 le
= RemoveHeadList(&chunks
);
2711 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
2713 if (c
->chunk_item
->type
& BLOCK_FLAG_METADATA
|| c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
) {
2715 FsRtlEnterFileSystem();
2717 Status
= balance_metadata_chunk(Vcb
, c
, &changed
);
2719 FsRtlExitFileSystem();
2721 if (!NT_SUCCESS(Status
)) {
2722 ERR("balance_metadata_chunk returned %08x\n", Status
);
2723 Vcb
->balance
.status
= Status
;
2727 KeWaitForSingleObject(&Vcb
->balance
.event
, Executive
, KernelMode
, FALSE
, NULL
);
2729 if (Vcb
->balance
.stopping
)
2733 if (!c
->list_entry_changed
.Flink
)
2734 InsertTailList(&Vcb
->chunks_changed
, &c
->list_entry_changed
);
2737 if (Vcb
->balance
.stopping
) {
2738 while (le
!= &chunks
) {
2739 c
= CONTAINING_RECORD(le
, chunk
, list_entry_balance
);
2743 c
->list_entry_balance
.Flink
= NULL
;
2748 c
->list_entry_balance
.Flink
= NULL
;
2750 Vcb
->balance
.chunks_left
--;
2754 if (!Vcb
->readonly
) {
2755 if (!Vcb
->balance
.removing
) {
2756 FsRtlEnterFileSystem();
2757 Status
= remove_balance_item(Vcb
);
2758 FsRtlExitFileSystem();
2760 if (!NT_SUCCESS(Status
)) {
2761 ERR("remove_balance_item returned %08x\n", Status
);
2767 FsRtlEnterFileSystem();
2768 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, TRUE
);
2770 le
= Vcb
->devices
.Flink
;
2771 while (le
!= &Vcb
->devices
) {
2772 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
2774 if (dev2
->devitem
.dev_id
== Vcb
->balance
.opts
[0].devid
) {
2783 if (Vcb
->balance
.chunks_left
== 0) {
2784 Status
= finish_removing_device(Vcb
, dev
);
2786 if (!NT_SUCCESS(Status
)) {
2787 ERR("finish_removing_device returned %08x\n", Status
);
2794 ExReleaseResourceLite(&Vcb
->tree_lock
);
2795 FsRtlExitFileSystem();
2799 ZwClose(Vcb
->balance
.thread
);
2800 Vcb
->balance
.thread
= NULL
;
2802 KeSetEvent(&Vcb
->balance
.finished
, 0, FALSE
);
2805 NTSTATUS
start_balance(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
2807 btrfs_start_balance
* bsb
= (btrfs_start_balance
*)data
;
2810 if (length
< sizeof(btrfs_start_balance
) || !data
)
2811 return STATUS_INVALID_PARAMETER
;
2813 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
2814 return STATUS_PRIVILEGE_NOT_HELD
;
2816 if (Vcb
->balance
.thread
) {
2817 WARN("balance already running\n");
2818 return STATUS_DEVICE_NOT_READY
;
2822 return STATUS_MEDIA_WRITE_PROTECTED
;
2824 if (!(bsb
->opts
[BALANCE_OPTS_DATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
2825 !(bsb
->opts
[BALANCE_OPTS_METADATA
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) &&
2826 !(bsb
->opts
[BALANCE_OPTS_SYSTEM
].flags
& BTRFS_BALANCE_OPTS_ENABLED
))
2827 return STATUS_SUCCESS
;
2829 for (i
= 0; i
< 3; i
++) {
2830 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2831 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_PROFILES
) {
2832 bsb
->opts
[i
].profiles
&= BLOCK_FLAG_RAID0
| BLOCK_FLAG_RAID1
| BLOCK_FLAG_DUPLICATE
| BLOCK_FLAG_RAID10
|
2833 BLOCK_FLAG_RAID5
| BLOCK_FLAG_RAID6
| BLOCK_FLAG_SINGLE
;
2835 if (bsb
->opts
[i
].profiles
== 0)
2836 return STATUS_INVALID_PARAMETER
;
2839 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DEVID
) {
2840 if (bsb
->opts
[i
].devid
== 0)
2841 return STATUS_INVALID_PARAMETER
;
2844 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_DRANGE
) {
2845 if (bsb
->opts
[i
].drange_start
> bsb
->opts
[i
].drange_end
)
2846 return STATUS_INVALID_PARAMETER
;
2849 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_VRANGE
) {
2850 if (bsb
->opts
[i
].vrange_start
> bsb
->opts
[i
].vrange_end
)
2851 return STATUS_INVALID_PARAMETER
;
2854 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_LIMIT
) {
2855 bsb
->opts
[i
].limit_start
= max(1, bsb
->opts
[i
].limit_start
);
2856 bsb
->opts
[i
].limit_end
= max(1, bsb
->opts
[i
].limit_end
);
2858 if (bsb
->opts
[i
].limit_start
> bsb
->opts
[i
].limit_end
)
2859 return STATUS_INVALID_PARAMETER
;
2862 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_STRIPES
) {
2863 bsb
->opts
[i
].stripes_start
= max(1, bsb
->opts
[i
].stripes_start
);
2864 bsb
->opts
[i
].stripes_end
= max(1, bsb
->opts
[i
].stripes_end
);
2866 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
2867 return STATUS_INVALID_PARAMETER
;
2870 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) {
2871 bsb
->opts
[i
].usage_start
= min(100, bsb
->opts
[i
].stripes_start
);
2872 bsb
->opts
[i
].usage_end
= min(100, bsb
->opts
[i
].stripes_end
);
2874 if (bsb
->opts
[i
].stripes_start
> bsb
->opts
[i
].stripes_end
)
2875 return STATUS_INVALID_PARAMETER
;
2878 if (bsb
->opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
) {
2879 if (bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID0
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID1
&&
2880 bsb
->opts
[i
].convert
!= BLOCK_FLAG_DUPLICATE
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID10
&&
2881 bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID5
&& bsb
->opts
[i
].convert
!= BLOCK_FLAG_RAID6
&&
2882 bsb
->opts
[i
].convert
!= BLOCK_FLAG_SINGLE
)
2883 return STATUS_INVALID_PARAMETER
;
2888 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bsb
->opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
2889 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bsb
->opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
2890 RtlCopyMemory(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bsb
->opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
2892 Vcb
->balance
.paused
= FALSE
;
2893 Vcb
->balance
.removing
= FALSE
;
2894 Vcb
->balance
.status
= STATUS_SUCCESS
;
2895 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
2897 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, NULL
, NULL
, NULL
, balance_thread
, Vcb
);
2898 if (!NT_SUCCESS(Status
)) {
2899 ERR("PsCreateSystemThread returned %08x\n", Status
);
2903 return STATUS_SUCCESS
;
2906 NTSTATUS
look_for_balance_item(device_extension
* Vcb
) {
2907 LIST_ENTRY rollback
;
2914 InitializeListHead(&rollback
);
2916 searchkey
.obj_id
= BALANCE_ITEM_ID
;
2917 searchkey
.obj_type
= TYPE_TEMP_ITEM
;
2918 searchkey
.offset
= 0;
2920 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, FALSE
, NULL
);
2921 if (!NT_SUCCESS(Status
)) {
2922 ERR("find_item returned %08x\n", Status
);
2926 if (keycmp(tp
.item
->key
, searchkey
)) {
2927 TRACE("no balance item found\n");
2928 return STATUS_NOT_FOUND
;
2931 if (tp
.item
->size
< sizeof(BALANCE_ITEM
)) {
2932 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
2933 tp
.item
->size
, sizeof(BALANCE_ITEM
));
2934 return STATUS_INTERNAL_ERROR
;
2937 bi
= (BALANCE_ITEM
*)tp
.item
->data
;
2939 if (bi
->flags
& BALANCE_FLAGS_DATA
)
2940 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], &bi
->data
);
2942 if (bi
->flags
& BALANCE_FLAGS_METADATA
)
2943 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], &bi
->metadata
);
2945 if (bi
->flags
& BALANCE_FLAGS_SYSTEM
)
2946 load_balance_args(&Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], &bi
->system
);
2948 // do the heuristics that Linux driver does
2950 for (i
= 0; i
< 3; i
++) {
2951 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_ENABLED
) {
2952 // if converting, don't redo chunks already done
2954 if (Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
2955 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_SOFT
;
2957 // don't balance chunks more than 90% filled - presumably these
2958 // have already been done
2960 if (!(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_USAGE
) &&
2961 !(Vcb
->balance
.opts
[i
].flags
& BTRFS_BALANCE_OPTS_CONVERT
)
2963 Vcb
->balance
.opts
[i
].flags
|= BTRFS_BALANCE_OPTS_USAGE
;
2964 Vcb
->balance
.opts
[i
].usage_start
= 0;
2965 Vcb
->balance
.opts
[i
].usage_end
= 90;
2970 if (Vcb
->readonly
|| Vcb
->options
.skip_balance
)
2971 Vcb
->balance
.paused
= TRUE
;
2973 Vcb
->balance
.paused
= FALSE
;
2975 Vcb
->balance
.removing
= FALSE
;
2976 Vcb
->balance
.status
= STATUS_SUCCESS
;
2977 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
2979 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, NULL
, NULL
, NULL
, balance_thread
, Vcb
);
2980 if (!NT_SUCCESS(Status
)) {
2981 ERR("PsCreateSystemThread returned %08x\n", Status
);
2985 return STATUS_SUCCESS
;
2988 NTSTATUS
query_balance(device_extension
* Vcb
, void* data
, ULONG length
) {
2989 btrfs_query_balance
* bqb
= (btrfs_query_balance
*)data
;
2991 if (length
< sizeof(btrfs_query_balance
) || !data
)
2992 return STATUS_INVALID_PARAMETER
;
2994 if (!Vcb
->balance
.thread
) {
2995 bqb
->status
= BTRFS_BALANCE_STOPPED
;
2997 if (!NT_SUCCESS(Vcb
->balance
.status
)) {
2998 bqb
->status
|= BTRFS_BALANCE_ERROR
;
2999 bqb
->error
= Vcb
->balance
.status
;
3002 return STATUS_SUCCESS
;
3005 bqb
->status
= Vcb
->balance
.paused
? BTRFS_BALANCE_PAUSED
: BTRFS_BALANCE_RUNNING
;
3007 if (Vcb
->balance
.removing
)
3008 bqb
->status
|= BTRFS_BALANCE_REMOVAL
;
3010 if (!NT_SUCCESS(Vcb
->balance
.status
))
3011 bqb
->status
|= BTRFS_BALANCE_ERROR
;
3013 bqb
->chunks_left
= Vcb
->balance
.chunks_left
;
3014 bqb
->total_chunks
= Vcb
->balance
.total_chunks
;
3015 bqb
->error
= Vcb
->balance
.status
;
3016 RtlCopyMemory(&bqb
->data_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_DATA
], sizeof(btrfs_balance_opts
));
3017 RtlCopyMemory(&bqb
->metadata_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_METADATA
], sizeof(btrfs_balance_opts
));
3018 RtlCopyMemory(&bqb
->system_opts
, &Vcb
->balance
.opts
[BALANCE_OPTS_SYSTEM
], sizeof(btrfs_balance_opts
));
3020 return STATUS_SUCCESS
;
3023 NTSTATUS
pause_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3024 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3025 return STATUS_PRIVILEGE_NOT_HELD
;
3027 if (!Vcb
->balance
.thread
)
3028 return STATUS_DEVICE_NOT_READY
;
3030 if (Vcb
->balance
.paused
)
3031 return STATUS_DEVICE_NOT_READY
;
3033 Vcb
->balance
.paused
= TRUE
;
3034 KeClearEvent(&Vcb
->balance
.event
);
3036 return STATUS_SUCCESS
;
3039 NTSTATUS
resume_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3040 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3041 return STATUS_PRIVILEGE_NOT_HELD
;
3043 if (!Vcb
->balance
.thread
)
3044 return STATUS_DEVICE_NOT_READY
;
3046 if (!Vcb
->balance
.paused
)
3047 return STATUS_DEVICE_NOT_READY
;
3050 return STATUS_MEDIA_WRITE_PROTECTED
;
3052 Vcb
->balance
.paused
= FALSE
;
3053 KeSetEvent(&Vcb
->balance
.event
, 0, FALSE
);
3055 return STATUS_SUCCESS
;
3058 NTSTATUS
stop_balance(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3059 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3060 return STATUS_PRIVILEGE_NOT_HELD
;
3062 if (!Vcb
->balance
.thread
)
3063 return STATUS_DEVICE_NOT_READY
;
3065 Vcb
->balance
.paused
= FALSE
;
3066 Vcb
->balance
.stopping
= TRUE
;
3067 Vcb
->balance
.cancelling
= TRUE
;
3068 Vcb
->balance
.status
= STATUS_SUCCESS
;
3069 KeSetEvent(&Vcb
->balance
.event
, 0, FALSE
);
3071 return STATUS_SUCCESS
;
3074 NTSTATUS
remove_device(device_extension
* Vcb
, void* data
, ULONG length
, KPROCESSOR_MODE processor_mode
) {
3080 UINT64 num_rw_devices
;
3082 TRACE("(%p, %p, %x)\n", Vcb
, data
, length
);
3084 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3085 return STATUS_PRIVILEGE_NOT_HELD
;
3087 if (length
< sizeof(UINT64
))
3088 return STATUS_INVALID_PARAMETER
;
3090 devid
= *(UINT64
*)data
;
3092 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, TRUE
);
3094 if (Vcb
->readonly
) {
3095 ExReleaseResourceLite(&Vcb
->tree_lock
);
3096 return STATUS_MEDIA_WRITE_PROTECTED
;
3101 le
= Vcb
->devices
.Flink
;
3102 while (le
!= &Vcb
->devices
) {
3103 device
* dev2
= CONTAINING_RECORD(le
, device
, list_entry
);
3105 if (dev2
->devitem
.dev_id
== devid
)
3108 if (!dev2
->readonly
)
3115 ExReleaseResourceLite(&Vcb
->tree_lock
);
3116 WARN("device %llx not found\n", devid
);
3117 return STATUS_NOT_FOUND
;
3120 if (!dev
->readonly
) {
3121 if (num_rw_devices
== 1) {
3122 ExReleaseResourceLite(&Vcb
->tree_lock
);
3123 WARN("not removing last non-readonly device\n");
3124 return STATUS_INVALID_PARAMETER
;
3127 if (num_rw_devices
== 4 &&
3128 ((Vcb
->data_flags
& BLOCK_FLAG_RAID10
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID10
|| Vcb
->system_flags
& BLOCK_FLAG_RAID10
) ||
3129 (Vcb
->data_flags
& BLOCK_FLAG_RAID6
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID6
|| Vcb
->system_flags
& BLOCK_FLAG_RAID6
))
3131 ExReleaseResourceLite(&Vcb
->tree_lock
);
3132 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n");
3133 return STATUS_CANNOT_DELETE
;
3136 if (num_rw_devices
== 3 && (Vcb
->data_flags
& BLOCK_FLAG_RAID5
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID5
|| Vcb
->system_flags
& BLOCK_FLAG_RAID5
)) {
3137 ExReleaseResourceLite(&Vcb
->tree_lock
);
3138 ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n");
3139 return STATUS_CANNOT_DELETE
;
3142 if (num_rw_devices
== 2 &&
3143 ((Vcb
->data_flags
& BLOCK_FLAG_RAID0
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID0
|| Vcb
->system_flags
& BLOCK_FLAG_RAID0
) ||
3144 (Vcb
->data_flags
& BLOCK_FLAG_RAID1
|| Vcb
->metadata_flags
& BLOCK_FLAG_RAID1
|| Vcb
->system_flags
& BLOCK_FLAG_RAID1
))
3146 ExReleaseResourceLite(&Vcb
->tree_lock
);
3147 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
3148 return STATUS_CANNOT_DELETE
;
3152 ExReleaseResourceLite(&Vcb
->tree_lock
);
3154 if (Vcb
->balance
.thread
) {
3155 WARN("balance already running\n");
3156 return STATUS_DEVICE_NOT_READY
;
3161 RtlZeroMemory(Vcb
->balance
.opts
, sizeof(btrfs_balance_opts
) * 3);
3163 for (i
= 0; i
< 3; i
++) {
3164 Vcb
->balance
.opts
[i
].flags
= BTRFS_BALANCE_OPTS_ENABLED
| BTRFS_BALANCE_OPTS_DEVID
;
3165 Vcb
->balance
.opts
[i
].devid
= devid
;
3168 Vcb
->balance
.paused
= FALSE
;
3169 Vcb
->balance
.removing
= TRUE
;
3170 KeInitializeEvent(&Vcb
->balance
.event
, NotificationEvent
, !Vcb
->balance
.paused
);
3172 Status
= PsCreateSystemThread(&Vcb
->balance
.thread
, 0, NULL
, NULL
, NULL
, balance_thread
, Vcb
);
3173 if (!NT_SUCCESS(Status
)) {
3174 ERR("PsCreateSystemThread returned %08x\n", Status
);
3179 return STATUS_SUCCESS
;