1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
25 // #define DEBUG_WRITE_LOOPS
42 } EXTENT_ITEM_SKINNY_METADATA
;
44 static NTSTATUS
create_chunk(device_extension
* Vcb
, chunk
* c
, PIRP Irp
);
45 static NTSTATUS
update_tree_extents(device_extension
* Vcb
, tree
* t
, PIRP Irp
, LIST_ENTRY
* rollback
);
47 #ifndef _MSC_VER // not in mingw yet
48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
51 _Function_class_(IO_COMPLETION_ROUTINE
)
52 static NTSTATUS __stdcall
write_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
53 write_context
* context
= conptr
;
57 context
->iosb
= Irp
->IoStatus
;
58 KeSetEvent(&context
->Event
, 0, false);
60 return STATUS_MORE_PROCESSING_REQUIRED
;
63 NTSTATUS
write_data_phys(_In_ PDEVICE_OBJECT device
, _In_ PFILE_OBJECT fileobj
, _In_
uint64_t address
,
64 _In_reads_bytes_(length
) void* data
, _In_
uint32_t length
) {
68 PIO_STACK_LOCATION IrpSp
;
69 write_context context
;
71 TRACE("(%p, %I64x, %p, %x)\n", device
, address
, data
, length
);
73 RtlZeroMemory(&context
, sizeof(write_context
));
75 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
77 offset
.QuadPart
= address
;
79 Irp
= IoAllocateIrp(device
->StackSize
, false);
82 ERR("IoAllocateIrp failed\n");
83 return STATUS_INSUFFICIENT_RESOURCES
;
86 IrpSp
= IoGetNextIrpStackLocation(Irp
);
87 IrpSp
->MajorFunction
= IRP_MJ_WRITE
;
88 IrpSp
->FileObject
= fileobj
;
90 if (device
->Flags
& DO_BUFFERED_IO
) {
91 Irp
->AssociatedIrp
.SystemBuffer
= data
;
93 Irp
->Flags
= IRP_BUFFERED_IO
;
94 } else if (device
->Flags
& DO_DIRECT_IO
) {
95 Irp
->MdlAddress
= IoAllocateMdl(data
, length
, false, false, NULL
);
96 if (!Irp
->MdlAddress
) {
97 DbgPrint("IoAllocateMdl failed\n");
98 Status
= STATUS_INSUFFICIENT_RESOURCES
;
102 Status
= STATUS_SUCCESS
;
105 MmProbeAndLockPages(Irp
->MdlAddress
, KernelMode
, IoReadAccess
);
106 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
107 Status
= _SEH2_GetExceptionCode();
110 if (!NT_SUCCESS(Status
)) {
111 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
112 IoFreeMdl(Irp
->MdlAddress
);
116 Irp
->UserBuffer
= data
;
119 IrpSp
->Parameters
.Write
.Length
= length
;
120 IrpSp
->Parameters
.Write
.ByteOffset
= offset
;
122 Irp
->UserIosb
= &context
.iosb
;
124 Irp
->UserEvent
= &context
.Event
;
126 IoSetCompletionRoutine(Irp
, write_completion
, &context
, true, true, true);
128 Status
= IoCallDriver(device
, Irp
);
130 if (Status
== STATUS_PENDING
) {
131 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
132 Status
= context
.iosb
.Status
;
135 if (!NT_SUCCESS(Status
)) {
136 ERR("IoCallDriver returned %08x\n", Status
);
139 if (device
->Flags
& DO_DIRECT_IO
) {
140 MmUnlockPages(Irp
->MdlAddress
);
141 IoFreeMdl(Irp
->MdlAddress
);
150 static void add_trim_entry(device
* dev
, uint64_t address
, uint64_t size
) {
151 space
* s
= ExAllocatePoolWithTag(PagedPool
, sizeof(space
), ALLOC_TAG
);
153 ERR("out of memory\n");
157 s
->address
= address
;
159 dev
->num_trim_entries
++;
161 InsertTailList(&dev
->trim_list
, &s
->list_entry
);
164 static void clean_space_cache_chunk(device_extension
* Vcb
, chunk
* c
) {
167 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
168 if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
169 type
= BLOCK_FLAG_DUPLICATE
;
170 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
171 type
= BLOCK_FLAG_RAID0
;
172 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
173 type
= BLOCK_FLAG_DUPLICATE
;
174 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
175 type
= BLOCK_FLAG_RAID10
;
176 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
177 type
= BLOCK_FLAG_RAID5
;
178 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
179 type
= BLOCK_FLAG_RAID6
;
181 type
= BLOCK_FLAG_DUPLICATE
;
184 while (!IsListEmpty(&c
->deleting
)) {
185 space
* s
= CONTAINING_RECORD(c
->deleting
.Flink
, space
, list_entry
);
187 if (Vcb
->trim
&& !Vcb
->options
.no_trim
&& (!Vcb
->options
.no_barrier
|| !(c
->chunk_item
->type
& BLOCK_FLAG_METADATA
))) {
188 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
190 if (type
== BLOCK_FLAG_DUPLICATE
) {
193 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
194 if (c
->devices
[i
] && c
->devices
[i
]->devobj
&& !c
->devices
[i
]->readonly
&& c
->devices
[i
]->trim
)
195 add_trim_entry(c
->devices
[i
], s
->address
- c
->offset
+ cis
[i
].offset
, s
->size
);
197 } else if (type
== BLOCK_FLAG_RAID0
) {
198 uint64_t startoff
, endoff
;
199 uint16_t startoffstripe
, endoffstripe
, i
;
201 get_raid0_offset(s
->address
- c
->offset
, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
, &startoff
, &startoffstripe
);
202 get_raid0_offset(s
->address
- c
->offset
+ s
->size
- 1, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
, &endoff
, &endoffstripe
);
204 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
205 if (c
->devices
[i
] && c
->devices
[i
]->devobj
&& !c
->devices
[i
]->readonly
&& c
->devices
[i
]->trim
) {
206 uint64_t stripestart
, stripeend
;
208 if (startoffstripe
> i
)
209 stripestart
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
210 else if (startoffstripe
== i
)
211 stripestart
= startoff
;
213 stripestart
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
215 if (endoffstripe
> i
)
216 stripeend
= endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
217 else if (endoffstripe
== i
)
218 stripeend
= endoff
+ 1;
220 stripeend
= endoff
- (endoff
% c
->chunk_item
->stripe_length
);
222 if (stripestart
!= stripeend
)
223 add_trim_entry(c
->devices
[i
], stripestart
+ cis
[i
].offset
, stripeend
- stripestart
);
226 } else if (type
== BLOCK_FLAG_RAID10
) {
227 uint64_t startoff
, endoff
;
228 uint16_t sub_stripes
, startoffstripe
, endoffstripe
, i
;
230 sub_stripes
= max(1, c
->chunk_item
->sub_stripes
);
232 get_raid0_offset(s
->address
- c
->offset
, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
/ sub_stripes
, &startoff
, &startoffstripe
);
233 get_raid0_offset(s
->address
- c
->offset
+ s
->size
- 1, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
/ sub_stripes
, &endoff
, &endoffstripe
);
235 startoffstripe
*= sub_stripes
;
236 endoffstripe
*= sub_stripes
;
238 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
+= sub_stripes
) {
240 uint64_t stripestart
, stripeend
;
242 if (startoffstripe
> i
)
243 stripestart
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
244 else if (startoffstripe
== i
)
245 stripestart
= startoff
;
247 stripestart
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
249 if (endoffstripe
> i
)
250 stripeend
= endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
251 else if (endoffstripe
== i
)
252 stripeend
= endoff
+ 1;
254 stripeend
= endoff
- (endoff
% c
->chunk_item
->stripe_length
);
256 if (stripestart
!= stripeend
) {
257 for (j
= 0; j
< sub_stripes
; j
++) {
258 if (c
->devices
[i
+j
] && c
->devices
[i
+j
]->devobj
&& !c
->devices
[i
+j
]->readonly
&& c
->devices
[i
+j
]->trim
)
259 add_trim_entry(c
->devices
[i
+j
], stripestart
+ cis
[i
+j
].offset
, stripeend
- stripestart
);
264 // FIXME - RAID5(?), RAID6(?)
267 RemoveEntryList(&s
->list_entry
);
273 DEVICE_MANAGE_DATA_SET_ATTRIBUTES
* dmdsa
;
274 ATA_PASS_THROUGH_EX apte
;
276 IO_STATUS_BLOCK iosb
;
277 #ifdef DEBUG_TRIM_EMULATION
281 } ioctl_context_stripe
;
286 ioctl_context_stripe
* stripes
;
289 _Function_class_(IO_COMPLETION_ROUTINE
)
290 static NTSTATUS __stdcall
ioctl_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
291 ioctl_context
* context
= (ioctl_context
*)conptr
;
292 LONG left2
= InterlockedDecrement(&context
->left
);
294 UNUSED(DeviceObject
);
298 KeSetEvent(&context
->Event
, 0, false);
300 return STATUS_MORE_PROCESSING_REQUIRED
;
303 #ifdef DEBUG_TRIM_EMULATION
304 static void trim_emulation(device
* dev
) {
306 ioctl_context context
;
307 unsigned int i
= 0, count
= 0;
309 le
= dev
->trim_list
.Flink
;
310 while (le
!= &dev
->trim_list
) {
315 context
.left
= count
;
317 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
319 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(ioctl_context_stripe
) * context
.left
, ALLOC_TAG
);
320 if (!context
.stripes
) {
321 ERR("out of memory\n");
325 RtlZeroMemory(context
.stripes
, sizeof(ioctl_context_stripe
) * context
.left
);
328 le
= dev
->trim_list
.Flink
;
329 while (le
!= &dev
->trim_list
) {
330 ioctl_context_stripe
* stripe
= &context
.stripes
[i
];
331 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
333 WARN("(%I64x, %I64x)\n", s
->address
, s
->size
);
335 stripe
->Irp
= IoAllocateIrp(dev
->devobj
->StackSize
, false);
338 ERR("IoAllocateIrp failed\n");
340 PIO_STACK_LOCATION IrpSp
= IoGetNextIrpStackLocation(stripe
->Irp
);
341 IrpSp
->MajorFunction
= IRP_MJ_WRITE
;
342 IrpSp
->FileObject
= dev
->fileobj
;
344 stripe
->buf
= ExAllocatePoolWithTag(NonPagedPool
, (uint32_t)s
->size
, ALLOC_TAG
);
347 ERR("out of memory\n");
349 RtlZeroMemory(stripe
->buf
, (uint32_t)s
->size
); // FIXME - randomize instead?
351 stripe
->mdl
= IoAllocateMdl(stripe
->buf
, (uint32_t)s
->size
, false, false, NULL
);
354 ERR("IoAllocateMdl failed\n");
356 MmBuildMdlForNonPagedPool(stripe
->mdl
);
358 stripe
->Irp
->MdlAddress
= stripe
->mdl
;
360 IrpSp
->Parameters
.Write
.ByteOffset
.QuadPart
= s
->address
;
361 IrpSp
->Parameters
.Write
.Length
= s
->size
;
363 stripe
->Irp
->UserIosb
= &stripe
->iosb
;
365 IoSetCompletionRoutine(stripe
->Irp
, ioctl_completion
, &context
, true, true, true);
367 IoCallDriver(dev
->devobj
, stripe
->Irp
);
377 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
379 for (i
= 0; i
< count
; i
++) {
380 ioctl_context_stripe
* stripe
= &context
.stripes
[i
];
383 IoFreeMdl(stripe
->mdl
);
386 ExFreePool(stripe
->buf
);
389 ExFreePool(context
.stripes
);
393 static void clean_space_cache(device_extension
* Vcb
) {
396 #ifndef DEBUG_TRIM_EMULATION
400 TRACE("(%p)\n", Vcb
);
402 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
404 le
= Vcb
->chunks
.Flink
;
405 while (le
!= &Vcb
->chunks
) {
406 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
408 if (c
->space_changed
) {
409 acquire_chunk_lock(c
, Vcb
);
411 if (c
->space_changed
)
412 clean_space_cache_chunk(Vcb
, c
);
414 c
->space_changed
= false;
416 release_chunk_lock(c
, Vcb
);
422 ExReleaseResourceLite(&Vcb
->chunk_lock
);
424 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
425 #ifndef DEBUG_TRIM_EMULATION
426 ioctl_context context
;
431 le
= Vcb
->devices
.Flink
;
432 while (le
!= &Vcb
->devices
) {
433 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
435 if (dev
->devobj
&& !dev
->readonly
&& dev
->trim
&& dev
->num_trim_entries
> 0)
441 if (context
.left
== 0)
444 total_num
= context
.left
;
447 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
449 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(ioctl_context_stripe
) * context
.left
, ALLOC_TAG
);
450 if (!context
.stripes
) {
451 ERR("out of memory\n");
455 RtlZeroMemory(context
.stripes
, sizeof(ioctl_context_stripe
) * context
.left
);
458 le
= Vcb
->devices
.Flink
;
459 while (le
!= &Vcb
->devices
) {
460 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
462 if (dev
->devobj
&& !dev
->readonly
&& dev
->trim
&& dev
->num_trim_entries
> 0) {
463 #ifdef DEBUG_TRIM_EMULATION
467 ioctl_context_stripe
* stripe
= &context
.stripes
[num
];
468 DEVICE_DATA_SET_RANGE
* ranges
;
469 ULONG datalen
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t)) + (dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
)), i
;
470 PIO_STACK_LOCATION IrpSp
;
472 stripe
->dmdsa
= ExAllocatePoolWithTag(PagedPool
, datalen
, ALLOC_TAG
);
473 if (!stripe
->dmdsa
) {
474 ERR("out of memory\n");
478 stripe
->dmdsa
->Size
= sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
);
479 stripe
->dmdsa
->Action
= DeviceDsmAction_Trim
;
480 stripe
->dmdsa
->Flags
= DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED
;
481 stripe
->dmdsa
->ParameterBlockOffset
= 0;
482 stripe
->dmdsa
->ParameterBlockLength
= 0;
483 stripe
->dmdsa
->DataSetRangesOffset
= (ULONG
)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES
), sizeof(uint64_t));
484 stripe
->dmdsa
->DataSetRangesLength
= dev
->num_trim_entries
* sizeof(DEVICE_DATA_SET_RANGE
);
486 ranges
= (DEVICE_DATA_SET_RANGE
*)((uint8_t*)stripe
->dmdsa
+ stripe
->dmdsa
->DataSetRangesOffset
);
490 le2
= dev
->trim_list
.Flink
;
491 while (le2
!= &dev
->trim_list
) {
492 space
* s
= CONTAINING_RECORD(le2
, space
, list_entry
);
494 ranges
[i
].StartingOffset
= s
->address
;
495 ranges
[i
].LengthInBytes
= s
->size
;
501 stripe
->Irp
= IoAllocateIrp(dev
->devobj
->StackSize
, false);
504 ERR("IoAllocateIrp failed\n");
508 IrpSp
= IoGetNextIrpStackLocation(stripe
->Irp
);
509 IrpSp
->MajorFunction
= IRP_MJ_DEVICE_CONTROL
;
510 IrpSp
->FileObject
= dev
->fileobj
;
512 IrpSp
->Parameters
.DeviceIoControl
.IoControlCode
= IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES
;
513 IrpSp
->Parameters
.DeviceIoControl
.InputBufferLength
= datalen
;
514 IrpSp
->Parameters
.DeviceIoControl
.OutputBufferLength
= 0;
516 stripe
->Irp
->AssociatedIrp
.SystemBuffer
= stripe
->dmdsa
;
517 stripe
->Irp
->Flags
|= IRP_BUFFERED_IO
;
518 stripe
->Irp
->UserBuffer
= NULL
;
519 stripe
->Irp
->UserIosb
= &stripe
->iosb
;
521 IoSetCompletionRoutine(stripe
->Irp
, ioctl_completion
, &context
, true, true, true);
523 IoCallDriver(dev
->devobj
, stripe
->Irp
);
527 while (!IsListEmpty(&dev
->trim_list
)) {
528 space
* s
= CONTAINING_RECORD(RemoveHeadList(&dev
->trim_list
), space
, list_entry
);
532 dev
->num_trim_entries
= 0;
534 #ifndef DEBUG_TRIM_EMULATION
542 #ifndef DEBUG_TRIM_EMULATION
543 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
545 for (num
= 0; num
< total_num
; num
++) {
546 if (context
.stripes
[num
].dmdsa
)
547 ExFreePool(context
.stripes
[num
].dmdsa
);
550 ExFreePool(context
.stripes
);
555 static bool trees_consistent(device_extension
* Vcb
) {
556 ULONG maxsize
= Vcb
->superblock
.node_size
- sizeof(tree_header
);
559 le
= Vcb
->trees
.Flink
;
560 while (le
!= &Vcb
->trees
) {
561 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
564 if (t
->header
.num_items
== 0 && t
->parent
) {
565 #ifdef DEBUG_WRITE_LOOPS
566 ERR("empty tree found, looping again\n");
571 if (t
->size
> maxsize
) {
572 #ifdef DEBUG_WRITE_LOOPS
573 ERR("overlarge tree found (%u > %u), looping again\n", t
->size
, maxsize
);
578 if (!t
->has_new_address
) {
579 #ifdef DEBUG_WRITE_LOOPS
580 ERR("tree found without new address, looping again\n");
592 static NTSTATUS
add_parents(device_extension
* Vcb
, PIRP Irp
) {
596 for (level
= 0; level
<= 255; level
++) {
597 bool nothing_found
= true;
599 TRACE("level = %u\n", level
);
601 le
= Vcb
->trees
.Flink
;
602 while (le
!= &Vcb
->trees
) {
603 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
605 if (t
->write
&& t
->header
.level
== level
) {
606 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t
, t
->header
.tree_id
, t
->header
.level
, t
->parent
);
608 nothing_found
= false;
611 if (!t
->parent
->write
)
612 TRACE("adding tree %p (level %x)\n", t
->parent
, t
->header
.level
);
614 t
->parent
->write
= true;
615 } else if (t
->root
!= Vcb
->root_root
&& t
->root
!= Vcb
->chunk_root
) {
623 searchkey
.obj_id
= t
->root
->id
;
624 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
625 searchkey
.offset
= 0xffffffffffffffff;
627 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
628 if (!NT_SUCCESS(Status
)) {
629 ERR("error - find_item returned %08x\n", Status
);
633 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
634 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
635 return STATUS_INTERNAL_ERROR
;
638 if (tp
.item
->size
< sizeof(ROOT_ITEM
)) { // if not full length, delete and create new entry
639 ROOT_ITEM
* ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
642 ERR("out of memory\n");
643 return STATUS_INSUFFICIENT_RESOURCES
;
646 RtlCopyMemory(ri
, &t
->root
->root_item
, sizeof(ROOT_ITEM
));
648 Status
= delete_tree_item(Vcb
, &tp
);
649 if (!NT_SUCCESS(Status
)) {
650 ERR("delete_tree_item returned %08x\n", Status
);
655 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, Irp
);
656 if (!NT_SUCCESS(Status
)) {
657 ERR("insert_tree_item returned %08x\n", Status
);
683 return STATUS_SUCCESS
;
686 static void add_parents_to_cache(tree
* t
) {
693 static bool insert_tree_extent_skinny(device_extension
* Vcb
, uint8_t level
, uint64_t root_id
, chunk
* c
, uint64_t address
, PIRP Irp
, LIST_ENTRY
* rollback
) {
695 EXTENT_ITEM_SKINNY_METADATA
* eism
;
696 traverse_ptr insert_tp
;
698 eism
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_ITEM_SKINNY_METADATA
), ALLOC_TAG
);
700 ERR("out of memory\n");
704 eism
->ei
.refcount
= 1;
705 eism
->ei
.generation
= Vcb
->superblock
.generation
;
706 eism
->ei
.flags
= EXTENT_ITEM_TREE_BLOCK
;
707 eism
->type
= TYPE_TREE_BLOCK_REF
;
708 eism
->tbr
.offset
= root_id
;
710 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, address
, TYPE_METADATA_ITEM
, level
, eism
, sizeof(EXTENT_ITEM_SKINNY_METADATA
), &insert_tp
, Irp
);
711 if (!NT_SUCCESS(Status
)) {
712 ERR("insert_tree_item returned %08x\n", Status
);
717 acquire_chunk_lock(c
, Vcb
);
719 space_list_subtract(c
, false, address
, Vcb
->superblock
.node_size
, rollback
);
721 release_chunk_lock(c
, Vcb
);
723 add_parents_to_cache(insert_tp
.tree
);
728 bool find_metadata_address_in_chunk(device_extension
* Vcb
, chunk
* c
, uint64_t* address
) {
732 TRACE("(%p, %I64x, %p)\n", Vcb
, c
->offset
, address
);
734 if (Vcb
->superblock
.node_size
> c
->chunk_item
->size
- c
->used
)
737 if (!c
->cache_loaded
) {
738 NTSTATUS Status
= load_cache_chunk(Vcb
, c
, NULL
);
740 if (!NT_SUCCESS(Status
)) {
741 ERR("load_cache_chunk returned %08x\n", Status
);
746 if (IsListEmpty(&c
->space_size
))
749 if (!c
->last_alloc_set
) {
750 s
= CONTAINING_RECORD(c
->space
.Blink
, space
, list_entry
);
752 c
->last_alloc
= s
->address
;
753 c
->last_alloc_set
= true;
755 if (s
->size
>= Vcb
->superblock
.node_size
) {
756 *address
= s
->address
;
757 c
->last_alloc
+= Vcb
->superblock
.node_size
;
763 while (le
!= &c
->space
) {
764 s
= CONTAINING_RECORD(le
, space
, list_entry
);
766 if (s
->address
<= c
->last_alloc
&& s
->address
+ s
->size
>= c
->last_alloc
+ Vcb
->superblock
.node_size
) {
767 *address
= c
->last_alloc
;
768 c
->last_alloc
+= Vcb
->superblock
.node_size
;
775 le
= c
->space_size
.Flink
;
776 while (le
!= &c
->space_size
) {
777 s
= CONTAINING_RECORD(le
, space
, list_entry_size
);
779 if (s
->size
== Vcb
->superblock
.node_size
) {
780 *address
= s
->address
;
781 c
->last_alloc
= s
->address
+ Vcb
->superblock
.node_size
;
783 } else if (s
->size
< Vcb
->superblock
.node_size
) {
784 if (le
== c
->space_size
.Flink
)
787 s
= CONTAINING_RECORD(le
->Blink
, space
, list_entry_size
);
789 *address
= s
->address
;
790 c
->last_alloc
= s
->address
+ Vcb
->superblock
.node_size
;
798 s
= CONTAINING_RECORD(c
->space_size
.Blink
, space
, list_entry_size
);
800 if (s
->size
> Vcb
->superblock
.node_size
) {
801 *address
= s
->address
;
802 c
->last_alloc
= s
->address
+ Vcb
->superblock
.node_size
;
809 static bool insert_tree_extent(device_extension
* Vcb
, uint8_t level
, uint64_t root_id
, chunk
* c
, uint64_t* new_address
, PIRP Irp
, LIST_ENTRY
* rollback
) {
812 EXTENT_ITEM_TREE2
* eit2
;
813 traverse_ptr insert_tp
;
815 TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb
, level
, root_id
, c
, new_address
, rollback
);
817 if (!find_metadata_address_in_chunk(Vcb
, c
, &address
))
820 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
) {
821 bool b
= insert_tree_extent_skinny(Vcb
, level
, root_id
, c
, address
, Irp
, rollback
);
824 *new_address
= address
;
829 eit2
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_ITEM_TREE2
), ALLOC_TAG
);
831 ERR("out of memory\n");
835 eit2
->eit
.extent_item
.refcount
= 1;
836 eit2
->eit
.extent_item
.generation
= Vcb
->superblock
.generation
;
837 eit2
->eit
.extent_item
.flags
= EXTENT_ITEM_TREE_BLOCK
;
838 eit2
->eit
.level
= level
;
839 eit2
->type
= TYPE_TREE_BLOCK_REF
;
840 eit2
->tbr
.offset
= root_id
;
842 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, address
, TYPE_EXTENT_ITEM
, Vcb
->superblock
.node_size
, eit2
, sizeof(EXTENT_ITEM_TREE2
), &insert_tp
, Irp
);
843 if (!NT_SUCCESS(Status
)) {
844 ERR("insert_tree_item returned %08x\n", Status
);
849 acquire_chunk_lock(c
, Vcb
);
851 space_list_subtract(c
, false, address
, Vcb
->superblock
.node_size
, rollback
);
853 release_chunk_lock(c
, Vcb
);
855 add_parents_to_cache(insert_tp
.tree
);
857 *new_address
= address
;
862 NTSTATUS
get_tree_new_address(device_extension
* Vcb
, tree
* t
, PIRP Irp
, LIST_ENTRY
* rollback
) {
864 chunk
*origchunk
= NULL
, *c
;
866 uint64_t flags
, addr
;
868 if (t
->root
->id
== BTRFS_ROOT_CHUNK
)
869 flags
= Vcb
->system_flags
;
871 flags
= Vcb
->metadata_flags
;
873 if (t
->has_address
) {
874 origchunk
= get_chunk_from_address(Vcb
, t
->header
.address
);
876 if (origchunk
&& !origchunk
->readonly
&& !origchunk
->reloc
&& origchunk
->chunk_item
->type
== flags
&&
877 insert_tree_extent(Vcb
, t
->header
.level
, t
->root
->id
, origchunk
, &addr
, Irp
, rollback
)) {
878 t
->new_address
= addr
;
879 t
->has_new_address
= true;
880 return STATUS_SUCCESS
;
884 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
886 le
= Vcb
->chunks
.Flink
;
887 while (le
!= &Vcb
->chunks
) {
888 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
890 if (!c
->readonly
&& !c
->reloc
) {
891 acquire_chunk_lock(c
, Vcb
);
893 if (c
!= origchunk
&& c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= Vcb
->superblock
.node_size
) {
894 if (insert_tree_extent(Vcb
, t
->header
.level
, t
->root
->id
, c
, &addr
, Irp
, rollback
)) {
895 release_chunk_lock(c
, Vcb
);
896 ExReleaseResourceLite(&Vcb
->chunk_lock
);
897 t
->new_address
= addr
;
898 t
->has_new_address
= true;
899 return STATUS_SUCCESS
;
903 release_chunk_lock(c
, Vcb
);
909 // allocate new chunk if necessary
911 Status
= alloc_chunk(Vcb
, flags
, &c
, false);
913 if (!NT_SUCCESS(Status
)) {
914 ERR("alloc_chunk returned %08x\n", Status
);
915 ExReleaseResourceLite(&Vcb
->chunk_lock
);
919 acquire_chunk_lock(c
, Vcb
);
921 if ((c
->chunk_item
->size
- c
->used
) >= Vcb
->superblock
.node_size
) {
922 if (insert_tree_extent(Vcb
, t
->header
.level
, t
->root
->id
, c
, &addr
, Irp
, rollback
)) {
923 release_chunk_lock(c
, Vcb
);
924 ExReleaseResourceLite(&Vcb
->chunk_lock
);
925 t
->new_address
= addr
;
926 t
->has_new_address
= true;
927 return STATUS_SUCCESS
;
931 release_chunk_lock(c
, Vcb
);
933 ExReleaseResourceLite(&Vcb
->chunk_lock
);
935 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb
->superblock
.node_size
);
937 return STATUS_DISK_FULL
;
940 static NTSTATUS
reduce_tree_extent(device_extension
* Vcb
, uint64_t address
, tree
* t
, uint64_t parent_root
, uint8_t level
, PIRP Irp
, LIST_ENTRY
* rollback
) {
944 TRACE("(%p, %I64x, %p)\n", Vcb
, address
, t
);
946 rc
= get_extent_refcount(Vcb
, address
, Vcb
->superblock
.node_size
, Irp
);
948 ERR("error - refcount for extent %I64x was 0\n", address
);
949 return STATUS_INTERNAL_ERROR
;
955 root
= t
->header
.tree_id
;
957 Status
= decrease_extent_refcount_tree(Vcb
, address
, Vcb
->superblock
.node_size
, root
, level
, Irp
);
958 if (!NT_SUCCESS(Status
)) {
959 ERR("decrease_extent_refcount_tree returned %08x\n", Status
);
964 chunk
* c
= get_chunk_from_address(Vcb
, address
);
967 acquire_chunk_lock(c
, Vcb
);
969 if (!c
->cache_loaded
) {
970 Status
= load_cache_chunk(Vcb
, c
, NULL
);
972 if (!NT_SUCCESS(Status
)) {
973 ERR("load_cache_chunk returned %08x\n", Status
);
974 release_chunk_lock(c
, Vcb
);
979 c
->used
-= Vcb
->superblock
.node_size
;
981 space_list_add(c
, address
, Vcb
->superblock
.node_size
, rollback
);
983 release_chunk_lock(c
, Vcb
);
985 ERR("could not find chunk for address %I64x\n", address
);
988 return STATUS_SUCCESS
;
991 static NTSTATUS
add_changed_extent_ref_edr(changed_extent
* ce
, EXTENT_DATA_REF
* edr
, bool old
) {
992 LIST_ENTRY
*le2
, *list
;
993 changed_extent_ref
* cer
;
995 list
= old
? &ce
->old_refs
: &ce
->refs
;
998 while (le2
!= list
) {
999 cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
1001 if (cer
->type
== TYPE_EXTENT_DATA_REF
&& cer
->edr
.root
== edr
->root
&& cer
->edr
.objid
== edr
->objid
&& cer
->edr
.offset
== edr
->offset
) {
1002 cer
->edr
.count
+= edr
->count
;
1009 cer
= ExAllocatePoolWithTag(PagedPool
, sizeof(changed_extent_ref
), ALLOC_TAG
);
1011 ERR("out of memory\n");
1012 return STATUS_INSUFFICIENT_RESOURCES
;
1015 cer
->type
= TYPE_EXTENT_DATA_REF
;
1016 RtlCopyMemory(&cer
->edr
, edr
, sizeof(EXTENT_DATA_REF
));
1017 InsertTailList(list
, &cer
->list_entry
);
1021 ce
->old_count
+= edr
->count
;
1023 ce
->count
+= edr
->count
;
1025 return STATUS_SUCCESS
;
1028 static NTSTATUS
add_changed_extent_ref_sdr(changed_extent
* ce
, SHARED_DATA_REF
* sdr
, bool old
) {
1029 LIST_ENTRY
*le2
, *list
;
1030 changed_extent_ref
* cer
;
1032 list
= old
? &ce
->old_refs
: &ce
->refs
;
1035 while (le2
!= list
) {
1036 cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
1038 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== sdr
->offset
) {
1039 cer
->sdr
.count
+= sdr
->count
;
1046 cer
= ExAllocatePoolWithTag(PagedPool
, sizeof(changed_extent_ref
), ALLOC_TAG
);
1048 ERR("out of memory\n");
1049 return STATUS_INSUFFICIENT_RESOURCES
;
1052 cer
->type
= TYPE_SHARED_DATA_REF
;
1053 RtlCopyMemory(&cer
->sdr
, sdr
, sizeof(SHARED_DATA_REF
));
1054 InsertTailList(list
, &cer
->list_entry
);
1058 ce
->old_count
+= sdr
->count
;
1060 ce
->count
+= sdr
->count
;
1062 return STATUS_SUCCESS
;
1065 static bool shared_tree_is_unique(device_extension
* Vcb
, tree
* t
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1070 if (!t
->updated_extents
&& t
->has_address
) {
1071 Status
= update_tree_extents(Vcb
, t
, Irp
, rollback
);
1072 if (!NT_SUCCESS(Status
)) {
1073 ERR("update_tree_extents returned %08x\n", Status
);
1078 searchkey
.obj_id
= t
->header
.address
;
1079 searchkey
.obj_type
= Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
? TYPE_METADATA_ITEM
: TYPE_EXTENT_ITEM
;
1080 searchkey
.offset
= 0xffffffffffffffff;
1082 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
1083 if (!NT_SUCCESS(Status
)) {
1084 ERR("error - find_item returned %08x\n", Status
);
1088 if (tp
.item
->key
.obj_id
== t
->header
.address
&& (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
|| tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
))
1094 static NTSTATUS
update_tree_extents(device_extension
* Vcb
, tree
* t
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1096 uint64_t rc
= get_extent_refcount(Vcb
, t
->header
.address
, Vcb
->superblock
.node_size
, Irp
);
1097 uint64_t flags
= get_extent_flags(Vcb
, t
->header
.address
, Irp
);
1100 ERR("refcount for extent %I64x was 0\n", t
->header
.address
);
1101 return STATUS_INTERNAL_ERROR
;
1104 if (flags
& EXTENT_ITEM_SHARED_BACKREFS
|| t
->header
.flags
& HEADER_FLAG_SHARED_BACKREF
|| !(t
->header
.flags
& HEADER_FLAG_MIXED_BACKREF
)) {
1106 bool unique
= rc
> 1 ? false : (t
->parent
? shared_tree_is_unique(Vcb
, t
->parent
, Irp
, rollback
) : false);
1108 if (t
->header
.level
== 0) {
1111 le
= t
->itemlist
.Flink
;
1112 while (le
!= &t
->itemlist
) {
1113 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
1115 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
1116 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
1118 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
1119 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1121 if (ed2
->size
> 0) {
1122 EXTENT_DATA_REF edr
;
1123 changed_extent
* ce
= NULL
;
1124 chunk
* c
= get_chunk_from_address(Vcb
, ed2
->address
);
1129 le2
= c
->changed_extents
.Flink
;
1130 while (le2
!= &c
->changed_extents
) {
1131 changed_extent
* ce2
= CONTAINING_RECORD(le2
, changed_extent
, list_entry
);
1133 if (ce2
->address
== ed2
->address
) {
1142 edr
.root
= t
->root
->id
;
1143 edr
.objid
= td
->key
.obj_id
;
1144 edr
.offset
= td
->key
.offset
- ed2
->offset
;
1148 Status
= add_changed_extent_ref_edr(ce
, &edr
, true);
1149 if (!NT_SUCCESS(Status
)) {
1150 ERR("add_changed_extent_ref_edr returned %08x\n", Status
);
1154 Status
= add_changed_extent_ref_edr(ce
, &edr
, false);
1155 if (!NT_SUCCESS(Status
)) {
1156 ERR("add_changed_extent_ref_edr returned %08x\n", Status
);
1161 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_EXTENT_DATA_REF
, &edr
, NULL
, 0, Irp
);
1162 if (!NT_SUCCESS(Status
)) {
1163 ERR("increase_extent_refcount returned %08x\n", Status
);
1167 if ((flags
& EXTENT_ITEM_SHARED_BACKREFS
&& unique
) || !(t
->header
.flags
& HEADER_FLAG_MIXED_BACKREF
)) {
1168 uint64_t sdrrc
= find_extent_shared_data_refcount(Vcb
, ed2
->address
, t
->header
.address
, Irp
);
1171 SHARED_DATA_REF sdr
;
1173 sdr
.offset
= t
->header
.address
;
1176 Status
= decrease_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0,
1177 t
->header
.address
, ce
? ce
->superseded
: false, Irp
);
1178 if (!NT_SUCCESS(Status
)) {
1179 ERR("decrease_extent_refcount returned %08x\n", Status
);
1186 le2
= ce
->refs
.Flink
;
1187 while (le2
!= &ce
->refs
) {
1188 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
1190 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== sdr
.offset
) {
1199 le2
= ce
->old_refs
.Flink
;
1200 while (le2
!= &ce
->old_refs
) {
1201 changed_extent_ref
* cer
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
1203 if (cer
->type
== TYPE_SHARED_DATA_REF
&& cer
->sdr
.offset
== sdr
.offset
) {
1206 if (cer
->sdr
.count
> 1)
1209 RemoveEntryList(&cer
->list_entry
);
1222 // FIXME - clear shared flag if unique?
1232 le
= t
->itemlist
.Flink
;
1233 while (le
!= &t
->itemlist
) {
1234 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
1236 if (!td
->inserted
) {
1237 tbr
.offset
= t
->root
->id
;
1239 Status
= increase_extent_refcount(Vcb
, td
->treeholder
.address
, Vcb
->superblock
.node_size
, TYPE_TREE_BLOCK_REF
,
1240 &tbr
, &td
->key
, t
->header
.level
- 1, Irp
);
1241 if (!NT_SUCCESS(Status
)) {
1242 ERR("increase_extent_refcount returned %08x\n", Status
);
1246 if (unique
|| !(t
->header
.flags
& HEADER_FLAG_MIXED_BACKREF
)) {
1247 uint64_t sbrrc
= find_extent_shared_tree_refcount(Vcb
, td
->treeholder
.address
, t
->header
.address
, Irp
);
1250 SHARED_BLOCK_REF sbr
;
1252 sbr
.offset
= t
->header
.address
;
1254 Status
= decrease_extent_refcount(Vcb
, td
->treeholder
.address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
1255 t
->header
.address
, false, Irp
);
1256 if (!NT_SUCCESS(Status
)) {
1257 ERR("decrease_extent_refcount returned %08x\n", Status
);
1263 // FIXME - clear shared flag if unique?
1271 uint64_t sbrrc
= find_extent_shared_tree_refcount(Vcb
, t
->header
.address
, t
->parent
->header
.address
, Irp
);
1274 SHARED_BLOCK_REF sbr
;
1276 sbr
.offset
= t
->parent
->header
.address
;
1278 Status
= decrease_extent_refcount(Vcb
, t
->header
.address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, NULL
, 0,
1279 t
->parent
->header
.address
, false, Irp
);
1280 if (!NT_SUCCESS(Status
)) {
1281 ERR("decrease_extent_refcount returned %08x\n", Status
);
1288 tbr
.offset
= t
->parent
->header
.tree_id
;
1290 tbr
.offset
= t
->header
.tree_id
;
1292 Status
= increase_extent_refcount(Vcb
, t
->header
.address
, Vcb
->superblock
.node_size
, TYPE_TREE_BLOCK_REF
, &tbr
,
1293 t
->parent
? &t
->paritem
->key
: NULL
, t
->header
.level
, Irp
);
1294 if (!NT_SUCCESS(Status
)) {
1295 ERR("increase_extent_refcount returned %08x\n", Status
);
1299 // FIXME - clear shared flag if unique?
1301 t
->header
.flags
&= ~HEADER_FLAG_SHARED_BACKREF
;
1304 if (rc
> 1 || t
->header
.tree_id
== t
->root
->id
) {
1305 Status
= reduce_tree_extent(Vcb
, t
->header
.address
, t
, t
->parent
? t
->parent
->header
.tree_id
: t
->header
.tree_id
, t
->header
.level
, Irp
, rollback
);
1307 if (!NT_SUCCESS(Status
)) {
1308 ERR("reduce_tree_extent returned %08x\n", Status
);
1313 t
->has_address
= false;
1315 if ((rc
> 1 || t
->header
.tree_id
!= t
->root
->id
) && !(flags
& EXTENT_ITEM_SHARED_BACKREFS
)) {
1316 if (t
->header
.tree_id
== t
->root
->id
) {
1317 flags
|= EXTENT_ITEM_SHARED_BACKREFS
;
1318 update_extent_flags(Vcb
, t
->header
.address
, flags
, Irp
);
1321 if (t
->header
.level
> 0) {
1324 le
= t
->itemlist
.Flink
;
1325 while (le
!= &t
->itemlist
) {
1326 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
1328 if (!td
->inserted
) {
1329 if (t
->header
.tree_id
== t
->root
->id
) {
1330 SHARED_BLOCK_REF sbr
;
1332 sbr
.offset
= t
->header
.address
;
1334 Status
= increase_extent_refcount(Vcb
, td
->treeholder
.address
, Vcb
->superblock
.node_size
, TYPE_SHARED_BLOCK_REF
, &sbr
, &td
->key
, t
->header
.level
- 1, Irp
);
1338 tbr
.offset
= t
->root
->id
;
1340 Status
= increase_extent_refcount(Vcb
, td
->treeholder
.address
, Vcb
->superblock
.node_size
, TYPE_TREE_BLOCK_REF
, &tbr
, &td
->key
, t
->header
.level
- 1, Irp
);
1343 if (!NT_SUCCESS(Status
)) {
1344 ERR("increase_extent_refcount returned %08x\n", Status
);
1354 le
= t
->itemlist
.Flink
;
1355 while (le
!= &t
->itemlist
) {
1356 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
1358 if (!td
->inserted
&& td
->key
.obj_type
== TYPE_EXTENT_DATA
&& td
->size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
1359 EXTENT_DATA
* ed
= (EXTENT_DATA
*)td
->data
;
1361 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
1362 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
1364 if (ed2
->size
> 0) {
1365 changed_extent
* ce
= NULL
;
1366 chunk
* c
= get_chunk_from_address(Vcb
, ed2
->address
);
1371 le2
= c
->changed_extents
.Flink
;
1372 while (le2
!= &c
->changed_extents
) {
1373 changed_extent
* ce2
= CONTAINING_RECORD(le2
, changed_extent
, list_entry
);
1375 if (ce2
->address
== ed2
->address
) {
1384 if (t
->header
.tree_id
== t
->root
->id
) {
1385 SHARED_DATA_REF sdr
;
1387 sdr
.offset
= t
->header
.address
;
1391 Status
= add_changed_extent_ref_sdr(ce
, &sdr
, true);
1392 if (!NT_SUCCESS(Status
)) {
1393 ERR("add_changed_extent_ref_edr returned %08x\n", Status
);
1397 Status
= add_changed_extent_ref_sdr(ce
, &sdr
, false);
1398 if (!NT_SUCCESS(Status
)) {
1399 ERR("add_changed_extent_ref_edr returned %08x\n", Status
);
1404 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_SHARED_DATA_REF
, &sdr
, NULL
, 0, Irp
);
1406 EXTENT_DATA_REF edr
;
1408 edr
.root
= t
->root
->id
;
1409 edr
.objid
= td
->key
.obj_id
;
1410 edr
.offset
= td
->key
.offset
- ed2
->offset
;
1414 Status
= add_changed_extent_ref_edr(ce
, &edr
, true);
1415 if (!NT_SUCCESS(Status
)) {
1416 ERR("add_changed_extent_ref_edr returned %08x\n", Status
);
1420 Status
= add_changed_extent_ref_edr(ce
, &edr
, false);
1421 if (!NT_SUCCESS(Status
)) {
1422 ERR("add_changed_extent_ref_edr returned %08x\n", Status
);
1427 Status
= increase_extent_refcount(Vcb
, ed2
->address
, ed2
->size
, TYPE_EXTENT_DATA_REF
, &edr
, NULL
, 0, Irp
);
1430 if (!NT_SUCCESS(Status
)) {
1431 ERR("increase_extent_refcount returned %08x\n", Status
);
1443 t
->updated_extents
= true;
1444 t
->header
.tree_id
= t
->root
->id
;
1446 return STATUS_SUCCESS
;
1449 static NTSTATUS
allocate_tree_extents(device_extension
* Vcb
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1452 bool changed
= false;
1453 uint8_t max_level
= 0, level
;
1455 TRACE("(%p)\n", Vcb
);
1457 le
= Vcb
->trees
.Flink
;
1458 while (le
!= &Vcb
->trees
) {
1459 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
1461 if (t
->write
&& !t
->has_new_address
) {
1464 if (t
->has_address
) {
1465 c
= get_chunk_from_address(Vcb
, t
->header
.address
);
1468 if (!c
->cache_loaded
) {
1469 acquire_chunk_lock(c
, Vcb
);
1471 if (!c
->cache_loaded
) {
1472 Status
= load_cache_chunk(Vcb
, c
, NULL
);
1474 if (!NT_SUCCESS(Status
)) {
1475 ERR("load_cache_chunk returned %08x\n", Status
);
1476 release_chunk_lock(c
, Vcb
);
1481 release_chunk_lock(c
, Vcb
);
1486 Status
= get_tree_new_address(Vcb
, t
, Irp
, rollback
);
1487 if (!NT_SUCCESS(Status
)) {
1488 ERR("get_tree_new_address returned %08x\n", Status
);
1492 TRACE("allocated extent %I64x\n", t
->new_address
);
1494 c
= get_chunk_from_address(Vcb
, t
->new_address
);
1497 c
->used
+= Vcb
->superblock
.node_size
;
1499 ERR("could not find chunk for address %I64x\n", t
->new_address
);
1500 return STATUS_INTERNAL_ERROR
;
1505 if (t
->header
.level
> max_level
)
1506 max_level
= t
->header
.level
;
1513 return STATUS_SUCCESS
;
1517 le
= Vcb
->trees
.Flink
;
1518 while (le
!= &Vcb
->trees
) {
1519 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
1521 if (t
->write
&& !t
->updated_extents
&& t
->has_address
&& t
->header
.level
== level
) {
1522 Status
= update_tree_extents(Vcb
, t
, Irp
, rollback
);
1523 if (!NT_SUCCESS(Status
)) {
1524 ERR("update_tree_extents returned %08x\n", Status
);
1538 return STATUS_SUCCESS
;
1541 static NTSTATUS
update_root_root(device_extension
* Vcb
, bool no_cache
, PIRP Irp
, LIST_ENTRY
* rollback
) {
1545 TRACE("(%p)\n", Vcb
);
1547 le
= Vcb
->trees
.Flink
;
1548 while (le
!= &Vcb
->trees
) {
1549 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
1551 if (t
->write
&& !t
->parent
) {
1552 if (t
->root
!= Vcb
->root_root
&& t
->root
!= Vcb
->chunk_root
) {
1556 searchkey
.obj_id
= t
->root
->id
;
1557 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
1558 searchkey
.offset
= 0xffffffffffffffff;
1560 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
1561 if (!NT_SUCCESS(Status
)) {
1562 ERR("error - find_item returned %08x\n", Status
);
1566 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
1567 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
1568 return STATUS_INTERNAL_ERROR
;
1571 TRACE("updating the address for root %I64x to %I64x\n", searchkey
.obj_id
, t
->new_address
);
1573 t
->root
->root_item
.block_number
= t
->new_address
;
1574 t
->root
->root_item
.root_level
= t
->header
.level
;
1575 t
->root
->root_item
.generation
= Vcb
->superblock
.generation
;
1576 t
->root
->root_item
.generation2
= Vcb
->superblock
.generation
;
1578 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1580 RtlCopyMemory(tp
.item
->data
, &t
->root
->root_item
, sizeof(ROOT_ITEM
));
1583 t
->root
->treeholder
.address
= t
->new_address
;
1584 t
->root
->treeholder
.generation
= Vcb
->superblock
.generation
;
1590 if (!no_cache
&& !(Vcb
->superblock
.compat_ro_flags
& BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE
)) {
1591 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
1592 Status
= update_chunk_caches(Vcb
, Irp
, rollback
);
1593 ExReleaseResourceLite(&Vcb
->chunk_lock
);
1595 if (!NT_SUCCESS(Status
)) {
1596 ERR("update_chunk_caches returned %08x\n", Status
);
1601 return STATUS_SUCCESS
;
1604 NTSTATUS
do_tree_writes(device_extension
* Vcb
, LIST_ENTRY
* tree_writes
, bool no_free
) {
1610 write_data_context
* wtc
;
1612 bool raid56
= false;
1614 // merge together runs
1616 le
= tree_writes
->Flink
;
1617 while (le
!= tree_writes
) {
1618 tw
= CONTAINING_RECORD(le
, tree_write
, list_entry
);
1620 if (!c
|| tw
->address
< c
->offset
|| tw
->address
>= c
->offset
+ c
->chunk_item
->size
)
1621 c
= get_chunk_from_address(Vcb
, tw
->address
);
1623 tree_write
* tw2
= CONTAINING_RECORD(le
->Blink
, tree_write
, list_entry
);
1625 if (tw
->address
== tw2
->address
+ tw2
->length
) {
1626 uint8_t* data
= ExAllocatePoolWithTag(NonPagedPool
, tw2
->length
+ tw
->length
, ALLOC_TAG
);
1629 ERR("out of memory\n");
1630 return STATUS_INSUFFICIENT_RESOURCES
;
1633 RtlCopyMemory(data
, tw2
->data
, tw2
->length
);
1634 RtlCopyMemory(&data
[tw2
->length
], tw
->data
, tw
->length
);
1637 ExFreePool(tw2
->data
);
1640 tw2
->length
+= tw
->length
;
1642 if (!no_free
) // FIXME - what if we allocated this just now?
1643 ExFreePool(tw
->data
);
1645 RemoveEntryList(&tw
->list_entry
);
1648 le
= tw2
->list_entry
.Flink
;
1655 if (c
->chunk_item
->type
& (BLOCK_FLAG_RAID5
| BLOCK_FLAG_RAID6
))
1663 le
= tree_writes
->Flink
;
1664 while (le
!= tree_writes
) {
1665 tw
= CONTAINING_RECORD(le
, tree_write
, list_entry
);
1672 wtc
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(write_data_context
) * num_bits
, ALLOC_TAG
);
1674 ERR("out of memory\n");
1675 return STATUS_INSUFFICIENT_RESOURCES
;
1678 le
= tree_writes
->Flink
;
1680 while (le
!= tree_writes
) {
1681 tw
= CONTAINING_RECORD(le
, tree_write
, list_entry
);
1683 TRACE("address: %I64x, size: %x\n", tw
->address
, tw
->length
);
1685 KeInitializeEvent(&wtc
[bit_num
].Event
, NotificationEvent
, false);
1686 InitializeListHead(&wtc
[bit_num
].stripes
);
1687 wtc
[bit_num
].need_wait
= false;
1688 wtc
[bit_num
].stripes_left
= 0;
1689 wtc
[bit_num
].parity1
= wtc
[bit_num
].parity2
= wtc
[bit_num
].scratch
= NULL
;
1690 wtc
[bit_num
].mdl
= wtc
[bit_num
].parity1_mdl
= wtc
[bit_num
].parity2_mdl
= NULL
;
1692 Status
= write_data(Vcb
, tw
->address
, tw
->data
, tw
->length
, &wtc
[bit_num
], NULL
, NULL
, false, 0, HighPagePriority
);
1693 if (!NT_SUCCESS(Status
)) {
1694 ERR("write_data returned %08x\n", Status
);
1696 for (i
= 0; i
< num_bits
; i
++) {
1697 free_write_data_stripes(&wtc
[i
]);
1709 for (i
= 0; i
< num_bits
; i
++) {
1710 if (wtc
[i
].stripes
.Flink
!= &wtc
[i
].stripes
) {
1711 // launch writes and wait
1712 le
= wtc
[i
].stripes
.Flink
;
1713 while (le
!= &wtc
[i
].stripes
) {
1714 write_data_stripe
* stripe
= CONTAINING_RECORD(le
, write_data_stripe
, list_entry
);
1716 if (stripe
->status
!= WriteDataStatus_Ignore
) {
1717 wtc
[i
].need_wait
= true;
1718 IoCallDriver(stripe
->device
->devobj
, stripe
->Irp
);
1726 for (i
= 0; i
< num_bits
; i
++) {
1727 if (wtc
[i
].need_wait
)
1728 KeWaitForSingleObject(&wtc
[i
].Event
, Executive
, KernelMode
, false, NULL
);
1731 for (i
= 0; i
< num_bits
; i
++) {
1732 le
= wtc
[i
].stripes
.Flink
;
1733 while (le
!= &wtc
[i
].stripes
) {
1734 write_data_stripe
* stripe
= CONTAINING_RECORD(le
, write_data_stripe
, list_entry
);
1736 if (stripe
->status
!= WriteDataStatus_Ignore
&& !NT_SUCCESS(stripe
->iosb
.Status
)) {
1737 Status
= stripe
->iosb
.Status
;
1738 log_device_error(Vcb
, stripe
->device
, BTRFS_DEV_STAT_WRITE_ERRORS
);
1745 free_write_data_stripes(&wtc
[i
]);
1753 le
= tree_writes
->Flink
;
1754 while (le
!= tree_writes
) {
1755 tw
= CONTAINING_RECORD(le
, tree_write
, list_entry
);
1760 ExAcquireResourceExclusiveLite(&c
->partial_stripes_lock
, true);
1762 while (!IsListEmpty(&c
->partial_stripes
)) {
1763 partial_stripe
* ps
= CONTAINING_RECORD(RemoveHeadList(&c
->partial_stripes
), partial_stripe
, list_entry
);
1765 Status
= flush_partial_stripe(Vcb
, c
, ps
);
1768 ExFreePool(ps
->bmparr
);
1772 if (!NT_SUCCESS(Status
)) {
1773 ERR("flush_partial_stripe returned %08x\n", Status
);
1774 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1779 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1786 return STATUS_SUCCESS
;
1789 static NTSTATUS
write_trees(device_extension
* Vcb
, PIRP Irp
) {
1791 uint8_t *data
, *body
;
1795 LIST_ENTRY tree_writes
;
1798 TRACE("(%p)\n", Vcb
);
1800 InitializeListHead(&tree_writes
);
1802 for (level
= 0; level
<= 255; level
++) {
1803 bool nothing_found
= true;
1805 TRACE("level = %u\n", level
);
1807 le
= Vcb
->trees
.Flink
;
1808 while (le
!= &Vcb
->trees
) {
1809 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
1811 if (t
->write
&& t
->header
.level
== level
) {
1812 KEY firstitem
, searchkey
;
1816 if (!t
->has_new_address
) {
1817 ERR("error - tried to write tree with no new address\n");
1818 return STATUS_INTERNAL_ERROR
;
1821 le2
= t
->itemlist
.Flink
;
1822 while (le2
!= &t
->itemlist
) {
1823 tree_data
* td
= CONTAINING_RECORD(le2
, tree_data
, list_entry
);
1825 firstitem
= td
->key
;
1832 t
->paritem
->key
= firstitem
;
1833 t
->paritem
->treeholder
.address
= t
->new_address
;
1834 t
->paritem
->treeholder
.generation
= Vcb
->superblock
.generation
;
1837 if (!(Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
)) {
1838 EXTENT_ITEM_TREE
* eit
;
1840 searchkey
.obj_id
= t
->new_address
;
1841 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
1842 searchkey
.offset
= Vcb
->superblock
.node_size
;
1844 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
1845 if (!NT_SUCCESS(Status
)) {
1846 ERR("error - find_item returned %08x\n", Status
);
1850 if (keycmp(searchkey
, tp
.item
->key
)) {
1851 ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
);
1852 return STATUS_INTERNAL_ERROR
;
1855 if (tp
.item
->size
< sizeof(EXTENT_ITEM_TREE
)) {
1856 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(EXTENT_ITEM_TREE
));
1857 return STATUS_INTERNAL_ERROR
;
1860 eit
= (EXTENT_ITEM_TREE
*)tp
.item
->data
;
1861 eit
->firstitem
= firstitem
;
1864 nothing_found
= false;
1874 TRACE("allocated tree extents\n");
1876 le
= Vcb
->trees
.Flink
;
1877 while (le
!= &Vcb
->trees
) {
1878 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
1880 #ifdef DEBUG_PARANOID
1881 uint32_t num_items
= 0, size
= 0;
1886 #ifdef DEBUG_PARANOID
1890 le2
= t
->itemlist
.Flink
;
1891 while (le2
!= &t
->itemlist
) {
1892 tree_data
* td
= CONTAINING_RECORD(le2
, tree_data
, list_entry
);
1897 if (keycmp(td
->key
, lastkey
) == 0) {
1898 ERR("(%I64x,%x,%I64x): duplicate key\n", td
->key
.obj_id
, td
->key
.obj_type
, td
->key
.offset
);
1900 } else if (keycmp(td
->key
, lastkey
) == -1) {
1901 ERR("(%I64x,%x,%I64x): key out of order\n", td
->key
.obj_id
, td
->key
.obj_type
, td
->key
.offset
);
1909 if (t
->header
.level
== 0)
1915 if (t
->header
.level
== 0)
1916 size
+= num_items
* sizeof(leaf_node
);
1918 size
+= num_items
* sizeof(internal_node
);
1920 if (num_items
!= t
->header
.num_items
) {
1921 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t
->root
->id
, t
->header
.level
, num_items
, t
->header
.num_items
);
1925 if (size
!= t
->size
) {
1926 ERR("tree %I64x, level %x: size was %x, expected %x\n", t
->root
->id
, t
->header
.level
, size
, t
->size
);
1930 if (t
->header
.num_items
== 0 && t
->parent
) {
1931 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t
->root
->id
, t
->header
.level
);
1935 if (t
->size
> Vcb
->superblock
.node_size
- sizeof(tree_header
)) {
1936 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %x)\n", t
->root
->id
, t
->header
.level
, t
->size
, Vcb
->superblock
.node_size
- sizeof(tree_header
));
1941 ERR("tree %p\n", t
);
1942 le2
= t
->itemlist
.Flink
;
1943 while (le2
!= &t
->itemlist
) {
1944 tree_data
* td
= CONTAINING_RECORD(le2
, tree_data
, list_entry
);
1946 ERR("%I64x,%x,%I64x inserted=%u\n", td
->key
.obj_id
, td
->key
.obj_type
, td
->key
.offset
, td
->inserted
);
1953 t
->header
.address
= t
->new_address
;
1954 t
->header
.generation
= Vcb
->superblock
.generation
;
1955 t
->header
.tree_id
= t
->root
->id
;
1956 t
->header
.flags
|= HEADER_FLAG_MIXED_BACKREF
;
1957 t
->header
.fs_uuid
= Vcb
->superblock
.uuid
;
1958 t
->has_address
= true;
1960 data
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
1962 ERR("out of memory\n");
1963 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1967 body
= data
+ sizeof(tree_header
);
1969 RtlCopyMemory(data
, &t
->header
, sizeof(tree_header
));
1970 RtlZeroMemory(body
, Vcb
->superblock
.node_size
- sizeof(tree_header
));
1972 if (t
->header
.level
== 0) {
1973 leaf_node
* itemptr
= (leaf_node
*)body
;
1975 uint8_t* dataptr
= data
+ Vcb
->superblock
.node_size
;
1977 le2
= t
->itemlist
.Flink
;
1978 while (le2
!= &t
->itemlist
) {
1979 tree_data
* td
= CONTAINING_RECORD(le2
, tree_data
, list_entry
);
1981 dataptr
= dataptr
- td
->size
;
1983 itemptr
[i
].key
= td
->key
;
1984 itemptr
[i
].offset
= (uint32_t)((uint8_t*)dataptr
- (uint8_t*)body
);
1985 itemptr
[i
].size
= td
->size
;
1989 RtlCopyMemory(dataptr
, td
->data
, td
->size
);
1995 internal_node
* itemptr
= (internal_node
*)body
;
1998 le2
= t
->itemlist
.Flink
;
1999 while (le2
!= &t
->itemlist
) {
2000 tree_data
* td
= CONTAINING_RECORD(le2
, tree_data
, list_entry
);
2002 itemptr
[i
].key
= td
->key
;
2003 itemptr
[i
].address
= td
->treeholder
.address
;
2004 itemptr
[i
].generation
= td
->treeholder
.generation
;
2012 crc32
= calc_crc32c(0xffffffff, (uint8_t*)&((tree_header
*)data
)->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(((tree_header
*)data
)->csum
));
2014 *((uint32_t*)data
) = crc32
;
2015 TRACE("setting crc32 to %08x\n", crc32
);
2017 tw
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree_write
), ALLOC_TAG
);
2019 ERR("out of memory\n");
2021 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2025 tw
->address
= t
->new_address
;
2026 tw
->length
= Vcb
->superblock
.node_size
;
2029 if (IsListEmpty(&tree_writes
))
2030 InsertTailList(&tree_writes
, &tw
->list_entry
);
2032 bool inserted
= false;
2034 le2
= tree_writes
.Flink
;
2035 while (le2
!= &tree_writes
) {
2036 tree_write
* tw2
= CONTAINING_RECORD(le2
, tree_write
, list_entry
);
2038 if (tw2
->address
> tw
->address
) {
2039 InsertHeadList(le2
->Blink
, &tw
->list_entry
);
2048 InsertTailList(&tree_writes
, &tw
->list_entry
);
2055 Status
= do_tree_writes(Vcb
, &tree_writes
, false);
2056 if (!NT_SUCCESS(Status
)) {
2057 ERR("do_tree_writes returned %08x\n", Status
);
2061 Status
= STATUS_SUCCESS
;
2064 while (!IsListEmpty(&tree_writes
)) {
2065 le
= RemoveHeadList(&tree_writes
);
2066 tw
= CONTAINING_RECORD(le
, tree_write
, list_entry
);
2069 ExFreePool(tw
->data
);
2077 static void update_backup_superblock(device_extension
* Vcb
, superblock_backup
* sb
, PIRP Irp
) {
2081 RtlZeroMemory(sb
, sizeof(superblock_backup
));
2083 sb
->root_tree_addr
= Vcb
->superblock
.root_tree_addr
;
2084 sb
->root_tree_generation
= Vcb
->superblock
.generation
;
2085 sb
->root_level
= Vcb
->superblock
.root_level
;
2087 sb
->chunk_tree_addr
= Vcb
->superblock
.chunk_tree_addr
;
2088 sb
->chunk_tree_generation
= Vcb
->superblock
.chunk_root_generation
;
2089 sb
->chunk_root_level
= Vcb
->superblock
.chunk_root_level
;
2091 searchkey
.obj_id
= BTRFS_ROOT_EXTENT
;
2092 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
2093 searchkey
.offset
= 0xffffffffffffffff;
2095 if (NT_SUCCESS(find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
))) {
2096 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
&& tp
.item
->size
>= sizeof(ROOT_ITEM
)) {
2097 ROOT_ITEM
* ri
= (ROOT_ITEM
*)tp
.item
->data
;
2099 sb
->extent_tree_addr
= ri
->block_number
;
2100 sb
->extent_tree_generation
= ri
->generation
;
2101 sb
->extent_root_level
= ri
->root_level
;
2105 searchkey
.obj_id
= BTRFS_ROOT_FSTREE
;
2107 if (NT_SUCCESS(find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
))) {
2108 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
&& tp
.item
->size
>= sizeof(ROOT_ITEM
)) {
2109 ROOT_ITEM
* ri
= (ROOT_ITEM
*)tp
.item
->data
;
2111 sb
->fs_tree_addr
= ri
->block_number
;
2112 sb
->fs_tree_generation
= ri
->generation
;
2113 sb
->fs_root_level
= ri
->root_level
;
2117 searchkey
.obj_id
= BTRFS_ROOT_DEVTREE
;
2119 if (NT_SUCCESS(find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
))) {
2120 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
&& tp
.item
->size
>= sizeof(ROOT_ITEM
)) {
2121 ROOT_ITEM
* ri
= (ROOT_ITEM
*)tp
.item
->data
;
2123 sb
->dev_root_addr
= ri
->block_number
;
2124 sb
->dev_root_generation
= ri
->generation
;
2125 sb
->dev_root_level
= ri
->root_level
;
2129 searchkey
.obj_id
= BTRFS_ROOT_CHECKSUM
;
2131 if (NT_SUCCESS(find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
))) {
2132 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
&& tp
.item
->size
>= sizeof(ROOT_ITEM
)) {
2133 ROOT_ITEM
* ri
= (ROOT_ITEM
*)tp
.item
->data
;
2135 sb
->csum_root_addr
= ri
->block_number
;
2136 sb
->csum_root_generation
= ri
->generation
;
2137 sb
->csum_root_level
= ri
->root_level
;
2141 sb
->total_bytes
= Vcb
->superblock
.total_bytes
;
2142 sb
->bytes_used
= Vcb
->superblock
.bytes_used
;
2143 sb
->num_devices
= Vcb
->superblock
.num_devices
;
2153 LIST_ENTRY list_entry
;
2154 } write_superblocks_stripe
;
2156 typedef struct _write_superblocks_context
{
2160 } write_superblocks_context
;
2162 _Function_class_(IO_COMPLETION_ROUTINE
)
2163 static NTSTATUS __stdcall
write_superblock_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
2164 write_superblocks_stripe
* stripe
= conptr
;
2165 write_superblocks_context
* context
= stripe
->context
;
2167 UNUSED(DeviceObject
);
2169 stripe
->Status
= Irp
->IoStatus
.Status
;
2171 if (InterlockedDecrement(&context
->left
) == 0)
2172 KeSetEvent(&context
->Event
, 0, false);
2174 return STATUS_MORE_PROCESSING_REQUIRED
;
2177 static NTSTATUS
write_superblock(device_extension
* Vcb
, device
* device
, write_superblocks_context
* context
) {
2180 // All the documentation says that the Linux driver only writes one superblock
2181 // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2183 while (superblock_addrs
[i
] > 0 && device
->devitem
.num_bytes
>= superblock_addrs
[i
] + sizeof(superblock
)) {
2184 ULONG sblen
= (ULONG
)sector_align(sizeof(superblock
), Vcb
->superblock
.sector_size
);
2187 write_superblocks_stripe
* stripe
;
2188 PIO_STACK_LOCATION IrpSp
;
2190 sb
= ExAllocatePoolWithTag(NonPagedPool
, sblen
, ALLOC_TAG
);
2192 ERR("out of memory\n");
2193 return STATUS_INSUFFICIENT_RESOURCES
;
2196 RtlCopyMemory(sb
, &Vcb
->superblock
, sizeof(superblock
));
2198 if (sblen
> sizeof(superblock
))
2199 RtlZeroMemory((uint8_t*)sb
+ sizeof(superblock
), sblen
- sizeof(superblock
));
2201 RtlCopyMemory(&sb
->dev_item
, &device
->devitem
, sizeof(DEV_ITEM
));
2202 sb
->sb_phys_addr
= superblock_addrs
[i
];
2204 crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&sb
->uuid
, (ULONG
)sizeof(superblock
) - sizeof(sb
->checksum
));
2205 RtlCopyMemory(&sb
->checksum
, &crc32
, sizeof(uint32_t));
2207 stripe
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(write_superblocks_stripe
), ALLOC_TAG
);
2209 ERR("out of memory\n");
2211 return STATUS_INSUFFICIENT_RESOURCES
;
2214 stripe
->buf
= (uint8_t*)sb
;
2216 stripe
->Irp
= IoAllocateIrp(device
->devobj
->StackSize
, false);
2218 ERR("IoAllocateIrp failed\n");
2221 return STATUS_INSUFFICIENT_RESOURCES
;
2224 IrpSp
= IoGetNextIrpStackLocation(stripe
->Irp
);
2225 IrpSp
->MajorFunction
= IRP_MJ_WRITE
;
2226 IrpSp
->FileObject
= device
->fileobj
;
2229 IrpSp
->Flags
|= SL_WRITE_THROUGH
;
2231 if (device
->devobj
->Flags
& DO_BUFFERED_IO
) {
2232 stripe
->Irp
->AssociatedIrp
.SystemBuffer
= sb
;
2235 stripe
->Irp
->Flags
= IRP_BUFFERED_IO
;
2236 } else if (device
->devobj
->Flags
& DO_DIRECT_IO
) {
2237 stripe
->mdl
= IoAllocateMdl(sb
, sblen
, false, false, NULL
);
2239 ERR("IoAllocateMdl failed\n");
2240 IoFreeIrp(stripe
->Irp
);
2243 return STATUS_INSUFFICIENT_RESOURCES
;
2246 stripe
->Irp
->MdlAddress
= stripe
->mdl
;
2248 MmBuildMdlForNonPagedPool(stripe
->mdl
);
2250 stripe
->Irp
->UserBuffer
= sb
;
2254 IrpSp
->Parameters
.Write
.Length
= sblen
;
2255 IrpSp
->Parameters
.Write
.ByteOffset
.QuadPart
= superblock_addrs
[i
];
2257 IoSetCompletionRoutine(stripe
->Irp
, write_superblock_completion
, stripe
, true, true, true);
2259 stripe
->context
= context
;
2260 stripe
->device
= device
;
2261 InsertTailList(&context
->stripes
, &stripe
->list_entry
);
2269 ERR("no superblocks written!\n");
2271 return STATUS_SUCCESS
;
2274 static NTSTATUS
write_superblocks(device_extension
* Vcb
, PIRP Irp
) {
2278 write_superblocks_context context
;
2280 TRACE("(%p)\n", Vcb
);
2282 le
= Vcb
->trees
.Flink
;
2283 while (le
!= &Vcb
->trees
) {
2284 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
2286 if (t
->write
&& !t
->parent
) {
2287 if (t
->root
== Vcb
->root_root
) {
2288 Vcb
->superblock
.root_tree_addr
= t
->new_address
;
2289 Vcb
->superblock
.root_level
= t
->header
.level
;
2290 } else if (t
->root
== Vcb
->chunk_root
) {
2291 Vcb
->superblock
.chunk_tree_addr
= t
->new_address
;
2292 Vcb
->superblock
.chunk_root_generation
= t
->header
.generation
;
2293 Vcb
->superblock
.chunk_root_level
= t
->header
.level
;
2300 for (i
= 0; i
< BTRFS_NUM_BACKUP_ROOTS
- 1; i
++) {
2301 RtlCopyMemory(&Vcb
->superblock
.backup
[i
], &Vcb
->superblock
.backup
[i
+1], sizeof(superblock_backup
));
2304 update_backup_superblock(Vcb
, &Vcb
->superblock
.backup
[BTRFS_NUM_BACKUP_ROOTS
- 1], Irp
);
2306 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
2307 InitializeListHead(&context
.stripes
);
2310 le
= Vcb
->devices
.Flink
;
2311 while (le
!= &Vcb
->devices
) {
2312 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
2314 if (dev
->devobj
&& !dev
->readonly
) {
2315 Status
= write_superblock(Vcb
, dev
, &context
);
2316 if (!NT_SUCCESS(Status
)) {
2317 ERR("write_superblock returned %08x\n", Status
);
2325 if (IsListEmpty(&context
.stripes
)) {
2326 ERR("error - not writing any superblocks\n");
2327 Status
= STATUS_INTERNAL_ERROR
;
2331 le
= context
.stripes
.Flink
;
2332 while (le
!= &context
.stripes
) {
2333 write_superblocks_stripe
* stripe
= CONTAINING_RECORD(le
, write_superblocks_stripe
, list_entry
);
2335 IoCallDriver(stripe
->device
->devobj
, stripe
->Irp
);
2340 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
2342 le
= context
.stripes
.Flink
;
2343 while (le
!= &context
.stripes
) {
2344 write_superblocks_stripe
* stripe
= CONTAINING_RECORD(le
, write_superblocks_stripe
, list_entry
);
2346 if (!NT_SUCCESS(stripe
->Status
)) {
2347 ERR("device %I64x returned %08x\n", stripe
->device
->devitem
.dev_id
, stripe
->Status
);
2348 log_device_error(Vcb
, stripe
->device
, BTRFS_DEV_STAT_WRITE_ERRORS
);
2349 Status
= stripe
->Status
;
2356 Status
= STATUS_SUCCESS
;
2359 while (!IsListEmpty(&context
.stripes
)) {
2360 write_superblocks_stripe
* stripe
= CONTAINING_RECORD(RemoveHeadList(&context
.stripes
), write_superblocks_stripe
, list_entry
);
2363 if (stripe
->mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2364 MmUnlockPages(stripe
->mdl
);
2366 IoFreeMdl(stripe
->mdl
);
2370 IoFreeIrp(stripe
->Irp
);
2373 ExFreePool(stripe
->buf
);
2381 static NTSTATUS
flush_changed_extent(device_extension
* Vcb
, chunk
* c
, changed_extent
* ce
, PIRP Irp
, LIST_ENTRY
* rollback
) {
2382 LIST_ENTRY
*le
, *le2
;
2386 if (ce
->count
== 0 && ce
->old_count
== 0) {
2387 while (!IsListEmpty(&ce
->refs
)) {
2388 changed_extent_ref
* cer
= CONTAINING_RECORD(RemoveHeadList(&ce
->refs
), changed_extent_ref
, list_entry
);
2392 while (!IsListEmpty(&ce
->old_refs
)) {
2393 changed_extent_ref
* cer
= CONTAINING_RECORD(RemoveHeadList(&ce
->old_refs
), changed_extent_ref
, list_entry
);
2400 le
= ce
->refs
.Flink
;
2401 while (le
!= &ce
->refs
) {
2402 changed_extent_ref
* cer
= CONTAINING_RECORD(le
, changed_extent_ref
, list_entry
);
2403 uint32_t old_count
= 0;
2405 if (cer
->type
== TYPE_EXTENT_DATA_REF
) {
2406 le2
= ce
->old_refs
.Flink
;
2407 while (le2
!= &ce
->old_refs
) {
2408 changed_extent_ref
* cer2
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
2410 if (cer2
->type
== TYPE_EXTENT_DATA_REF
&& cer2
->edr
.root
== cer
->edr
.root
&& cer2
->edr
.objid
== cer
->edr
.objid
&& cer2
->edr
.offset
== cer
->edr
.offset
) {
2411 old_count
= cer2
->edr
.count
;
2418 old_size
= ce
->old_count
> 0 ? ce
->old_size
: ce
->size
;
2420 if (cer
->edr
.count
> old_count
) {
2421 Status
= increase_extent_refcount_data(Vcb
, ce
->address
, old_size
, cer
->edr
.root
, cer
->edr
.objid
, cer
->edr
.offset
, cer
->edr
.count
- old_count
, Irp
);
2423 if (!NT_SUCCESS(Status
)) {
2424 ERR("increase_extent_refcount_data returned %08x\n", Status
);
2428 } else if (cer
->type
== TYPE_SHARED_DATA_REF
) {
2429 le2
= ce
->old_refs
.Flink
;
2430 while (le2
!= &ce
->old_refs
) {
2431 changed_extent_ref
* cer2
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
2433 if (cer2
->type
== TYPE_SHARED_DATA_REF
&& cer2
->sdr
.offset
== cer
->sdr
.offset
) {
2434 RemoveEntryList(&cer2
->list_entry
);
2446 le
= ce
->refs
.Flink
;
2447 while (le
!= &ce
->refs
) {
2448 changed_extent_ref
* cer
= CONTAINING_RECORD(le
, changed_extent_ref
, list_entry
);
2449 LIST_ENTRY
* le3
= le
->Flink
;
2450 uint32_t old_count
= 0;
2452 if (cer
->type
== TYPE_EXTENT_DATA_REF
) {
2453 le2
= ce
->old_refs
.Flink
;
2454 while (le2
!= &ce
->old_refs
) {
2455 changed_extent_ref
* cer2
= CONTAINING_RECORD(le2
, changed_extent_ref
, list_entry
);
2457 if (cer2
->type
== TYPE_EXTENT_DATA_REF
&& cer2
->edr
.root
== cer
->edr
.root
&& cer2
->edr
.objid
== cer
->edr
.objid
&& cer2
->edr
.offset
== cer
->edr
.offset
) {
2458 old_count
= cer2
->edr
.count
;
2460 RemoveEntryList(&cer2
->list_entry
);
2468 old_size
= ce
->old_count
> 0 ? ce
->old_size
: ce
->size
;
2470 if (cer
->edr
.count
< old_count
) {
2471 Status
= decrease_extent_refcount_data(Vcb
, ce
->address
, old_size
, cer
->edr
.root
, cer
->edr
.objid
, cer
->edr
.offset
,
2472 old_count
- cer
->edr
.count
, ce
->superseded
, Irp
);
2474 if (!NT_SUCCESS(Status
)) {
2475 ERR("decrease_extent_refcount_data returned %08x\n", Status
);
2480 if (ce
->size
!= ce
->old_size
&& ce
->old_count
> 0) {
2485 searchkey
.obj_id
= ce
->address
;
2486 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
2487 searchkey
.offset
= ce
->old_size
;
2489 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
2490 if (!NT_SUCCESS(Status
)) {
2491 ERR("error - find_item returned %08x\n", Status
);
2495 if (keycmp(searchkey
, tp
.item
->key
)) {
2496 ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
2497 return STATUS_INTERNAL_ERROR
;
2500 if (tp
.item
->size
> 0) {
2501 data
= ExAllocatePoolWithTag(PagedPool
, tp
.item
->size
, ALLOC_TAG
);
2504 ERR("out of memory\n");
2505 return STATUS_INSUFFICIENT_RESOURCES
;
2508 RtlCopyMemory(data
, tp
.item
->data
, tp
.item
->size
);
2512 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, ce
->address
, TYPE_EXTENT_ITEM
, ce
->size
, data
, tp
.item
->size
, NULL
, Irp
);
2513 if (!NT_SUCCESS(Status
)) {
2514 ERR("insert_tree_item returned %08x\n", Status
);
2515 if (data
) ExFreePool(data
);
2519 Status
= delete_tree_item(Vcb
, &tp
);
2520 if (!NT_SUCCESS(Status
)) {
2521 ERR("delete_tree_item returned %08x\n", Status
);
2527 RemoveEntryList(&cer
->list_entry
);
2533 #ifdef DEBUG_PARANOID
2534 if (!IsListEmpty(&ce
->old_refs
))
2535 WARN("old_refs not empty\n");
2539 if (ce
->count
== 0 && !ce
->superseded
) {
2540 c
->used
-= ce
->size
;
2541 space_list_add(c
, ce
->address
, ce
->size
, rollback
);
2544 RemoveEntryList(&ce
->list_entry
);
2547 return STATUS_SUCCESS
;
2550 void add_checksum_entry(device_extension
* Vcb
, uint64_t address
, ULONG length
, uint32_t* csum
, PIRP Irp
) {
2552 traverse_ptr tp
, next_tp
;
2554 uint64_t startaddr
, endaddr
;
2556 uint32_t* checksums
;
2559 ULONG runlength
, index
;
2561 TRACE("(%p, %I64x, %x, %p, %p)\n", Vcb
, address
, length
, csum
, Irp
);
2563 searchkey
.obj_id
= EXTENT_CSUM_ID
;
2564 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
2565 searchkey
.offset
= address
;
2567 // FIXME - create checksum_root if it doesn't exist at all
2569 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, false, Irp
);
2570 if (Status
== STATUS_NOT_FOUND
) { // tree is completely empty
2571 if (csum
) { // not deleted
2572 ULONG length2
= length
;
2573 uint64_t off
= address
;
2574 uint32_t* data
= csum
;
2577 uint16_t il
= (uint16_t)min(length2
, MAX_CSUM_SIZE
/ sizeof(uint32_t));
2579 checksums
= ExAllocatePoolWithTag(PagedPool
, il
* sizeof(uint32_t), ALLOC_TAG
);
2581 ERR("out of memory\n");
2585 RtlCopyMemory(checksums
, data
, il
* sizeof(uint32_t));
2587 Status
= insert_tree_item(Vcb
, Vcb
->checksum_root
, EXTENT_CSUM_ID
, TYPE_EXTENT_CSUM
, off
, checksums
,
2588 il
* sizeof(uint32_t), NULL
, Irp
);
2589 if (!NT_SUCCESS(Status
)) {
2590 ERR("insert_tree_item returned %08x\n", Status
);
2591 ExFreePool(checksums
);
2598 off
+= il
* Vcb
->superblock
.sector_size
;
2601 } while (length2
> 0);
2603 } else if (!NT_SUCCESS(Status
)) {
2604 ERR("find_item returned %08x\n", Status
);
2609 // FIXME - check entry is TYPE_EXTENT_CSUM?
2611 if (tp
.item
->key
.offset
< address
&& tp
.item
->key
.offset
+ (tp
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(uint32_t)) >= address
)
2612 startaddr
= tp
.item
->key
.offset
;
2614 startaddr
= address
;
2616 searchkey
.obj_id
= EXTENT_CSUM_ID
;
2617 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
2618 searchkey
.offset
= address
+ (length
* Vcb
->superblock
.sector_size
);
2620 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, false, Irp
);
2621 if (!NT_SUCCESS(Status
)) {
2622 ERR("find_item returned %08x\n", Status
);
2626 tplen
= tp
.item
->size
/ sizeof(uint32_t);
2628 if (tp
.item
->key
.offset
+ (tplen
* Vcb
->superblock
.sector_size
) >= address
+ (length
* Vcb
->superblock
.sector_size
))
2629 endaddr
= tp
.item
->key
.offset
+ (tplen
* Vcb
->superblock
.sector_size
);
2631 endaddr
= address
+ (length
* Vcb
->superblock
.sector_size
);
2633 TRACE("cs starts at %I64x (%x sectors)\n", address
, length
);
2634 TRACE("startaddr = %I64x\n", startaddr
);
2635 TRACE("endaddr = %I64x\n", endaddr
);
2637 len
= (ULONG
)((endaddr
- startaddr
) / Vcb
->superblock
.sector_size
);
2639 checksums
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint32_t) * len
, ALLOC_TAG
);
2641 ERR("out of memory\n");
2645 bmparr
= ExAllocatePoolWithTag(PagedPool
, sizeof(ULONG
) * ((len
/8)+1), ALLOC_TAG
);
2647 ERR("out of memory\n");
2648 ExFreePool(checksums
);
2652 RtlInitializeBitMap(&bmp
, bmparr
, len
);
2653 RtlSetAllBits(&bmp
);
2655 searchkey
.obj_id
= EXTENT_CSUM_ID
;
2656 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
2657 searchkey
.offset
= address
;
2659 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp
, &searchkey
, false, Irp
);
2660 if (!NT_SUCCESS(Status
)) {
2661 ERR("find_item returned %08x\n", Status
);
2662 ExFreePool(checksums
);
2667 // set bit = free space, cleared bit = allocated sector
2669 while (tp
.item
->key
.offset
< endaddr
) {
2670 if (tp
.item
->key
.offset
>= startaddr
) {
2671 if (tp
.item
->size
> 0) {
2672 ULONG itemlen
= (ULONG
)min((len
- (tp
.item
->key
.offset
- startaddr
) / Vcb
->superblock
.sector_size
) * sizeof(uint32_t), tp
.item
->size
);
2674 RtlCopyMemory(&checksums
[(tp
.item
->key
.offset
- startaddr
) / Vcb
->superblock
.sector_size
], tp
.item
->data
, itemlen
);
2675 RtlClearBits(&bmp
, (ULONG
)((tp
.item
->key
.offset
- startaddr
) / Vcb
->superblock
.sector_size
), itemlen
/ sizeof(uint32_t));
2678 Status
= delete_tree_item(Vcb
, &tp
);
2679 if (!NT_SUCCESS(Status
)) {
2680 ERR("delete_tree_item returned %08x\n", Status
);
2681 ExFreePool(checksums
);
2687 if (find_next_item(Vcb
, &tp
, &next_tp
, false, Irp
)) {
2693 if (!csum
) { // deleted
2694 RtlSetBits(&bmp
, (ULONG
)((address
- startaddr
) / Vcb
->superblock
.sector_size
), length
);
2696 RtlCopyMemory(&checksums
[(address
- startaddr
) / Vcb
->superblock
.sector_size
], csum
, length
* sizeof(uint32_t));
2697 RtlClearBits(&bmp
, (ULONG
)((address
- startaddr
) / Vcb
->superblock
.sector_size
), length
);
2700 runlength
= RtlFindFirstRunClear(&bmp
, &index
);
2702 while (runlength
!= 0) {
2706 if (index
+ runlength
>= len
) {
2707 runlength
= len
- index
;
2718 if (runlength
* sizeof(uint32_t) > MAX_CSUM_SIZE
)
2719 rl
= MAX_CSUM_SIZE
/ sizeof(uint32_t);
2721 rl
= (uint16_t)runlength
;
2723 data
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint32_t) * rl
, ALLOC_TAG
);
2725 ERR("out of memory\n");
2727 ExFreePool(checksums
);
2731 RtlCopyMemory(data
, &checksums
[index
], sizeof(uint32_t) * rl
);
2733 off
= startaddr
+ UInt32x32To64(index
, Vcb
->superblock
.sector_size
);
2735 Status
= insert_tree_item(Vcb
, Vcb
->checksum_root
, EXTENT_CSUM_ID
, TYPE_EXTENT_CSUM
, off
, data
, sizeof(uint32_t) * rl
, NULL
, Irp
);
2736 if (!NT_SUCCESS(Status
)) {
2737 ERR("insert_tree_item returned %08x\n", Status
);
2740 ExFreePool(checksums
);
2746 } while (runlength
> 0);
2748 runlength
= RtlFindNextForwardRunClear(&bmp
, index
, &index
);
2752 ExFreePool(checksums
);
2756 static NTSTATUS
update_chunk_usage(device_extension
* Vcb
, PIRP Irp
, LIST_ENTRY
* rollback
) {
2757 LIST_ENTRY
*le
= Vcb
->chunks
.Flink
, *le2
;
2761 BLOCK_GROUP_ITEM
* bgi
;
2764 TRACE("(%p)\n", Vcb
);
2766 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
2768 while (le
!= &Vcb
->chunks
) {
2769 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
2771 acquire_chunk_lock(c
, Vcb
);
2773 if (!c
->cache_loaded
&& (!IsListEmpty(&c
->changed_extents
) || c
->used
!= c
->oldused
)) {
2774 Status
= load_cache_chunk(Vcb
, c
, NULL
);
2776 if (!NT_SUCCESS(Status
)) {
2777 ERR("load_cache_chunk returned %08x\n", Status
);
2778 release_chunk_lock(c
, Vcb
);
2783 le2
= c
->changed_extents
.Flink
;
2784 while (le2
!= &c
->changed_extents
) {
2785 LIST_ENTRY
* le3
= le2
->Flink
;
2786 changed_extent
* ce
= CONTAINING_RECORD(le2
, changed_extent
, list_entry
);
2788 Status
= flush_changed_extent(Vcb
, c
, ce
, Irp
, rollback
);
2789 if (!NT_SUCCESS(Status
)) {
2790 ERR("flush_changed_extent returned %08x\n", Status
);
2791 release_chunk_lock(c
, Vcb
);
2798 // This is usually done by update_chunks, but we have to check again in case any new chunks
2799 // have been allocated since.
2801 Status
= create_chunk(Vcb
, c
, Irp
);
2802 if (!NT_SUCCESS(Status
)) {
2803 ERR("create_chunk returned %08x\n", Status
);
2804 release_chunk_lock(c
, Vcb
);
2810 if (c
->old_cache
->dirty
) {
2811 LIST_ENTRY batchlist
;
2813 InitializeListHead(&batchlist
);
2815 Status
= flush_fcb(c
->old_cache
, false, &batchlist
, Irp
);
2816 if (!NT_SUCCESS(Status
)) {
2817 ERR("flush_fcb returned %08x\n", Status
);
2818 release_chunk_lock(c
, Vcb
);
2819 clear_batch_list(Vcb
, &batchlist
);
2823 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
2824 if (!NT_SUCCESS(Status
)) {
2825 ERR("commit_batch_list returned %08x\n", Status
);
2826 release_chunk_lock(c
, Vcb
);
2831 free_fcb(c
->old_cache
);
2833 if (c
->old_cache
->refcount
== 0)
2834 reap_fcb(c
->old_cache
);
2836 c
->old_cache
= NULL
;
2839 if (c
->used
!= c
->oldused
) {
2841 uint64_t old_phys_used
, phys_used
;
2843 searchkey
.obj_id
= c
->offset
;
2844 searchkey
.obj_type
= TYPE_BLOCK_GROUP_ITEM
;
2845 searchkey
.offset
= c
->chunk_item
->size
;
2847 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
2848 if (!NT_SUCCESS(Status
)) {
2849 ERR("error - find_item returned %08x\n", Status
);
2850 release_chunk_lock(c
, Vcb
);
2854 if (keycmp(searchkey
, tp
.item
->key
)) {
2855 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
2856 Status
= STATUS_INTERNAL_ERROR
;
2857 release_chunk_lock(c
, Vcb
);
2861 if (tp
.item
->size
< sizeof(BLOCK_GROUP_ITEM
)) {
2862 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(BLOCK_GROUP_ITEM
));
2863 Status
= STATUS_INTERNAL_ERROR
;
2864 release_chunk_lock(c
, Vcb
);
2868 bgi
= ExAllocatePoolWithTag(PagedPool
, tp
.item
->size
, ALLOC_TAG
);
2870 ERR("out of memory\n");
2871 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2872 release_chunk_lock(c
, Vcb
);
2876 RtlCopyMemory(bgi
, tp
.item
->data
, tp
.item
->size
);
2877 bgi
->used
= c
->used
;
2879 #ifdef DEBUG_PARANOID
2880 if (bgi
->used
& 0x8000000000000000) {
2881 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)", bgi
->used
);
2886 TRACE("adjusting usage of chunk %I64x to %I64x\n", c
->offset
, c
->used
);
2888 Status
= delete_tree_item(Vcb
, &tp
);
2889 if (!NT_SUCCESS(Status
)) {
2890 ERR("delete_tree_item returned %08x\n", Status
);
2892 release_chunk_lock(c
, Vcb
);
2896 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, bgi
, tp
.item
->size
, NULL
, Irp
);
2897 if (!NT_SUCCESS(Status
)) {
2898 ERR("insert_tree_item returned %08x\n", Status
);
2900 release_chunk_lock(c
, Vcb
);
2905 uint64_t old_phys_used
= chunk_estimate_phys_size(Vcb
, c
, c
->oldused
);
2906 uint64_t phys_used
= chunk_estimate_phys_size(Vcb
, c
, c
->used
);
2908 old_phys_used
= chunk_estimate_phys_size(Vcb
, c
, c
->oldused
);
2909 phys_used
= chunk_estimate_phys_size(Vcb
, c
, c
->used
);
2912 if (Vcb
->superblock
.bytes_used
+ phys_used
> old_phys_used
)
2913 Vcb
->superblock
.bytes_used
+= phys_used
- old_phys_used
;
2915 Vcb
->superblock
.bytes_used
= 0;
2917 c
->oldused
= c
->used
;
2920 release_chunk_lock(c
, Vcb
);
2925 Status
= STATUS_SUCCESS
;
2928 ExReleaseResourceLite(&Vcb
->chunk_lock
);
2933 static void get_first_item(tree
* t
, KEY
* key
) {
2936 le
= t
->itemlist
.Flink
;
2937 while (le
!= &t
->itemlist
) {
2938 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
2945 static NTSTATUS
split_tree_at(device_extension
* Vcb
, tree
* t
, tree_data
* newfirstitem
, uint32_t numitems
, uint32_t size
) {
2948 tree_data
* oldlastitem
;
2950 TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t
->root
->id
, newfirstitem
->key
.obj_id
, newfirstitem
->key
.obj_type
, newfirstitem
->key
.offset
);
2952 nt
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree
), ALLOC_TAG
);
2954 ERR("out of memory\n");
2955 return STATUS_INSUFFICIENT_RESOURCES
;
2958 if (t
->header
.level
> 0) {
2959 nt
->nonpaged
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(tree_nonpaged
), ALLOC_TAG
);
2960 if (!nt
->nonpaged
) {
2961 ERR("out of memory\n");
2963 return STATUS_INSUFFICIENT_RESOURCES
;
2966 ExInitializeFastMutex(&nt
->nonpaged
->mutex
);
2968 nt
->nonpaged
= NULL
;
2970 RtlCopyMemory(&nt
->header
, &t
->header
, sizeof(tree_header
));
2971 nt
->header
.address
= 0;
2972 nt
->header
.generation
= Vcb
->superblock
.generation
;
2973 nt
->header
.num_items
= t
->header
.num_items
- numitems
;
2974 nt
->header
.flags
= HEADER_FLAG_MIXED_BACKREF
| HEADER_FLAG_WRITTEN
;
2976 nt
->has_address
= false;
2978 nt
->parent
= t
->parent
;
2980 #ifdef DEBUG_PARANOID
2981 if (nt
->parent
&& nt
->parent
->header
.level
<= nt
->header
.level
) int3
;
2985 nt
->new_address
= 0;
2986 nt
->has_new_address
= false;
2987 nt
->updated_extents
= false;
2988 nt
->uniqueness_determined
= true;
2989 nt
->is_unique
= true;
2990 nt
->list_entry_hash
.Flink
= NULL
;
2992 InitializeListHead(&nt
->itemlist
);
2994 oldlastitem
= CONTAINING_RECORD(newfirstitem
->list_entry
.Blink
, tree_data
, list_entry
);
2996 nt
->itemlist
.Flink
= &newfirstitem
->list_entry
;
2997 nt
->itemlist
.Blink
= t
->itemlist
.Blink
;
2998 nt
->itemlist
.Flink
->Blink
= &nt
->itemlist
;
2999 nt
->itemlist
.Blink
->Flink
= &nt
->itemlist
;
3001 t
->itemlist
.Blink
= &oldlastitem
->list_entry
;
3002 t
->itemlist
.Blink
->Flink
= &t
->itemlist
;
3004 nt
->size
= t
->size
- size
;
3006 t
->header
.num_items
= numitems
;
3009 InsertTailList(&Vcb
->trees
, &nt
->list_entry
);
3011 if (nt
->header
.level
> 0) {
3012 LIST_ENTRY
* le
= nt
->itemlist
.Flink
;
3014 while (le
!= &nt
->itemlist
) {
3015 tree_data
* td2
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3017 if (td2
->treeholder
.tree
) {
3018 td2
->treeholder
.tree
->parent
= nt
;
3019 #ifdef DEBUG_PARANOID
3020 if (td2
->treeholder
.tree
->parent
&& td2
->treeholder
.tree
->parent
->header
.level
<= td2
->treeholder
.tree
->header
.level
) int3
;
3027 LIST_ENTRY
* le
= nt
->itemlist
.Flink
;
3029 while (le
!= &nt
->itemlist
) {
3030 tree_data
* td2
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3032 if (!td2
->inserted
&& td2
->data
) {
3033 uint8_t* data
= ExAllocatePoolWithTag(PagedPool
, td2
->size
, ALLOC_TAG
);
3036 ERR("out of memory\n");
3037 return STATUS_INSUFFICIENT_RESOURCES
;
3040 RtlCopyMemory(data
, td2
->data
, td2
->size
);
3042 td2
->inserted
= true;
3050 td
= ExAllocateFromPagedLookasideList(&Vcb
->tree_data_lookaside
);
3052 ERR("out of memory\n");
3053 return STATUS_INSUFFICIENT_RESOURCES
;
3056 td
->key
= newfirstitem
->key
;
3058 InsertHeadList(&t
->paritem
->list_entry
, &td
->list_entry
);
3061 td
->inserted
= true;
3062 td
->treeholder
.tree
= nt
;
3065 nt
->parent
->header
.num_items
++;
3066 nt
->parent
->size
+= sizeof(internal_node
);
3071 TRACE("adding new tree parent\n");
3073 if (nt
->header
.level
== 255) {
3074 ERR("cannot add parent to tree at level 255\n");
3075 return STATUS_INTERNAL_ERROR
;
3078 pt
= ExAllocatePoolWithTag(PagedPool
, sizeof(tree
), ALLOC_TAG
);
3080 ERR("out of memory\n");
3081 return STATUS_INSUFFICIENT_RESOURCES
;
3084 pt
->nonpaged
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(tree_nonpaged
), ALLOC_TAG
);
3085 if (!pt
->nonpaged
) {
3086 ERR("out of memory\n");
3088 return STATUS_INSUFFICIENT_RESOURCES
;
3091 ExInitializeFastMutex(&pt
->nonpaged
->mutex
);
3093 RtlCopyMemory(&pt
->header
, &nt
->header
, sizeof(tree_header
));
3094 pt
->header
.address
= 0;
3095 pt
->header
.num_items
= 2;
3096 pt
->header
.level
= nt
->header
.level
+ 1;
3097 pt
->header
.flags
= HEADER_FLAG_MIXED_BACKREF
| HEADER_FLAG_WRITTEN
;
3099 pt
->has_address
= false;
3104 pt
->new_address
= 0;
3105 pt
->has_new_address
= false;
3106 pt
->updated_extents
= false;
3107 pt
->size
= pt
->header
.num_items
* sizeof(internal_node
);
3108 pt
->uniqueness_determined
= true;
3109 pt
->is_unique
= true;
3110 pt
->list_entry_hash
.Flink
= NULL
;
3112 InitializeListHead(&pt
->itemlist
);
3114 InsertTailList(&Vcb
->trees
, &pt
->list_entry
);
3116 td
= ExAllocateFromPagedLookasideList(&Vcb
->tree_data_lookaside
);
3118 ERR("out of memory\n");
3119 return STATUS_INSUFFICIENT_RESOURCES
;
3122 get_first_item(t
, &td
->key
);
3124 td
->inserted
= false;
3125 td
->treeholder
.address
= 0;
3126 td
->treeholder
.generation
= Vcb
->superblock
.generation
;
3127 td
->treeholder
.tree
= t
;
3128 InsertTailList(&pt
->itemlist
, &td
->list_entry
);
3131 td
= ExAllocateFromPagedLookasideList(&Vcb
->tree_data_lookaside
);
3133 ERR("out of memory\n");
3134 return STATUS_INSUFFICIENT_RESOURCES
;
3137 td
->key
= newfirstitem
->key
;
3139 td
->inserted
= false;
3140 td
->treeholder
.address
= 0;
3141 td
->treeholder
.generation
= Vcb
->superblock
.generation
;
3142 td
->treeholder
.tree
= nt
;
3143 InsertTailList(&pt
->itemlist
, &td
->list_entry
);
3148 t
->root
->treeholder
.tree
= pt
;
3153 #ifdef DEBUG_PARANOID
3154 if (t
->parent
&& t
->parent
->header
.level
<= t
->header
.level
) int3
;
3155 if (nt
->parent
&& nt
->parent
->header
.level
<= nt
->header
.level
) int3
;
3159 t
->root
->root_item
.bytes_used
+= Vcb
->superblock
.node_size
;
3161 return STATUS_SUCCESS
;
3164 static NTSTATUS
split_tree(device_extension
* Vcb
, tree
* t
) {
3166 uint32_t size
, ds
, numitems
;
3171 // FIXME - naïve implementation: maximizes number of filled trees
3173 le
= t
->itemlist
.Flink
;
3174 while (le
!= &t
->itemlist
) {
3175 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3178 if (t
->header
.level
== 0)
3179 ds
= sizeof(leaf_node
) + td
->size
;
3181 ds
= sizeof(internal_node
);
3183 if (numitems
== 0 && ds
> Vcb
->superblock
.node_size
- sizeof(tree_header
)) {
3184 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %x)\n",
3185 td
->key
.obj_id
, td
->key
.obj_type
, td
->key
.offset
, t
->root
->id
,
3186 ds
, Vcb
->superblock
.node_size
- sizeof(tree_header
));
3187 return STATUS_INTERNAL_ERROR
;
3190 // FIXME - move back if previous item was deleted item with same key
3191 if (size
+ ds
> Vcb
->superblock
.node_size
- sizeof(tree_header
))
3192 return split_tree_at(Vcb
, t
, td
, numitems
, size
);
3201 return STATUS_SUCCESS
;
3204 bool is_tree_unique(device_extension
* Vcb
, tree
* t
, PIRP Irp
) {
3212 if (t
->uniqueness_determined
)
3213 return t
->is_unique
;
3215 if (t
->parent
&& !is_tree_unique(Vcb
, t
->parent
, Irp
))
3218 if (t
->has_address
) {
3219 searchkey
.obj_id
= t
->header
.address
;
3220 searchkey
.obj_type
= Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
? TYPE_METADATA_ITEM
: TYPE_EXTENT_ITEM
;
3221 searchkey
.offset
= 0xffffffffffffffff;
3223 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
3224 if (!NT_SUCCESS(Status
)) {
3225 ERR("error - find_item returned %08x\n", Status
);
3229 if (tp
.item
->key
.obj_id
!= t
->header
.address
|| (tp
.item
->key
.obj_type
!= TYPE_METADATA_ITEM
&& tp
.item
->key
.obj_type
!= TYPE_EXTENT_ITEM
))
3232 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->size
== sizeof(EXTENT_ITEM_V0
))
3235 if (tp
.item
->size
< sizeof(EXTENT_ITEM
))
3238 ei
= (EXTENT_ITEM
*)tp
.item
->data
;
3240 if (ei
->refcount
> 1)
3243 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& ei
->flags
& EXTENT_ITEM_TREE_BLOCK
) {
3246 if (tp
.item
->size
< sizeof(EXTENT_ITEM
) + sizeof(EXTENT_ITEM2
))
3249 ei2
= (EXTENT_ITEM2
*)&ei
[1];
3250 type
= (uint8_t*)&ei2
[1];
3252 type
= (uint8_t*)&ei
[1];
3254 if (type
>= tp
.item
->data
+ tp
.item
->size
|| *type
!= TYPE_TREE_BLOCK_REF
)
3262 t
->uniqueness_determined
= true;
3267 static NTSTATUS
try_tree_amalgamate(device_extension
* Vcb
, tree
* t
, bool* done
, bool* done_deletions
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3269 tree_data
* nextparitem
= NULL
;
3271 tree
*next_tree
, *par
;
3275 TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t
->root
->id
, t
->header
.level
, t
->size
);
3277 // FIXME - doesn't capture everything, as it doesn't ascend
3278 le
= t
->paritem
->list_entry
.Flink
;
3279 while (le
!= &t
->parent
->itemlist
) {
3280 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3291 return STATUS_SUCCESS
;
3293 TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem
->key
.obj_id
, nextparitem
->key
.obj_type
, nextparitem
->key
.offset
);
3295 if (!nextparitem
->treeholder
.tree
) {
3296 Status
= do_load_tree(Vcb
, &nextparitem
->treeholder
, t
->root
, t
->parent
, nextparitem
, NULL
);
3297 if (!NT_SUCCESS(Status
)) {
3298 ERR("do_load_tree returned %08x\n", Status
);
3303 if (!is_tree_unique(Vcb
, nextparitem
->treeholder
.tree
, Irp
))
3304 return STATUS_SUCCESS
;
3306 next_tree
= nextparitem
->treeholder
.tree
;
3308 if (!next_tree
->updated_extents
&& next_tree
->has_address
) {
3309 Status
= update_tree_extents(Vcb
, next_tree
, Irp
, rollback
);
3310 if (!NT_SUCCESS(Status
)) {
3311 ERR("update_tree_extents returned %08x\n", Status
);
3316 if (t
->size
+ next_tree
->size
<= Vcb
->superblock
.node_size
- sizeof(tree_header
)) {
3317 // merge two trees into one
3319 t
->header
.num_items
+= next_tree
->header
.num_items
;
3320 t
->size
+= next_tree
->size
;
3322 if (next_tree
->header
.level
> 0) {
3323 le
= next_tree
->itemlist
.Flink
;
3325 while (le
!= &next_tree
->itemlist
) {
3326 tree_data
* td2
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3328 if (td2
->treeholder
.tree
) {
3329 td2
->treeholder
.tree
->parent
= t
;
3330 #ifdef DEBUG_PARANOID
3331 if (td2
->treeholder
.tree
->parent
&& td2
->treeholder
.tree
->parent
->header
.level
<= td2
->treeholder
.tree
->header
.level
) int3
;
3335 td2
->inserted
= true;
3339 le
= next_tree
->itemlist
.Flink
;
3341 while (le
!= &next_tree
->itemlist
) {
3342 tree_data
* td2
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3344 if (!td2
->inserted
&& td2
->data
) {
3345 uint8_t* data
= ExAllocatePoolWithTag(PagedPool
, td2
->size
, ALLOC_TAG
);
3348 ERR("out of memory\n");
3349 return STATUS_INSUFFICIENT_RESOURCES
;
3352 RtlCopyMemory(data
, td2
->data
, td2
->size
);
3354 td2
->inserted
= true;
3361 t
->itemlist
.Blink
->Flink
= next_tree
->itemlist
.Flink
;
3362 t
->itemlist
.Blink
->Flink
->Blink
= t
->itemlist
.Blink
;
3363 t
->itemlist
.Blink
= next_tree
->itemlist
.Blink
;
3364 t
->itemlist
.Blink
->Flink
= &t
->itemlist
;
3366 next_tree
->itemlist
.Flink
= next_tree
->itemlist
.Blink
= &next_tree
->itemlist
;
3368 next_tree
->header
.num_items
= 0;
3369 next_tree
->size
= 0;
3371 if (next_tree
->has_new_address
) { // delete associated EXTENT_ITEM
3372 Status
= reduce_tree_extent(Vcb
, next_tree
->new_address
, next_tree
, next_tree
->parent
->header
.tree_id
, next_tree
->header
.level
, Irp
, rollback
);
3374 if (!NT_SUCCESS(Status
)) {
3375 ERR("reduce_tree_extent returned %08x\n", Status
);
3378 } else if (next_tree
->has_address
) {
3379 Status
= reduce_tree_extent(Vcb
, next_tree
->header
.address
, next_tree
, next_tree
->parent
->header
.tree_id
, next_tree
->header
.level
, Irp
, rollback
);
3381 if (!NT_SUCCESS(Status
)) {
3382 ERR("reduce_tree_extent returned %08x\n", Status
);
3387 if (!nextparitem
->ignore
) {
3388 nextparitem
->ignore
= true;
3389 next_tree
->parent
->header
.num_items
--;
3390 next_tree
->parent
->size
-= sizeof(internal_node
);
3392 *done_deletions
= true;
3395 par
= next_tree
->parent
;
3401 RemoveEntryList(&nextparitem
->list_entry
);
3402 ExFreePool(next_tree
->paritem
);
3403 next_tree
->paritem
= NULL
;
3405 next_tree
->root
->root_item
.bytes_used
-= Vcb
->superblock
.node_size
;
3407 free_tree(next_tree
);
3411 // rebalance by moving items from second tree into first
3412 ULONG avg_size
= (t
->size
+ next_tree
->size
) / 2;
3413 KEY firstitem
= {0, 0, 0};
3414 bool changed
= false;
3416 TRACE("attempting rebalance\n");
3418 le
= next_tree
->itemlist
.Flink
;
3419 while (le
!= &next_tree
->itemlist
&& t
->size
< avg_size
&& next_tree
->header
.num_items
> 1) {
3420 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3424 if (next_tree
->header
.level
== 0)
3425 size
= sizeof(leaf_node
) + td
->size
;
3427 size
= sizeof(internal_node
);
3431 if (t
->size
+ size
< Vcb
->superblock
.node_size
- sizeof(tree_header
)) {
3432 RemoveEntryList(&td
->list_entry
);
3433 InsertTailList(&t
->itemlist
, &td
->list_entry
);
3435 if (next_tree
->header
.level
> 0 && td
->treeholder
.tree
) {
3436 td
->treeholder
.tree
->parent
= t
;
3437 #ifdef DEBUG_PARANOID
3438 if (td
->treeholder
.tree
->parent
&& td
->treeholder
.tree
->parent
->header
.level
<= td
->treeholder
.tree
->header
.level
) int3
;
3440 } else if (next_tree
->header
.level
== 0 && !td
->inserted
&& td
->size
> 0) {
3441 uint8_t* data
= ExAllocatePoolWithTag(PagedPool
, td
->size
, ALLOC_TAG
);
3444 ERR("out of memory\n");
3445 return STATUS_INSUFFICIENT_RESOURCES
;
3448 RtlCopyMemory(data
, td
->data
, td
->size
);
3452 td
->inserted
= true;
3455 next_tree
->size
-= size
;
3457 next_tree
->header
.num_items
--;
3458 t
->header
.num_items
++;
3465 le
= next_tree
->itemlist
.Flink
;
3468 le
= next_tree
->itemlist
.Flink
;
3469 while (le
!= &next_tree
->itemlist
) {
3470 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3473 firstitem
= td
->key
;
3480 // FIXME - once ascension is working, make this work with parent's parent, etc.
3481 if (next_tree
->paritem
)
3482 next_tree
->paritem
->key
= firstitem
;
3494 return STATUS_SUCCESS
;
3497 static NTSTATUS
update_extent_level(device_extension
* Vcb
, uint64_t address
, tree
* t
, uint8_t level
, PIRP Irp
) {
3502 if (Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA
) {
3503 searchkey
.obj_id
= address
;
3504 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
3505 searchkey
.offset
= t
->header
.level
;
3507 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
3508 if (!NT_SUCCESS(Status
)) {
3509 ERR("error - find_item returned %08x\n", Status
);
3513 if (!keycmp(tp
.item
->key
, searchkey
)) {
3514 EXTENT_ITEM_SKINNY_METADATA
* eism
;
3516 if (tp
.item
->size
> 0) {
3517 eism
= ExAllocatePoolWithTag(PagedPool
, tp
.item
->size
, ALLOC_TAG
);
3520 ERR("out of memory\n");
3521 return STATUS_INSUFFICIENT_RESOURCES
;
3524 RtlCopyMemory(eism
, tp
.item
->data
, tp
.item
->size
);
3528 Status
= delete_tree_item(Vcb
, &tp
);
3529 if (!NT_SUCCESS(Status
)) {
3530 ERR("delete_tree_item returned %08x\n", Status
);
3531 if (eism
) ExFreePool(eism
);
3535 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, address
, TYPE_METADATA_ITEM
, level
, eism
, tp
.item
->size
, NULL
, Irp
);
3536 if (!NT_SUCCESS(Status
)) {
3537 ERR("insert_tree_item returned %08x\n", Status
);
3538 if (eism
) ExFreePool(eism
);
3542 return STATUS_SUCCESS
;
3546 searchkey
.obj_id
= address
;
3547 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
3548 searchkey
.offset
= 0xffffffffffffffff;
3550 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
3551 if (!NT_SUCCESS(Status
)) {
3552 ERR("error - find_item returned %08x\n", Status
);
3556 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
3557 EXTENT_ITEM_TREE
* eit
;
3559 if (tp
.item
->size
< sizeof(EXTENT_ITEM_TREE
)) {
3560 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(EXTENT_ITEM_TREE
));
3561 return STATUS_INTERNAL_ERROR
;
3564 eit
= ExAllocatePoolWithTag(PagedPool
, tp
.item
->size
, ALLOC_TAG
);
3567 ERR("out of memory\n");
3568 return STATUS_INSUFFICIENT_RESOURCES
;
3571 RtlCopyMemory(eit
, tp
.item
->data
, tp
.item
->size
);
3573 Status
= delete_tree_item(Vcb
, &tp
);
3574 if (!NT_SUCCESS(Status
)) {
3575 ERR("delete_tree_item returned %08x\n", Status
);
3582 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, eit
, tp
.item
->size
, NULL
, Irp
);
3583 if (!NT_SUCCESS(Status
)) {
3584 ERR("insert_tree_item returned %08x\n", Status
);
3589 return STATUS_SUCCESS
;
3592 ERR("could not find EXTENT_ITEM for address %I64x\n", address
);
3594 return STATUS_INTERNAL_ERROR
;
3597 static NTSTATUS
update_tree_extents_recursive(device_extension
* Vcb
, tree
* t
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3600 if (t
->parent
&& !t
->parent
->updated_extents
&& t
->parent
->has_address
) {
3601 Status
= update_tree_extents_recursive(Vcb
, t
->parent
, Irp
, rollback
);
3602 if (!NT_SUCCESS(Status
))
3606 Status
= update_tree_extents(Vcb
, t
, Irp
, rollback
);
3607 if (!NT_SUCCESS(Status
)) {
3608 ERR("update_tree_extents returned %08x\n", Status
);
3612 return STATUS_SUCCESS
;
3615 static NTSTATUS
do_splits(device_extension
* Vcb
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3616 ULONG level
, max_level
;
3618 bool empty
, done_deletions
= false;
3622 TRACE("(%p)\n", Vcb
);
3626 for (level
= 0; level
<= 255; level
++) {
3627 LIST_ENTRY
*le
, *nextle
;
3631 TRACE("doing level %u\n", level
);
3633 le
= Vcb
->trees
.Flink
;
3635 while (le
!= &Vcb
->trees
) {
3636 t
= CONTAINING_RECORD(le
, tree
, list_entry
);
3640 if (t
->write
&& t
->header
.level
== level
) {
3643 if (t
->header
.num_items
== 0) {
3645 done_deletions
= true;
3647 TRACE("deleting tree in root %I64x\n", t
->root
->id
);
3649 t
->root
->root_item
.bytes_used
-= Vcb
->superblock
.node_size
;
3651 if (t
->has_new_address
) { // delete associated EXTENT_ITEM
3652 Status
= reduce_tree_extent(Vcb
, t
->new_address
, t
, t
->parent
->header
.tree_id
, t
->header
.level
, Irp
, rollback
);
3654 if (!NT_SUCCESS(Status
)) {
3655 ERR("reduce_tree_extent returned %08x\n", Status
);
3659 t
->has_new_address
= false;
3660 } else if (t
->has_address
) {
3661 Status
= reduce_tree_extent(Vcb
,t
->header
.address
, t
, t
->parent
->header
.tree_id
, t
->header
.level
, Irp
, rollback
);
3663 if (!NT_SUCCESS(Status
)) {
3664 ERR("reduce_tree_extent returned %08x\n", Status
);
3668 t
->has_address
= false;
3671 if (!t
->paritem
->ignore
) {
3672 t
->paritem
->ignore
= true;
3673 t
->parent
->header
.num_items
--;
3674 t
->parent
->size
-= sizeof(internal_node
);
3677 RemoveEntryList(&t
->paritem
->list_entry
);
3678 ExFreePool(t
->paritem
);
3682 } else if (t
->header
.level
!= 0) {
3683 if (t
->has_new_address
) {
3684 Status
= update_extent_level(Vcb
, t
->new_address
, t
, 0, Irp
);
3686 if (!NT_SUCCESS(Status
)) {
3687 ERR("update_extent_level returned %08x\n", Status
);
3692 t
->header
.level
= 0;
3694 } else if (t
->size
> Vcb
->superblock
.node_size
- sizeof(tree_header
)) {
3695 TRACE("splitting overlarge tree (%x > %x)\n", t
->size
, Vcb
->superblock
.node_size
- sizeof(tree_header
));
3697 if (!t
->updated_extents
&& t
->has_address
) {
3698 Status
= update_tree_extents_recursive(Vcb
, t
, Irp
, rollback
);
3699 if (!NT_SUCCESS(Status
)) {
3700 ERR("update_tree_extents_recursive returned %08x\n", Status
);
3705 Status
= split_tree(Vcb
, t
);
3707 if (!NT_SUCCESS(Status
)) {
3708 ERR("split_tree returned %08x\n", Status
);
3720 TRACE("nothing found for level %u\n", level
);
3725 min_size
= (Vcb
->superblock
.node_size
- sizeof(tree_header
)) / 2;
3727 for (level
= 0; level
<= max_level
; level
++) {
3730 le
= Vcb
->trees
.Flink
;
3732 while (le
!= &Vcb
->trees
) {
3733 t
= CONTAINING_RECORD(le
, tree
, list_entry
);
3735 if (t
->write
&& t
->header
.level
== level
&& t
->header
.num_items
> 0 && t
->parent
&& t
->size
< min_size
&&
3736 t
->root
->id
!= BTRFS_ROOT_FREE_SPACE
&& is_tree_unique(Vcb
, t
, Irp
)) {
3740 Status
= try_tree_amalgamate(Vcb
, t
, &done
, &done_deletions
, Irp
, rollback
);
3741 if (!NT_SUCCESS(Status
)) {
3742 ERR("try_tree_amalgamate returned %08x\n", Status
);
3745 } while (done
&& t
->size
< min_size
);
3752 // simplify trees if top tree only has one entry
3754 if (done_deletions
) {
3755 for (level
= max_level
; level
> 0; level
--) {
3756 LIST_ENTRY
*le
, *nextle
;
3758 le
= Vcb
->trees
.Flink
;
3759 while (le
!= &Vcb
->trees
) {
3761 t
= CONTAINING_RECORD(le
, tree
, list_entry
);
3763 if (t
->write
&& t
->header
.level
== level
) {
3764 if (!t
->parent
&& t
->header
.num_items
== 1) {
3765 LIST_ENTRY
* le2
= t
->itemlist
.Flink
;
3766 tree_data
* td
= NULL
;
3767 tree
* child_tree
= NULL
;
3769 while (le2
!= &t
->itemlist
) {
3770 td
= CONTAINING_RECORD(le2
, tree_data
, list_entry
);
3776 TRACE("deleting top-level tree in root %I64x with one item\n", t
->root
->id
);
3778 if (t
->has_new_address
) { // delete associated EXTENT_ITEM
3779 Status
= reduce_tree_extent(Vcb
, t
->new_address
, t
, t
->header
.tree_id
, t
->header
.level
, Irp
, rollback
);
3781 if (!NT_SUCCESS(Status
)) {
3782 ERR("reduce_tree_extent returned %08x\n", Status
);
3786 t
->has_new_address
= false;
3787 } else if (t
->has_address
) {
3788 Status
= reduce_tree_extent(Vcb
,t
->header
.address
, t
, t
->header
.tree_id
, t
->header
.level
, Irp
, rollback
);
3790 if (!NT_SUCCESS(Status
)) {
3791 ERR("reduce_tree_extent returned %08x\n", Status
);
3795 t
->has_address
= false;
3798 if (!td
->treeholder
.tree
) { // load first item if not already loaded
3799 KEY searchkey
= {0,0,0};
3802 Status
= find_item(Vcb
, t
->root
, &tp
, &searchkey
, false, Irp
);
3803 if (!NT_SUCCESS(Status
)) {
3804 ERR("error - find_item returned %08x\n", Status
);
3809 child_tree
= td
->treeholder
.tree
;
3812 child_tree
->parent
= NULL
;
3813 child_tree
->paritem
= NULL
;
3816 t
->root
->root_item
.bytes_used
-= Vcb
->superblock
.node_size
;
3821 child_tree
->root
->treeholder
.tree
= child_tree
;
3830 return STATUS_SUCCESS
;
3833 static NTSTATUS
remove_root_extents(device_extension
* Vcb
, root
* r
, tree_holder
* th
, uint8_t level
, tree
* parent
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3840 buf
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
3842 ERR("out of memory\n");
3843 return STATUS_INSUFFICIENT_RESOURCES
;
3846 Status
= read_data(Vcb
, th
->address
, Vcb
->superblock
.node_size
, NULL
, true, buf
, NULL
,
3847 &c
, Irp
, th
->generation
, false, NormalPagePriority
);
3848 if (!NT_SUCCESS(Status
)) {
3849 ERR("read_data returned 0x%08x\n", Status
);
3854 Status
= load_tree(Vcb
, th
->address
, buf
, r
, &th
->tree
);
3856 if (!th
->tree
|| th
->tree
->buf
!= buf
)
3859 if (!NT_SUCCESS(Status
)) {
3860 ERR("load_tree(%I64x) returned %08x\n", th
->address
, Status
);
3866 LIST_ENTRY
* le
= th
->tree
->itemlist
.Flink
;
3868 while (le
!= &th
->tree
->itemlist
) {
3869 tree_data
* td
= CONTAINING_RECORD(le
, tree_data
, list_entry
);
3872 Status
= remove_root_extents(Vcb
, r
, &td
->treeholder
, th
->tree
->header
.level
- 1, th
->tree
, Irp
, rollback
);
3874 if (!NT_SUCCESS(Status
)) {
3875 ERR("remove_root_extents returned %08x\n", Status
);
3884 if (th
->tree
&& !th
->tree
->updated_extents
&& th
->tree
->has_address
) {
3885 Status
= update_tree_extents(Vcb
, th
->tree
, Irp
, rollback
);
3886 if (!NT_SUCCESS(Status
)) {
3887 ERR("update_tree_extents returned %08x\n", Status
);
3892 if (!th
->tree
|| th
->tree
->has_address
) {
3893 Status
= reduce_tree_extent(Vcb
, th
->address
, NULL
, parent
? parent
->header
.tree_id
: r
->id
, level
, Irp
, rollback
);
3895 if (!NT_SUCCESS(Status
)) {
3896 ERR("reduce_tree_extent(%I64x) returned %08x\n", th
->address
, Status
);
3901 return STATUS_SUCCESS
;
3904 static NTSTATUS
drop_root(device_extension
* Vcb
, root
* r
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3909 Status
= remove_root_extents(Vcb
, r
, &r
->treeholder
, r
->root_item
.root_level
, NULL
, Irp
, rollback
);
3910 if (!NT_SUCCESS(Status
)) {
3911 ERR("remove_root_extents returned %08x\n", Status
);
3915 // remove entries in uuid root (tree 9)
3916 if (Vcb
->uuid_root
) {
3917 RtlCopyMemory(&searchkey
.obj_id
, &r
->root_item
.uuid
.uuid
[0], sizeof(uint64_t));
3918 searchkey
.obj_type
= TYPE_SUBVOL_UUID
;
3919 RtlCopyMemory(&searchkey
.offset
, &r
->root_item
.uuid
.uuid
[sizeof(uint64_t)], sizeof(uint64_t));
3921 if (searchkey
.obj_id
!= 0 || searchkey
.offset
!= 0) {
3922 Status
= find_item(Vcb
, Vcb
->uuid_root
, &tp
, &searchkey
, false, Irp
);
3923 if (!NT_SUCCESS(Status
)) {
3924 WARN("find_item returned %08x\n", Status
);
3926 if (!keycmp(tp
.item
->key
, searchkey
)) {
3927 Status
= delete_tree_item(Vcb
, &tp
);
3928 if (!NT_SUCCESS(Status
)) {
3929 ERR("delete_tree_item returned %08x\n", Status
);
3933 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
3937 if (r
->root_item
.rtransid
> 0) {
3938 RtlCopyMemory(&searchkey
.obj_id
, &r
->root_item
.received_uuid
.uuid
[0], sizeof(uint64_t));
3939 searchkey
.obj_type
= TYPE_SUBVOL_REC_UUID
;
3940 RtlCopyMemory(&searchkey
.offset
, &r
->root_item
.received_uuid
.uuid
[sizeof(uint64_t)], sizeof(uint64_t));
3942 Status
= find_item(Vcb
, Vcb
->uuid_root
, &tp
, &searchkey
, false, Irp
);
3943 if (!NT_SUCCESS(Status
))
3944 WARN("find_item returned %08x\n", Status
);
3946 if (!keycmp(tp
.item
->key
, searchkey
)) {
3947 if (tp
.item
->size
== sizeof(uint64_t)) {
3948 uint64_t* id
= (uint64_t*)tp
.item
->data
;
3951 Status
= delete_tree_item(Vcb
, &tp
);
3952 if (!NT_SUCCESS(Status
)) {
3953 ERR("delete_tree_item returned %08x\n", Status
);
3957 } else if (tp
.item
->size
> sizeof(uint64_t)) {
3959 uint64_t* ids
= (uint64_t*)tp
.item
->data
;
3961 for (i
= 0; i
< tp
.item
->size
/ sizeof(uint64_t); i
++) {
3962 if (ids
[i
] == r
->id
) {
3965 ne
= ExAllocatePoolWithTag(PagedPool
, tp
.item
->size
- sizeof(uint64_t), ALLOC_TAG
);
3967 ERR("out of memory\n");
3968 return STATUS_INSUFFICIENT_RESOURCES
;
3972 RtlCopyMemory(ne
, ids
, sizeof(uint64_t) * i
);
3974 if ((i
+ 1) * sizeof(uint64_t) < tp
.item
->size
)
3975 RtlCopyMemory(&ne
[i
], &ids
[i
+ 1], tp
.item
->size
- ((i
+ 1) * sizeof(uint64_t)));
3977 Status
= delete_tree_item(Vcb
, &tp
);
3978 if (!NT_SUCCESS(Status
)) {
3979 ERR("delete_tree_item returned %08x\n", Status
);
3984 Status
= insert_tree_item(Vcb
, Vcb
->uuid_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
3985 ne
, tp
.item
->size
- sizeof(uint64_t), NULL
, Irp
);
3986 if (!NT_SUCCESS(Status
)) {
3987 ERR("insert_tree_item returned %08x\n", Status
);
3997 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
4004 searchkey
.obj_id
= r
->id
;
4005 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
4006 searchkey
.offset
= 0xffffffffffffffff;
4008 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
4009 if (!NT_SUCCESS(Status
)) {
4010 ERR("find_item returned %08x\n", Status
);
4014 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
4015 Status
= delete_tree_item(Vcb
, &tp
);
4017 if (!NT_SUCCESS(Status
)) {
4018 ERR("delete_tree_item returned %08x\n", Status
);
4022 WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
4024 // delete items in tree cache
4026 free_trees_root(Vcb
, r
);
4028 return STATUS_SUCCESS
;
4031 static NTSTATUS
drop_roots(device_extension
* Vcb
, PIRP Irp
, LIST_ENTRY
* rollback
) {
4032 LIST_ENTRY
*le
= Vcb
->drop_roots
.Flink
, *le2
;
4035 while (le
!= &Vcb
->drop_roots
) {
4036 root
* r
= CONTAINING_RECORD(le
, root
, list_entry
);
4040 Status
= drop_root(Vcb
, r
, Irp
, rollback
);
4041 if (!NT_SUCCESS(Status
)) {
4042 ERR("drop_root(%I64x) returned %08x\n", r
->id
, Status
);
4049 return STATUS_SUCCESS
;
4052 NTSTATUS
update_dev_item(device_extension
* Vcb
, device
* device
, PIRP Irp
) {
4058 searchkey
.obj_id
= 1;
4059 searchkey
.obj_type
= TYPE_DEV_ITEM
;
4060 searchkey
.offset
= device
->devitem
.dev_id
;
4062 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, false, Irp
);
4063 if (!NT_SUCCESS(Status
)) {
4064 ERR("error - find_item returned %08x\n", Status
);
4068 if (keycmp(tp
.item
->key
, searchkey
)) {
4069 ERR("error - could not find DEV_ITEM for device %I64x\n", device
->devitem
.dev_id
);
4070 return STATUS_INTERNAL_ERROR
;
4073 Status
= delete_tree_item(Vcb
, &tp
);
4074 if (!NT_SUCCESS(Status
)) {
4075 ERR("delete_tree_item returned %08x\n", Status
);
4079 di
= ExAllocatePoolWithTag(PagedPool
, sizeof(DEV_ITEM
), ALLOC_TAG
);
4081 ERR("out of memory\n");
4082 return STATUS_INSUFFICIENT_RESOURCES
;
4085 RtlCopyMemory(di
, &device
->devitem
, sizeof(DEV_ITEM
));
4087 Status
= insert_tree_item(Vcb
, Vcb
->chunk_root
, 1, TYPE_DEV_ITEM
, device
->devitem
.dev_id
, di
, sizeof(DEV_ITEM
), NULL
, Irp
);
4088 if (!NT_SUCCESS(Status
)) {
4089 ERR("insert_tree_item returned %08x\n", Status
);
4094 return STATUS_SUCCESS
;
4097 static void regen_bootstrap(device_extension
* Vcb
) {
4103 le
= Vcb
->sys_chunks
.Flink
;
4104 while (le
!= &Vcb
->sys_chunks
) {
4105 sc2
= CONTAINING_RECORD(le
, sys_chunk
, list_entry
);
4107 TRACE("%I64x,%x,%I64x\n", sc2
->key
.obj_id
, sc2
->key
.obj_type
, sc2
->key
.offset
);
4109 RtlCopyMemory(&Vcb
->superblock
.sys_chunk_array
[i
], &sc2
->key
, sizeof(KEY
));
4112 RtlCopyMemory(&Vcb
->superblock
.sys_chunk_array
[i
], sc2
->data
, sc2
->size
);
4119 static NTSTATUS
add_to_bootstrap(device_extension
* Vcb
, uint64_t obj_id
, uint8_t obj_type
, uint64_t offset
, void* data
, uint16_t size
) {
4123 if (Vcb
->superblock
.n
+ sizeof(KEY
) + size
> SYS_CHUNK_ARRAY_SIZE
) {
4124 ERR("error - bootstrap is full\n");
4125 return STATUS_INTERNAL_ERROR
;
4128 sc
= ExAllocatePoolWithTag(PagedPool
, sizeof(sys_chunk
), ALLOC_TAG
);
4130 ERR("out of memory\n");
4131 return STATUS_INSUFFICIENT_RESOURCES
;
4134 sc
->key
.obj_id
= obj_id
;
4135 sc
->key
.obj_type
= obj_type
;
4136 sc
->key
.offset
= offset
;
4138 sc
->data
= ExAllocatePoolWithTag(PagedPool
, sc
->size
, ALLOC_TAG
);
4140 ERR("out of memory\n");
4142 return STATUS_INSUFFICIENT_RESOURCES
;
4145 RtlCopyMemory(sc
->data
, data
, sc
->size
);
4147 le
= Vcb
->sys_chunks
.Flink
;
4148 while (le
!= &Vcb
->sys_chunks
) {
4149 sys_chunk
* sc2
= CONTAINING_RECORD(le
, sys_chunk
, list_entry
);
4151 if (keycmp(sc2
->key
, sc
->key
) == 1)
4156 InsertTailList(le
, &sc
->list_entry
);
4158 Vcb
->superblock
.n
+= sizeof(KEY
) + size
;
4160 regen_bootstrap(Vcb
);
4162 return STATUS_SUCCESS
;
4165 static NTSTATUS
create_chunk(device_extension
* Vcb
, chunk
* c
, PIRP Irp
) {
4167 CHUNK_ITEM_STRIPE
* cis
;
4168 BLOCK_GROUP_ITEM
* bgi
;
4172 ci
= ExAllocatePoolWithTag(PagedPool
, c
->size
, ALLOC_TAG
);
4174 ERR("out of memory\n");
4175 return STATUS_INSUFFICIENT_RESOURCES
;
4178 RtlCopyMemory(ci
, c
->chunk_item
, c
->size
);
4180 Status
= insert_tree_item(Vcb
, Vcb
->chunk_root
, 0x100, TYPE_CHUNK_ITEM
, c
->offset
, ci
, c
->size
, NULL
, Irp
);
4181 if (!NT_SUCCESS(Status
)) {
4182 ERR("insert_tree_item failed\n");
4187 if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
) {
4188 Status
= add_to_bootstrap(Vcb
, 0x100, TYPE_CHUNK_ITEM
, c
->offset
, ci
, c
->size
);
4189 if (!NT_SUCCESS(Status
)) {
4190 ERR("add_to_bootstrap returned %08x\n", Status
);
4195 // add BLOCK_GROUP_ITEM to tree 2
4197 bgi
= ExAllocatePoolWithTag(PagedPool
, sizeof(BLOCK_GROUP_ITEM
), ALLOC_TAG
);
4199 ERR("out of memory\n");
4200 return STATUS_INSUFFICIENT_RESOURCES
;
4203 bgi
->used
= c
->used
;
4204 bgi
->chunk_tree
= 0x100;
4205 bgi
->flags
= c
->chunk_item
->type
;
4207 Status
= insert_tree_item(Vcb
, Vcb
->extent_root
, c
->offset
, TYPE_BLOCK_GROUP_ITEM
, c
->chunk_item
->size
, bgi
, sizeof(BLOCK_GROUP_ITEM
), NULL
, Irp
);
4208 if (!NT_SUCCESS(Status
)) {
4209 ERR("insert_tree_item failed\n");
4214 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
4215 factor
= c
->chunk_item
->num_stripes
;
4216 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
4217 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
4218 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
4219 factor
= c
->chunk_item
->num_stripes
- 1;
4220 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
4221 factor
= c
->chunk_item
->num_stripes
- 2;
4222 else // SINGLE, DUPLICATE, RAID1
4225 // add DEV_EXTENTs to tree 4
4227 cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
4229 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
4232 de
= ExAllocatePoolWithTag(PagedPool
, sizeof(DEV_EXTENT
), ALLOC_TAG
);
4234 ERR("out of memory\n");
4235 return STATUS_INSUFFICIENT_RESOURCES
;
4238 de
->chunktree
= Vcb
->chunk_root
->id
;
4240 de
->address
= c
->offset
;
4241 de
->length
= c
->chunk_item
->size
/ factor
;
4242 de
->chunktree_uuid
= Vcb
->chunk_root
->treeholder
.tree
->header
.chunk_tree_uuid
;
4244 Status
= insert_tree_item(Vcb
, Vcb
->dev_root
, c
->devices
[i
]->devitem
.dev_id
, TYPE_DEV_EXTENT
, cis
[i
].offset
, de
, sizeof(DEV_EXTENT
), NULL
, Irp
);
4245 if (!NT_SUCCESS(Status
)) {
4246 ERR("insert_tree_item returned %08x\n", Status
);
4251 // FIXME - no point in calling this twice for the same device
4252 Status
= update_dev_item(Vcb
, c
->devices
[i
], Irp
);
4253 if (!NT_SUCCESS(Status
)) {
4254 ERR("update_dev_item returned %08x\n", Status
);
4260 c
->oldused
= c
->used
;
4262 Vcb
->superblock
.bytes_used
+= chunk_estimate_phys_size(Vcb
, c
, c
->used
);
4264 return STATUS_SUCCESS
;
4267 static void remove_from_bootstrap(device_extension
* Vcb
, uint64_t obj_id
, uint8_t obj_type
, uint64_t offset
) {
4271 le
= Vcb
->sys_chunks
.Flink
;
4272 while (le
!= &Vcb
->sys_chunks
) {
4273 sc2
= CONTAINING_RECORD(le
, sys_chunk
, list_entry
);
4275 if (sc2
->key
.obj_id
== obj_id
&& sc2
->key
.obj_type
== obj_type
&& sc2
->key
.offset
== offset
) {
4276 RemoveEntryList(&sc2
->list_entry
);
4278 Vcb
->superblock
.n
-= sizeof(KEY
) + sc2
->size
;
4280 ExFreePool(sc2
->data
);
4282 regen_bootstrap(Vcb
);
4290 static NTSTATUS
set_xattr(device_extension
* Vcb
, LIST_ENTRY
* batchlist
, root
* subvol
, uint64_t inode
, char* name
, uint16_t namelen
,
4291 uint32_t crc32
, uint8_t* data
, uint16_t datalen
) {
4296 TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb
, subvol
->id
, inode
, namelen
, name
, crc32
, data
, datalen
);
4298 xasize
= (uint16_t)offsetof(DIR_ITEM
, name
[0]) + namelen
+ datalen
;
4300 xa
= ExAllocatePoolWithTag(PagedPool
, xasize
, ALLOC_TAG
);
4302 ERR("out of memory\n");
4303 return STATUS_INSUFFICIENT_RESOURCES
;
4307 xa
->key
.obj_type
= 0;
4309 xa
->transid
= Vcb
->superblock
.generation
;
4312 xa
->type
= BTRFS_TYPE_EA
;
4313 RtlCopyMemory(xa
->name
, name
, namelen
);
4314 RtlCopyMemory(xa
->name
+ namelen
, data
, datalen
);
4316 Status
= insert_tree_item_batch(batchlist
, Vcb
, subvol
, inode
, TYPE_XATTR_ITEM
, crc32
, xa
, xasize
, Batch_SetXattr
);
4317 if (!NT_SUCCESS(Status
)) {
4318 ERR("insert_tree_item_batch returned %08x\n", Status
);
4323 return STATUS_SUCCESS
;
4326 static NTSTATUS
delete_xattr(device_extension
* Vcb
, LIST_ENTRY
* batchlist
, root
* subvol
, uint64_t inode
, char* name
,
4327 uint16_t namelen
, uint32_t crc32
) {
4332 TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb
, subvol
->id
, inode
, namelen
, name
, crc32
);
4334 xasize
= (uint16_t)offsetof(DIR_ITEM
, name
[0]) + namelen
;
4336 xa
= ExAllocatePoolWithTag(PagedPool
, xasize
, ALLOC_TAG
);
4338 ERR("out of memory\n");
4339 return STATUS_INSUFFICIENT_RESOURCES
;
4343 xa
->key
.obj_type
= 0;
4345 xa
->transid
= Vcb
->superblock
.generation
;
4348 xa
->type
= BTRFS_TYPE_EA
;
4349 RtlCopyMemory(xa
->name
, name
, namelen
);
4351 Status
= insert_tree_item_batch(batchlist
, Vcb
, subvol
, inode
, TYPE_XATTR_ITEM
, crc32
, xa
, xasize
, Batch_DeleteXattr
);
4352 if (!NT_SUCCESS(Status
)) {
4353 ERR("insert_tree_item_batch returned %08x\n", Status
);
4358 return STATUS_SUCCESS
;
4361 static NTSTATUS
insert_sparse_extent(fcb
* fcb
, LIST_ENTRY
* batchlist
, uint64_t start
, uint64_t length
) {
4366 TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb
->subvol
->id
, fcb
->inode
, start
, length
);
4368 ed
= ExAllocatePoolWithTag(PagedPool
, sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
), ALLOC_TAG
);
4370 ERR("out of memory\n");
4371 return STATUS_INSUFFICIENT_RESOURCES
;
4374 ed
->generation
= fcb
->Vcb
->superblock
.generation
;
4375 ed
->decoded_size
= length
;
4376 ed
->compression
= BTRFS_COMPRESSION_NONE
;
4377 ed
->encryption
= BTRFS_ENCRYPTION_NONE
;
4378 ed
->encoding
= BTRFS_ENCODING_NONE
;
4379 ed
->type
= EXTENT_TYPE_REGULAR
;
4381 ed2
= (EXTENT_DATA2
*)ed
->data
;
4385 ed2
->num_bytes
= length
;
4387 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, fcb
->inode
, TYPE_EXTENT_DATA
, start
, ed
, sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
), Batch_Insert
);
4388 if (!NT_SUCCESS(Status
)) {
4389 ERR("insert_tree_item_batch returned %08x\n", Status
);
4394 return STATUS_SUCCESS
;
4398 #pragma warning(push)
4399 #pragma warning(suppress: 28194)
4401 NTSTATUS
insert_tree_item_batch(LIST_ENTRY
* batchlist
, device_extension
* Vcb
, root
* r
, uint64_t objid
, uint8_t objtype
, uint64_t offset
,
4402 _In_opt_
_When_(return >= 0, __drv_aliasesMem
) void* data
, uint16_t datalen
, enum batch_operation operation
) {
4404 batch_root
* br
= NULL
;
4407 le
= batchlist
->Flink
;
4408 while (le
!= batchlist
) {
4409 batch_root
* br2
= CONTAINING_RECORD(le
, batch_root
, list_entry
);
4420 br
= ExAllocatePoolWithTag(PagedPool
, sizeof(batch_root
), ALLOC_TAG
);
4422 ERR("out of memory\n");
4423 return STATUS_INSUFFICIENT_RESOURCES
;
4427 InitializeListHead(&br
->items
);
4428 InsertTailList(batchlist
, &br
->list_entry
);
4431 bi
= ExAllocateFromPagedLookasideList(&Vcb
->batch_item_lookaside
);
4433 ERR("out of memory\n");
4434 return STATUS_INSUFFICIENT_RESOURCES
;
4437 bi
->key
.obj_id
= objid
;
4438 bi
->key
.obj_type
= objtype
;
4439 bi
->key
.offset
= offset
;
4441 bi
->datalen
= datalen
;
4442 bi
->operation
= operation
;
4444 le
= br
->items
.Blink
;
4445 while (le
!= &br
->items
) {
4446 batch_item
* bi2
= CONTAINING_RECORD(le
, batch_item
, list_entry
);
4447 int cmp
= keycmp(bi2
->key
, bi
->key
);
4449 if (cmp
== -1 || (cmp
== 0 && bi
->operation
>= bi2
->operation
)) {
4450 InsertHeadList(&bi2
->list_entry
, &bi
->list_entry
);
4451 return STATUS_SUCCESS
;
4457 InsertHeadList(&br
->items
, &bi
->list_entry
);
4459 return STATUS_SUCCESS
;
4462 #pragma warning(pop)
4471 uint64_t skip_start
;
4473 LIST_ENTRY list_entry
;
4476 static void rationalize_extents(fcb
* fcb
, PIRP Irp
) {
4478 LIST_ENTRY extent_ranges
;
4480 bool changed
= false, truncating
= false;
4481 uint32_t num_extents
= 0;
4483 InitializeListHead(&extent_ranges
);
4485 le
= fcb
->extents
.Flink
;
4486 while (le
!= &fcb
->extents
) {
4487 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4489 if ((ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) && ext
->extent_data
.compression
== BTRFS_COMPRESSION_NONE
&& ext
->unique
) {
4490 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
4492 if (ed2
->size
!= 0) {
4495 le2
= extent_ranges
.Flink
;
4496 while (le2
!= &extent_ranges
) {
4497 extent_range
* er2
= CONTAINING_RECORD(le2
, extent_range
, list_entry
);
4499 if (er2
->address
== ed2
->address
) {
4500 er2
->skip_start
= min(er2
->skip_start
, ed2
->offset
);
4501 er2
->skip_end
= min(er2
->skip_end
, ed2
->size
- ed2
->offset
- ed2
->num_bytes
);
4503 } else if (er2
->address
> ed2
->address
)
4509 er
= ExAllocatePoolWithTag(PagedPool
, sizeof(extent_range
), ALLOC_TAG
); // FIXME - should be from lookaside?
4511 ERR("out of memory\n");
4515 er
->address
= ed2
->address
;
4516 er
->length
= ed2
->size
;
4517 er
->offset
= ext
->offset
- ed2
->offset
;
4518 er
->changed
= false;
4520 er
->skip_start
= ed2
->offset
;
4521 er
->skip_end
= ed2
->size
- ed2
->offset
- ed2
->num_bytes
;
4523 if (er
->skip_start
!= 0 || er
->skip_end
!= 0)
4526 InsertHeadList(le2
->Blink
, &er
->list_entry
);
4535 if (num_extents
== 0 || (num_extents
== 1 && !truncating
))
4538 le
= extent_ranges
.Flink
;
4539 while (le
!= &extent_ranges
) {
4540 er
= CONTAINING_RECORD(le
, extent_range
, list_entry
);
4545 er
->chunk
= get_chunk_from_address(fcb
->Vcb
, er
->address
);
4548 ERR("get_chunk_from_address(%I64x) failed\n", er
->address
);
4553 while (le2
!= &extent_ranges
) {
4554 extent_range
* er2
= CONTAINING_RECORD(le2
, extent_range
, list_entry
);
4556 if (!er2
->chunk
&& er2
->address
>= er
->chunk
->offset
&& er2
->address
< er
->chunk
->offset
+ er
->chunk
->chunk_item
->size
)
4557 er2
->chunk
= er
->chunk
;
4567 // truncate beginning or end of extent if unused
4569 le
= extent_ranges
.Flink
;
4570 while (le
!= &extent_ranges
) {
4571 er
= CONTAINING_RECORD(le
, extent_range
, list_entry
);
4573 if (er
->skip_start
> 0) {
4574 LIST_ENTRY
* le2
= fcb
->extents
.Flink
;
4575 while (le2
!= &fcb
->extents
) {
4576 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
4578 if ((ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) && ext
->extent_data
.compression
== BTRFS_COMPRESSION_NONE
&& ext
->unique
) {
4579 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
4581 if (ed2
->size
!= 0 && ed2
->address
== er
->address
) {
4584 Status
= update_changed_extent_ref(fcb
->Vcb
, er
->chunk
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
,
4585 -1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, true, Irp
);
4586 if (!NT_SUCCESS(Status
)) {
4587 ERR("update_changed_extent_ref returned %08x\n", Status
);
4591 ext
->extent_data
.decoded_size
-= er
->skip_start
;
4592 ed2
->size
-= er
->skip_start
;
4593 ed2
->address
+= er
->skip_start
;
4594 ed2
->offset
-= er
->skip_start
;
4596 add_changed_extent_ref(er
->chunk
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
,
4597 1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
);
4604 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
))
4605 add_checksum_entry(fcb
->Vcb
, er
->address
, (ULONG
)(er
->skip_start
/ fcb
->Vcb
->superblock
.sector_size
), NULL
, NULL
);
4607 acquire_chunk_lock(er
->chunk
, fcb
->Vcb
);
4609 if (!er
->chunk
->cache_loaded
) {
4610 NTSTATUS Status
= load_cache_chunk(fcb
->Vcb
, er
->chunk
, NULL
);
4612 if (!NT_SUCCESS(Status
)) {
4613 ERR("load_cache_chunk returned %08x\n", Status
);
4614 release_chunk_lock(er
->chunk
, fcb
->Vcb
);
4619 er
->chunk
->used
-= er
->skip_start
;
4621 space_list_add(er
->chunk
, er
->address
, er
->skip_start
, NULL
);
4623 release_chunk_lock(er
->chunk
, fcb
->Vcb
);
4625 er
->address
+= er
->skip_start
;
4626 er
->length
-= er
->skip_start
;
4629 if (er
->skip_end
> 0) {
4630 LIST_ENTRY
* le2
= fcb
->extents
.Flink
;
4631 while (le2
!= &fcb
->extents
) {
4632 extent
* ext
= CONTAINING_RECORD(le2
, extent
, list_entry
);
4634 if ((ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) && ext
->extent_data
.compression
== BTRFS_COMPRESSION_NONE
&& ext
->unique
) {
4635 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
4637 if (ed2
->size
!= 0 && ed2
->address
== er
->address
) {
4640 Status
= update_changed_extent_ref(fcb
->Vcb
, er
->chunk
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
,
4641 -1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, true, Irp
);
4642 if (!NT_SUCCESS(Status
)) {
4643 ERR("update_changed_extent_ref returned %08x\n", Status
);
4647 ext
->extent_data
.decoded_size
-= er
->skip_end
;
4648 ed2
->size
-= er
->skip_end
;
4650 add_changed_extent_ref(er
->chunk
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
,
4651 1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
);
4658 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
))
4659 add_checksum_entry(fcb
->Vcb
, er
->address
+ er
->length
- er
->skip_end
, (ULONG
)(er
->skip_end
/ fcb
->Vcb
->superblock
.sector_size
), NULL
, NULL
);
4661 acquire_chunk_lock(er
->chunk
, fcb
->Vcb
);
4663 if (!er
->chunk
->cache_loaded
) {
4664 NTSTATUS Status
= load_cache_chunk(fcb
->Vcb
, er
->chunk
, NULL
);
4666 if (!NT_SUCCESS(Status
)) {
4667 ERR("load_cache_chunk returned %08x\n", Status
);
4668 release_chunk_lock(er
->chunk
, fcb
->Vcb
);
4673 er
->chunk
->used
-= er
->skip_end
;
4675 space_list_add(er
->chunk
, er
->address
+ er
->length
- er
->skip_end
, er
->skip_end
, NULL
);
4677 release_chunk_lock(er
->chunk
, fcb
->Vcb
);
4679 er
->length
-= er
->skip_end
;
4686 if (num_extents
< 2)
4689 // merge together adjacent extents
4690 le
= extent_ranges
.Flink
;
4691 while (le
!= &extent_ranges
) {
4692 er
= CONTAINING_RECORD(le
, extent_range
, list_entry
);
4694 if (le
->Flink
!= &extent_ranges
&& er
->length
< MAX_EXTENT_SIZE
) {
4695 extent_range
* er2
= CONTAINING_RECORD(le
->Flink
, extent_range
, list_entry
);
4697 if (er
->chunk
== er2
->chunk
) {
4698 if (er2
->address
== er
->address
+ er
->length
&& er2
->offset
>= er
->offset
+ er
->length
) {
4699 if (er
->length
+ er2
->length
<= MAX_EXTENT_SIZE
) {
4700 er
->length
+= er2
->length
;
4703 RemoveEntryList(&er2
->list_entry
);
4719 le
= fcb
->extents
.Flink
;
4720 while (le
!= &fcb
->extents
) {
4721 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4723 if ((ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) && ext
->extent_data
.compression
== BTRFS_COMPRESSION_NONE
&& ext
->unique
) {
4724 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
4726 if (ed2
->size
!= 0) {
4729 le2
= extent_ranges
.Flink
;
4730 while (le2
!= &extent_ranges
) {
4731 extent_range
* er2
= CONTAINING_RECORD(le2
, extent_range
, list_entry
);
4733 if (ed2
->address
>= er2
->address
&& ed2
->address
+ ed2
->size
<= er2
->address
+ er2
->length
&& er2
->changed
) {
4736 Status
= update_changed_extent_ref(fcb
->Vcb
, er2
->chunk
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
,
4737 -1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, true, Irp
);
4738 if (!NT_SUCCESS(Status
)) {
4739 ERR("update_changed_extent_ref returned %08x\n", Status
);
4743 ed2
->offset
+= ed2
->address
- er2
->address
;
4744 ed2
->address
= er2
->address
;
4745 ed2
->size
= er2
->length
;
4746 ext
->extent_data
.decoded_size
= ed2
->size
;
4748 add_changed_extent_ref(er2
->chunk
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
,
4749 1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
);
4763 while (!IsListEmpty(&extent_ranges
)) {
4764 le
= RemoveHeadList(&extent_ranges
);
4765 er
= CONTAINING_RECORD(le
, extent_range
, list_entry
);
4771 NTSTATUS
flush_fcb(fcb
* fcb
, bool cache
, LIST_ENTRY
* batchlist
, PIRP Irp
) {
4777 #ifdef DEBUG_PARANOID
4778 uint64_t old_size
= 0;
4779 bool extents_changed
;
4784 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, fcb
->adsxattr
.Buffer
, fcb
->adsxattr
.Length
, fcb
->adshash
);
4785 if (!NT_SUCCESS(Status
)) {
4786 ERR("delete_xattr returned %08x\n", Status
);
4790 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, fcb
->adsxattr
.Buffer
, fcb
->adsxattr
.Length
,
4791 fcb
->adshash
, (uint8_t*)fcb
->adsdata
.Buffer
, fcb
->adsdata
.Length
);
4792 if (!NT_SUCCESS(Status
)) {
4793 ERR("set_xattr returned %08x\n", Status
);
4798 Status
= STATUS_SUCCESS
;
4803 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, fcb
->inode
, TYPE_INODE_ITEM
, 0xffffffffffffffff, NULL
, 0, Batch_DeleteInode
);
4804 if (!NT_SUCCESS(Status
)) {
4805 ERR("insert_tree_item_batch returned %08x\n", Status
);
4809 if (fcb
->marked_as_orphan
) {
4810 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, BTRFS_ORPHAN_INODE_OBJID
, TYPE_ORPHAN_INODE
,
4811 fcb
->inode
, NULL
, 0, Batch_Delete
);
4812 if (!NT_SUCCESS(Status
)) {
4813 ERR("insert_tree_item_batch returned %08x\n", Status
);
4818 Status
= STATUS_SUCCESS
;
4822 #ifdef DEBUG_PARANOID
4823 extents_changed
= fcb
->extents_changed
;
4826 if (fcb
->extents_changed
) {
4828 bool prealloc
= false, extents_inline
= false;
4831 // delete ignored extent items
4832 le
= fcb
->extents
.Flink
;
4833 while (le
!= &fcb
->extents
) {
4834 LIST_ENTRY
* le2
= le
->Flink
;
4835 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4838 RemoveEntryList(&ext
->list_entry
);
4841 ExFreePool(ext
->csum
);
4849 le
= fcb
->extents
.Flink
;
4850 while (le
!= &fcb
->extents
) {
4851 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4853 if (ext
->inserted
&& ext
->csum
&& ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
) {
4854 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
4856 if (ed2
->size
> 0) { // not sparse
4857 if (ext
->extent_data
.compression
== BTRFS_COMPRESSION_NONE
)
4858 add_checksum_entry(fcb
->Vcb
, ed2
->address
+ ed2
->offset
, (ULONG
)(ed2
->num_bytes
/ fcb
->Vcb
->superblock
.sector_size
), ext
->csum
, Irp
);
4860 add_checksum_entry(fcb
->Vcb
, ed2
->address
, (ULONG
)(ed2
->size
/ fcb
->Vcb
->superblock
.sector_size
), ext
->csum
, Irp
);
4867 if (!IsListEmpty(&fcb
->extents
)) {
4868 rationalize_extents(fcb
, Irp
);
4870 // merge together adjacent EXTENT_DATAs pointing to same extent
4872 le
= fcb
->extents
.Flink
;
4873 while (le
!= &fcb
->extents
) {
4874 LIST_ENTRY
* le2
= le
->Flink
;
4875 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4877 if ((ext
->extent_data
.type
== EXTENT_TYPE_REGULAR
|| ext
->extent_data
.type
== EXTENT_TYPE_PREALLOC
) && le
->Flink
!= &fcb
->extents
) {
4878 extent
* nextext
= CONTAINING_RECORD(le
->Flink
, extent
, list_entry
);
4880 if (ext
->extent_data
.type
== nextext
->extent_data
.type
) {
4881 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ext
->extent_data
.data
;
4882 EXTENT_DATA2
* ned2
= (EXTENT_DATA2
*)nextext
->extent_data
.data
;
4884 if (ed2
->size
!= 0 && ed2
->address
== ned2
->address
&& ed2
->size
== ned2
->size
&&
4885 nextext
->offset
== ext
->offset
+ ed2
->num_bytes
&& ned2
->offset
== ed2
->offset
+ ed2
->num_bytes
) {
4888 if (ext
->extent_data
.compression
== BTRFS_COMPRESSION_NONE
&& ext
->csum
) {
4889 ULONG len
= (ULONG
)((ed2
->num_bytes
+ ned2
->num_bytes
) / fcb
->Vcb
->superblock
.sector_size
);
4892 csum
= ExAllocatePoolWithTag(NonPagedPool
, len
* sizeof(uint32_t), ALLOC_TAG
);
4894 ERR("out of memory\n");
4895 Status
= STATUS_INSUFFICIENT_RESOURCES
;
4899 RtlCopyMemory(csum
, ext
->csum
, (ULONG
)(ed2
->num_bytes
* sizeof(uint32_t) / fcb
->Vcb
->superblock
.sector_size
));
4900 RtlCopyMemory(&csum
[ed2
->num_bytes
/ fcb
->Vcb
->superblock
.sector_size
], nextext
->csum
,
4901 (ULONG
)(ned2
->num_bytes
* sizeof(uint32_t) / fcb
->Vcb
->superblock
.sector_size
));
4903 ExFreePool(ext
->csum
);
4907 ext
->extent_data
.generation
= fcb
->Vcb
->superblock
.generation
;
4908 ed2
->num_bytes
+= ned2
->num_bytes
;
4910 RemoveEntryList(&nextext
->list_entry
);
4913 ExFreePool(nextext
->csum
);
4915 ExFreePool(nextext
);
4917 c
= get_chunk_from_address(fcb
->Vcb
, ed2
->address
);
4920 ERR("get_chunk_from_address(%I64x) failed\n", ed2
->address
);
4922 Status
= update_changed_extent_ref(fcb
->Vcb
, c
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
, -1,
4923 fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, false, Irp
);
4924 if (!NT_SUCCESS(Status
)) {
4925 ERR("update_changed_extent_ref returned %08x\n", Status
);
4939 if (!fcb
->created
) {
4940 // delete existing EXTENT_DATA items
4942 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, fcb
->inode
, TYPE_EXTENT_DATA
, 0, NULL
, 0, Batch_DeleteExtentData
);
4943 if (!NT_SUCCESS(Status
)) {
4944 ERR("insert_tree_item_batch returned %08x\n", Status
);
4949 // add new EXTENT_DATAs
4953 le
= fcb
->extents
.Flink
;
4954 while (le
!= &fcb
->extents
) {
4955 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4958 ext
->inserted
= false;
4960 if (!(fcb
->Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_NO_HOLES
) && ext
->offset
> last_end
) {
4961 Status
= insert_sparse_extent(fcb
, batchlist
, last_end
, ext
->offset
- last_end
);
4962 if (!NT_SUCCESS(Status
)) {
4963 ERR("insert_sparse_extent returned %08x\n", Status
);
4968 ed
= ExAllocatePoolWithTag(PagedPool
, ext
->datalen
, ALLOC_TAG
);
4970 ERR("out of memory\n");
4971 Status
= STATUS_INSUFFICIENT_RESOURCES
;
4975 RtlCopyMemory(ed
, &ext
->extent_data
, ext
->datalen
);
4977 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, fcb
->inode
, TYPE_EXTENT_DATA
, ext
->offset
,
4978 ed
, ext
->datalen
, Batch_Insert
);
4979 if (!NT_SUCCESS(Status
)) {
4980 ERR("insert_tree_item_batch returned %08x\n", Status
);
4984 if (ed
->type
== EXTENT_TYPE_PREALLOC
)
4987 if (ed
->type
== EXTENT_TYPE_INLINE
)
4988 extents_inline
= true;
4990 if (!(fcb
->Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_NO_HOLES
)) {
4991 if (ed
->type
== EXTENT_TYPE_INLINE
)
4992 last_end
= ext
->offset
+ ed
->decoded_size
;
4994 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
4996 last_end
= ext
->offset
+ ed2
->num_bytes
;
5003 if (!(fcb
->Vcb
->superblock
.incompat_flags
& BTRFS_INCOMPAT_FLAGS_NO_HOLES
) && !extents_inline
&&
5004 sector_align(fcb
->inode_item
.st_size
, fcb
->Vcb
->superblock
.sector_size
) > last_end
) {
5005 Status
= insert_sparse_extent(fcb
, batchlist
, last_end
, sector_align(fcb
->inode_item
.st_size
, fcb
->Vcb
->superblock
.sector_size
) - last_end
);
5006 if (!NT_SUCCESS(Status
)) {
5007 ERR("insert_sparse_extent returned %08x\n", Status
);
5012 // update prealloc flag in INODE_ITEM
5015 fcb
->inode_item
.flags
&= ~BTRFS_INODE_PREALLOC
;
5017 fcb
->inode_item
.flags
|= BTRFS_INODE_PREALLOC
;
5019 fcb
->inode_item_changed
= true;
5021 fcb
->extents_changed
= false;
5024 if ((!fcb
->created
&& fcb
->inode_item_changed
) || cache
) {
5025 searchkey
.obj_id
= fcb
->inode
;
5026 searchkey
.obj_type
= TYPE_INODE_ITEM
;
5027 searchkey
.offset
= 0xffffffffffffffff;
5029 Status
= find_item(fcb
->Vcb
, fcb
->subvol
, &tp
, &searchkey
, false, Irp
);
5030 if (!NT_SUCCESS(Status
)) {
5031 ERR("error - find_item returned %08x\n", Status
);
5035 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
5037 ii
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_ITEM
), ALLOC_TAG
);
5039 ERR("out of memory\n");
5040 Status
= STATUS_INSUFFICIENT_RESOURCES
;
5044 RtlCopyMemory(ii
, &fcb
->inode_item
, sizeof(INODE_ITEM
));
5046 Status
= insert_tree_item(fcb
->Vcb
, fcb
->subvol
, fcb
->inode
, TYPE_INODE_ITEM
, 0, ii
, sizeof(INODE_ITEM
), NULL
, Irp
);
5047 if (!NT_SUCCESS(Status
)) {
5048 ERR("insert_tree_item returned %08x\n", Status
);
5054 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb
->inode
, fcb
->subvol
->id
);
5055 Status
= STATUS_INTERNAL_ERROR
;
5059 #ifdef DEBUG_PARANOID
5060 INODE_ITEM
* ii2
= (INODE_ITEM
*)tp
.item
->data
;
5062 old_size
= ii2
->st_size
;
5065 ii_offset
= tp
.item
->key
.offset
;
5069 Status
= delete_tree_item(fcb
->Vcb
, &tp
);
5070 if (!NT_SUCCESS(Status
)) {
5071 ERR("delete_tree_item returned %08x\n", Status
);
5075 searchkey
.obj_id
= fcb
->inode
;
5076 searchkey
.obj_type
= TYPE_INODE_ITEM
;
5077 searchkey
.offset
= ii_offset
;
5079 Status
= find_item(fcb
->Vcb
, fcb
->subvol
, &tp
, &searchkey
, false, Irp
);
5080 if (!NT_SUCCESS(Status
)) {
5081 ERR("error - find_item returned %08x\n", Status
);
5085 if (keycmp(tp
.item
->key
, searchkey
)) {
5086 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb
->inode
, fcb
->subvol
->id
);
5087 Status
= STATUS_INTERNAL_ERROR
;
5090 RtlCopyMemory(tp
.item
->data
, &fcb
->inode_item
, min(tp
.item
->size
, sizeof(INODE_ITEM
)));
5093 #ifdef DEBUG_PARANOID
5094 if (!extents_changed
&& fcb
->type
!= BTRFS_TYPE_DIRECTORY
&& old_size
!= fcb
->inode_item
.st_size
) {
5095 ERR("error - size has changed but extents not marked as changed\n");
5102 fcb
->created
= false;
5104 if (!cache
&& fcb
->inode_item_changed
) {
5105 ii
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_ITEM
), ALLOC_TAG
);
5107 ERR("out of memory\n");
5108 Status
= STATUS_INSUFFICIENT_RESOURCES
;
5112 RtlCopyMemory(ii
, &fcb
->inode_item
, sizeof(INODE_ITEM
));
5114 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, fcb
->inode
, TYPE_INODE_ITEM
, ii_offset
, ii
, sizeof(INODE_ITEM
),
5116 if (!NT_SUCCESS(Status
)) {
5117 ERR("insert_tree_item_batch returned %08x\n", Status
);
5121 fcb
->inode_item_changed
= false;
5124 if (fcb
->sd_dirty
) {
5125 if (!fcb
->sd_deleted
) {
5126 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_NTACL
, sizeof(EA_NTACL
) - 1,
5127 EA_NTACL_HASH
, (uint8_t*)fcb
->sd
, (uint16_t)RtlLengthSecurityDescriptor(fcb
->sd
));
5128 if (!NT_SUCCESS(Status
)) {
5129 ERR("set_xattr returned %08x\n", Status
);
5133 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_NTACL
, sizeof(EA_NTACL
) - 1, EA_NTACL_HASH
);
5134 if (!NT_SUCCESS(Status
)) {
5135 ERR("delete_xattr returned %08x\n", Status
);
5140 fcb
->sd_deleted
= false;
5141 fcb
->sd_dirty
= false;
5144 if (fcb
->atts_changed
) {
5145 if (!fcb
->atts_deleted
) {
5146 uint8_t val
[16], *val2
;
5147 ULONG atts
= fcb
->atts
;
5149 TRACE("inserting new DOSATTRIB xattr\n");
5151 if (fcb
->inode
== SUBVOL_ROOT_INODE
)
5152 atts
&= ~FILE_ATTRIBUTE_READONLY
;
5154 val2
= &val
[sizeof(val
) - 1];
5157 uint8_t c
= atts
% 16;
5158 *val2
= c
<= 9 ? (c
+ '0') : (c
- 0xa + 'a');
5162 } while (atts
!= 0);
5168 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_DOSATTRIB
, sizeof(EA_DOSATTRIB
) - 1,
5169 EA_DOSATTRIB_HASH
, val2
, (uint16_t)(val
+ sizeof(val
) - val2
));
5170 if (!NT_SUCCESS(Status
)) {
5171 ERR("set_xattr returned %08x\n", Status
);
5175 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_DOSATTRIB
, sizeof(EA_DOSATTRIB
) - 1, EA_DOSATTRIB_HASH
);
5176 if (!NT_SUCCESS(Status
)) {
5177 ERR("delete_xattr returned %08x\n", Status
);
5182 fcb
->atts_changed
= false;
5183 fcb
->atts_deleted
= false;
5186 if (fcb
->reparse_xattr_changed
) {
5187 if (fcb
->reparse_xattr
.Buffer
&& fcb
->reparse_xattr
.Length
> 0) {
5188 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_REPARSE
, sizeof(EA_REPARSE
) - 1,
5189 EA_REPARSE_HASH
, (uint8_t*)fcb
->reparse_xattr
.Buffer
, (uint16_t)fcb
->reparse_xattr
.Length
);
5190 if (!NT_SUCCESS(Status
)) {
5191 ERR("set_xattr returned %08x\n", Status
);
5195 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_REPARSE
, sizeof(EA_REPARSE
) - 1, EA_REPARSE_HASH
);
5196 if (!NT_SUCCESS(Status
)) {
5197 ERR("delete_xattr returned %08x\n", Status
);
5202 fcb
->reparse_xattr_changed
= false;
5205 if (fcb
->ea_changed
) {
5206 if (fcb
->ea_xattr
.Buffer
&& fcb
->ea_xattr
.Length
> 0) {
5207 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_EA
, sizeof(EA_EA
) - 1,
5208 EA_EA_HASH
, (uint8_t*)fcb
->ea_xattr
.Buffer
, (uint16_t)fcb
->ea_xattr
.Length
);
5209 if (!NT_SUCCESS(Status
)) {
5210 ERR("set_xattr returned %08x\n", Status
);
5214 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_EA
, sizeof(EA_EA
) - 1, EA_EA_HASH
);
5215 if (!NT_SUCCESS(Status
)) {
5216 ERR("delete_xattr returned %08x\n", Status
);
5221 fcb
->ea_changed
= false;
5224 if (fcb
->prop_compression_changed
) {
5225 if (fcb
->prop_compression
== PropCompression_None
) {
5226 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_PROP_COMPRESSION
, sizeof(EA_PROP_COMPRESSION
) - 1, EA_PROP_COMPRESSION_HASH
);
5227 if (!NT_SUCCESS(Status
)) {
5228 ERR("delete_xattr returned %08x\n", Status
);
5231 } else if (fcb
->prop_compression
== PropCompression_Zlib
) {
5232 static const char zlib
[] = "zlib";
5234 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_PROP_COMPRESSION
, sizeof(EA_PROP_COMPRESSION
) - 1,
5235 EA_PROP_COMPRESSION_HASH
, (uint8_t*)zlib
, sizeof(zlib
) - 1);
5236 if (!NT_SUCCESS(Status
)) {
5237 ERR("set_xattr returned %08x\n", Status
);
5240 } else if (fcb
->prop_compression
== PropCompression_LZO
) {
5241 static const char lzo
[] = "lzo";
5243 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_PROP_COMPRESSION
, sizeof(EA_PROP_COMPRESSION
) - 1,
5244 EA_PROP_COMPRESSION_HASH
, (uint8_t*)lzo
, sizeof(lzo
) - 1);
5245 if (!NT_SUCCESS(Status
)) {
5246 ERR("set_xattr returned %08x\n", Status
);
5249 } else if (fcb
->prop_compression
== PropCompression_ZSTD
) {
5250 static const char zstd
[] = "zstd";
5252 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_PROP_COMPRESSION
, sizeof(EA_PROP_COMPRESSION
) - 1,
5253 EA_PROP_COMPRESSION_HASH
, (uint8_t*)zstd
, sizeof(zstd
) - 1);
5254 if (!NT_SUCCESS(Status
)) {
5255 ERR("set_xattr returned %08x\n", Status
);
5260 fcb
->prop_compression_changed
= false;
5263 if (fcb
->xattrs_changed
) {
5266 le
= fcb
->xattrs
.Flink
;
5267 while (le
!= &fcb
->xattrs
) {
5268 xattr
* xa
= CONTAINING_RECORD(le
, xattr
, list_entry
);
5269 LIST_ENTRY
* le2
= le
->Flink
;
5272 uint32_t hash
= calc_crc32c(0xfffffffe, (uint8_t*)xa
->data
, xa
->namelen
);
5274 if (xa
->valuelen
== 0) {
5275 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, xa
->data
, xa
->namelen
, hash
);
5276 if (!NT_SUCCESS(Status
)) {
5277 ERR("delete_xattr returned %08x\n", Status
);
5281 RemoveEntryList(&xa
->list_entry
);
5284 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, xa
->data
, xa
->namelen
,
5285 hash
, (uint8_t*)&xa
->data
[xa
->namelen
], xa
->valuelen
);
5286 if (!NT_SUCCESS(Status
)) {
5287 ERR("set_xattr returned %08x\n", Status
);
5298 fcb
->xattrs_changed
= false;
5301 if ((fcb
->case_sensitive_set
&& !fcb
->case_sensitive
)) {
5302 Status
= delete_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_CASE_SENSITIVE
,
5303 sizeof(EA_CASE_SENSITIVE
) - 1, EA_CASE_SENSITIVE_HASH
);
5304 if (!NT_SUCCESS(Status
)) {
5305 ERR("delete_xattr returned %08x\n", Status
);
5309 fcb
->case_sensitive_set
= false;
5310 } else if ((!fcb
->case_sensitive_set
&& fcb
->case_sensitive
)) {
5311 Status
= set_xattr(fcb
->Vcb
, batchlist
, fcb
->subvol
, fcb
->inode
, EA_CASE_SENSITIVE
,
5312 sizeof(EA_CASE_SENSITIVE
) - 1, EA_CASE_SENSITIVE_HASH
, (uint8_t*)"1", 1);
5313 if (!NT_SUCCESS(Status
)) {
5314 ERR("set_xattr returned %08x\n", Status
);
5318 fcb
->case_sensitive_set
= true;
5321 if (fcb
->inode_item
.st_nlink
== 0 && !fcb
->marked_as_orphan
) { // mark as orphan
5322 Status
= insert_tree_item_batch(batchlist
, fcb
->Vcb
, fcb
->subvol
, BTRFS_ORPHAN_INODE_OBJID
, TYPE_ORPHAN_INODE
,
5323 fcb
->inode
, NULL
, 0, Batch_Insert
);
5324 if (!NT_SUCCESS(Status
)) {
5325 ERR("insert_tree_item_batch returned %08x\n", Status
);
5329 fcb
->marked_as_orphan
= true;
5332 Status
= STATUS_SUCCESS
;
5340 if (!ExIsResourceAcquiredExclusiveLite(&fcb
->Vcb
->dirty_fcbs_lock
)) {
5341 ExAcquireResourceExclusiveLite(&fcb
->Vcb
->dirty_fcbs_lock
, true);
5345 RemoveEntryList(&fcb
->list_entry_dirty
);
5348 ExReleaseResourceLite(&fcb
->Vcb
->dirty_fcbs_lock
);
5354 void add_trim_entry_avoid_sb(device_extension
* Vcb
, device
* dev
, uint64_t address
, uint64_t size
) {
5356 ULONG sblen
= (ULONG
)sector_align(sizeof(superblock
), Vcb
->superblock
.sector_size
);
5359 while (superblock_addrs
[i
] != 0) {
5360 if (superblock_addrs
[i
] + sblen
>= address
&& superblock_addrs
[i
] < address
+ size
) {
5361 if (superblock_addrs
[i
] > address
)
5362 add_trim_entry(dev
, address
, superblock_addrs
[i
] - address
);
5364 if (size
<= superblock_addrs
[i
] + sblen
- address
)
5367 size
-= superblock_addrs
[i
] + sblen
- address
;
5368 address
= superblock_addrs
[i
] + sblen
;
5369 } else if (superblock_addrs
[i
] > address
+ size
)
5375 add_trim_entry(dev
, address
, size
);
5378 static NTSTATUS
drop_chunk(device_extension
* Vcb
, chunk
* c
, LIST_ENTRY
* batchlist
, PIRP Irp
, LIST_ENTRY
* rollback
) {
5386 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];;
5388 TRACE("dropping chunk %I64x\n", c
->offset
);
5390 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
5391 factor
= c
->chunk_item
->num_stripes
;
5392 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
5393 factor
= c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
;
5394 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
)
5395 factor
= c
->chunk_item
->num_stripes
- 1;
5396 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
5397 factor
= c
->chunk_item
->num_stripes
- 2;
5398 else // SINGLE, DUPLICATE, RAID1
5402 if (Vcb
->trim
&& !Vcb
->options
.no_trim
) {
5403 uint64_t len
= c
->chunk_item
->size
/ factor
;
5405 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
5406 if (c
->devices
[i
] && c
->devices
[i
]->devobj
&& !c
->devices
[i
]->readonly
&& c
->devices
[i
]->trim
)
5407 add_trim_entry_avoid_sb(Vcb
, c
->devices
[i
], cis
[i
].offset
, len
);
5412 Status
= load_stored_free_space_cache(Vcb
, c
, true, Irp
);
5414 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
)
5415 WARN("load_stored_free_space_cache returned %08x\n", Status
);
5418 // remove free space cache
5420 c
->cache
->deleted
= true;
5422 Status
= excise_extents(Vcb
, c
->cache
, 0, c
->cache
->inode_item
.st_size
, Irp
, rollback
);
5423 if (!NT_SUCCESS(Status
)) {
5424 ERR("excise_extents returned %08x\n", Status
);
5428 Status
= flush_fcb(c
->cache
, true, batchlist
, Irp
);
5432 if (c
->cache
->refcount
== 0)
5435 if (!NT_SUCCESS(Status
)) {
5436 ERR("flush_fcb returned %08x\n", Status
);
5440 searchkey
.obj_id
= FREE_SPACE_CACHE_ID
;
5441 searchkey
.obj_type
= 0;
5442 searchkey
.offset
= c
->offset
;
5444 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
5445 if (!NT_SUCCESS(Status
)) {
5446 ERR("error - find_item returned %08x\n", Status
);
5450 if (!keycmp(tp
.item
->key
, searchkey
)) {
5451 Status
= delete_tree_item(Vcb
, &tp
);
5452 if (!NT_SUCCESS(Status
)) {
5453 ERR("delete_tree_item returned %08x\n", Status
);
5459 if (Vcb
->space_root
) {
5460 Status
= insert_tree_item_batch(batchlist
, Vcb
, Vcb
->space_root
, c
->offset
, TYPE_FREE_SPACE_INFO
, c
->chunk_item
->size
,
5461 NULL
, 0, Batch_DeleteFreeSpace
);
5462 if (!NT_SUCCESS(Status
)) {
5463 ERR("insert_tree_item_batch returned %08x\n", Status
);
5468 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
5470 // remove DEV_EXTENTs from tree 4
5471 searchkey
.obj_id
= cis
[i
].dev_id
;
5472 searchkey
.obj_type
= TYPE_DEV_EXTENT
;
5473 searchkey
.offset
= cis
[i
].offset
;
5475 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, Irp
);
5476 if (!NT_SUCCESS(Status
)) {
5477 ERR("error - find_item returned %08x\n", Status
);
5481 if (!keycmp(tp
.item
->key
, searchkey
)) {
5482 Status
= delete_tree_item(Vcb
, &tp
);
5483 if (!NT_SUCCESS(Status
)) {
5484 ERR("delete_tree_item returned %08x\n", Status
);
5488 if (tp
.item
->size
>= sizeof(DEV_EXTENT
)) {
5489 DEV_EXTENT
* de
= (DEV_EXTENT
*)tp
.item
->data
;
5491 c
->devices
[i
]->devitem
.bytes_used
-= de
->length
;
5493 if (Vcb
->balance
.thread
&& Vcb
->balance
.shrinking
&& Vcb
->balance
.opts
[0].devid
== c
->devices
[i
]->devitem
.dev_id
) {
5494 if (cis
[i
].offset
< Vcb
->balance
.opts
[0].drange_start
&& cis
[i
].offset
+ de
->length
> Vcb
->balance
.opts
[0].drange_start
)
5495 space_list_add2(&c
->devices
[i
]->space
, NULL
, cis
[i
].offset
, Vcb
->balance
.opts
[0].drange_start
- cis
[i
].offset
, NULL
, rollback
);
5497 space_list_add2(&c
->devices
[i
]->space
, NULL
, cis
[i
].offset
, de
->length
, NULL
, rollback
);
5500 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
);
5502 uint64_t len
= c
->chunk_item
->size
/ factor
;
5504 c
->devices
[i
]->devitem
.bytes_used
-= len
;
5506 if (Vcb
->balance
.thread
&& Vcb
->balance
.shrinking
&& Vcb
->balance
.opts
[0].devid
== c
->devices
[i
]->devitem
.dev_id
) {
5507 if (cis
[i
].offset
< Vcb
->balance
.opts
[0].drange_start
&& cis
[i
].offset
+ len
> Vcb
->balance
.opts
[0].drange_start
)
5508 space_list_add2(&c
->devices
[i
]->space
, NULL
, cis
[i
].offset
, Vcb
->balance
.opts
[0].drange_start
- cis
[i
].offset
, NULL
, rollback
);
5510 space_list_add2(&c
->devices
[i
]->space
, NULL
, cis
[i
].offset
, len
, NULL
, rollback
);
5514 // modify DEV_ITEMs in chunk tree
5515 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
5516 if (c
->devices
[i
]) {
5520 searchkey
.obj_id
= 1;
5521 searchkey
.obj_type
= TYPE_DEV_ITEM
;
5522 searchkey
.offset
= c
->devices
[i
]->devitem
.dev_id
;
5524 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, false, Irp
);
5525 if (!NT_SUCCESS(Status
)) {
5526 ERR("error - find_item returned %08x\n", Status
);
5530 if (!keycmp(tp
.item
->key
, searchkey
)) {
5531 Status
= delete_tree_item(Vcb
, &tp
);
5532 if (!NT_SUCCESS(Status
)) {
5533 ERR("delete_tree_item returned %08x\n", Status
);
5537 di
= ExAllocatePoolWithTag(PagedPool
, sizeof(DEV_ITEM
), ALLOC_TAG
);
5539 ERR("out of memory\n");
5540 return STATUS_INSUFFICIENT_RESOURCES
;
5543 RtlCopyMemory(di
, &c
->devices
[i
]->devitem
, sizeof(DEV_ITEM
));
5545 Status
= insert_tree_item(Vcb
, Vcb
->chunk_root
, 1, TYPE_DEV_ITEM
, c
->devices
[i
]->devitem
.dev_id
, di
, sizeof(DEV_ITEM
), NULL
, Irp
);
5546 if (!NT_SUCCESS(Status
)) {
5547 ERR("insert_tree_item returned %08x\n", Status
);
5552 for (j
= i
+ 1; j
< c
->chunk_item
->num_stripes
; j
++) {
5553 if (c
->devices
[j
] == c
->devices
[i
])
5554 c
->devices
[j
] = NULL
;
5560 // remove CHUNK_ITEM from chunk tree
5561 searchkey
.obj_id
= 0x100;
5562 searchkey
.obj_type
= TYPE_CHUNK_ITEM
;
5563 searchkey
.offset
= c
->offset
;
5565 Status
= find_item(Vcb
, Vcb
->chunk_root
, &tp
, &searchkey
, false, Irp
);
5566 if (!NT_SUCCESS(Status
)) {
5567 ERR("error - find_item returned %08x\n", Status
);
5571 if (!keycmp(tp
.item
->key
, searchkey
)) {
5572 Status
= delete_tree_item(Vcb
, &tp
);
5574 if (!NT_SUCCESS(Status
)) {
5575 ERR("delete_tree_item returned %08x\n", Status
);
5579 WARN("could not find CHUNK_ITEM for chunk %I64x\n", c
->offset
);
5581 // remove BLOCK_GROUP_ITEM from extent tree
5582 searchkey
.obj_id
= c
->offset
;
5583 searchkey
.obj_type
= TYPE_BLOCK_GROUP_ITEM
;
5584 searchkey
.offset
= 0xffffffffffffffff;
5586 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
5587 if (!NT_SUCCESS(Status
)) {
5588 ERR("error - find_item returned %08x\n", Status
);
5592 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
5593 Status
= delete_tree_item(Vcb
, &tp
);
5595 if (!NT_SUCCESS(Status
)) {
5596 ERR("delete_tree_item returned %08x\n", Status
);
5600 WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c
->offset
);
5603 if (c
->chunk_item
->type
& BLOCK_FLAG_SYSTEM
)
5604 remove_from_bootstrap(Vcb
, 0x100, TYPE_CHUNK_ITEM
, c
->offset
);
5606 RemoveEntryList(&c
->list_entry
);
5608 // clear raid56 incompat flag if dropping last RAID5/6 chunk
5610 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
5612 bool clear_flag
= true;
5614 le
= Vcb
->chunks
.Flink
;
5615 while (le
!= &Vcb
->chunks
) {
5616 chunk
* c2
= CONTAINING_RECORD(le
, chunk
, list_entry
);
5618 if (c2
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c2
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
5627 Vcb
->superblock
.incompat_flags
&= ~BTRFS_INCOMPAT_FLAGS_RAID56
;
5631 uint64_t phys_used
= chunk_estimate_phys_size(Vcb
, c
, c
->oldused
);
5633 phys_used
= chunk_estimate_phys_size(Vcb
, c
, c
->oldused
);
5636 if (phys_used
< Vcb
->superblock
.bytes_used
)
5637 Vcb
->superblock
.bytes_used
-= phys_used
;
5639 Vcb
->superblock
.bytes_used
= 0;
5641 ExFreePool(c
->chunk_item
);
5642 ExFreePool(c
->devices
);
5644 while (!IsListEmpty(&c
->space
)) {
5645 space
* s
= CONTAINING_RECORD(c
->space
.Flink
, space
, list_entry
);
5647 RemoveEntryList(&s
->list_entry
);
5651 while (!IsListEmpty(&c
->deleting
)) {
5652 space
* s
= CONTAINING_RECORD(c
->deleting
.Flink
, space
, list_entry
);
5654 RemoveEntryList(&s
->list_entry
);
5658 release_chunk_lock(c
, Vcb
);
5660 ExDeleteResourceLite(&c
->partial_stripes_lock
);
5661 ExDeleteResourceLite(&c
->range_locks_lock
);
5662 ExDeleteResourceLite(&c
->lock
);
5663 ExDeleteResourceLite(&c
->changed_extents_lock
);
5667 return STATUS_SUCCESS
;
5670 static NTSTATUS
partial_stripe_read(device_extension
* Vcb
, chunk
* c
, partial_stripe
* ps
, uint64_t startoff
, uint16_t parity
, ULONG offset
, ULONG len
) {
5672 ULONG sl
= (ULONG
)(c
->chunk_item
->stripe_length
/ Vcb
->superblock
.sector_size
);
5673 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
5676 ULONG readlen
= min(offset
+ len
, offset
+ (sl
- (offset
% sl
))) - offset
;
5679 stripe
= (parity
+ (offset
/ sl
) + 1) % c
->chunk_item
->num_stripes
;
5681 if (c
->devices
[stripe
]->devobj
) {
5682 Status
= sync_read_phys(c
->devices
[stripe
]->devobj
, c
->devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ startoff
+ ((offset
% sl
) * Vcb
->superblock
.sector_size
),
5683 readlen
* Vcb
->superblock
.sector_size
, ps
->data
+ (offset
* Vcb
->superblock
.sector_size
), false);
5684 if (!NT_SUCCESS(Status
)) {
5685 ERR("sync_read_phys returned %08x\n", Status
);
5688 } else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
) {
5692 scratch
= ExAllocatePoolWithTag(NonPagedPool
, readlen
* Vcb
->superblock
.sector_size
, ALLOC_TAG
);
5694 ERR("out of memory\n");
5695 return STATUS_INSUFFICIENT_RESOURCES
;
5698 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
5700 if (!c
->devices
[i
]->devobj
) {
5701 ExFreePool(scratch
);
5702 return STATUS_UNEXPECTED_IO_ERROR
;
5705 if (i
== 0 || (stripe
== 0 && i
== 1)) {
5706 Status
= sync_read_phys(c
->devices
[i
]->devobj
, c
->devices
[i
]->fileobj
, cis
[i
].offset
+ startoff
+ ((offset
% sl
) * Vcb
->superblock
.sector_size
),
5707 readlen
* Vcb
->superblock
.sector_size
, ps
->data
+ (offset
* Vcb
->superblock
.sector_size
), false);
5708 if (!NT_SUCCESS(Status
)) {
5709 ERR("sync_read_phys returned %08x\n", Status
);
5710 ExFreePool(scratch
);
5714 Status
= sync_read_phys(c
->devices
[i
]->devobj
, c
->devices
[i
]->fileobj
, cis
[i
].offset
+ startoff
+ ((offset
% sl
) * Vcb
->superblock
.sector_size
),
5715 readlen
* Vcb
->superblock
.sector_size
, scratch
, false);
5716 if (!NT_SUCCESS(Status
)) {
5717 ERR("sync_read_phys returned %08x\n", Status
);
5718 ExFreePool(scratch
);
5722 do_xor(ps
->data
+ (offset
* Vcb
->superblock
.sector_size
), scratch
, readlen
* Vcb
->superblock
.sector_size
);
5727 ExFreePool(scratch
);
5730 uint16_t k
, i
, logstripe
, error_stripe
, num_errors
= 0;
5732 scratch
= ExAllocatePoolWithTag(NonPagedPool
, (c
->chunk_item
->num_stripes
+ 2) * readlen
* Vcb
->superblock
.sector_size
, ALLOC_TAG
);
5734 ERR("out of memory\n");
5735 return STATUS_INSUFFICIENT_RESOURCES
;
5738 i
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
5739 for (k
= 0; k
< c
->chunk_item
->num_stripes
; k
++) {
5741 if (c
->devices
[i
]->devobj
) {
5742 Status
= sync_read_phys(c
->devices
[i
]->devobj
, c
->devices
[i
]->fileobj
, cis
[i
].offset
+ startoff
+ ((offset
% sl
) * Vcb
->superblock
.sector_size
),
5743 readlen
* Vcb
->superblock
.sector_size
, scratch
+ (k
* readlen
* Vcb
->superblock
.sector_size
), false);
5744 if (!NT_SUCCESS(Status
)) {
5745 ERR("sync_read_phys returned %08x\n", Status
);
5754 if (num_errors
> 1) {
5755 ExFreePool(scratch
);
5756 return STATUS_UNEXPECTED_IO_ERROR
;
5761 i
= (i
+ 1) % c
->chunk_item
->num_stripes
;
5764 if (num_errors
== 0 || error_stripe
== c
->chunk_item
->num_stripes
- 1) {
5765 for (k
= 0; k
< c
->chunk_item
->num_stripes
- 1; k
++) {
5766 if (k
!= logstripe
) {
5767 if (k
== 0 || (k
== 1 && logstripe
== 0)) {
5768 RtlCopyMemory(ps
->data
+ (offset
* Vcb
->superblock
.sector_size
), scratch
+ (k
* readlen
* Vcb
->superblock
.sector_size
),
5769 readlen
* Vcb
->superblock
.sector_size
);
5771 do_xor(ps
->data
+ (offset
* Vcb
->superblock
.sector_size
), scratch
+ (k
* readlen
* Vcb
->superblock
.sector_size
),
5772 readlen
* Vcb
->superblock
.sector_size
);
5777 raid6_recover2(scratch
, c
->chunk_item
->num_stripes
, readlen
* Vcb
->superblock
.sector_size
, logstripe
,
5778 error_stripe
, scratch
+ (c
->chunk_item
->num_stripes
* readlen
* Vcb
->superblock
.sector_size
));
5780 RtlCopyMemory(ps
->data
+ (offset
* Vcb
->superblock
.sector_size
), scratch
+ (c
->chunk_item
->num_stripes
* readlen
* Vcb
->superblock
.sector_size
),
5781 readlen
* Vcb
->superblock
.sector_size
);
5784 ExFreePool(scratch
);
5791 return STATUS_SUCCESS
;
5794 NTSTATUS
flush_partial_stripe(device_extension
* Vcb
, chunk
* c
, partial_stripe
* ps
) {
5796 uint16_t parity2
, stripe
, startoffstripe
;
5799 ULONG runlength
, index
, last1
;
5800 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
5802 uint16_t k
, num_data_stripes
= c
->chunk_item
->num_stripes
- (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
? 1 : 2);
5803 uint64_t ps_length
= num_data_stripes
* c
->chunk_item
->stripe_length
;
5804 ULONG stripe_length
= (ULONG
)c
->chunk_item
->stripe_length
;
5806 // FIXME - do writes asynchronously?
5808 get_raid0_offset(ps
->address
- c
->offset
, stripe_length
, num_data_stripes
, &startoff
, &startoffstripe
);
5810 parity2
= (((ps
->address
- c
->offset
) / ps_length
) + c
->chunk_item
->num_stripes
- 1) % c
->chunk_item
->num_stripes
;
5812 // read data (or reconstruct if degraded)
5814 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
5817 while (runlength
!= 0) {
5818 if (index
>= ps
->bmplen
)
5821 if (index
+ runlength
>= ps
->bmplen
) {
5822 runlength
= ps
->bmplen
- index
;
5828 if (index
> last1
) {
5829 Status
= partial_stripe_read(Vcb
, c
, ps
, startoff
, parity2
, last1
, index
- last1
);
5830 if (!NT_SUCCESS(Status
)) {
5831 ERR("partial_stripe_read returned %08x\n", Status
);
5836 last1
= index
+ runlength
;
5838 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
5841 if (last1
< ps_length
/ Vcb
->superblock
.sector_size
) {
5842 Status
= partial_stripe_read(Vcb
, c
, ps
, startoff
, parity2
, last1
, (ULONG
)((ps_length
/ Vcb
->superblock
.sector_size
) - last1
));
5843 if (!NT_SUCCESS(Status
)) {
5844 ERR("partial_stripe_read returned %08x\n", Status
);
5849 // set unallocated data to 0
5850 le
= c
->space
.Flink
;
5851 while (le
!= &c
->space
) {
5852 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
5854 if (s
->address
+ s
->size
> ps
->address
&& s
->address
< ps
->address
+ ps_length
) {
5855 uint64_t start
= max(ps
->address
, s
->address
);
5856 uint64_t end
= min(ps
->address
+ ps_length
, s
->address
+ s
->size
);
5858 RtlZeroMemory(ps
->data
+ start
- ps
->address
, (ULONG
)(end
- start
));
5859 } else if (s
->address
>= ps
->address
+ ps_length
)
5865 le
= c
->deleting
.Flink
;
5866 while (le
!= &c
->deleting
) {
5867 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
5869 if (s
->address
+ s
->size
> ps
->address
&& s
->address
< ps
->address
+ ps_length
) {
5870 uint64_t start
= max(ps
->address
, s
->address
);
5871 uint64_t end
= min(ps
->address
+ ps_length
, s
->address
+ s
->size
);
5873 RtlZeroMemory(ps
->data
+ start
- ps
->address
, (ULONG
)(end
- start
));
5874 } else if (s
->address
>= ps
->address
+ ps_length
)
5880 stripe
= (parity2
+ 1) % c
->chunk_item
->num_stripes
;
5883 for (k
= 0; k
< num_data_stripes
; k
++) {
5884 if (c
->devices
[stripe
]->devobj
) {
5885 Status
= write_data_phys(c
->devices
[stripe
]->devobj
, c
->devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ startoff
, data
, stripe_length
);
5886 if (!NT_SUCCESS(Status
)) {
5887 ERR("write_data_phys returned %08x\n", Status
);
5892 data
+= stripe_length
;
5893 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
5897 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
) {
5898 if (c
->devices
[parity2
]->devobj
) {
5901 for (i
= 1; i
< c
->chunk_item
->num_stripes
- 1; i
++) {
5902 do_xor(ps
->data
, ps
->data
+ (i
* stripe_length
), stripe_length
);
5905 Status
= write_data_phys(c
->devices
[parity2
]->devobj
, c
->devices
[parity2
]->fileobj
, cis
[parity2
].offset
+ startoff
, ps
->data
, stripe_length
);
5906 if (!NT_SUCCESS(Status
)) {
5907 ERR("write_data_phys returned %08x\n", Status
);
5912 uint16_t parity1
= (parity2
+ c
->chunk_item
->num_stripes
- 1) % c
->chunk_item
->num_stripes
;
5914 if (c
->devices
[parity1
]->devobj
|| c
->devices
[parity2
]->devobj
) {
5918 scratch
= ExAllocatePoolWithTag(NonPagedPool
, stripe_length
* 2, ALLOC_TAG
);
5920 ERR("out of memory\n");
5921 return STATUS_INSUFFICIENT_RESOURCES
;
5924 i
= c
->chunk_item
->num_stripes
- 3;
5927 if (i
== c
->chunk_item
->num_stripes
- 3) {
5928 RtlCopyMemory(scratch
, ps
->data
+ (i
* stripe_length
), stripe_length
);
5929 RtlCopyMemory(scratch
+ stripe_length
, ps
->data
+ (i
* stripe_length
), stripe_length
);
5931 do_xor(scratch
, ps
->data
+ (i
* stripe_length
), stripe_length
);
5933 galois_double(scratch
+ stripe_length
, stripe_length
);
5934 do_xor(scratch
+ stripe_length
, ps
->data
+ (i
* stripe_length
), stripe_length
);
5943 if (c
->devices
[parity1
]->devobj
) {
5944 Status
= write_data_phys(c
->devices
[parity1
]->devobj
, c
->devices
[parity1
]->fileobj
, cis
[parity1
].offset
+ startoff
, scratch
, stripe_length
);
5945 if (!NT_SUCCESS(Status
)) {
5946 ERR("write_data_phys returned %08x\n", Status
);
5947 ExFreePool(scratch
);
5952 if (c
->devices
[parity2
]->devobj
) {
5953 Status
= write_data_phys(c
->devices
[parity2
]->devobj
, c
->devices
[parity2
]->fileobj
, cis
[parity2
].offset
+ startoff
,
5954 scratch
+ stripe_length
, stripe_length
);
5955 if (!NT_SUCCESS(Status
)) {
5956 ERR("write_data_phys returned %08x\n", Status
);
5957 ExFreePool(scratch
);
5962 ExFreePool(scratch
);
5966 return STATUS_SUCCESS
;
5969 static NTSTATUS
update_chunks(device_extension
* Vcb
, LIST_ENTRY
* batchlist
, PIRP Irp
, LIST_ENTRY
* rollback
) {
5970 LIST_ENTRY
*le
, *le2
;
5972 uint64_t used_minus_cache
;
5974 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
5976 // FIXME - do tree chunks before data chunks
5978 le
= Vcb
->chunks
.Flink
;
5979 while (le
!= &Vcb
->chunks
) {
5980 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
5985 acquire_chunk_lock(c
, Vcb
);
5987 // flush partial stripes
5988 if (!Vcb
->readonly
&& (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)) {
5989 ExAcquireResourceExclusiveLite(&c
->partial_stripes_lock
, true);
5991 while (!IsListEmpty(&c
->partial_stripes
)) {
5992 partial_stripe
* ps
= CONTAINING_RECORD(RemoveHeadList(&c
->partial_stripes
), partial_stripe
, list_entry
);
5994 Status
= flush_partial_stripe(Vcb
, c
, ps
);
5997 ExFreePool(ps
->bmparr
);
6001 if (!NT_SUCCESS(Status
)) {
6002 ERR("flush_partial_stripe returned %08x\n", Status
);
6003 ExReleaseResourceLite(&c
->partial_stripes_lock
);
6004 release_chunk_lock(c
, Vcb
);
6005 ExReleaseResourceLite(&Vcb
->chunk_lock
);
6010 ExReleaseResourceLite(&c
->partial_stripes_lock
);
6013 if (c
->list_entry_balance
.Flink
) {
6014 release_chunk_lock(c
, Vcb
);
6019 if (c
->space_changed
|| c
->created
) {
6020 bool created
= c
->created
;
6022 used_minus_cache
= c
->used
;
6024 // subtract self-hosted cache
6025 if (used_minus_cache
> 0 && c
->chunk_item
->type
& BLOCK_FLAG_DATA
&& c
->cache
&& c
->cache
->inode_item
.st_size
== c
->used
) {
6028 le3
= c
->cache
->extents
.Flink
;
6029 while (le3
!= &c
->cache
->extents
) {
6030 extent
* ext
= CONTAINING_RECORD(le3
, extent
, list_entry
);
6031 EXTENT_DATA
* ed
= &ext
->extent_data
;
6034 if (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) {
6035 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
6037 if (ed2
->size
!= 0 && ed2
->address
>= c
->offset
&& ed2
->address
+ ed2
->size
<= c
->offset
+ c
->chunk_item
->size
)
6038 used_minus_cache
-= ed2
->size
;
6046 if (used_minus_cache
== 0) {
6047 Status
= drop_chunk(Vcb
, c
, batchlist
, Irp
, rollback
);
6048 if (!NT_SUCCESS(Status
)) {
6049 ERR("drop_chunk returned %08x\n", Status
);
6050 release_chunk_lock(c
, Vcb
);
6051 ExReleaseResourceLite(&Vcb
->chunk_lock
);
6055 // c is now freed, so avoid releasing non-existent lock
6058 } else if (c
->created
) {
6059 Status
= create_chunk(Vcb
, c
, Irp
);
6060 if (!NT_SUCCESS(Status
)) {
6061 ERR("create_chunk returned %08x\n", Status
);
6062 release_chunk_lock(c
, Vcb
);
6063 ExReleaseResourceLite(&Vcb
->chunk_lock
);
6068 if (used_minus_cache
> 0 || created
)
6069 release_chunk_lock(c
, Vcb
);
6071 release_chunk_lock(c
, Vcb
);
6077 ExReleaseResourceLite(&Vcb
->chunk_lock
);
6079 return STATUS_SUCCESS
;
6082 static NTSTATUS
delete_root_ref(device_extension
* Vcb
, uint64_t subvolid
, uint64_t parsubvolid
, uint64_t parinode
, PANSI_STRING utf8
, PIRP Irp
) {
6087 searchkey
.obj_id
= parsubvolid
;
6088 searchkey
.obj_type
= TYPE_ROOT_REF
;
6089 searchkey
.offset
= subvolid
;
6091 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
6092 if (!NT_SUCCESS(Status
)) {
6093 ERR("error - find_item returned %08x\n", Status
);
6097 if (!keycmp(searchkey
, tp
.item
->key
)) {
6098 if (tp
.item
->size
< sizeof(ROOT_REF
)) {
6099 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(ROOT_REF
));
6100 return STATUS_INTERNAL_ERROR
;
6105 rr
= (ROOT_REF
*)tp
.item
->data
;
6106 len
= tp
.item
->size
;
6111 if (len
< sizeof(ROOT_REF
) || len
< offsetof(ROOT_REF
, name
[0]) + rr
->n
) {
6112 ERR("(%I64x,%x,%I64x) was truncated\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
);
6116 itemlen
= (uint16_t)offsetof(ROOT_REF
, name
[0]) + rr
->n
;
6118 if (rr
->dir
== parinode
&& rr
->n
== utf8
->Length
&& RtlCompareMemory(rr
->name
, utf8
->Buffer
, rr
->n
) == rr
->n
) {
6119 uint16_t newlen
= tp
.item
->size
- itemlen
;
6121 Status
= delete_tree_item(Vcb
, &tp
);
6122 if (!NT_SUCCESS(Status
)) {
6123 ERR("delete_tree_item returned %08x\n", Status
);
6128 TRACE("deleting (%I64x,%x,%I64x)\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
);
6130 uint8_t *newrr
= ExAllocatePoolWithTag(PagedPool
, newlen
, ALLOC_TAG
), *rroff
;
6133 ERR("out of memory\n");
6134 return STATUS_INSUFFICIENT_RESOURCES
;
6137 TRACE("modifying (%I64x,%x,%I64x)\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
);
6139 if ((uint8_t*)rr
> tp
.item
->data
) {
6140 RtlCopyMemory(newrr
, tp
.item
->data
, (uint8_t*)rr
- tp
.item
->data
);
6141 rroff
= newrr
+ ((uint8_t*)rr
- tp
.item
->data
);
6146 if ((uint8_t*)&rr
->name
[rr
->n
] < tp
.item
->data
+ tp
.item
->size
)
6147 RtlCopyMemory(rroff
, &rr
->name
[rr
->n
], tp
.item
->size
- ((uint8_t*)&rr
->name
[rr
->n
] - tp
.item
->data
));
6149 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, newrr
, newlen
, NULL
, Irp
);
6150 if (!NT_SUCCESS(Status
)) {
6151 ERR("insert_tree_item returned %08x\n", Status
);
6160 if (len
> itemlen
) {
6162 rr
= (ROOT_REF
*)&rr
->name
[rr
->n
];
6168 WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey
.offset
, searchkey
.obj_id
);
6169 return STATUS_NOT_FOUND
;
6172 return STATUS_SUCCESS
;
6176 #pragma warning(push)
6177 #pragma warning(suppress: 28194)
6179 static NTSTATUS
add_root_ref(_In_ device_extension
* Vcb
, _In_
uint64_t subvolid
, _In_
uint64_t parsubvolid
, _In_ __drv_aliasesMem ROOT_REF
* rr
, _In_opt_ PIRP Irp
) {
6184 searchkey
.obj_id
= parsubvolid
;
6185 searchkey
.obj_type
= TYPE_ROOT_REF
;
6186 searchkey
.offset
= subvolid
;
6188 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
6189 if (!NT_SUCCESS(Status
)) {
6190 ERR("error - find_item returned %08x\n", Status
);
6194 if (!keycmp(searchkey
, tp
.item
->key
)) {
6195 uint16_t rrsize
= tp
.item
->size
+ (uint16_t)offsetof(ROOT_REF
, name
[0]) + rr
->n
;
6198 rr2
= ExAllocatePoolWithTag(PagedPool
, rrsize
, ALLOC_TAG
);
6200 ERR("out of memory\n");
6201 return STATUS_INSUFFICIENT_RESOURCES
;
6204 if (tp
.item
->size
> 0)
6205 RtlCopyMemory(rr2
, tp
.item
->data
, tp
.item
->size
);
6207 RtlCopyMemory(rr2
+ tp
.item
->size
, rr
, offsetof(ROOT_REF
, name
[0]) + rr
->n
);
6210 Status
= delete_tree_item(Vcb
, &tp
);
6211 if (!NT_SUCCESS(Status
)) {
6212 ERR("delete_tree_item returned %08x\n", Status
);
6217 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, rr2
, rrsize
, NULL
, Irp
);
6218 if (!NT_SUCCESS(Status
)) {
6219 ERR("insert_tree_item returned %08x\n", Status
);
6224 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, rr
, (uint16_t)offsetof(ROOT_REF
, name
[0]) + rr
->n
, NULL
, Irp
);
6225 if (!NT_SUCCESS(Status
)) {
6226 ERR("insert_tree_item returned %08x\n", Status
);
6232 return STATUS_SUCCESS
;
6235 #pragma warning(pop)
6238 static NTSTATUS
update_root_backref(device_extension
* Vcb
, uint64_t subvolid
, uint64_t parsubvolid
, PIRP Irp
) {
6245 searchkey
.obj_id
= parsubvolid
;
6246 searchkey
.obj_type
= TYPE_ROOT_REF
;
6247 searchkey
.offset
= subvolid
;
6249 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
6250 if (!NT_SUCCESS(Status
)) {
6251 ERR("error - find_item returned %08x\n", Status
);
6255 if (!keycmp(tp
.item
->key
, searchkey
) && tp
.item
->size
> 0) {
6256 datalen
= tp
.item
->size
;
6258 data
= ExAllocatePoolWithTag(PagedPool
, datalen
, ALLOC_TAG
);
6260 ERR("out of memory\n");
6261 return STATUS_INSUFFICIENT_RESOURCES
;
6264 RtlCopyMemory(data
, tp
.item
->data
, datalen
);
6270 searchkey
.obj_id
= subvolid
;
6271 searchkey
.obj_type
= TYPE_ROOT_BACKREF
;
6272 searchkey
.offset
= parsubvolid
;
6274 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
6275 if (!NT_SUCCESS(Status
)) {
6276 ERR("error - find_item returned %08x\n", Status
);
6284 if (!keycmp(tp
.item
->key
, searchkey
)) {
6285 Status
= delete_tree_item(Vcb
, &tp
);
6286 if (!NT_SUCCESS(Status
)) {
6287 ERR("delete_tree_item returned %08x\n", Status
);
6297 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, subvolid
, TYPE_ROOT_BACKREF
, parsubvolid
, data
, datalen
, NULL
, Irp
);
6298 if (!NT_SUCCESS(Status
)) {
6299 ERR("insert_tree_item returned %08x\n", Status
);
6305 return STATUS_SUCCESS
;
6308 static NTSTATUS
add_root_item_to_cache(device_extension
* Vcb
, uint64_t root
, PIRP Irp
) {
6313 searchkey
.obj_id
= root
;
6314 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
6315 searchkey
.offset
= 0xffffffffffffffff;
6317 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
6318 if (!NT_SUCCESS(Status
)) {
6319 ERR("error - find_item returned %08x\n", Status
);
6323 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
6324 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
6325 return STATUS_INTERNAL_ERROR
;
6328 if (tp
.item
->size
< sizeof(ROOT_ITEM
)) { // if not full length, create new entry with new bits zeroed
6329 ROOT_ITEM
* ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
6331 ERR("out of memory\n");
6332 return STATUS_INSUFFICIENT_RESOURCES
;
6335 if (tp
.item
->size
> 0)
6336 RtlCopyMemory(ri
, tp
.item
->data
, tp
.item
->size
);
6338 RtlZeroMemory(((uint8_t*)ri
) + tp
.item
->size
, sizeof(ROOT_ITEM
) - tp
.item
->size
);
6340 Status
= delete_tree_item(Vcb
, &tp
);
6341 if (!NT_SUCCESS(Status
)) {
6342 ERR("delete_tree_item returned %08x\n", Status
);
6347 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, searchkey
.obj_id
, searchkey
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, Irp
);
6348 if (!NT_SUCCESS(Status
)) {
6349 ERR("insert_tree_item returned %08x\n", Status
);
6354 tp
.tree
->write
= true;
6357 return STATUS_SUCCESS
;
6360 static NTSTATUS
flush_fileref(file_ref
* fileref
, LIST_ENTRY
* batchlist
, PIRP Irp
) {
6363 // if fileref created and then immediately deleted, do nothing
6364 if (fileref
->created
&& fileref
->deleted
) {
6365 fileref
->dirty
= false;
6366 return STATUS_SUCCESS
;
6369 if (fileref
->fcb
->ads
) {
6370 fileref
->dirty
= false;
6371 return STATUS_SUCCESS
;
6374 if (fileref
->created
) {
6379 crc32
= calc_crc32c(0xfffffffe, (uint8_t*)fileref
->dc
->utf8
.Buffer
, fileref
->dc
->utf8
.Length
);
6381 disize
= (uint16_t)(offsetof(DIR_ITEM
, name
[0]) + fileref
->dc
->utf8
.Length
);
6382 di
= ExAllocatePoolWithTag(PagedPool
, disize
, ALLOC_TAG
);
6384 ERR("out of memory\n");
6385 return STATUS_INSUFFICIENT_RESOURCES
;
6388 if (fileref
->parent
->fcb
->subvol
== fileref
->fcb
->subvol
) {
6389 di
->key
.obj_id
= fileref
->fcb
->inode
;
6390 di
->key
.obj_type
= TYPE_INODE_ITEM
;
6392 } else { // subvolume
6393 di
->key
.obj_id
= fileref
->fcb
->subvol
->id
;
6394 di
->key
.obj_type
= TYPE_ROOT_ITEM
;
6395 di
->key
.offset
= 0xffffffffffffffff;
6398 di
->transid
= fileref
->fcb
->Vcb
->superblock
.generation
;
6400 di
->n
= (uint16_t)fileref
->dc
->utf8
.Length
;
6401 di
->type
= fileref
->fcb
->type
;
6402 RtlCopyMemory(di
->name
, fileref
->dc
->utf8
.Buffer
, fileref
->dc
->utf8
.Length
);
6404 di2
= ExAllocatePoolWithTag(PagedPool
, disize
, ALLOC_TAG
);
6406 ERR("out of memory\n");
6407 return STATUS_INSUFFICIENT_RESOURCES
;
6410 RtlCopyMemory(di2
, di
, disize
);
6412 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_INDEX
,
6413 fileref
->dc
->index
, di
, disize
, Batch_Insert
);
6414 if (!NT_SUCCESS(Status
)) {
6415 ERR("insert_tree_item_batch returned %08x\n", Status
);
6419 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_ITEM
, crc32
,
6420 di2
, disize
, Batch_DirItem
);
6421 if (!NT_SUCCESS(Status
)) {
6422 ERR("insert_tree_item_batch returned %08x\n", Status
);
6426 if (fileref
->parent
->fcb
->subvol
== fileref
->fcb
->subvol
) {
6429 ir
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_REF
) - 1 + fileref
->dc
->utf8
.Length
, ALLOC_TAG
);
6431 ERR("out of memory\n");
6432 return STATUS_INSUFFICIENT_RESOURCES
;
6435 ir
->index
= fileref
->dc
->index
;
6436 ir
->n
= fileref
->dc
->utf8
.Length
;
6437 RtlCopyMemory(ir
->name
, fileref
->dc
->utf8
.Buffer
, ir
->n
);
6439 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
, fileref
->fcb
->inode
, TYPE_INODE_REF
, fileref
->parent
->fcb
->inode
,
6440 ir
, sizeof(INODE_REF
) - 1 + ir
->n
, Batch_InodeRef
);
6441 if (!NT_SUCCESS(Status
)) {
6442 ERR("insert_tree_item_batch returned %08x\n", Status
);
6445 } else if (fileref
->fcb
!= fileref
->fcb
->Vcb
->dummy_fcb
) {
6449 rrlen
= sizeof(ROOT_REF
) - 1 + fileref
->dc
->utf8
.Length
;
6451 rr
= ExAllocatePoolWithTag(PagedPool
, rrlen
, ALLOC_TAG
);
6453 ERR("out of memory\n");
6454 return STATUS_INSUFFICIENT_RESOURCES
;
6457 rr
->dir
= fileref
->parent
->fcb
->inode
;
6458 rr
->index
= fileref
->dc
->index
;
6459 rr
->n
= fileref
->dc
->utf8
.Length
;
6460 RtlCopyMemory(rr
->name
, fileref
->dc
->utf8
.Buffer
, fileref
->dc
->utf8
.Length
);
6462 Status
= add_root_ref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, rr
, Irp
);
6463 if (!NT_SUCCESS(Status
)) {
6464 ERR("add_root_ref returned %08x\n", Status
);
6468 Status
= update_root_backref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, Irp
);
6469 if (!NT_SUCCESS(Status
)) {
6470 ERR("update_root_backref returned %08x\n", Status
);
6475 fileref
->created
= false;
6476 } else if (fileref
->deleted
) {
6481 name
= &fileref
->oldutf8
;
6483 crc32
= calc_crc32c(0xfffffffe, (uint8_t*)name
->Buffer
, name
->Length
);
6485 TRACE("deleting %.*S\n", file_desc_fileref(fileref
));
6487 di
= ExAllocatePoolWithTag(PagedPool
, sizeof(DIR_ITEM
) - 1 + name
->Length
, ALLOC_TAG
);
6489 ERR("out of memory\n");
6490 return STATUS_INSUFFICIENT_RESOURCES
;
6494 di
->n
= name
->Length
;
6495 RtlCopyMemory(di
->name
, name
->Buffer
, name
->Length
);
6497 // delete DIR_ITEM (0x54)
6499 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_ITEM
,
6500 crc32
, di
, sizeof(DIR_ITEM
) - 1 + name
->Length
, Batch_DeleteDirItem
);
6501 if (!NT_SUCCESS(Status
)) {
6502 ERR("insert_tree_item_batch returned %08x\n", Status
);
6506 if (fileref
->parent
->fcb
->subvol
== fileref
->fcb
->subvol
) {
6509 // delete INODE_REF (0xc)
6511 ir
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_REF
) - 1 + name
->Length
, ALLOC_TAG
);
6513 ERR("out of memory\n");
6514 return STATUS_INSUFFICIENT_RESOURCES
;
6517 ir
->index
= fileref
->oldindex
;
6518 ir
->n
= name
->Length
;
6519 RtlCopyMemory(ir
->name
, name
->Buffer
, name
->Length
);
6521 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->fcb
->inode
, TYPE_INODE_REF
,
6522 fileref
->parent
->fcb
->inode
, ir
, sizeof(INODE_REF
) - 1 + name
->Length
, Batch_DeleteInodeRef
);
6523 if (!NT_SUCCESS(Status
)) {
6524 ERR("insert_tree_item_batch returned %08x\n", Status
);
6527 } else if (fileref
->fcb
!= fileref
->fcb
->Vcb
->dummy_fcb
) { // subvolume
6528 Status
= delete_root_ref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, fileref
->parent
->fcb
->inode
, name
, Irp
);
6529 if (!NT_SUCCESS(Status
)) {
6530 ERR("delete_root_ref returned %08x\n", Status
);
6534 Status
= update_root_backref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, Irp
);
6535 if (!NT_SUCCESS(Status
)) {
6536 ERR("update_root_backref returned %08x\n", Status
);
6541 // delete DIR_INDEX (0x60)
6543 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_INDEX
,
6544 fileref
->oldindex
, NULL
, 0, Batch_Delete
);
6545 if (!NT_SUCCESS(Status
)) {
6546 ERR("insert_tree_item_batch returned %08x\n", Status
);
6550 if (fileref
->oldutf8
.Buffer
) {
6551 ExFreePool(fileref
->oldutf8
.Buffer
);
6552 fileref
->oldutf8
.Buffer
= NULL
;
6554 } else { // rename or change type
6555 PANSI_STRING oldutf8
= fileref
->oldutf8
.Buffer
? &fileref
->oldutf8
: &fileref
->dc
->utf8
;
6556 uint32_t crc32
, oldcrc32
;
6558 DIR_ITEM
*olddi
, *di
, *di2
;
6560 crc32
= calc_crc32c(0xfffffffe, (uint8_t*)fileref
->dc
->utf8
.Buffer
, fileref
->dc
->utf8
.Length
);
6562 if (!fileref
->oldutf8
.Buffer
)
6565 oldcrc32
= calc_crc32c(0xfffffffe, (uint8_t*)fileref
->oldutf8
.Buffer
, fileref
->oldutf8
.Length
);
6567 olddi
= ExAllocatePoolWithTag(PagedPool
, sizeof(DIR_ITEM
) - 1 + oldutf8
->Length
, ALLOC_TAG
);
6569 ERR("out of memory\n");
6570 return STATUS_INSUFFICIENT_RESOURCES
;
6574 olddi
->n
= (uint16_t)oldutf8
->Length
;
6575 RtlCopyMemory(olddi
->name
, oldutf8
->Buffer
, oldutf8
->Length
);
6577 // delete DIR_ITEM (0x54)
6579 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_ITEM
,
6580 oldcrc32
, olddi
, sizeof(DIR_ITEM
) - 1 + oldutf8
->Length
, Batch_DeleteDirItem
);
6581 if (!NT_SUCCESS(Status
)) {
6582 ERR("insert_tree_item_batch returned %08x\n", Status
);
6587 // add DIR_ITEM (0x54)
6589 disize
= (uint16_t)(offsetof(DIR_ITEM
, name
[0]) + fileref
->dc
->utf8
.Length
);
6590 di
= ExAllocatePoolWithTag(PagedPool
, disize
, ALLOC_TAG
);
6592 ERR("out of memory\n");
6593 return STATUS_INSUFFICIENT_RESOURCES
;
6596 di2
= ExAllocatePoolWithTag(PagedPool
, disize
, ALLOC_TAG
);
6598 ERR("out of memory\n");
6600 return STATUS_INSUFFICIENT_RESOURCES
;
6604 di
->key
= fileref
->dc
->key
;
6605 else if (fileref
->parent
->fcb
->subvol
== fileref
->fcb
->subvol
) {
6606 di
->key
.obj_id
= fileref
->fcb
->inode
;
6607 di
->key
.obj_type
= TYPE_INODE_ITEM
;
6609 } else { // subvolume
6610 di
->key
.obj_id
= fileref
->fcb
->subvol
->id
;
6611 di
->key
.obj_type
= TYPE_ROOT_ITEM
;
6612 di
->key
.offset
= 0xffffffffffffffff;
6615 di
->transid
= fileref
->fcb
->Vcb
->superblock
.generation
;
6617 di
->n
= (uint16_t)fileref
->dc
->utf8
.Length
;
6618 di
->type
= fileref
->fcb
->type
;
6619 RtlCopyMemory(di
->name
, fileref
->dc
->utf8
.Buffer
, fileref
->dc
->utf8
.Length
);
6621 RtlCopyMemory(di2
, di
, disize
);
6623 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_ITEM
, crc32
,
6624 di
, disize
, Batch_DirItem
);
6625 if (!NT_SUCCESS(Status
)) {
6626 ERR("insert_tree_item_batch returned %08x\n", Status
);
6632 if (fileref
->parent
->fcb
->subvol
== fileref
->fcb
->subvol
) {
6633 INODE_REF
*ir
, *ir2
;
6635 // delete INODE_REF (0xc)
6637 ir
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_REF
) - 1 + oldutf8
->Length
, ALLOC_TAG
);
6639 ERR("out of memory\n");
6641 return STATUS_INSUFFICIENT_RESOURCES
;
6644 ir
->index
= fileref
->dc
->index
;
6645 ir
->n
= oldutf8
->Length
;
6646 RtlCopyMemory(ir
->name
, oldutf8
->Buffer
, ir
->n
);
6648 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
, fileref
->fcb
->inode
, TYPE_INODE_REF
, fileref
->parent
->fcb
->inode
,
6649 ir
, sizeof(INODE_REF
) - 1 + ir
->n
, Batch_DeleteInodeRef
);
6650 if (!NT_SUCCESS(Status
)) {
6651 ERR("insert_tree_item_batch returned %08x\n", Status
);
6657 // add INODE_REF (0xc)
6659 ir2
= ExAllocatePoolWithTag(PagedPool
, sizeof(INODE_REF
) - 1 + fileref
->dc
->utf8
.Length
, ALLOC_TAG
);
6661 ERR("out of memory\n");
6663 return STATUS_INSUFFICIENT_RESOURCES
;
6666 ir2
->index
= fileref
->dc
->index
;
6667 ir2
->n
= fileref
->dc
->utf8
.Length
;
6668 RtlCopyMemory(ir2
->name
, fileref
->dc
->utf8
.Buffer
, ir2
->n
);
6670 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
, fileref
->fcb
->inode
, TYPE_INODE_REF
, fileref
->parent
->fcb
->inode
,
6671 ir2
, sizeof(INODE_REF
) - 1 + ir2
->n
, Batch_InodeRef
);
6672 if (!NT_SUCCESS(Status
)) {
6673 ERR("insert_tree_item_batch returned %08x\n", Status
);
6678 } else if (fileref
->fcb
!= fileref
->fcb
->Vcb
->dummy_fcb
) { // subvolume
6682 Status
= delete_root_ref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, fileref
->parent
->fcb
->inode
, oldutf8
, Irp
);
6683 if (!NT_SUCCESS(Status
)) {
6684 ERR("delete_root_ref returned %08x\n", Status
);
6689 rrlen
= sizeof(ROOT_REF
) - 1 + fileref
->dc
->utf8
.Length
;
6691 rr
= ExAllocatePoolWithTag(PagedPool
, rrlen
, ALLOC_TAG
);
6693 ERR("out of memory\n");
6695 return STATUS_INSUFFICIENT_RESOURCES
;
6698 rr
->dir
= fileref
->parent
->fcb
->inode
;
6699 rr
->index
= fileref
->dc
->index
;
6700 rr
->n
= fileref
->dc
->utf8
.Length
;
6701 RtlCopyMemory(rr
->name
, fileref
->dc
->utf8
.Buffer
, fileref
->dc
->utf8
.Length
);
6703 Status
= add_root_ref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, rr
, Irp
);
6704 if (!NT_SUCCESS(Status
)) {
6705 ERR("add_root_ref returned %08x\n", Status
);
6710 Status
= update_root_backref(fileref
->fcb
->Vcb
, fileref
->fcb
->subvol
->id
, fileref
->parent
->fcb
->subvol
->id
, Irp
);
6711 if (!NT_SUCCESS(Status
)) {
6712 ERR("update_root_backref returned %08x\n", Status
);
6718 // delete DIR_INDEX (0x60)
6720 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_INDEX
,
6721 fileref
->dc
->index
, NULL
, 0, Batch_Delete
);
6722 if (!NT_SUCCESS(Status
)) {
6723 ERR("insert_tree_item_batch returned %08x\n", Status
);
6728 // add DIR_INDEX (0x60)
6730 Status
= insert_tree_item_batch(batchlist
, fileref
->fcb
->Vcb
, fileref
->parent
->fcb
->subvol
, fileref
->parent
->fcb
->inode
, TYPE_DIR_INDEX
,
6731 fileref
->dc
->index
, di2
, disize
, Batch_Insert
);
6732 if (!NT_SUCCESS(Status
)) {
6733 ERR("insert_tree_item_batch returned %08x\n", Status
);
6738 if (fileref
->oldutf8
.Buffer
) {
6739 ExFreePool(fileref
->oldutf8
.Buffer
);
6740 fileref
->oldutf8
.Buffer
= NULL
;
6744 fileref
->dirty
= false;
6746 return STATUS_SUCCESS
;
6749 static void flush_disk_caches(device_extension
* Vcb
) {
6751 ioctl_context context
;
6756 le
= Vcb
->devices
.Flink
;
6758 while (le
!= &Vcb
->devices
) {
6759 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
6761 if (dev
->devobj
&& !dev
->readonly
&& dev
->can_flush
)
6767 if (context
.left
== 0)
6772 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
6774 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(ioctl_context_stripe
) * context
.left
, ALLOC_TAG
);
6775 if (!context
.stripes
) {
6776 ERR("out of memory\n");
6780 RtlZeroMemory(context
.stripes
, sizeof(ioctl_context_stripe
) * context
.left
);
6782 le
= Vcb
->devices
.Flink
;
6784 while (le
!= &Vcb
->devices
) {
6785 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
6787 if (dev
->devobj
&& !dev
->readonly
&& dev
->can_flush
) {
6788 PIO_STACK_LOCATION IrpSp
;
6789 ioctl_context_stripe
* stripe
= &context
.stripes
[num
];
6791 RtlZeroMemory(&stripe
->apte
, sizeof(ATA_PASS_THROUGH_EX
));
6793 stripe
->apte
.Length
= sizeof(ATA_PASS_THROUGH_EX
);
6794 stripe
->apte
.TimeOutValue
= 5;
6795 stripe
->apte
.CurrentTaskFile
[6] = IDE_COMMAND_FLUSH_CACHE
;
6797 stripe
->Irp
= IoAllocateIrp(dev
->devobj
->StackSize
, false);
6800 ERR("IoAllocateIrp failed\n");
6804 IrpSp
= IoGetNextIrpStackLocation(stripe
->Irp
);
6805 IrpSp
->MajorFunction
= IRP_MJ_DEVICE_CONTROL
;
6806 IrpSp
->FileObject
= dev
->fileobj
;
6808 IrpSp
->Parameters
.DeviceIoControl
.IoControlCode
= IOCTL_ATA_PASS_THROUGH
;
6809 IrpSp
->Parameters
.DeviceIoControl
.InputBufferLength
= sizeof(ATA_PASS_THROUGH_EX
);
6810 IrpSp
->Parameters
.DeviceIoControl
.OutputBufferLength
= sizeof(ATA_PASS_THROUGH_EX
);
6812 stripe
->Irp
->AssociatedIrp
.SystemBuffer
= &stripe
->apte
;
6813 stripe
->Irp
->Flags
|= IRP_BUFFERED_IO
| IRP_INPUT_OPERATION
;
6814 stripe
->Irp
->UserBuffer
= &stripe
->apte
;
6815 stripe
->Irp
->UserIosb
= &stripe
->iosb
;
6817 IoSetCompletionRoutine(stripe
->Irp
, ioctl_completion
, &context
, true, true, true);
6819 IoCallDriver(dev
->devobj
, stripe
->Irp
);
6828 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
6830 ExFreePool(context
.stripes
);
6833 static NTSTATUS
flush_changed_dev_stats(device_extension
* Vcb
, device
* dev
, PIRP Irp
) {
6840 searchkey
.obj_id
= 0;
6841 searchkey
.obj_type
= TYPE_DEV_STATS
;
6842 searchkey
.offset
= dev
->devitem
.dev_id
;
6844 Status
= find_item(Vcb
, Vcb
->dev_root
, &tp
, &searchkey
, false, Irp
);
6845 if (!NT_SUCCESS(Status
)) {
6846 ERR("find_item returned %08x\n", Status
);
6850 if (!keycmp(tp
.item
->key
, searchkey
)) {
6851 Status
= delete_tree_item(Vcb
, &tp
);
6852 if (!NT_SUCCESS(Status
)) {
6853 ERR("delete_tree_item returned %08x\n", Status
);
6858 statslen
= sizeof(uint64_t) * 5;
6859 stats
= ExAllocatePoolWithTag(PagedPool
, statslen
, ALLOC_TAG
);
6861 ERR("out of memory\n");
6862 return STATUS_INSUFFICIENT_RESOURCES
;
6865 RtlCopyMemory(stats
, dev
->stats
, statslen
);
6867 Status
= insert_tree_item(Vcb
, Vcb
->dev_root
, 0, TYPE_DEV_STATS
, dev
->devitem
.dev_id
, stats
, statslen
, NULL
, Irp
);
6868 if (!NT_SUCCESS(Status
)) {
6869 ERR("insert_tree_item returned %08x\n", Status
);
6874 return STATUS_SUCCESS
;
6877 static NTSTATUS
flush_subvol(device_extension
* Vcb
, root
* r
, PIRP Irp
) {
6880 if (r
!= Vcb
->root_root
&& r
!= Vcb
->chunk_root
) {
6885 searchkey
.obj_id
= r
->id
;
6886 searchkey
.obj_type
= TYPE_ROOT_ITEM
;
6887 searchkey
.offset
= 0xffffffffffffffff;
6889 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
6890 if (!NT_SUCCESS(Status
)) {
6891 ERR("error - find_item returned %08x\n", Status
);
6895 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
6896 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey
.obj_id
);
6897 return STATUS_INTERNAL_ERROR
;
6900 ri
= ExAllocatePoolWithTag(PagedPool
, sizeof(ROOT_ITEM
), ALLOC_TAG
);
6902 ERR("out of memory\n");
6903 return STATUS_INSUFFICIENT_RESOURCES
;
6906 RtlCopyMemory(ri
, &r
->root_item
, sizeof(ROOT_ITEM
));
6908 Status
= delete_tree_item(Vcb
, &tp
);
6909 if (!NT_SUCCESS(Status
)) {
6910 ERR("delete_tree_item returned %08x\n", Status
);
6914 Status
= insert_tree_item(Vcb
, Vcb
->root_root
, tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, ri
, sizeof(ROOT_ITEM
), NULL
, Irp
);
6915 if (!NT_SUCCESS(Status
)) {
6916 ERR("insert_tree_item returned %08x\n", Status
);
6925 if (!Vcb
->uuid_root
) {
6928 TRACE("uuid root doesn't exist, creating it\n");
6930 Status
= create_root(Vcb
, BTRFS_ROOT_UUID
, &uuid_root
, false, 0, Irp
);
6932 if (!NT_SUCCESS(Status
)) {
6933 ERR("create_root returned %08x\n", Status
);
6937 Vcb
->uuid_root
= uuid_root
;
6940 RtlCopyMemory(&searchkey
.obj_id
, &r
->root_item
.received_uuid
, sizeof(uint64_t));
6941 searchkey
.obj_type
= TYPE_SUBVOL_REC_UUID
;
6942 RtlCopyMemory(&searchkey
.offset
, &r
->root_item
.received_uuid
.uuid
[sizeof(uint64_t)], sizeof(uint64_t));
6944 Status
= find_item(Vcb
, Vcb
->uuid_root
, &tp
, &searchkey
, false, Irp
);
6945 if (!NT_SUCCESS(Status
)) {
6946 ERR("find_item returned %08x\n", Status
);
6950 if (!keycmp(tp
.item
->key
, searchkey
)) {
6951 if (tp
.item
->size
+ sizeof(uint64_t) <= Vcb
->superblock
.node_size
- sizeof(tree_header
) - sizeof(leaf_node
)) {
6954 ids
= ExAllocatePoolWithTag(PagedPool
, tp
.item
->size
+ sizeof(uint64_t), ALLOC_TAG
);
6956 ERR("out of memory\n");
6957 return STATUS_INSUFFICIENT_RESOURCES
;
6960 RtlCopyMemory(ids
, tp
.item
->data
, tp
.item
->size
);
6961 RtlCopyMemory((uint8_t*)ids
+ tp
.item
->size
, &r
->id
, sizeof(uint64_t));
6963 Status
= delete_tree_item(Vcb
, &tp
);
6964 if (!NT_SUCCESS(Status
)) {
6965 ERR("delete_tree_item returned %08x\n", Status
);
6970 Status
= insert_tree_item(Vcb
, Vcb
->uuid_root
, searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, ids
, tp
.item
->size
+ sizeof(uint64_t), NULL
, Irp
);
6971 if (!NT_SUCCESS(Status
)) {
6972 ERR("insert_tree_item returned %08x\n", Status
);
6980 root_num
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint64_t), ALLOC_TAG
);
6982 ERR("out of memory\n");
6983 return STATUS_INSUFFICIENT_RESOURCES
;
6988 Status
= insert_tree_item(Vcb
, Vcb
->uuid_root
, searchkey
.obj_id
, searchkey
.obj_type
, searchkey
.offset
, root_num
, sizeof(uint64_t), NULL
, Irp
);
6989 if (!NT_SUCCESS(Status
)) {
6990 ERR("insert_tree_item returned %08x\n", Status
);
6991 ExFreePool(root_num
);
6996 r
->received
= false;
7001 return STATUS_SUCCESS
;
7004 static NTSTATUS
test_not_full(device_extension
* Vcb
) {
7005 uint64_t reserve
, could_alloc
, free_space
;
7008 // This function ensures we drop into readonly mode if we're about to leave very little
7009 // space for metadata - this is similar to the "global reserve" of the Linux driver.
7010 // Otherwise we might completely fill our space, at which point due to COW we can't
7011 // delete anything in order to fix this.
7013 reserve
= Vcb
->extent_root
->root_item
.bytes_used
;
7014 reserve
+= Vcb
->root_root
->root_item
.bytes_used
;
7015 if (Vcb
->checksum_root
) reserve
+= Vcb
->checksum_root
->root_item
.bytes_used
;
7017 reserve
= max(reserve
, 0x1000000); // 16 M
7018 reserve
= min(reserve
, 0x20000000); // 512 M
7020 // Find out how much space would be available for new metadata chunks
7024 if (Vcb
->metadata_flags
& BLOCK_FLAG_RAID5
) {
7025 uint64_t s1
= 0, s2
= 0, s3
= 0;
7027 le
= Vcb
->devices
.Flink
;
7028 while (le
!= &Vcb
->devices
) {
7029 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
7031 if (!dev
->readonly
) {
7032 uint64_t space
= dev
->devitem
.num_bytes
- dev
->devitem
.bytes_used
;
7038 } else if (space
>= s2
) {
7041 } else if (space
>= s3
)
7048 could_alloc
= s3
* 2;
7049 } else if (Vcb
->metadata_flags
& (BLOCK_FLAG_RAID10
| BLOCK_FLAG_RAID6
)) {
7050 uint64_t s1
= 0, s2
= 0, s3
= 0, s4
= 0;
7052 le
= Vcb
->devices
.Flink
;
7053 while (le
!= &Vcb
->devices
) {
7054 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
7056 if (!dev
->readonly
) {
7057 uint64_t space
= dev
->devitem
.num_bytes
- dev
->devitem
.bytes_used
;
7064 } else if (space
>= s2
) {
7068 } else if (space
>= s3
) {
7071 } else if (space
>= s4
)
7078 could_alloc
= s4
* 2;
7079 } else if (Vcb
->metadata_flags
& (BLOCK_FLAG_RAID0
| BLOCK_FLAG_RAID1
)) {
7080 uint64_t s1
= 0, s2
= 0;
7082 le
= Vcb
->devices
.Flink
;
7083 while (le
!= &Vcb
->devices
) {
7084 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
7086 if (!dev
->readonly
) {
7087 uint64_t space
= dev
->devitem
.num_bytes
- dev
->devitem
.bytes_used
;
7092 } else if (space
>= s2
)
7099 if (Vcb
->metadata_flags
& BLOCK_FLAG_RAID1
)
7102 could_alloc
= s2
* 2;
7103 } else if (Vcb
->metadata_flags
& BLOCK_FLAG_DUPLICATE
) {
7104 le
= Vcb
->devices
.Flink
;
7105 while (le
!= &Vcb
->devices
) {
7106 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
7108 if (!dev
->readonly
) {
7109 uint64_t space
= (dev
->devitem
.num_bytes
- dev
->devitem
.bytes_used
) / 2;
7111 could_alloc
= max(could_alloc
, space
);
7117 le
= Vcb
->devices
.Flink
;
7118 while (le
!= &Vcb
->devices
) {
7119 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
7121 if (!dev
->readonly
) {
7122 uint64_t space
= dev
->devitem
.num_bytes
- dev
->devitem
.bytes_used
;
7124 could_alloc
= max(could_alloc
, space
);
7131 if (could_alloc
>= reserve
)
7132 return STATUS_SUCCESS
;
7136 le
= Vcb
->chunks
.Flink
;
7137 while (le
!= &Vcb
->chunks
) {
7138 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
7140 if (!c
->reloc
&& !c
->readonly
&& c
->chunk_item
->type
& BLOCK_FLAG_METADATA
) {
7141 free_space
+= c
->chunk_item
->size
- c
->used
;
7143 if (free_space
+ could_alloc
>= reserve
)
7144 return STATUS_SUCCESS
;
7150 return STATUS_DISK_FULL
;
7153 static NTSTATUS
check_for_orphans_root(device_extension
* Vcb
, root
* r
, PIRP Irp
) {
7157 LIST_ENTRY rollback
;
7159 TRACE("(%p, %p)\n", Vcb
, r
);
7161 InitializeListHead(&rollback
);
7163 searchkey
.obj_id
= BTRFS_ORPHAN_INODE_OBJID
;
7164 searchkey
.obj_type
= TYPE_ORPHAN_INODE
;
7165 searchkey
.offset
= 0;
7167 Status
= find_item(Vcb
, r
, &tp
, &searchkey
, false, Irp
);
7168 if (!NT_SUCCESS(Status
)) {
7169 ERR("find_item returned %08x\n", Status
);
7174 traverse_ptr next_tp
;
7176 if (tp
.item
->key
.obj_id
> searchkey
.obj_id
|| (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
> searchkey
.obj_type
))
7179 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
7182 TRACE("removing orphaned inode %I64x\n", tp
.item
->key
.offset
);
7184 Status
= open_fcb(Vcb
, r
, tp
.item
->key
.offset
, 0, NULL
, false, NULL
, &fcb
, PagedPool
, Irp
);
7185 if (!NT_SUCCESS(Status
))
7186 ERR("open_fcb returned %08x\n", Status
);
7188 if (fcb
->inode_item
.st_nlink
== 0) {
7189 if (fcb
->type
!= BTRFS_TYPE_DIRECTORY
&& fcb
->inode_item
.st_size
> 0) {
7190 Status
= excise_extents(Vcb
, fcb
, 0, sector_align(fcb
->inode_item
.st_size
, Vcb
->superblock
.sector_size
), Irp
, &rollback
);
7191 if (!NT_SUCCESS(Status
)) {
7192 ERR("excise_extents returned %08x\n", Status
);
7197 fcb
->deleted
= true;
7199 mark_fcb_dirty(fcb
);
7204 Status
= delete_tree_item(Vcb
, &tp
);
7205 if (!NT_SUCCESS(Status
)) {
7206 ERR("delete_tree_item returned %08x\n", Status
);
7212 if (find_next_item(Vcb
, &tp
, &next_tp
, false, Irp
))
7218 Status
= STATUS_SUCCESS
;
7220 clear_rollback(&rollback
);
7223 do_rollback(Vcb
, &rollback
);
7228 static NTSTATUS
check_for_orphans(device_extension
* Vcb
, PIRP Irp
) {
7232 if (IsListEmpty(&Vcb
->dirty_filerefs
))
7233 return STATUS_SUCCESS
;
7235 le
= Vcb
->dirty_filerefs
.Flink
;
7236 while (le
!= &Vcb
->dirty_filerefs
) {
7237 file_ref
* fr
= CONTAINING_RECORD(le
, file_ref
, list_entry_dirty
);
7239 if (!fr
->fcb
->subvol
->checked_for_orphans
) {
7240 Status
= check_for_orphans_root(Vcb
, fr
->fcb
->subvol
, Irp
);
7241 if (!NT_SUCCESS(Status
)) {
7242 ERR("check_for_orphans_root returned %08x\n", Status
);
7246 fr
->fcb
->subvol
->checked_for_orphans
= true;
7252 return STATUS_SUCCESS
;
7255 static NTSTATUS
do_write2(device_extension
* Vcb
, PIRP Irp
, LIST_ENTRY
* rollback
) {
7257 LIST_ENTRY
*le
, batchlist
;
7258 bool cache_changed
= false;
7259 volume_device_extension
* vde
;
7260 bool no_cache
= false;
7261 #ifdef DEBUG_FLUSH_TIMES
7262 uint64_t filerefs
= 0, fcbs
= 0;
7263 LARGE_INTEGER freq
, time1
, time2
;
7265 #ifdef DEBUG_WRITE_LOOPS
7269 TRACE("(%p)\n", Vcb
);
7271 InitializeListHead(&batchlist
);
7273 #ifdef DEBUG_FLUSH_TIMES
7274 time1
= KeQueryPerformanceCounter(&freq
);
7277 Status
= check_for_orphans(Vcb
, Irp
);
7278 if (!NT_SUCCESS(Status
)) {
7279 ERR("check_for_orphans returned %08x\n", Status
);
7283 ExAcquireResourceExclusiveLite(&Vcb
->dirty_filerefs_lock
, true);
7285 while (!IsListEmpty(&Vcb
->dirty_filerefs
)) {
7286 file_ref
* fr
= CONTAINING_RECORD(RemoveHeadList(&Vcb
->dirty_filerefs
), file_ref
, list_entry_dirty
);
7288 flush_fileref(fr
, &batchlist
, Irp
);
7291 #ifdef DEBUG_FLUSH_TIMES
7296 ExReleaseResourceLite(&Vcb
->dirty_filerefs_lock
);
7298 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
7299 if (!NT_SUCCESS(Status
)) {
7300 ERR("commit_batch_list returned %08x\n", Status
);
7304 #ifdef DEBUG_FLUSH_TIMES
7305 time2
= KeQueryPerformanceCounter(NULL
);
7307 ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs
, time2
.QuadPart
- time1
.QuadPart
, freq
.QuadPart
);
7309 time1
= KeQueryPerformanceCounter(&freq
);
7312 // We process deleted streams first, so we don't run over our xattr
7313 // limit unless we absolutely have to.
7314 // We also process deleted normal files, to avoid any problems
7315 // caused by inode collisions.
7317 ExAcquireResourceExclusiveLite(&Vcb
->dirty_fcbs_lock
, true);
7319 le
= Vcb
->dirty_fcbs
.Flink
;
7320 while (le
!= &Vcb
->dirty_fcbs
) {
7321 fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_dirty
);
7322 LIST_ENTRY
* le2
= le
->Flink
;
7325 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, true);
7326 Status
= flush_fcb(fcb
, false, &batchlist
, Irp
);
7327 ExReleaseResourceLite(fcb
->Header
.Resource
);
7331 if (!NT_SUCCESS(Status
)) {
7332 ERR("flush_fcb returned %08x\n", Status
);
7333 clear_batch_list(Vcb
, &batchlist
);
7334 ExReleaseResourceLite(&Vcb
->dirty_fcbs_lock
);
7338 #ifdef DEBUG_FLUSH_TIMES
7346 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
7347 if (!NT_SUCCESS(Status
)) {
7348 ERR("commit_batch_list returned %08x\n", Status
);
7349 ExReleaseResourceLite(&Vcb
->dirty_fcbs_lock
);
7353 le
= Vcb
->dirty_fcbs
.Flink
;
7354 while (le
!= &Vcb
->dirty_fcbs
) {
7355 fcb
* fcb
= CONTAINING_RECORD(le
, struct _fcb
, list_entry_dirty
);
7356 LIST_ENTRY
* le2
= le
->Flink
;
7358 if (fcb
->subvol
!= Vcb
->root_root
) {
7359 ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, true);
7360 Status
= flush_fcb(fcb
, false, &batchlist
, Irp
);
7361 ExReleaseResourceLite(fcb
->Header
.Resource
);
7364 if (!NT_SUCCESS(Status
)) {
7365 ERR("flush_fcb returned %08x\n", Status
);
7366 ExReleaseResourceLite(&Vcb
->dirty_fcbs_lock
);
7370 #ifdef DEBUG_FLUSH_TIMES
7378 ExReleaseResourceLite(&Vcb
->dirty_fcbs_lock
);
7380 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
7381 if (!NT_SUCCESS(Status
)) {
7382 ERR("commit_batch_list returned %08x\n", Status
);
7386 #ifdef DEBUG_FLUSH_TIMES
7387 time2
= KeQueryPerformanceCounter(NULL
);
7389 ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs
, time2
.QuadPart
- time1
.QuadPart
, freq
.QuadPart
);
7392 // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7393 while (!IsListEmpty(&Vcb
->dirty_subvols
)) {
7394 root
* r
= CONTAINING_RECORD(RemoveHeadList(&Vcb
->dirty_subvols
), root
, list_entry_dirty
);
7396 Status
= flush_subvol(Vcb
, r
, Irp
);
7397 if (!NT_SUCCESS(Status
)) {
7398 ERR("flush_subvol returned %08x\n", Status
);
7403 if (!IsListEmpty(&Vcb
->drop_roots
)) {
7404 Status
= drop_roots(Vcb
, Irp
, rollback
);
7406 if (!NT_SUCCESS(Status
)) {
7407 ERR("drop_roots returned %08x\n", Status
);
7412 Status
= update_chunks(Vcb
, &batchlist
, Irp
, rollback
);
7414 if (!NT_SUCCESS(Status
)) {
7415 ERR("update_chunks returned %08x\n", Status
);
7419 Status
= commit_batch_list(Vcb
, &batchlist
, Irp
);
7421 // If only changing superblock, e.g. changing label, we still need to rewrite
7422 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7423 if (!Vcb
->root_root
->treeholder
.tree
|| !Vcb
->root_root
->treeholder
.tree
->write
) {
7428 searchkey
.obj_id
= 0;
7429 searchkey
.obj_type
= 0;
7430 searchkey
.offset
= 0;
7432 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, Irp
);
7433 if (!NT_SUCCESS(Status
)) {
7434 ERR("error - find_item returned %08x\n", Status
);
7438 Vcb
->root_root
->treeholder
.tree
->write
= true;
7441 // make sure we always update the extent tree
7442 Status
= add_root_item_to_cache(Vcb
, BTRFS_ROOT_EXTENT
, Irp
);
7443 if (!NT_SUCCESS(Status
)) {
7444 ERR("add_root_item_to_cache returned %08x\n", Status
);
7448 if (Vcb
->stats_changed
) {
7449 le
= Vcb
->devices
.Flink
;
7450 while (le
!= &Vcb
->devices
) {
7451 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
7453 if (dev
->stats_changed
) {
7454 Status
= flush_changed_dev_stats(Vcb
, dev
, Irp
);
7455 if (!NT_SUCCESS(Status
)) {
7456 ERR("flush_changed_dev_stats returned %08x\n", Status
);
7459 dev
->stats_changed
= false;
7465 Vcb
->stats_changed
= false;
7469 Status
= add_parents(Vcb
, Irp
);
7470 if (!NT_SUCCESS(Status
)) {
7471 ERR("add_parents returned %08x\n", Status
);
7475 Status
= allocate_tree_extents(Vcb
, Irp
, rollback
);
7476 if (!NT_SUCCESS(Status
)) {
7477 ERR("allocate_tree_extents returned %08x\n", Status
);
7481 Status
= do_splits(Vcb
, Irp
, rollback
);
7482 if (!NT_SUCCESS(Status
)) {
7483 ERR("do_splits returned %08x\n", Status
);
7487 Status
= update_chunk_usage(Vcb
, Irp
, rollback
);
7488 if (!NT_SUCCESS(Status
)) {
7489 ERR("update_chunk_usage returned %08x\n", Status
);
7493 if (!(Vcb
->superblock
.compat_ro_flags
& BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE
)) {
7495 Status
= allocate_cache(Vcb
, &cache_changed
, Irp
, rollback
);
7496 if (!NT_SUCCESS(Status
)) {
7497 WARN("allocate_cache returned %08x\n", Status
);
7499 cache_changed
= false;
7503 Status
= update_chunk_caches_tree(Vcb
, Irp
);
7504 if (!NT_SUCCESS(Status
)) {
7505 ERR("update_chunk_caches_tree returned %08x\n", Status
);
7510 #ifdef DEBUG_WRITE_LOOPS
7514 ERR("cache has changed, looping again\n");
7516 } while (cache_changed
|| !trees_consistent(Vcb
));
7518 #ifdef DEBUG_WRITE_LOOPS
7519 ERR("%u loops\n", loops
);
7522 TRACE("trees consistent\n");
7524 Status
= update_root_root(Vcb
, no_cache
, Irp
, rollback
);
7525 if (!NT_SUCCESS(Status
)) {
7526 ERR("update_root_root returned %08x\n", Status
);
7530 Status
= write_trees(Vcb
, Irp
);
7531 if (!NT_SUCCESS(Status
)) {
7532 ERR("write_trees returned %08x\n", Status
);
7536 Status
= test_not_full(Vcb
);
7537 if (!NT_SUCCESS(Status
)) {
7538 ERR("test_not_full returned %08x\n", Status
);
7542 #ifdef DEBUG_PARANOID
7543 le
= Vcb
->trees
.Flink
;
7544 while (le
!= &Vcb
->trees
) {
7545 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
7549 searchkey
.obj_id
= t
->header
.address
;
7550 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
7551 searchkey
.offset
= 0xffffffffffffffff;
7553 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
7554 if (!NT_SUCCESS(Status
)) {
7555 ERR("error - find_item returned %08x\n", Status
);
7559 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
7560 searchkey
.obj_id
= t
->header
.address
;
7561 searchkey
.obj_type
= TYPE_EXTENT_ITEM
;
7562 searchkey
.offset
= 0xffffffffffffffff;
7564 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, Irp
);
7565 if (!NT_SUCCESS(Status
)) {
7566 ERR("error - find_item returned %08x\n", Status
);
7570 if (tp
.item
->key
.obj_id
!= searchkey
.obj_id
|| tp
.item
->key
.obj_type
!= searchkey
.obj_type
) {
7571 ERR("error - could not find entry in extent tree for tree at %I64x\n", t
->header
.address
);
7572 Status
= STATUS_INTERNAL_ERROR
;
7581 Vcb
->superblock
.cache_generation
= Vcb
->superblock
.generation
;
7583 if (!Vcb
->options
.no_barrier
)
7584 flush_disk_caches(Vcb
);
7586 Status
= write_superblocks(Vcb
, Irp
);
7587 if (!NT_SUCCESS(Status
)) {
7588 ERR("write_superblocks returned %08x\n", Status
);
7595 pdo_device_extension
* pdode
= vde
->pdode
;
7597 ExAcquireResourceSharedLite(&pdode
->child_lock
, true);
7599 le
= pdode
->children
.Flink
;
7601 while (le
!= &pdode
->children
) {
7602 volume_child
* vc
= CONTAINING_RECORD(le
, volume_child
, list_entry
);
7604 vc
->generation
= Vcb
->superblock
.generation
;
7608 ExReleaseResourceLite(&pdode
->child_lock
);
7611 clean_space_cache(Vcb
);
7613 le
= Vcb
->chunks
.Flink
;
7614 while (le
!= &Vcb
->chunks
) {
7615 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
7618 c
->space_changed
= false;
7623 Vcb
->superblock
.generation
++;
7625 Status
= STATUS_SUCCESS
;
7627 le
= Vcb
->trees
.Flink
;
7628 while (le
!= &Vcb
->trees
) {
7629 tree
* t
= CONTAINING_RECORD(le
, tree
, list_entry
);
7636 Vcb
->need_write
= false;
7638 while (!IsListEmpty(&Vcb
->drop_roots
)) {
7639 root
* r
= CONTAINING_RECORD(RemoveHeadList(&Vcb
->drop_roots
), root
, list_entry
);
7641 ExDeleteResourceLite(&r
->nonpaged
->load_tree_lock
);
7642 ExFreePool(r
->nonpaged
);
7647 TRACE("do_write returning %08x\n", Status
);
7652 NTSTATUS
do_write(device_extension
* Vcb
, PIRP Irp
) {
7653 LIST_ENTRY rollback
;
7656 InitializeListHead(&rollback
);
7658 Status
= do_write2(Vcb
, Irp
, &rollback
);
7660 if (!NT_SUCCESS(Status
)) {
7661 ERR("do_write2 returned %08x, dropping into readonly mode\n", Status
);
7662 Vcb
->readonly
= true;
7663 FsRtlNotifyVolumeEvent(Vcb
->root_file
, FSRTL_VOLUME_FORCED_CLOSED
);
7664 do_rollback(Vcb
, &rollback
);
7666 clear_rollback(&rollback
);
7671 static void do_flush(device_extension
* Vcb
) {
7674 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
7676 if (Vcb
->need_write
&& !Vcb
->readonly
)
7677 Status
= do_write(Vcb
, NULL
);
7679 Status
= STATUS_SUCCESS
;
7683 if (!NT_SUCCESS(Status
))
7684 ERR("do_write returned %08x\n", Status
);
7686 ExReleaseResourceLite(&Vcb
->tree_lock
);
7689 _Function_class_(KSTART_ROUTINE
)
7690 void __stdcall
flush_thread(void* context
) {
7691 DEVICE_OBJECT
* devobj
= context
;
7692 device_extension
* Vcb
= devobj
->DeviceExtension
;
7693 LARGE_INTEGER due_time
;
7695 ObReferenceObject(devobj
);
7697 KeInitializeTimer(&Vcb
->flush_thread_timer
);
7699 due_time
.QuadPart
= (uint64_t)Vcb
->options
.flush_interval
* -10000000;
7701 KeSetTimer(&Vcb
->flush_thread_timer
, due_time
, NULL
);
7704 KeWaitForSingleObject(&Vcb
->flush_thread_timer
, Executive
, KernelMode
, false, NULL
);
7706 if (!(devobj
->Vpb
->Flags
& VPB_MOUNTED
) || Vcb
->removing
)
7712 KeSetTimer(&Vcb
->flush_thread_timer
, due_time
, NULL
);
7715 ObDereferenceObject(devobj
);
7716 KeCancelTimer(&Vcb
->flush_thread_timer
);
7718 KeSetEvent(&Vcb
->flush_thread_finished
, 0, false);
7720 PsTerminateSystemThread(STATUS_SUCCESS
);