[BTRFS] Upgrade to 1.4
[reactos.git] / drivers / filesystems / btrfs / flushthread.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include <ata.h>
20 #include <ntddscsi.h>
21 #include <ntddstor.h>
22
23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
24
25 // #define DEBUG_WRITE_LOOPS
26
27 typedef struct {
28 KEVENT Event;
29 IO_STATUS_BLOCK iosb;
30 } write_context;
31
32 typedef struct {
33 EXTENT_ITEM_TREE eit;
34 uint8_t type;
35 TREE_BLOCK_REF tbr;
36 } EXTENT_ITEM_TREE2;
37
38 typedef struct {
39 EXTENT_ITEM ei;
40 uint8_t type;
41 TREE_BLOCK_REF tbr;
42 } EXTENT_ITEM_SKINNY_METADATA;
43
44 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
45 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
46
47 #ifndef _MSC_VER // not in mingw yet
48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
49 #endif
50
51 _Function_class_(IO_COMPLETION_ROUTINE)
52 static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
53 write_context* context = conptr;
54
55 UNUSED(DeviceObject);
56
57 context->iosb = Irp->IoStatus;
58 KeSetEvent(&context->Event, 0, false);
59
60 return STATUS_MORE_PROCESSING_REQUIRED;
61 }
62
63 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ PFILE_OBJECT fileobj, _In_ uint64_t address,
64 _In_reads_bytes_(length) void* data, _In_ uint32_t length) {
65 NTSTATUS Status;
66 LARGE_INTEGER offset;
67 PIRP Irp;
68 PIO_STACK_LOCATION IrpSp;
69 write_context context;
70
71 TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length);
72
73 RtlZeroMemory(&context, sizeof(write_context));
74
75 KeInitializeEvent(&context.Event, NotificationEvent, false);
76
77 offset.QuadPart = address;
78
79 Irp = IoAllocateIrp(device->StackSize, false);
80
81 if (!Irp) {
82 ERR("IoAllocateIrp failed\n");
83 return STATUS_INSUFFICIENT_RESOURCES;
84 }
85
86 IrpSp = IoGetNextIrpStackLocation(Irp);
87 IrpSp->MajorFunction = IRP_MJ_WRITE;
88 IrpSp->FileObject = fileobj;
89
90 if (device->Flags & DO_BUFFERED_IO) {
91 Irp->AssociatedIrp.SystemBuffer = data;
92
93 Irp->Flags = IRP_BUFFERED_IO;
94 } else if (device->Flags & DO_DIRECT_IO) {
95 Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL);
96 if (!Irp->MdlAddress) {
97 DbgPrint("IoAllocateMdl failed\n");
98 Status = STATUS_INSUFFICIENT_RESOURCES;
99 goto exit;
100 }
101
102 Status = STATUS_SUCCESS;
103
104 _SEH2_TRY {
105 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
106 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
107 Status = _SEH2_GetExceptionCode();
108 } _SEH2_END;
109
110 if (!NT_SUCCESS(Status)) {
111 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
112 IoFreeMdl(Irp->MdlAddress);
113 goto exit;
114 }
115 } else {
116 Irp->UserBuffer = data;
117 }
118
119 IrpSp->Parameters.Write.Length = length;
120 IrpSp->Parameters.Write.ByteOffset = offset;
121
122 Irp->UserIosb = &context.iosb;
123
124 Irp->UserEvent = &context.Event;
125
126 IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true);
127
128 Status = IoCallDriver(device, Irp);
129
130 if (Status == STATUS_PENDING) {
131 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
132 Status = context.iosb.Status;
133 }
134
135 if (!NT_SUCCESS(Status)) {
136 ERR("IoCallDriver returned %08x\n", Status);
137 }
138
139 if (device->Flags & DO_DIRECT_IO) {
140 MmUnlockPages(Irp->MdlAddress);
141 IoFreeMdl(Irp->MdlAddress);
142 }
143
144 exit:
145 IoFreeIrp(Irp);
146
147 return Status;
148 }
149
150 static void add_trim_entry(device* dev, uint64_t address, uint64_t size) {
151 space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
152 if (!s) {
153 ERR("out of memory\n");
154 return;
155 }
156
157 s->address = address;
158 s->size = size;
159 dev->num_trim_entries++;
160
161 InsertTailList(&dev->trim_list, &s->list_entry);
162 }
163
164 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
165 ULONG type;
166
167 if (Vcb->trim && !Vcb->options.no_trim) {
168 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
169 type = BLOCK_FLAG_DUPLICATE;
170 else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
171 type = BLOCK_FLAG_RAID0;
172 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
173 type = BLOCK_FLAG_DUPLICATE;
174 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
175 type = BLOCK_FLAG_RAID10;
176 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
177 type = BLOCK_FLAG_RAID5;
178 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
179 type = BLOCK_FLAG_RAID6;
180 else // SINGLE
181 type = BLOCK_FLAG_DUPLICATE;
182 }
183
184 while (!IsListEmpty(&c->deleting)) {
185 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
186
187 if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
188 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
189
190 if (type == BLOCK_FLAG_DUPLICATE) {
191 uint16_t i;
192
193 for (i = 0; i < c->chunk_item->num_stripes; i++) {
194 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
195 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
196 }
197 } else if (type == BLOCK_FLAG_RAID0) {
198 uint64_t startoff, endoff;
199 uint16_t startoffstripe, endoffstripe, i;
200
201 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
202 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
203
204 for (i = 0; i < c->chunk_item->num_stripes; i++) {
205 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
206 uint64_t stripestart, stripeend;
207
208 if (startoffstripe > i)
209 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
210 else if (startoffstripe == i)
211 stripestart = startoff;
212 else
213 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
214
215 if (endoffstripe > i)
216 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
217 else if (endoffstripe == i)
218 stripeend = endoff + 1;
219 else
220 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
221
222 if (stripestart != stripeend)
223 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
224 }
225 }
226 } else if (type == BLOCK_FLAG_RAID10) {
227 uint64_t startoff, endoff;
228 uint16_t sub_stripes, startoffstripe, endoffstripe, i;
229
230 sub_stripes = max(1, c->chunk_item->sub_stripes);
231
232 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
233 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
234
235 startoffstripe *= sub_stripes;
236 endoffstripe *= sub_stripes;
237
238 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
239 ULONG j;
240 uint64_t stripestart, stripeend;
241
242 if (startoffstripe > i)
243 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
244 else if (startoffstripe == i)
245 stripestart = startoff;
246 else
247 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
248
249 if (endoffstripe > i)
250 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
251 else if (endoffstripe == i)
252 stripeend = endoff + 1;
253 else
254 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
255
256 if (stripestart != stripeend) {
257 for (j = 0; j < sub_stripes; j++) {
258 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
259 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
260 }
261 }
262 }
263 }
264 // FIXME - RAID5(?), RAID6(?)
265 }
266
267 RemoveEntryList(&s->list_entry);
268 ExFreePool(s);
269 }
270 }
271
272 typedef struct {
273 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
274 ATA_PASS_THROUGH_EX apte;
275 PIRP Irp;
276 IO_STATUS_BLOCK iosb;
277 #ifdef DEBUG_TRIM_EMULATION
278 PMDL mdl;
279 void* buf;
280 #endif
281 } ioctl_context_stripe;
282
283 typedef struct {
284 KEVENT Event;
285 LONG left;
286 ioctl_context_stripe* stripes;
287 } ioctl_context;
288
289 _Function_class_(IO_COMPLETION_ROUTINE)
290 static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
291 ioctl_context* context = (ioctl_context*)conptr;
292 LONG left2 = InterlockedDecrement(&context->left);
293
294 UNUSED(DeviceObject);
295 UNUSED(Irp);
296
297 if (left2 == 0)
298 KeSetEvent(&context->Event, 0, false);
299
300 return STATUS_MORE_PROCESSING_REQUIRED;
301 }
302
303 #ifdef DEBUG_TRIM_EMULATION
304 static void trim_emulation(device* dev) {
305 LIST_ENTRY* le;
306 ioctl_context context;
307 unsigned int i = 0, count = 0;
308
309 le = dev->trim_list.Flink;
310 while (le != &dev->trim_list) {
311 count++;
312 le = le->Flink;
313 }
314
315 context.left = count;
316
317 KeInitializeEvent(&context.Event, NotificationEvent, false);
318
319 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
320 if (!context.stripes) {
321 ERR("out of memory\n");
322 return;
323 }
324
325 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
326
327 i = 0;
328 le = dev->trim_list.Flink;
329 while (le != &dev->trim_list) {
330 ioctl_context_stripe* stripe = &context.stripes[i];
331 space* s = CONTAINING_RECORD(le, space, list_entry);
332
333 WARN("(%I64x, %I64x)\n", s->address, s->size);
334
335 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
336
337 if (!stripe->Irp) {
338 ERR("IoAllocateIrp failed\n");
339 } else {
340 PIO_STACK_LOCATION IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
341 IrpSp->MajorFunction = IRP_MJ_WRITE;
342 IrpSp->FileObject = dev->fileobj;
343
344 stripe->buf = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)s->size, ALLOC_TAG);
345
346 if (!stripe->buf) {
347 ERR("out of memory\n");
348 } else {
349 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead?
350
351 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL);
352
353 if (!stripe->mdl) {
354 ERR("IoAllocateMdl failed\n");
355 } else {
356 MmBuildMdlForNonPagedPool(stripe->mdl);
357
358 stripe->Irp->MdlAddress = stripe->mdl;
359
360 IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address;
361 IrpSp->Parameters.Write.Length = s->size;
362
363 stripe->Irp->UserIosb = &stripe->iosb;
364
365 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
366
367 IoCallDriver(dev->devobj, stripe->Irp);
368 }
369 }
370 }
371
372 i++;
373
374 le = le->Flink;
375 }
376
377 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
378
379 for (i = 0; i < count; i++) {
380 ioctl_context_stripe* stripe = &context.stripes[i];
381
382 if (stripe->mdl)
383 IoFreeMdl(stripe->mdl);
384
385 if (stripe->buf)
386 ExFreePool(stripe->buf);
387 }
388
389 ExFreePool(context.stripes);
390 }
391 #endif
392
393 static void clean_space_cache(device_extension* Vcb) {
394 LIST_ENTRY* le;
395 chunk* c;
396 #ifndef DEBUG_TRIM_EMULATION
397 ULONG num;
398 #endif
399
400 TRACE("(%p)\n", Vcb);
401
402 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
403
404 le = Vcb->chunks.Flink;
405 while (le != &Vcb->chunks) {
406 c = CONTAINING_RECORD(le, chunk, list_entry);
407
408 if (c->space_changed) {
409 acquire_chunk_lock(c, Vcb);
410
411 if (c->space_changed)
412 clean_space_cache_chunk(Vcb, c);
413
414 c->space_changed = false;
415
416 release_chunk_lock(c, Vcb);
417 }
418
419 le = le->Flink;
420 }
421
422 ExReleaseResourceLite(&Vcb->chunk_lock);
423
424 if (Vcb->trim && !Vcb->options.no_trim) {
425 #ifndef DEBUG_TRIM_EMULATION
426 ioctl_context context;
427 ULONG total_num;
428
429 context.left = 0;
430
431 le = Vcb->devices.Flink;
432 while (le != &Vcb->devices) {
433 device* dev = CONTAINING_RECORD(le, device, list_entry);
434
435 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
436 context.left++;
437
438 le = le->Flink;
439 }
440
441 if (context.left == 0)
442 return;
443
444 total_num = context.left;
445 num = 0;
446
447 KeInitializeEvent(&context.Event, NotificationEvent, false);
448
449 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
450 if (!context.stripes) {
451 ERR("out of memory\n");
452 return;
453 }
454
455 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
456 #endif
457
458 le = Vcb->devices.Flink;
459 while (le != &Vcb->devices) {
460 device* dev = CONTAINING_RECORD(le, device, list_entry);
461
462 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
463 #ifdef DEBUG_TRIM_EMULATION
464 trim_emulation(dev);
465 #else
466 LIST_ENTRY* le2;
467 ioctl_context_stripe* stripe = &context.stripes[num];
468 DEVICE_DATA_SET_RANGE* ranges;
469 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
470 PIO_STACK_LOCATION IrpSp;
471
472 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
473 if (!stripe->dmdsa) {
474 ERR("out of memory\n");
475 goto nextdev;
476 }
477
478 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
479 stripe->dmdsa->Action = DeviceDsmAction_Trim;
480 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
481 stripe->dmdsa->ParameterBlockOffset = 0;
482 stripe->dmdsa->ParameterBlockLength = 0;
483 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t));
484 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
485
486 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
487
488 i = 0;
489
490 le2 = dev->trim_list.Flink;
491 while (le2 != &dev->trim_list) {
492 space* s = CONTAINING_RECORD(le2, space, list_entry);
493
494 ranges[i].StartingOffset = s->address;
495 ranges[i].LengthInBytes = s->size;
496 i++;
497
498 le2 = le2->Flink;
499 }
500
501 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
502
503 if (!stripe->Irp) {
504 ERR("IoAllocateIrp failed\n");
505 goto nextdev;
506 }
507
508 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
509 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
510 IrpSp->FileObject = dev->fileobj;
511
512 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
513 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
514 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
515
516 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
517 stripe->Irp->Flags |= IRP_BUFFERED_IO;
518 stripe->Irp->UserBuffer = NULL;
519 stripe->Irp->UserIosb = &stripe->iosb;
520
521 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
522
523 IoCallDriver(dev->devobj, stripe->Irp);
524
525 nextdev:
526 #endif
527 while (!IsListEmpty(&dev->trim_list)) {
528 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
529 ExFreePool(s);
530 }
531
532 dev->num_trim_entries = 0;
533
534 #ifndef DEBUG_TRIM_EMULATION
535 num++;
536 #endif
537 }
538
539 le = le->Flink;
540 }
541
542 #ifndef DEBUG_TRIM_EMULATION
543 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
544
545 for (num = 0; num < total_num; num++) {
546 if (context.stripes[num].dmdsa)
547 ExFreePool(context.stripes[num].dmdsa);
548 }
549
550 ExFreePool(context.stripes);
551 #endif
552 }
553 }
554
555 static bool trees_consistent(device_extension* Vcb) {
556 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
557 LIST_ENTRY* le;
558
559 le = Vcb->trees.Flink;
560 while (le != &Vcb->trees) {
561 tree* t = CONTAINING_RECORD(le, tree, list_entry);
562
563 if (t->write) {
564 if (t->header.num_items == 0 && t->parent) {
565 #ifdef DEBUG_WRITE_LOOPS
566 ERR("empty tree found, looping again\n");
567 #endif
568 return false;
569 }
570
571 if (t->size > maxsize) {
572 #ifdef DEBUG_WRITE_LOOPS
573 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
574 #endif
575 return false;
576 }
577
578 if (!t->has_new_address) {
579 #ifdef DEBUG_WRITE_LOOPS
580 ERR("tree found without new address, looping again\n");
581 #endif
582 return false;
583 }
584 }
585
586 le = le->Flink;
587 }
588
589 return true;
590 }
591
592 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
593 ULONG level;
594 LIST_ENTRY* le;
595
596 for (level = 0; level <= 255; level++) {
597 bool nothing_found = true;
598
599 TRACE("level = %u\n", level);
600
601 le = Vcb->trees.Flink;
602 while (le != &Vcb->trees) {
603 tree* t = CONTAINING_RECORD(le, tree, list_entry);
604
605 if (t->write && t->header.level == level) {
606 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
607
608 nothing_found = false;
609
610 if (t->parent) {
611 if (!t->parent->write)
612 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
613
614 t->parent->write = true;
615 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
616 KEY searchkey;
617 traverse_ptr tp;
618 NTSTATUS Status;
619 #ifdef __REACTOS__
620 tree* t2;
621 #endif
622
623 searchkey.obj_id = t->root->id;
624 searchkey.obj_type = TYPE_ROOT_ITEM;
625 searchkey.offset = 0xffffffffffffffff;
626
627 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
628 if (!NT_SUCCESS(Status)) {
629 ERR("error - find_item returned %08x\n", Status);
630 return Status;
631 }
632
633 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
634 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
635 return STATUS_INTERNAL_ERROR;
636 }
637
638 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
639 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
640
641 if (!ri) {
642 ERR("out of memory\n");
643 return STATUS_INSUFFICIENT_RESOURCES;
644 }
645
646 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
647
648 Status = delete_tree_item(Vcb, &tp);
649 if (!NT_SUCCESS(Status)) {
650 ERR("delete_tree_item returned %08x\n", Status);
651 ExFreePool(ri);
652 return Status;
653 }
654
655 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
656 if (!NT_SUCCESS(Status)) {
657 ERR("insert_tree_item returned %08x\n", Status);
658 ExFreePool(ri);
659 return Status;
660 }
661 }
662
663 #ifndef __REACTOS__
664 tree* t2 = tp.tree;
665 #else
666 t2 = tp.tree;
667 #endif
668 while (t2) {
669 t2->write = true;
670
671 t2 = t2->parent;
672 }
673 }
674 }
675
676 le = le->Flink;
677 }
678
679 if (nothing_found)
680 break;
681 }
682
683 return STATUS_SUCCESS;
684 }
685
686 static void add_parents_to_cache(tree* t) {
687 while (t->parent) {
688 t = t->parent;
689 t->write = true;
690 }
691 }
692
693 static bool insert_tree_extent_skinny(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t address, PIRP Irp, LIST_ENTRY* rollback) {
694 NTSTATUS Status;
695 EXTENT_ITEM_SKINNY_METADATA* eism;
696 traverse_ptr insert_tp;
697
698 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
699 if (!eism) {
700 ERR("out of memory\n");
701 return false;
702 }
703
704 eism->ei.refcount = 1;
705 eism->ei.generation = Vcb->superblock.generation;
706 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
707 eism->type = TYPE_TREE_BLOCK_REF;
708 eism->tbr.offset = root_id;
709
710 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
711 if (!NT_SUCCESS(Status)) {
712 ERR("insert_tree_item returned %08x\n", Status);
713 ExFreePool(eism);
714 return false;
715 }
716
717 acquire_chunk_lock(c, Vcb);
718
719 space_list_subtract(c, false, address, Vcb->superblock.node_size, rollback);
720
721 release_chunk_lock(c, Vcb);
722
723 add_parents_to_cache(insert_tp.tree);
724
725 return true;
726 }
727
728 bool find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t* address) {
729 LIST_ENTRY* le;
730 space* s;
731
732 TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address);
733
734 if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
735 return false;
736
737 if (!c->cache_loaded) {
738 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
739
740 if (!NT_SUCCESS(Status)) {
741 ERR("load_cache_chunk returned %08x\n", Status);
742 return false;
743 }
744 }
745
746 if (IsListEmpty(&c->space_size))
747 return false;
748
749 if (!c->last_alloc_set) {
750 s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
751
752 c->last_alloc = s->address;
753 c->last_alloc_set = true;
754
755 if (s->size >= Vcb->superblock.node_size) {
756 *address = s->address;
757 c->last_alloc += Vcb->superblock.node_size;
758 return true;
759 }
760 }
761
762 le = c->space.Flink;
763 while (le != &c->space) {
764 s = CONTAINING_RECORD(le, space, list_entry);
765
766 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
767 *address = c->last_alloc;
768 c->last_alloc += Vcb->superblock.node_size;
769 return true;
770 }
771
772 le = le->Flink;
773 }
774
775 le = c->space_size.Flink;
776 while (le != &c->space_size) {
777 s = CONTAINING_RECORD(le, space, list_entry_size);
778
779 if (s->size == Vcb->superblock.node_size) {
780 *address = s->address;
781 c->last_alloc = s->address + Vcb->superblock.node_size;
782 return true;
783 } else if (s->size < Vcb->superblock.node_size) {
784 if (le == c->space_size.Flink)
785 return false;
786
787 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
788
789 *address = s->address;
790 c->last_alloc = s->address + Vcb->superblock.node_size;
791
792 return true;
793 }
794
795 le = le->Flink;
796 }
797
798 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
799
800 if (s->size > Vcb->superblock.node_size) {
801 *address = s->address;
802 c->last_alloc = s->address + Vcb->superblock.node_size;
803 return true;
804 }
805
806 return false;
807 }
808
809 static bool insert_tree_extent(device_extension* Vcb, uint8_t level, uint64_t root_id, chunk* c, uint64_t* new_address, PIRP Irp, LIST_ENTRY* rollback) {
810 NTSTATUS Status;
811 uint64_t address;
812 EXTENT_ITEM_TREE2* eit2;
813 traverse_ptr insert_tp;
814
815 TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
816
817 if (!find_metadata_address_in_chunk(Vcb, c, &address))
818 return false;
819
820 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
821 bool b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
822
823 if (b)
824 *new_address = address;
825
826 return b;
827 }
828
829 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
830 if (!eit2) {
831 ERR("out of memory\n");
832 return false;
833 }
834
835 eit2->eit.extent_item.refcount = 1;
836 eit2->eit.extent_item.generation = Vcb->superblock.generation;
837 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
838 eit2->eit.level = level;
839 eit2->type = TYPE_TREE_BLOCK_REF;
840 eit2->tbr.offset = root_id;
841
842 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
843 if (!NT_SUCCESS(Status)) {
844 ERR("insert_tree_item returned %08x\n", Status);
845 ExFreePool(eit2);
846 return false;
847 }
848
849 acquire_chunk_lock(c, Vcb);
850
851 space_list_subtract(c, false, address, Vcb->superblock.node_size, rollback);
852
853 release_chunk_lock(c, Vcb);
854
855 add_parents_to_cache(insert_tp.tree);
856
857 *new_address = address;
858
859 return true;
860 }
861
862 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
863 NTSTATUS Status;
864 chunk *origchunk = NULL, *c;
865 LIST_ENTRY* le;
866 uint64_t flags, addr;
867
868 if (t->root->id == BTRFS_ROOT_CHUNK)
869 flags = Vcb->system_flags;
870 else
871 flags = Vcb->metadata_flags;
872
873 if (t->has_address) {
874 origchunk = get_chunk_from_address(Vcb, t->header.address);
875
876 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
877 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
878 t->new_address = addr;
879 t->has_new_address = true;
880 return STATUS_SUCCESS;
881 }
882 }
883
884 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
885
886 le = Vcb->chunks.Flink;
887 while (le != &Vcb->chunks) {
888 c = CONTAINING_RECORD(le, chunk, list_entry);
889
890 if (!c->readonly && !c->reloc) {
891 acquire_chunk_lock(c, Vcb);
892
893 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
894 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
895 release_chunk_lock(c, Vcb);
896 ExReleaseResourceLite(&Vcb->chunk_lock);
897 t->new_address = addr;
898 t->has_new_address = true;
899 return STATUS_SUCCESS;
900 }
901 }
902
903 release_chunk_lock(c, Vcb);
904 }
905
906 le = le->Flink;
907 }
908
909 // allocate new chunk if necessary
910
911 Status = alloc_chunk(Vcb, flags, &c, false);
912
913 if (!NT_SUCCESS(Status)) {
914 ERR("alloc_chunk returned %08x\n", Status);
915 ExReleaseResourceLite(&Vcb->chunk_lock);
916 return Status;
917 }
918
919 acquire_chunk_lock(c, Vcb);
920
921 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
922 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
923 release_chunk_lock(c, Vcb);
924 ExReleaseResourceLite(&Vcb->chunk_lock);
925 t->new_address = addr;
926 t->has_new_address = true;
927 return STATUS_SUCCESS;
928 }
929 }
930
931 release_chunk_lock(c, Vcb);
932
933 ExReleaseResourceLite(&Vcb->chunk_lock);
934
935 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
936
937 return STATUS_DISK_FULL;
938 }
939
940 static NTSTATUS reduce_tree_extent(device_extension* Vcb, uint64_t address, tree* t, uint64_t parent_root, uint8_t level, PIRP Irp, LIST_ENTRY* rollback) {
941 NTSTATUS Status;
942 uint64_t rc, root;
943
944 TRACE("(%p, %I64x, %p)\n", Vcb, address, t);
945
946 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
947 if (rc == 0) {
948 ERR("error - refcount for extent %I64x was 0\n", address);
949 return STATUS_INTERNAL_ERROR;
950 }
951
952 if (!t || t->parent)
953 root = parent_root;
954 else
955 root = t->header.tree_id;
956
957 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
958 if (!NT_SUCCESS(Status)) {
959 ERR("decrease_extent_refcount_tree returned %08x\n", Status);
960 return Status;
961 }
962
963 if (rc == 1) {
964 chunk* c = get_chunk_from_address(Vcb, address);
965
966 if (c) {
967 acquire_chunk_lock(c, Vcb);
968
969 if (!c->cache_loaded) {
970 Status = load_cache_chunk(Vcb, c, NULL);
971
972 if (!NT_SUCCESS(Status)) {
973 ERR("load_cache_chunk returned %08x\n", Status);
974 release_chunk_lock(c, Vcb);
975 return Status;
976 }
977 }
978
979 c->used -= Vcb->superblock.node_size;
980
981 space_list_add(c, address, Vcb->superblock.node_size, rollback);
982
983 release_chunk_lock(c, Vcb);
984 } else
985 ERR("could not find chunk for address %I64x\n", address);
986 }
987
988 return STATUS_SUCCESS;
989 }
990
991 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, bool old) {
992 LIST_ENTRY *le2, *list;
993 changed_extent_ref* cer;
994
995 list = old ? &ce->old_refs : &ce->refs;
996
997 le2 = list->Flink;
998 while (le2 != list) {
999 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1000
1001 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
1002 cer->edr.count += edr->count;
1003 goto end;
1004 }
1005
1006 le2 = le2->Flink;
1007 }
1008
1009 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1010 if (!cer) {
1011 ERR("out of memory\n");
1012 return STATUS_INSUFFICIENT_RESOURCES;
1013 }
1014
1015 cer->type = TYPE_EXTENT_DATA_REF;
1016 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
1017 InsertTailList(list, &cer->list_entry);
1018
1019 end:
1020 if (old)
1021 ce->old_count += edr->count;
1022 else
1023 ce->count += edr->count;
1024
1025 return STATUS_SUCCESS;
1026 }
1027
1028 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, bool old) {
1029 LIST_ENTRY *le2, *list;
1030 changed_extent_ref* cer;
1031
1032 list = old ? &ce->old_refs : &ce->refs;
1033
1034 le2 = list->Flink;
1035 while (le2 != list) {
1036 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1037
1038 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
1039 cer->sdr.count += sdr->count;
1040 goto end;
1041 }
1042
1043 le2 = le2->Flink;
1044 }
1045
1046 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
1047 if (!cer) {
1048 ERR("out of memory\n");
1049 return STATUS_INSUFFICIENT_RESOURCES;
1050 }
1051
1052 cer->type = TYPE_SHARED_DATA_REF;
1053 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
1054 InsertTailList(list, &cer->list_entry);
1055
1056 end:
1057 if (old)
1058 ce->old_count += sdr->count;
1059 else
1060 ce->count += sdr->count;
1061
1062 return STATUS_SUCCESS;
1063 }
1064
1065 static bool shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1066 KEY searchkey;
1067 traverse_ptr tp;
1068 NTSTATUS Status;
1069
1070 if (!t->updated_extents && t->has_address) {
1071 Status = update_tree_extents(Vcb, t, Irp, rollback);
1072 if (!NT_SUCCESS(Status)) {
1073 ERR("update_tree_extents returned %08x\n", Status);
1074 return false;
1075 }
1076 }
1077
1078 searchkey.obj_id = t->header.address;
1079 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
1080 searchkey.offset = 0xffffffffffffffff;
1081
1082 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1083 if (!NT_SUCCESS(Status)) {
1084 ERR("error - find_item returned %08x\n", Status);
1085 return false;
1086 }
1087
1088 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
1089 return false;
1090 else
1091 return true;
1092 }
1093
1094 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
1095 NTSTATUS Status;
1096 uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
1097 uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp);
1098
1099 if (rc == 0) {
1100 ERR("refcount for extent %I64x was 0\n", t->header.address);
1101 return STATUS_INTERNAL_ERROR;
1102 }
1103
1104 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1105 TREE_BLOCK_REF tbr;
1106 bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false);
1107
1108 if (t->header.level == 0) {
1109 LIST_ENTRY* le;
1110
1111 le = t->itemlist.Flink;
1112 while (le != &t->itemlist) {
1113 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1114
1115 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1116 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1117
1118 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1119 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1120
1121 if (ed2->size > 0) {
1122 EXTENT_DATA_REF edr;
1123 changed_extent* ce = NULL;
1124 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1125
1126 if (c) {
1127 LIST_ENTRY* le2;
1128
1129 le2 = c->changed_extents.Flink;
1130 while (le2 != &c->changed_extents) {
1131 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1132
1133 if (ce2->address == ed2->address) {
1134 ce = ce2;
1135 break;
1136 }
1137
1138 le2 = le2->Flink;
1139 }
1140 }
1141
1142 edr.root = t->root->id;
1143 edr.objid = td->key.obj_id;
1144 edr.offset = td->key.offset - ed2->offset;
1145 edr.count = 1;
1146
1147 if (ce) {
1148 Status = add_changed_extent_ref_edr(ce, &edr, true);
1149 if (!NT_SUCCESS(Status)) {
1150 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1151 return Status;
1152 }
1153
1154 Status = add_changed_extent_ref_edr(ce, &edr, false);
1155 if (!NT_SUCCESS(Status)) {
1156 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1157 return Status;
1158 }
1159 }
1160
1161 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1162 if (!NT_SUCCESS(Status)) {
1163 ERR("increase_extent_refcount returned %08x\n", Status);
1164 return Status;
1165 }
1166
1167 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1168 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1169
1170 if (sdrrc > 0) {
1171 SHARED_DATA_REF sdr;
1172
1173 sdr.offset = t->header.address;
1174 sdr.count = 1;
1175
1176 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1177 t->header.address, ce ? ce->superseded : false, Irp);
1178 if (!NT_SUCCESS(Status)) {
1179 ERR("decrease_extent_refcount returned %08x\n", Status);
1180 return Status;
1181 }
1182
1183 if (ce) {
1184 LIST_ENTRY* le2;
1185
1186 le2 = ce->refs.Flink;
1187 while (le2 != &ce->refs) {
1188 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1189
1190 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1191 ce->count--;
1192 cer->sdr.count--;
1193 break;
1194 }
1195
1196 le2 = le2->Flink;
1197 }
1198
1199 le2 = ce->old_refs.Flink;
1200 while (le2 != &ce->old_refs) {
1201 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1202
1203 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1204 ce->old_count--;
1205
1206 if (cer->sdr.count > 1)
1207 cer->sdr.count--;
1208 else {
1209 RemoveEntryList(&cer->list_entry);
1210 ExFreePool(cer);
1211 }
1212
1213 break;
1214 }
1215
1216 le2 = le2->Flink;
1217 }
1218 }
1219 }
1220 }
1221
1222 // FIXME - clear shared flag if unique?
1223 }
1224 }
1225 }
1226
1227 le = le->Flink;
1228 }
1229 } else {
1230 LIST_ENTRY* le;
1231
1232 le = t->itemlist.Flink;
1233 while (le != &t->itemlist) {
1234 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1235
1236 if (!td->inserted) {
1237 tbr.offset = t->root->id;
1238
1239 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1240 &tbr, &td->key, t->header.level - 1, Irp);
1241 if (!NT_SUCCESS(Status)) {
1242 ERR("increase_extent_refcount returned %08x\n", Status);
1243 return Status;
1244 }
1245
1246 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1247 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1248
1249 if (sbrrc > 0) {
1250 SHARED_BLOCK_REF sbr;
1251
1252 sbr.offset = t->header.address;
1253
1254 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1255 t->header.address, false, Irp);
1256 if (!NT_SUCCESS(Status)) {
1257 ERR("decrease_extent_refcount returned %08x\n", Status);
1258 return Status;
1259 }
1260 }
1261 }
1262
1263 // FIXME - clear shared flag if unique?
1264 }
1265
1266 le = le->Flink;
1267 }
1268 }
1269
1270 if (unique) {
1271 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1272
1273 if (sbrrc == 1) {
1274 SHARED_BLOCK_REF sbr;
1275
1276 sbr.offset = t->parent->header.address;
1277
1278 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1279 t->parent->header.address, false, Irp);
1280 if (!NT_SUCCESS(Status)) {
1281 ERR("decrease_extent_refcount returned %08x\n", Status);
1282 return Status;
1283 }
1284 }
1285 }
1286
1287 if (t->parent)
1288 tbr.offset = t->parent->header.tree_id;
1289 else
1290 tbr.offset = t->header.tree_id;
1291
1292 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1293 t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1294 if (!NT_SUCCESS(Status)) {
1295 ERR("increase_extent_refcount returned %08x\n", Status);
1296 return Status;
1297 }
1298
1299 // FIXME - clear shared flag if unique?
1300
1301 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1302 }
1303
1304 if (rc > 1 || t->header.tree_id == t->root->id) {
1305 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1306
1307 if (!NT_SUCCESS(Status)) {
1308 ERR("reduce_tree_extent returned %08x\n", Status);
1309 return Status;
1310 }
1311 }
1312
1313 t->has_address = false;
1314
1315 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1316 if (t->header.tree_id == t->root->id) {
1317 flags |= EXTENT_ITEM_SHARED_BACKREFS;
1318 update_extent_flags(Vcb, t->header.address, flags, Irp);
1319 }
1320
1321 if (t->header.level > 0) {
1322 LIST_ENTRY* le;
1323
1324 le = t->itemlist.Flink;
1325 while (le != &t->itemlist) {
1326 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1327
1328 if (!td->inserted) {
1329 if (t->header.tree_id == t->root->id) {
1330 SHARED_BLOCK_REF sbr;
1331
1332 sbr.offset = t->header.address;
1333
1334 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1335 } else {
1336 TREE_BLOCK_REF tbr;
1337
1338 tbr.offset = t->root->id;
1339
1340 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1341 }
1342
1343 if (!NT_SUCCESS(Status)) {
1344 ERR("increase_extent_refcount returned %08x\n", Status);
1345 return Status;
1346 }
1347 }
1348
1349 le = le->Flink;
1350 }
1351 } else {
1352 LIST_ENTRY* le;
1353
1354 le = t->itemlist.Flink;
1355 while (le != &t->itemlist) {
1356 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1357
1358 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1359 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1360
1361 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1362 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1363
1364 if (ed2->size > 0) {
1365 changed_extent* ce = NULL;
1366 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1367
1368 if (c) {
1369 LIST_ENTRY* le2;
1370
1371 le2 = c->changed_extents.Flink;
1372 while (le2 != &c->changed_extents) {
1373 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1374
1375 if (ce2->address == ed2->address) {
1376 ce = ce2;
1377 break;
1378 }
1379
1380 le2 = le2->Flink;
1381 }
1382 }
1383
1384 if (t->header.tree_id == t->root->id) {
1385 SHARED_DATA_REF sdr;
1386
1387 sdr.offset = t->header.address;
1388 sdr.count = 1;
1389
1390 if (ce) {
1391 Status = add_changed_extent_ref_sdr(ce, &sdr, true);
1392 if (!NT_SUCCESS(Status)) {
1393 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1394 return Status;
1395 }
1396
1397 Status = add_changed_extent_ref_sdr(ce, &sdr, false);
1398 if (!NT_SUCCESS(Status)) {
1399 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1400 return Status;
1401 }
1402 }
1403
1404 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1405 } else {
1406 EXTENT_DATA_REF edr;
1407
1408 edr.root = t->root->id;
1409 edr.objid = td->key.obj_id;
1410 edr.offset = td->key.offset - ed2->offset;
1411 edr.count = 1;
1412
1413 if (ce) {
1414 Status = add_changed_extent_ref_edr(ce, &edr, true);
1415 if (!NT_SUCCESS(Status)) {
1416 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1417 return Status;
1418 }
1419
1420 Status = add_changed_extent_ref_edr(ce, &edr, false);
1421 if (!NT_SUCCESS(Status)) {
1422 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1423 return Status;
1424 }
1425 }
1426
1427 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1428 }
1429
1430 if (!NT_SUCCESS(Status)) {
1431 ERR("increase_extent_refcount returned %08x\n", Status);
1432 return Status;
1433 }
1434 }
1435 }
1436 }
1437
1438 le = le->Flink;
1439 }
1440 }
1441 }
1442
1443 t->updated_extents = true;
1444 t->header.tree_id = t->root->id;
1445
1446 return STATUS_SUCCESS;
1447 }
1448
1449 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1450 LIST_ENTRY* le;
1451 NTSTATUS Status;
1452 bool changed = false;
1453 uint8_t max_level = 0, level;
1454
1455 TRACE("(%p)\n", Vcb);
1456
1457 le = Vcb->trees.Flink;
1458 while (le != &Vcb->trees) {
1459 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1460
1461 if (t->write && !t->has_new_address) {
1462 chunk* c;
1463
1464 if (t->has_address) {
1465 c = get_chunk_from_address(Vcb, t->header.address);
1466
1467 if (c) {
1468 if (!c->cache_loaded) {
1469 acquire_chunk_lock(c, Vcb);
1470
1471 if (!c->cache_loaded) {
1472 Status = load_cache_chunk(Vcb, c, NULL);
1473
1474 if (!NT_SUCCESS(Status)) {
1475 ERR("load_cache_chunk returned %08x\n", Status);
1476 release_chunk_lock(c, Vcb);
1477 return Status;
1478 }
1479 }
1480
1481 release_chunk_lock(c, Vcb);
1482 }
1483 }
1484 }
1485
1486 Status = get_tree_new_address(Vcb, t, Irp, rollback);
1487 if (!NT_SUCCESS(Status)) {
1488 ERR("get_tree_new_address returned %08x\n", Status);
1489 return Status;
1490 }
1491
1492 TRACE("allocated extent %I64x\n", t->new_address);
1493
1494 c = get_chunk_from_address(Vcb, t->new_address);
1495
1496 if (c)
1497 c->used += Vcb->superblock.node_size;
1498 else {
1499 ERR("could not find chunk for address %I64x\n", t->new_address);
1500 return STATUS_INTERNAL_ERROR;
1501 }
1502
1503 changed = true;
1504
1505 if (t->header.level > max_level)
1506 max_level = t->header.level;
1507 }
1508
1509 le = le->Flink;
1510 }
1511
1512 if (!changed)
1513 return STATUS_SUCCESS;
1514
1515 level = max_level;
1516 do {
1517 le = Vcb->trees.Flink;
1518 while (le != &Vcb->trees) {
1519 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1520
1521 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1522 Status = update_tree_extents(Vcb, t, Irp, rollback);
1523 if (!NT_SUCCESS(Status)) {
1524 ERR("update_tree_extents returned %08x\n", Status);
1525 return Status;
1526 }
1527 }
1528
1529 le = le->Flink;
1530 }
1531
1532 if (level == 0)
1533 break;
1534
1535 level--;
1536 } while (true);
1537
1538 return STATUS_SUCCESS;
1539 }
1540
1541 static NTSTATUS update_root_root(device_extension* Vcb, bool no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1542 LIST_ENTRY* le;
1543 NTSTATUS Status;
1544
1545 TRACE("(%p)\n", Vcb);
1546
1547 le = Vcb->trees.Flink;
1548 while (le != &Vcb->trees) {
1549 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1550
1551 if (t->write && !t->parent) {
1552 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1553 KEY searchkey;
1554 traverse_ptr tp;
1555
1556 searchkey.obj_id = t->root->id;
1557 searchkey.obj_type = TYPE_ROOT_ITEM;
1558 searchkey.offset = 0xffffffffffffffff;
1559
1560 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1561 if (!NT_SUCCESS(Status)) {
1562 ERR("error - find_item returned %08x\n", Status);
1563 return Status;
1564 }
1565
1566 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1567 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
1568 return STATUS_INTERNAL_ERROR;
1569 }
1570
1571 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address);
1572
1573 t->root->root_item.block_number = t->new_address;
1574 t->root->root_item.root_level = t->header.level;
1575 t->root->root_item.generation = Vcb->superblock.generation;
1576 t->root->root_item.generation2 = Vcb->superblock.generation;
1577
1578 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1579
1580 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1581 }
1582
1583 t->root->treeholder.address = t->new_address;
1584 t->root->treeholder.generation = Vcb->superblock.generation;
1585 }
1586
1587 le = le->Flink;
1588 }
1589
1590 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1591 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
1592 Status = update_chunk_caches(Vcb, Irp, rollback);
1593 ExReleaseResourceLite(&Vcb->chunk_lock);
1594
1595 if (!NT_SUCCESS(Status)) {
1596 ERR("update_chunk_caches returned %08x\n", Status);
1597 return Status;
1598 }
1599 }
1600
1601 return STATUS_SUCCESS;
1602 }
1603
1604 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, bool no_free) {
1605 chunk* c;
1606 LIST_ENTRY* le;
1607 tree_write* tw;
1608 NTSTATUS Status;
1609 ULONG i, num_bits;
1610 write_data_context* wtc;
1611 ULONG bit_num = 0;
1612 bool raid56 = false;
1613
1614 // merge together runs
1615 c = NULL;
1616 le = tree_writes->Flink;
1617 while (le != tree_writes) {
1618 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1619
1620 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1621 c = get_chunk_from_address(Vcb, tw->address);
1622 else {
1623 tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1624
1625 if (tw->address == tw2->address + tw2->length) {
1626 uint8_t* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1627
1628 if (!data) {
1629 ERR("out of memory\n");
1630 return STATUS_INSUFFICIENT_RESOURCES;
1631 }
1632
1633 RtlCopyMemory(data, tw2->data, tw2->length);
1634 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1635
1636 if (!no_free)
1637 ExFreePool(tw2->data);
1638
1639 tw2->data = data;
1640 tw2->length += tw->length;
1641
1642 if (!no_free) // FIXME - what if we allocated this just now?
1643 ExFreePool(tw->data);
1644
1645 RemoveEntryList(&tw->list_entry);
1646 ExFreePool(tw);
1647
1648 le = tw2->list_entry.Flink;
1649 continue;
1650 }
1651 }
1652
1653 tw->c = c;
1654
1655 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1656 raid56 = true;
1657
1658 le = le->Flink;
1659 }
1660
1661 num_bits = 0;
1662
1663 le = tree_writes->Flink;
1664 while (le != tree_writes) {
1665 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1666
1667 num_bits++;
1668
1669 le = le->Flink;
1670 }
1671
1672 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1673 if (!wtc) {
1674 ERR("out of memory\n");
1675 return STATUS_INSUFFICIENT_RESOURCES;
1676 }
1677
1678 le = tree_writes->Flink;
1679
1680 while (le != tree_writes) {
1681 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1682
1683 TRACE("address: %I64x, size: %x\n", tw->address, tw->length);
1684
1685 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false);
1686 InitializeListHead(&wtc[bit_num].stripes);
1687 wtc[bit_num].need_wait = false;
1688 wtc[bit_num].stripes_left = 0;
1689 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1690 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1691
1692 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority);
1693 if (!NT_SUCCESS(Status)) {
1694 ERR("write_data returned %08x\n", Status);
1695
1696 for (i = 0; i < num_bits; i++) {
1697 free_write_data_stripes(&wtc[i]);
1698 }
1699 ExFreePool(wtc);
1700
1701 return Status;
1702 }
1703
1704 bit_num++;
1705
1706 le = le->Flink;
1707 }
1708
1709 for (i = 0; i < num_bits; i++) {
1710 if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1711 // launch writes and wait
1712 le = wtc[i].stripes.Flink;
1713 while (le != &wtc[i].stripes) {
1714 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1715
1716 if (stripe->status != WriteDataStatus_Ignore) {
1717 wtc[i].need_wait = true;
1718 IoCallDriver(stripe->device->devobj, stripe->Irp);
1719 }
1720
1721 le = le->Flink;
1722 }
1723 }
1724 }
1725
1726 for (i = 0; i < num_bits; i++) {
1727 if (wtc[i].need_wait)
1728 KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, false, NULL);
1729 }
1730
1731 for (i = 0; i < num_bits; i++) {
1732 le = wtc[i].stripes.Flink;
1733 while (le != &wtc[i].stripes) {
1734 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1735
1736 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1737 Status = stripe->iosb.Status;
1738 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1739 break;
1740 }
1741
1742 le = le->Flink;
1743 }
1744
1745 free_write_data_stripes(&wtc[i]);
1746 }
1747
1748 ExFreePool(wtc);
1749
1750 if (raid56) {
1751 c = NULL;
1752
1753 le = tree_writes->Flink;
1754 while (le != tree_writes) {
1755 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1756
1757 if (tw->c != c) {
1758 c = tw->c;
1759
1760 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
1761
1762 while (!IsListEmpty(&c->partial_stripes)) {
1763 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1764
1765 Status = flush_partial_stripe(Vcb, c, ps);
1766
1767 if (ps->bmparr)
1768 ExFreePool(ps->bmparr);
1769
1770 ExFreePool(ps);
1771
1772 if (!NT_SUCCESS(Status)) {
1773 ERR("flush_partial_stripe returned %08x\n", Status);
1774 ExReleaseResourceLite(&c->partial_stripes_lock);
1775 return Status;
1776 }
1777 }
1778
1779 ExReleaseResourceLite(&c->partial_stripes_lock);
1780 }
1781
1782 le = le->Flink;
1783 }
1784 }
1785
1786 return STATUS_SUCCESS;
1787 }
1788
1789 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1790 ULONG level;
1791 uint8_t *data, *body;
1792 uint32_t crc32;
1793 NTSTATUS Status;
1794 LIST_ENTRY* le;
1795 LIST_ENTRY tree_writes;
1796 tree_write* tw;
1797
1798 TRACE("(%p)\n", Vcb);
1799
1800 InitializeListHead(&tree_writes);
1801
1802 for (level = 0; level <= 255; level++) {
1803 bool nothing_found = true;
1804
1805 TRACE("level = %u\n", level);
1806
1807 le = Vcb->trees.Flink;
1808 while (le != &Vcb->trees) {
1809 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1810
1811 if (t->write && t->header.level == level) {
1812 KEY firstitem, searchkey;
1813 LIST_ENTRY* le2;
1814 traverse_ptr tp;
1815
1816 if (!t->has_new_address) {
1817 ERR("error - tried to write tree with no new address\n");
1818 return STATUS_INTERNAL_ERROR;
1819 }
1820
1821 le2 = t->itemlist.Flink;
1822 while (le2 != &t->itemlist) {
1823 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1824 if (!td->ignore) {
1825 firstitem = td->key;
1826 break;
1827 }
1828 le2 = le2->Flink;
1829 }
1830
1831 if (t->parent) {
1832 t->paritem->key = firstitem;
1833 t->paritem->treeholder.address = t->new_address;
1834 t->paritem->treeholder.generation = Vcb->superblock.generation;
1835 }
1836
1837 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1838 EXTENT_ITEM_TREE* eit;
1839
1840 searchkey.obj_id = t->new_address;
1841 searchkey.obj_type = TYPE_EXTENT_ITEM;
1842 searchkey.offset = Vcb->superblock.node_size;
1843
1844 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1845 if (!NT_SUCCESS(Status)) {
1846 ERR("error - find_item returned %08x\n", Status);
1847 return Status;
1848 }
1849
1850 if (keycmp(searchkey, tp.item->key)) {
1851 ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1852 return STATUS_INTERNAL_ERROR;
1853 }
1854
1855 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1856 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1857 return STATUS_INTERNAL_ERROR;
1858 }
1859
1860 eit = (EXTENT_ITEM_TREE*)tp.item->data;
1861 eit->firstitem = firstitem;
1862 }
1863
1864 nothing_found = false;
1865 }
1866
1867 le = le->Flink;
1868 }
1869
1870 if (nothing_found)
1871 break;
1872 }
1873
1874 TRACE("allocated tree extents\n");
1875
1876 le = Vcb->trees.Flink;
1877 while (le != &Vcb->trees) {
1878 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1879 LIST_ENTRY* le2;
1880 #ifdef DEBUG_PARANOID
1881 uint32_t num_items = 0, size = 0;
1882 bool crash = false;
1883 #endif
1884
1885 if (t->write) {
1886 #ifdef DEBUG_PARANOID
1887 bool first = true;
1888 KEY lastkey;
1889
1890 le2 = t->itemlist.Flink;
1891 while (le2 != &t->itemlist) {
1892 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1893 if (!td->ignore) {
1894 num_items++;
1895
1896 if (!first) {
1897 if (keycmp(td->key, lastkey) == 0) {
1898 ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1899 crash = true;
1900 } else if (keycmp(td->key, lastkey) == -1) {
1901 ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1902 crash = true;
1903 }
1904 } else
1905 first = false;
1906
1907 lastkey = td->key;
1908
1909 if (t->header.level == 0)
1910 size += td->size;
1911 }
1912 le2 = le2->Flink;
1913 }
1914
1915 if (t->header.level == 0)
1916 size += num_items * sizeof(leaf_node);
1917 else
1918 size += num_items * sizeof(internal_node);
1919
1920 if (num_items != t->header.num_items) {
1921 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1922 crash = true;
1923 }
1924
1925 if (size != t->size) {
1926 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1927 crash = true;
1928 }
1929
1930 if (t->header.num_items == 0 && t->parent) {
1931 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1932 crash = true;
1933 }
1934
1935 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1936 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1937 crash = true;
1938 }
1939
1940 if (crash) {
1941 ERR("tree %p\n", t);
1942 le2 = t->itemlist.Flink;
1943 while (le2 != &t->itemlist) {
1944 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1945 if (!td->ignore) {
1946 ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1947 }
1948 le2 = le2->Flink;
1949 }
1950 int3;
1951 }
1952 #endif
1953 t->header.address = t->new_address;
1954 t->header.generation = Vcb->superblock.generation;
1955 t->header.tree_id = t->root->id;
1956 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1957 t->header.fs_uuid = Vcb->superblock.uuid;
1958 t->has_address = true;
1959
1960 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1961 if (!data) {
1962 ERR("out of memory\n");
1963 Status = STATUS_INSUFFICIENT_RESOURCES;
1964 goto end;
1965 }
1966
1967 body = data + sizeof(tree_header);
1968
1969 RtlCopyMemory(data, &t->header, sizeof(tree_header));
1970 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
1971
1972 if (t->header.level == 0) {
1973 leaf_node* itemptr = (leaf_node*)body;
1974 int i = 0;
1975 uint8_t* dataptr = data + Vcb->superblock.node_size;
1976
1977 le2 = t->itemlist.Flink;
1978 while (le2 != &t->itemlist) {
1979 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1980 if (!td->ignore) {
1981 dataptr = dataptr - td->size;
1982
1983 itemptr[i].key = td->key;
1984 itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body);
1985 itemptr[i].size = td->size;
1986 i++;
1987
1988 if (td->size > 0)
1989 RtlCopyMemory(dataptr, td->data, td->size);
1990 }
1991
1992 le2 = le2->Flink;
1993 }
1994 } else {
1995 internal_node* itemptr = (internal_node*)body;
1996 int i = 0;
1997
1998 le2 = t->itemlist.Flink;
1999 while (le2 != &t->itemlist) {
2000 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2001 if (!td->ignore) {
2002 itemptr[i].key = td->key;
2003 itemptr[i].address = td->treeholder.address;
2004 itemptr[i].generation = td->treeholder.generation;
2005 i++;
2006 }
2007
2008 le2 = le2->Flink;
2009 }
2010 }
2011
2012 crc32 = calc_crc32c(0xffffffff, (uint8_t*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2013 crc32 = ~crc32;
2014 *((uint32_t*)data) = crc32;
2015 TRACE("setting crc32 to %08x\n", crc32);
2016
2017 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
2018 if (!tw) {
2019 ERR("out of memory\n");
2020 ExFreePool(data);
2021 Status = STATUS_INSUFFICIENT_RESOURCES;
2022 goto end;
2023 }
2024
2025 tw->address = t->new_address;
2026 tw->length = Vcb->superblock.node_size;
2027 tw->data = data;
2028
2029 if (IsListEmpty(&tree_writes))
2030 InsertTailList(&tree_writes, &tw->list_entry);
2031 else {
2032 bool inserted = false;
2033
2034 le2 = tree_writes.Flink;
2035 while (le2 != &tree_writes) {
2036 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
2037
2038 if (tw2->address > tw->address) {
2039 InsertHeadList(le2->Blink, &tw->list_entry);
2040 inserted = true;
2041 break;
2042 }
2043
2044 le2 = le2->Flink;
2045 }
2046
2047 if (!inserted)
2048 InsertTailList(&tree_writes, &tw->list_entry);
2049 }
2050 }
2051
2052 le = le->Flink;
2053 }
2054
2055 Status = do_tree_writes(Vcb, &tree_writes, false);
2056 if (!NT_SUCCESS(Status)) {
2057 ERR("do_tree_writes returned %08x\n", Status);
2058 goto end;
2059 }
2060
2061 Status = STATUS_SUCCESS;
2062
2063 end:
2064 while (!IsListEmpty(&tree_writes)) {
2065 le = RemoveHeadList(&tree_writes);
2066 tw = CONTAINING_RECORD(le, tree_write, list_entry);
2067
2068 if (tw->data)
2069 ExFreePool(tw->data);
2070
2071 ExFreePool(tw);
2072 }
2073
2074 return Status;
2075 }
2076
2077 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
2078 KEY searchkey;
2079 traverse_ptr tp;
2080
2081 RtlZeroMemory(sb, sizeof(superblock_backup));
2082
2083 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2084 sb->root_tree_generation = Vcb->superblock.generation;
2085 sb->root_level = Vcb->superblock.root_level;
2086
2087 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2088 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2089 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2090
2091 searchkey.obj_id = BTRFS_ROOT_EXTENT;
2092 searchkey.obj_type = TYPE_ROOT_ITEM;
2093 searchkey.offset = 0xffffffffffffffff;
2094
2095 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2096 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2097 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2098
2099 sb->extent_tree_addr = ri->block_number;
2100 sb->extent_tree_generation = ri->generation;
2101 sb->extent_root_level = ri->root_level;
2102 }
2103 }
2104
2105 searchkey.obj_id = BTRFS_ROOT_FSTREE;
2106
2107 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2108 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2109 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2110
2111 sb->fs_tree_addr = ri->block_number;
2112 sb->fs_tree_generation = ri->generation;
2113 sb->fs_root_level = ri->root_level;
2114 }
2115 }
2116
2117 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2118
2119 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2120 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2121 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2122
2123 sb->dev_root_addr = ri->block_number;
2124 sb->dev_root_generation = ri->generation;
2125 sb->dev_root_level = ri->root_level;
2126 }
2127 }
2128
2129 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2130
2131 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2132 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2133 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2134
2135 sb->csum_root_addr = ri->block_number;
2136 sb->csum_root_generation = ri->generation;
2137 sb->csum_root_level = ri->root_level;
2138 }
2139 }
2140
2141 sb->total_bytes = Vcb->superblock.total_bytes;
2142 sb->bytes_used = Vcb->superblock.bytes_used;
2143 sb->num_devices = Vcb->superblock.num_devices;
2144 }
2145
2146 typedef struct {
2147 void* context;
2148 uint8_t* buf;
2149 PMDL mdl;
2150 device* device;
2151 NTSTATUS Status;
2152 PIRP Irp;
2153 LIST_ENTRY list_entry;
2154 } write_superblocks_stripe;
2155
2156 typedef struct _write_superblocks_context {
2157 KEVENT Event;
2158 LIST_ENTRY stripes;
2159 LONG left;
2160 } write_superblocks_context;
2161
2162 _Function_class_(IO_COMPLETION_ROUTINE)
2163 static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2164 write_superblocks_stripe* stripe = conptr;
2165 write_superblocks_context* context = stripe->context;
2166
2167 UNUSED(DeviceObject);
2168
2169 stripe->Status = Irp->IoStatus.Status;
2170
2171 if (InterlockedDecrement(&context->left) == 0)
2172 KeSetEvent(&context->Event, 0, false);
2173
2174 return STATUS_MORE_PROCESSING_REQUIRED;
2175 }
2176
2177 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2178 unsigned int i = 0;
2179
2180 // All the documentation says that the Linux driver only writes one superblock
2181 // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2182
2183 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2184 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2185 superblock* sb;
2186 uint32_t crc32;
2187 write_superblocks_stripe* stripe;
2188 PIO_STACK_LOCATION IrpSp;
2189
2190 sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2191 if (!sb) {
2192 ERR("out of memory\n");
2193 return STATUS_INSUFFICIENT_RESOURCES;
2194 }
2195
2196 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2197
2198 if (sblen > sizeof(superblock))
2199 RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock));
2200
2201 RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2202 sb->sb_phys_addr = superblock_addrs[i];
2203
2204 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2205 RtlCopyMemory(&sb->checksum, &crc32, sizeof(uint32_t));
2206
2207 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2208 if (!stripe) {
2209 ERR("out of memory\n");
2210 ExFreePool(sb);
2211 return STATUS_INSUFFICIENT_RESOURCES;
2212 }
2213
2214 stripe->buf = (uint8_t*)sb;
2215
2216 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false);
2217 if (!stripe->Irp) {
2218 ERR("IoAllocateIrp failed\n");
2219 ExFreePool(stripe);
2220 ExFreePool(sb);
2221 return STATUS_INSUFFICIENT_RESOURCES;
2222 }
2223
2224 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2225 IrpSp->MajorFunction = IRP_MJ_WRITE;
2226 IrpSp->FileObject = device->fileobj;
2227
2228 if (i == 0)
2229 IrpSp->Flags |= SL_WRITE_THROUGH;
2230
2231 if (device->devobj->Flags & DO_BUFFERED_IO) {
2232 stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2233 stripe->mdl = NULL;
2234
2235 stripe->Irp->Flags = IRP_BUFFERED_IO;
2236 } else if (device->devobj->Flags & DO_DIRECT_IO) {
2237 stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL);
2238 if (!stripe->mdl) {
2239 ERR("IoAllocateMdl failed\n");
2240 IoFreeIrp(stripe->Irp);
2241 ExFreePool(stripe);
2242 ExFreePool(sb);
2243 return STATUS_INSUFFICIENT_RESOURCES;
2244 }
2245
2246 stripe->Irp->MdlAddress = stripe->mdl;
2247
2248 MmBuildMdlForNonPagedPool(stripe->mdl);
2249 } else {
2250 stripe->Irp->UserBuffer = sb;
2251 stripe->mdl = NULL;
2252 }
2253
2254 IrpSp->Parameters.Write.Length = sblen;
2255 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2256
2257 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true);
2258
2259 stripe->context = context;
2260 stripe->device = device;
2261 InsertTailList(&context->stripes, &stripe->list_entry);
2262
2263 context->left++;
2264
2265 i++;
2266 }
2267
2268 if (i == 0)
2269 ERR("no superblocks written!\n");
2270
2271 return STATUS_SUCCESS;
2272 }
2273
2274 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2275 uint64_t i;
2276 NTSTATUS Status;
2277 LIST_ENTRY* le;
2278 write_superblocks_context context;
2279
2280 TRACE("(%p)\n", Vcb);
2281
2282 le = Vcb->trees.Flink;
2283 while (le != &Vcb->trees) {
2284 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2285
2286 if (t->write && !t->parent) {
2287 if (t->root == Vcb->root_root) {
2288 Vcb->superblock.root_tree_addr = t->new_address;
2289 Vcb->superblock.root_level = t->header.level;
2290 } else if (t->root == Vcb->chunk_root) {
2291 Vcb->superblock.chunk_tree_addr = t->new_address;
2292 Vcb->superblock.chunk_root_generation = t->header.generation;
2293 Vcb->superblock.chunk_root_level = t->header.level;
2294 }
2295 }
2296
2297 le = le->Flink;
2298 }
2299
2300 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2301 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2302 }
2303
2304 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2305
2306 KeInitializeEvent(&context.Event, NotificationEvent, false);
2307 InitializeListHead(&context.stripes);
2308 context.left = 0;
2309
2310 le = Vcb->devices.Flink;
2311 while (le != &Vcb->devices) {
2312 device* dev = CONTAINING_RECORD(le, device, list_entry);
2313
2314 if (dev->devobj && !dev->readonly) {
2315 Status = write_superblock(Vcb, dev, &context);
2316 if (!NT_SUCCESS(Status)) {
2317 ERR("write_superblock returned %08x\n", Status);
2318 goto end;
2319 }
2320 }
2321
2322 le = le->Flink;
2323 }
2324
2325 if (IsListEmpty(&context.stripes)) {
2326 ERR("error - not writing any superblocks\n");
2327 Status = STATUS_INTERNAL_ERROR;
2328 goto end;
2329 }
2330
2331 le = context.stripes.Flink;
2332 while (le != &context.stripes) {
2333 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2334
2335 IoCallDriver(stripe->device->devobj, stripe->Irp);
2336
2337 le = le->Flink;
2338 }
2339
2340 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2341
2342 le = context.stripes.Flink;
2343 while (le != &context.stripes) {
2344 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2345
2346 if (!NT_SUCCESS(stripe->Status)) {
2347 ERR("device %I64x returned %08x\n", stripe->device->devitem.dev_id, stripe->Status);
2348 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2349 Status = stripe->Status;
2350 goto end;
2351 }
2352
2353 le = le->Flink;
2354 }
2355
2356 Status = STATUS_SUCCESS;
2357
2358 end:
2359 while (!IsListEmpty(&context.stripes)) {
2360 write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2361
2362 if (stripe->mdl) {
2363 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2364 MmUnlockPages(stripe->mdl);
2365
2366 IoFreeMdl(stripe->mdl);
2367 }
2368
2369 if (stripe->Irp)
2370 IoFreeIrp(stripe->Irp);
2371
2372 if (stripe->buf)
2373 ExFreePool(stripe->buf);
2374
2375 ExFreePool(stripe);
2376 }
2377
2378 return Status;
2379 }
2380
2381 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2382 LIST_ENTRY *le, *le2;
2383 NTSTATUS Status;
2384 uint64_t old_size;
2385
2386 if (ce->count == 0 && ce->old_count == 0) {
2387 while (!IsListEmpty(&ce->refs)) {
2388 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2389 ExFreePool(cer);
2390 }
2391
2392 while (!IsListEmpty(&ce->old_refs)) {
2393 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2394 ExFreePool(cer);
2395 }
2396
2397 goto end;
2398 }
2399
2400 le = ce->refs.Flink;
2401 while (le != &ce->refs) {
2402 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2403 uint32_t old_count = 0;
2404
2405 if (cer->type == TYPE_EXTENT_DATA_REF) {
2406 le2 = ce->old_refs.Flink;
2407 while (le2 != &ce->old_refs) {
2408 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2409
2410 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2411 old_count = cer2->edr.count;
2412 break;
2413 }
2414
2415 le2 = le2->Flink;
2416 }
2417
2418 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2419
2420 if (cer->edr.count > old_count) {
2421 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2422
2423 if (!NT_SUCCESS(Status)) {
2424 ERR("increase_extent_refcount_data returned %08x\n", Status);
2425 return Status;
2426 }
2427 }
2428 } else if (cer->type == TYPE_SHARED_DATA_REF) {
2429 le2 = ce->old_refs.Flink;
2430 while (le2 != &ce->old_refs) {
2431 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2432
2433 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2434 RemoveEntryList(&cer2->list_entry);
2435 ExFreePool(cer2);
2436 break;
2437 }
2438
2439 le2 = le2->Flink;
2440 }
2441 }
2442
2443 le = le->Flink;
2444 }
2445
2446 le = ce->refs.Flink;
2447 while (le != &ce->refs) {
2448 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2449 LIST_ENTRY* le3 = le->Flink;
2450 uint32_t old_count = 0;
2451
2452 if (cer->type == TYPE_EXTENT_DATA_REF) {
2453 le2 = ce->old_refs.Flink;
2454 while (le2 != &ce->old_refs) {
2455 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2456
2457 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2458 old_count = cer2->edr.count;
2459
2460 RemoveEntryList(&cer2->list_entry);
2461 ExFreePool(cer2);
2462 break;
2463 }
2464
2465 le2 = le2->Flink;
2466 }
2467
2468 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2469
2470 if (cer->edr.count < old_count) {
2471 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2472 old_count - cer->edr.count, ce->superseded, Irp);
2473
2474 if (!NT_SUCCESS(Status)) {
2475 ERR("decrease_extent_refcount_data returned %08x\n", Status);
2476 return Status;
2477 }
2478 }
2479
2480 if (ce->size != ce->old_size && ce->old_count > 0) {
2481 KEY searchkey;
2482 traverse_ptr tp;
2483 void* data;
2484
2485 searchkey.obj_id = ce->address;
2486 searchkey.obj_type = TYPE_EXTENT_ITEM;
2487 searchkey.offset = ce->old_size;
2488
2489 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2490 if (!NT_SUCCESS(Status)) {
2491 ERR("error - find_item returned %08x\n", Status);
2492 return Status;
2493 }
2494
2495 if (keycmp(searchkey, tp.item->key)) {
2496 ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2497 return STATUS_INTERNAL_ERROR;
2498 }
2499
2500 if (tp.item->size > 0) {
2501 data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2502
2503 if (!data) {
2504 ERR("out of memory\n");
2505 return STATUS_INSUFFICIENT_RESOURCES;
2506 }
2507
2508 RtlCopyMemory(data, tp.item->data, tp.item->size);
2509 } else
2510 data = NULL;
2511
2512 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2513 if (!NT_SUCCESS(Status)) {
2514 ERR("insert_tree_item returned %08x\n", Status);
2515 if (data) ExFreePool(data);
2516 return Status;
2517 }
2518
2519 Status = delete_tree_item(Vcb, &tp);
2520 if (!NT_SUCCESS(Status)) {
2521 ERR("delete_tree_item returned %08x\n", Status);
2522 return Status;
2523 }
2524 }
2525 }
2526
2527 RemoveEntryList(&cer->list_entry);
2528 ExFreePool(cer);
2529
2530 le = le3;
2531 }
2532
2533 #ifdef DEBUG_PARANOID
2534 if (!IsListEmpty(&ce->old_refs))
2535 WARN("old_refs not empty\n");
2536 #endif
2537
2538 end:
2539 if (ce->count == 0 && !ce->superseded) {
2540 c->used -= ce->size;
2541 space_list_add(c, ce->address, ce->size, rollback);
2542 }
2543
2544 RemoveEntryList(&ce->list_entry);
2545 ExFreePool(ce);
2546
2547 return STATUS_SUCCESS;
2548 }
2549
2550 void add_checksum_entry(device_extension* Vcb, uint64_t address, ULONG length, uint32_t* csum, PIRP Irp) {
2551 KEY searchkey;
2552 traverse_ptr tp, next_tp;
2553 NTSTATUS Status;
2554 uint64_t startaddr, endaddr;
2555 ULONG len;
2556 uint32_t* checksums;
2557 RTL_BITMAP bmp;
2558 ULONG* bmparr;
2559 ULONG runlength, index;
2560
2561 TRACE("(%p, %I64x, %x, %p, %p)\n", Vcb, address, length, csum, Irp);
2562
2563 searchkey.obj_id = EXTENT_CSUM_ID;
2564 searchkey.obj_type = TYPE_EXTENT_CSUM;
2565 searchkey.offset = address;
2566
2567 // FIXME - create checksum_root if it doesn't exist at all
2568
2569 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2570 if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2571 if (csum) { // not deleted
2572 ULONG length2 = length;
2573 uint64_t off = address;
2574 uint32_t* data = csum;
2575
2576 do {
2577 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / sizeof(uint32_t));
2578
2579 checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(uint32_t), ALLOC_TAG);
2580 if (!checksums) {
2581 ERR("out of memory\n");
2582 return;
2583 }
2584
2585 RtlCopyMemory(checksums, data, il * sizeof(uint32_t));
2586
2587 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2588 il * sizeof(uint32_t), NULL, Irp);
2589 if (!NT_SUCCESS(Status)) {
2590 ERR("insert_tree_item returned %08x\n", Status);
2591 ExFreePool(checksums);
2592 return;
2593 }
2594
2595 length2 -= il;
2596
2597 if (length2 > 0) {
2598 off += il * Vcb->superblock.sector_size;
2599 data += il;
2600 }
2601 } while (length2 > 0);
2602 }
2603 } else if (!NT_SUCCESS(Status)) {
2604 ERR("find_item returned %08x\n", Status);
2605 return;
2606 } else {
2607 uint32_t tplen;
2608
2609 // FIXME - check entry is TYPE_EXTENT_CSUM?
2610
2611 if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)) >= address)
2612 startaddr = tp.item->key.offset;
2613 else
2614 startaddr = address;
2615
2616 searchkey.obj_id = EXTENT_CSUM_ID;
2617 searchkey.obj_type = TYPE_EXTENT_CSUM;
2618 searchkey.offset = address + (length * Vcb->superblock.sector_size);
2619
2620 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2621 if (!NT_SUCCESS(Status)) {
2622 ERR("find_item returned %08x\n", Status);
2623 return;
2624 }
2625
2626 tplen = tp.item->size / sizeof(uint32_t);
2627
2628 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size))
2629 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
2630 else
2631 endaddr = address + (length * Vcb->superblock.sector_size);
2632
2633 TRACE("cs starts at %I64x (%x sectors)\n", address, length);
2634 TRACE("startaddr = %I64x\n", startaddr);
2635 TRACE("endaddr = %I64x\n", endaddr);
2636
2637 len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size);
2638
2639 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * len, ALLOC_TAG);
2640 if (!checksums) {
2641 ERR("out of memory\n");
2642 return;
2643 }
2644
2645 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2646 if (!bmparr) {
2647 ERR("out of memory\n");
2648 ExFreePool(checksums);
2649 return;
2650 }
2651
2652 RtlInitializeBitMap(&bmp, bmparr, len);
2653 RtlSetAllBits(&bmp);
2654
2655 searchkey.obj_id = EXTENT_CSUM_ID;
2656 searchkey.obj_type = TYPE_EXTENT_CSUM;
2657 searchkey.offset = address;
2658
2659 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2660 if (!NT_SUCCESS(Status)) {
2661 ERR("find_item returned %08x\n", Status);
2662 ExFreePool(checksums);
2663 ExFreePool(bmparr);
2664 return;
2665 }
2666
2667 // set bit = free space, cleared bit = allocated sector
2668
2669 while (tp.item->key.offset < endaddr) {
2670 if (tp.item->key.offset >= startaddr) {
2671 if (tp.item->size > 0) {
2672 ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(uint32_t), tp.item->size);
2673
2674 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen);
2675 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(uint32_t));
2676 }
2677
2678 Status = delete_tree_item(Vcb, &tp);
2679 if (!NT_SUCCESS(Status)) {
2680 ERR("delete_tree_item returned %08x\n", Status);
2681 ExFreePool(checksums);
2682 ExFreePool(bmparr);
2683 return;
2684 }
2685 }
2686
2687 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
2688 tp = next_tp;
2689 } else
2690 break;
2691 }
2692
2693 if (!csum) { // deleted
2694 RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2695 } else {
2696 RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(uint32_t));
2697 RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2698 }
2699
2700 runlength = RtlFindFirstRunClear(&bmp, &index);
2701
2702 while (runlength != 0) {
2703 if (index >= len)
2704 break;
2705
2706 if (index + runlength >= len) {
2707 runlength = len - index;
2708
2709 if (runlength == 0)
2710 break;
2711 }
2712
2713 do {
2714 uint16_t rl;
2715 uint64_t off;
2716 uint32_t* data;
2717
2718 if (runlength * sizeof(uint32_t) > MAX_CSUM_SIZE)
2719 rl = MAX_CSUM_SIZE / sizeof(uint32_t);
2720 else
2721 rl = (uint16_t)runlength;
2722
2723 data = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * rl, ALLOC_TAG);
2724 if (!data) {
2725 ERR("out of memory\n");
2726 ExFreePool(bmparr);
2727 ExFreePool(checksums);
2728 return;
2729 }
2730
2731 RtlCopyMemory(data, &checksums[index], sizeof(uint32_t) * rl);
2732
2733 off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
2734
2735 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(uint32_t) * rl, NULL, Irp);
2736 if (!NT_SUCCESS(Status)) {
2737 ERR("insert_tree_item returned %08x\n", Status);
2738 ExFreePool(data);
2739 ExFreePool(bmparr);
2740 ExFreePool(checksums);
2741 return;
2742 }
2743
2744 runlength -= rl;
2745 index += rl;
2746 } while (runlength > 0);
2747
2748 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2749 }
2750
2751 ExFreePool(bmparr);
2752 ExFreePool(checksums);
2753 }
2754 }
2755
2756 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2757 LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2758 chunk* c;
2759 KEY searchkey;
2760 traverse_ptr tp;
2761 BLOCK_GROUP_ITEM* bgi;
2762 NTSTATUS Status;
2763
2764 TRACE("(%p)\n", Vcb);
2765
2766 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2767
2768 while (le != &Vcb->chunks) {
2769 c = CONTAINING_RECORD(le, chunk, list_entry);
2770
2771 acquire_chunk_lock(c, Vcb);
2772
2773 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2774 Status = load_cache_chunk(Vcb, c, NULL);
2775
2776 if (!NT_SUCCESS(Status)) {
2777 ERR("load_cache_chunk returned %08x\n", Status);
2778 release_chunk_lock(c, Vcb);
2779 goto end;
2780 }
2781 }
2782
2783 le2 = c->changed_extents.Flink;
2784 while (le2 != &c->changed_extents) {
2785 LIST_ENTRY* le3 = le2->Flink;
2786 changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2787
2788 Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2789 if (!NT_SUCCESS(Status)) {
2790 ERR("flush_changed_extent returned %08x\n", Status);
2791 release_chunk_lock(c, Vcb);
2792 goto end;
2793 }
2794
2795 le2 = le3;
2796 }
2797
2798 // This is usually done by update_chunks, but we have to check again in case any new chunks
2799 // have been allocated since.
2800 if (c->created) {
2801 Status = create_chunk(Vcb, c, Irp);
2802 if (!NT_SUCCESS(Status)) {
2803 ERR("create_chunk returned %08x\n", Status);
2804 release_chunk_lock(c, Vcb);
2805 goto end;
2806 }
2807 }
2808
2809 if (c->old_cache) {
2810 if (c->old_cache->dirty) {
2811 LIST_ENTRY batchlist;
2812
2813 InitializeListHead(&batchlist);
2814
2815 Status = flush_fcb(c->old_cache, false, &batchlist, Irp);
2816 if (!NT_SUCCESS(Status)) {
2817 ERR("flush_fcb returned %08x\n", Status);
2818 release_chunk_lock(c, Vcb);
2819 clear_batch_list(Vcb, &batchlist);
2820 goto end;
2821 }
2822
2823 Status = commit_batch_list(Vcb, &batchlist, Irp);
2824 if (!NT_SUCCESS(Status)) {
2825 ERR("commit_batch_list returned %08x\n", Status);
2826 release_chunk_lock(c, Vcb);
2827 goto end;
2828 }
2829 }
2830
2831 free_fcb(c->old_cache);
2832
2833 if (c->old_cache->refcount == 0)
2834 reap_fcb(c->old_cache);
2835
2836 c->old_cache = NULL;
2837 }
2838
2839 if (c->used != c->oldused) {
2840 #ifdef __REACTOS__
2841 uint64_t old_phys_used, phys_used;
2842 #endif
2843 searchkey.obj_id = c->offset;
2844 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2845 searchkey.offset = c->chunk_item->size;
2846
2847 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2848 if (!NT_SUCCESS(Status)) {
2849 ERR("error - find_item returned %08x\n", Status);
2850 release_chunk_lock(c, Vcb);
2851 goto end;
2852 }
2853
2854 if (keycmp(searchkey, tp.item->key)) {
2855 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2856 Status = STATUS_INTERNAL_ERROR;
2857 release_chunk_lock(c, Vcb);
2858 goto end;
2859 }
2860
2861 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2862 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2863 Status = STATUS_INTERNAL_ERROR;
2864 release_chunk_lock(c, Vcb);
2865 goto end;
2866 }
2867
2868 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2869 if (!bgi) {
2870 ERR("out of memory\n");
2871 Status = STATUS_INSUFFICIENT_RESOURCES;
2872 release_chunk_lock(c, Vcb);
2873 goto end;
2874 }
2875
2876 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2877 bgi->used = c->used;
2878
2879 #ifdef DEBUG_PARANOID
2880 if (bgi->used & 0x8000000000000000) {
2881 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)", bgi->used);
2882 int3;
2883 }
2884 #endif
2885
2886 TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used);
2887
2888 Status = delete_tree_item(Vcb, &tp);
2889 if (!NT_SUCCESS(Status)) {
2890 ERR("delete_tree_item returned %08x\n", Status);
2891 ExFreePool(bgi);
2892 release_chunk_lock(c, Vcb);
2893 goto end;
2894 }
2895
2896 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2897 if (!NT_SUCCESS(Status)) {
2898 ERR("insert_tree_item returned %08x\n", Status);
2899 ExFreePool(bgi);
2900 release_chunk_lock(c, Vcb);
2901 goto end;
2902 }
2903
2904 #ifndef __REACTOS__
2905 uint64_t old_phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
2906 uint64_t phys_used = chunk_estimate_phys_size(Vcb, c, c->used);
2907 #else
2908 old_phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
2909 phys_used = chunk_estimate_phys_size(Vcb, c, c->used);
2910 #endif
2911
2912 if (Vcb->superblock.bytes_used + phys_used > old_phys_used)
2913 Vcb->superblock.bytes_used += phys_used - old_phys_used;
2914 else
2915 Vcb->superblock.bytes_used = 0;
2916
2917 c->oldused = c->used;
2918 }
2919
2920 release_chunk_lock(c, Vcb);
2921
2922 le = le->Flink;
2923 }
2924
2925 Status = STATUS_SUCCESS;
2926
2927 end:
2928 ExReleaseResourceLite(&Vcb->chunk_lock);
2929
2930 return Status;
2931 }
2932
2933 static void get_first_item(tree* t, KEY* key) {
2934 LIST_ENTRY* le;
2935
2936 le = t->itemlist.Flink;
2937 while (le != &t->itemlist) {
2938 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2939
2940 *key = td->key;
2941 return;
2942 }
2943 }
2944
2945 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, uint32_t numitems, uint32_t size) {
2946 tree *nt, *pt;
2947 tree_data* td;
2948 tree_data* oldlastitem;
2949
2950 TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2951
2952 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2953 if (!nt) {
2954 ERR("out of memory\n");
2955 return STATUS_INSUFFICIENT_RESOURCES;
2956 }
2957
2958 if (t->header.level > 0) {
2959 nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2960 if (!nt->nonpaged) {
2961 ERR("out of memory\n");
2962 ExFreePool(nt);
2963 return STATUS_INSUFFICIENT_RESOURCES;
2964 }
2965
2966 ExInitializeFastMutex(&nt->nonpaged->mutex);
2967 } else
2968 nt->nonpaged = NULL;
2969
2970 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2971 nt->header.address = 0;
2972 nt->header.generation = Vcb->superblock.generation;
2973 nt->header.num_items = t->header.num_items - numitems;
2974 nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2975
2976 nt->has_address = false;
2977 nt->Vcb = Vcb;
2978 nt->parent = t->parent;
2979
2980 #ifdef DEBUG_PARANOID
2981 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2982 #endif
2983
2984 nt->root = t->root;
2985 nt->new_address = 0;
2986 nt->has_new_address = false;
2987 nt->updated_extents = false;
2988 nt->uniqueness_determined = true;
2989 nt->is_unique = true;
2990 nt->list_entry_hash.Flink = NULL;
2991 nt->buf = NULL;
2992 InitializeListHead(&nt->itemlist);
2993
2994 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2995
2996 nt->itemlist.Flink = &newfirstitem->list_entry;
2997 nt->itemlist.Blink = t->itemlist.Blink;
2998 nt->itemlist.Flink->Blink = &nt->itemlist;
2999 nt->itemlist.Blink->Flink = &nt->itemlist;
3000
3001 t->itemlist.Blink = &oldlastitem->list_entry;
3002 t->itemlist.Blink->Flink = &t->itemlist;
3003
3004 nt->size = t->size - size;
3005 t->size = size;
3006 t->header.num_items = numitems;
3007 nt->write = true;
3008
3009 InsertTailList(&Vcb->trees, &nt->list_entry);
3010
3011 if (nt->header.level > 0) {
3012 LIST_ENTRY* le = nt->itemlist.Flink;
3013
3014 while (le != &nt->itemlist) {
3015 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3016
3017 if (td2->treeholder.tree) {
3018 td2->treeholder.tree->parent = nt;
3019 #ifdef DEBUG_PARANOID
3020 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3021 #endif
3022 }
3023
3024 le = le->Flink;
3025 }
3026 } else {
3027 LIST_ENTRY* le = nt->itemlist.Flink;
3028
3029 while (le != &nt->itemlist) {
3030 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3031
3032 if (!td2->inserted && td2->data) {
3033 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3034
3035 if (!data) {
3036 ERR("out of memory\n");
3037 return STATUS_INSUFFICIENT_RESOURCES;
3038 }
3039
3040 RtlCopyMemory(data, td2->data, td2->size);
3041 td2->data = data;
3042 td2->inserted = true;
3043 }
3044
3045 le = le->Flink;
3046 }
3047 }
3048
3049 if (nt->parent) {
3050 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3051 if (!td) {
3052 ERR("out of memory\n");
3053 return STATUS_INSUFFICIENT_RESOURCES;
3054 }
3055
3056 td->key = newfirstitem->key;
3057
3058 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
3059
3060 td->ignore = false;
3061 td->inserted = true;
3062 td->treeholder.tree = nt;
3063 nt->paritem = td;
3064
3065 nt->parent->header.num_items++;
3066 nt->parent->size += sizeof(internal_node);
3067
3068 goto end;
3069 }
3070
3071 TRACE("adding new tree parent\n");
3072
3073 if (nt->header.level == 255) {
3074 ERR("cannot add parent to tree at level 255\n");
3075 return STATUS_INTERNAL_ERROR;
3076 }
3077
3078 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
3079 if (!pt) {
3080 ERR("out of memory\n");
3081 return STATUS_INSUFFICIENT_RESOURCES;
3082 }
3083
3084 pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
3085 if (!pt->nonpaged) {
3086 ERR("out of memory\n");
3087 ExFreePool(pt);
3088 return STATUS_INSUFFICIENT_RESOURCES;
3089 }
3090
3091 ExInitializeFastMutex(&pt->nonpaged->mutex);
3092
3093 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
3094 pt->header.address = 0;
3095 pt->header.num_items = 2;
3096 pt->header.level = nt->header.level + 1;
3097 pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
3098
3099 pt->has_address = false;
3100 pt->Vcb = Vcb;
3101 pt->parent = NULL;
3102 pt->paritem = NULL;
3103 pt->root = t->root;
3104 pt->new_address = 0;
3105 pt->has_new_address = false;
3106 pt->updated_extents = false;
3107 pt->size = pt->header.num_items * sizeof(internal_node);
3108 pt->uniqueness_determined = true;
3109 pt->is_unique = true;
3110 pt->list_entry_hash.Flink = NULL;
3111 pt->buf = NULL;
3112 InitializeListHead(&pt->itemlist);
3113
3114 InsertTailList(&Vcb->trees, &pt->list_entry);
3115
3116 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3117 if (!td) {
3118 ERR("out of memory\n");
3119 return STATUS_INSUFFICIENT_RESOURCES;
3120 }
3121
3122 get_first_item(t, &td->key);
3123 td->ignore = false;
3124 td->inserted = false;
3125 td->treeholder.address = 0;
3126 td->treeholder.generation = Vcb->superblock.generation;
3127 td->treeholder.tree = t;
3128 InsertTailList(&pt->itemlist, &td->list_entry);
3129 t->paritem = td;
3130
3131 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3132 if (!td) {
3133 ERR("out of memory\n");
3134 return STATUS_INSUFFICIENT_RESOURCES;
3135 }
3136
3137 td->key = newfirstitem->key;
3138 td->ignore = false;
3139 td->inserted = false;
3140 td->treeholder.address = 0;
3141 td->treeholder.generation = Vcb->superblock.generation;
3142 td->treeholder.tree = nt;
3143 InsertTailList(&pt->itemlist, &td->list_entry);
3144 nt->paritem = td;
3145
3146 pt->write = true;
3147
3148 t->root->treeholder.tree = pt;
3149
3150 t->parent = pt;
3151 nt->parent = pt;
3152
3153 #ifdef DEBUG_PARANOID
3154 if (t->parent && t->parent->header.level <= t->header.level) int3;
3155 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3156 #endif
3157
3158 end:
3159 t->root->root_item.bytes_used += Vcb->superblock.node_size;
3160
3161 return STATUS_SUCCESS;
3162 }
3163
3164 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
3165 LIST_ENTRY* le;
3166 uint32_t size, ds, numitems;
3167
3168 size = 0;
3169 numitems = 0;
3170
3171 // FIXME - naïve implementation: maximizes number of filled trees
3172
3173 le = t->itemlist.Flink;
3174 while (le != &t->itemlist) {
3175 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3176
3177 if (!td->ignore) {
3178 if (t->header.level == 0)
3179 ds = sizeof(leaf_node) + td->size;
3180 else
3181 ds = sizeof(internal_node);
3182
3183 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3184 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %x)\n",
3185 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3186 ds, Vcb->superblock.node_size - sizeof(tree_header));
3187 return STATUS_INTERNAL_ERROR;
3188 }
3189
3190 // FIXME - move back if previous item was deleted item with same key
3191 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3192 return split_tree_at(Vcb, t, td, numitems, size);
3193
3194 size += ds;
3195 numitems++;
3196 }
3197
3198 le = le->Flink;
3199 }
3200
3201 return STATUS_SUCCESS;
3202 }
3203
3204 bool is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3205 KEY searchkey;
3206 traverse_ptr tp;
3207 NTSTATUS Status;
3208 bool ret = false;
3209 EXTENT_ITEM* ei;
3210 uint8_t* type;
3211
3212 if (t->uniqueness_determined)
3213 return t->is_unique;
3214
3215 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3216 goto end;
3217
3218 if (t->has_address) {
3219 searchkey.obj_id = t->header.address;
3220 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3221 searchkey.offset = 0xffffffffffffffff;
3222
3223 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3224 if (!NT_SUCCESS(Status)) {
3225 ERR("error - find_item returned %08x\n", Status);
3226 goto end;
3227 }
3228
3229 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3230 goto end;
3231
3232 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3233 goto end;
3234
3235 if (tp.item->size < sizeof(EXTENT_ITEM))
3236 goto end;
3237
3238 ei = (EXTENT_ITEM*)tp.item->data;
3239
3240 if (ei->refcount > 1)
3241 goto end;
3242
3243 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3244 EXTENT_ITEM2* ei2;
3245
3246 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3247 goto end;
3248
3249 ei2 = (EXTENT_ITEM2*)&ei[1];
3250 type = (uint8_t*)&ei2[1];
3251 } else
3252 type = (uint8_t*)&ei[1];
3253
3254 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3255 goto end;
3256 }
3257
3258 ret = true;
3259
3260 end:
3261 t->is_unique = ret;
3262 t->uniqueness_determined = true;
3263
3264 return ret;
3265 }
3266
3267 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3268 LIST_ENTRY* le;
3269 tree_data* nextparitem = NULL;
3270 NTSTATUS Status;
3271 tree *next_tree, *par;
3272
3273 *done = false;
3274
3275 TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3276
3277 // FIXME - doesn't capture everything, as it doesn't ascend
3278 le = t->paritem->list_entry.Flink;
3279 while (le != &t->parent->itemlist) {
3280 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3281
3282 if (!td->ignore) {
3283 nextparitem = td;
3284 break;
3285 }
3286
3287 le = le->Flink;
3288 }
3289
3290 if (!nextparitem)
3291 return STATUS_SUCCESS;
3292
3293 TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3294
3295 if (!nextparitem->treeholder.tree) {
3296 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL);
3297 if (!NT_SUCCESS(Status)) {
3298 ERR("do_load_tree returned %08x\n", Status);
3299 return Status;
3300 }
3301 }
3302
3303 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3304 return STATUS_SUCCESS;
3305
3306 next_tree = nextparitem->treeholder.tree;
3307
3308 if (!next_tree->updated_extents && next_tree->has_address) {
3309 Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3310 if (!NT_SUCCESS(Status)) {
3311 ERR("update_tree_extents returned %08x\n", Status);
3312 return Status;
3313 }
3314 }
3315
3316 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3317 // merge two trees into one
3318
3319 t->header.num_items += next_tree->header.num_items;
3320 t->size += next_tree->size;
3321
3322 if (next_tree->header.level > 0) {
3323 le = next_tree->itemlist.Flink;
3324
3325 while (le != &next_tree->itemlist) {
3326 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3327
3328 if (td2->treeholder.tree) {
3329 td2->treeholder.tree->parent = t;
3330 #ifdef DEBUG_PARANOID
3331 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3332 #endif
3333 }
3334
3335 td2->inserted = true;
3336 le = le->Flink;
3337 }
3338 } else {
3339 le = next_tree->itemlist.Flink;
3340
3341 while (le != &next_tree->itemlist) {
3342 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3343
3344 if (!td2->inserted && td2->data) {
3345 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3346
3347 if (!data) {
3348 ERR("out of memory\n");
3349 return STATUS_INSUFFICIENT_RESOURCES;
3350 }
3351
3352 RtlCopyMemory(data, td2->data, td2->size);
3353 td2->data = data;
3354 td2->inserted = true;
3355 }
3356
3357 le = le->Flink;
3358 }
3359 }
3360
3361 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3362 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3363 t->itemlist.Blink = next_tree->itemlist.Blink;
3364 t->itemlist.Blink->Flink = &t->itemlist;
3365
3366 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3367
3368 next_tree->header.num_items = 0;
3369 next_tree->size = 0;
3370
3371 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3372 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3373
3374 if (!NT_SUCCESS(Status)) {
3375 ERR("reduce_tree_extent returned %08x\n", Status);
3376 return Status;
3377 }
3378 } else if (next_tree->has_address) {
3379 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3380
3381 if (!NT_SUCCESS(Status)) {
3382 ERR("reduce_tree_extent returned %08x\n", Status);
3383 return Status;
3384 }
3385 }
3386
3387 if (!nextparitem->ignore) {
3388 nextparitem->ignore = true;
3389 next_tree->parent->header.num_items--;
3390 next_tree->parent->size -= sizeof(internal_node);
3391
3392 *done_deletions = true;
3393 }
3394
3395 par = next_tree->parent;
3396 while (par) {
3397 par->write = true;
3398 par = par->parent;
3399 }
3400
3401 RemoveEntryList(&nextparitem->list_entry);
3402 ExFreePool(next_tree->paritem);
3403 next_tree->paritem = NULL;
3404
3405 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3406
3407 free_tree(next_tree);
3408
3409 *done = true;
3410 } else {
3411 // rebalance by moving items from second tree into first
3412 ULONG avg_size = (t->size + next_tree->size) / 2;
3413 KEY firstitem = {0, 0, 0};
3414 bool changed = false;
3415
3416 TRACE("attempting rebalance\n");
3417
3418 le = next_tree->itemlist.Flink;
3419 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3420 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3421 ULONG size;
3422
3423 if (!td->ignore) {
3424 if (next_tree->header.level == 0)
3425 size = sizeof(leaf_node) + td->size;
3426 else
3427 size = sizeof(internal_node);
3428 } else
3429 size = 0;
3430
3431 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3432 RemoveEntryList(&td->list_entry);
3433 InsertTailList(&t->itemlist, &td->list_entry);
3434
3435 if (next_tree->header.level > 0 && td->treeholder.tree) {
3436 td->treeholder.tree->parent = t;
3437 #ifdef DEBUG_PARANOID
3438 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3439 #endif
3440 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3441 uint8_t* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3442
3443 if (!data) {
3444 ERR("out of memory\n");
3445 return STATUS_INSUFFICIENT_RESOURCES;
3446 }
3447
3448 RtlCopyMemory(data, td->data, td->size);
3449 td->data = data;
3450 }
3451
3452 td->inserted = true;
3453
3454 if (!td->ignore) {
3455 next_tree->size -= size;
3456 t->size += size;
3457 next_tree->header.num_items--;
3458 t->header.num_items++;
3459 }
3460
3461 changed = true;
3462 } else
3463 break;
3464
3465 le = next_tree->itemlist.Flink;
3466 }
3467
3468 le = next_tree->itemlist.Flink;
3469 while (le != &next_tree->itemlist) {
3470 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3471
3472 if (!td->ignore) {
3473 firstitem = td->key;
3474 break;
3475 }
3476
3477 le = le->Flink;
3478 }
3479
3480 // FIXME - once ascension is working, make this work with parent's parent, etc.
3481 if (next_tree->paritem)
3482 next_tree->paritem->key = firstitem;
3483
3484 par = next_tree;
3485 while (par) {
3486 par->write = true;
3487 par = par->parent;
3488 }
3489
3490 if (changed)
3491 *done = true;
3492 }
3493
3494 return STATUS_SUCCESS;
3495 }
3496
3497 static NTSTATUS update_extent_level(device_extension* Vcb, uint64_t address, tree* t, uint8_t level, PIRP Irp) {
3498 KEY searchkey;
3499 traverse_ptr tp;
3500 NTSTATUS Status;
3501
3502 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3503 searchkey.obj_id = address;
3504 searchkey.obj_type = TYPE_METADATA_ITEM;
3505 searchkey.offset = t->header.level;
3506
3507 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3508 if (!NT_SUCCESS(Status)) {
3509 ERR("error - find_item returned %08x\n", Status);
3510 return Status;
3511 }
3512
3513 if (!keycmp(tp.item->key, searchkey)) {
3514 EXTENT_ITEM_SKINNY_METADATA* eism;
3515
3516 if (tp.item->size > 0) {
3517 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3518
3519 if (!eism) {
3520 ERR("out of memory\n");
3521 return STATUS_INSUFFICIENT_RESOURCES;
3522 }
3523
3524 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3525 } else
3526 eism = NULL;
3527
3528 Status = delete_tree_item(Vcb, &tp);
3529 if (!NT_SUCCESS(Status)) {
3530 ERR("delete_tree_item returned %08x\n", Status);
3531 if (eism) ExFreePool(eism);
3532 return Status;
3533 }
3534
3535 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp);
3536 if (!NT_SUCCESS(Status)) {
3537 ERR("insert_tree_item returned %08x\n", Status);
3538 if (eism) ExFreePool(eism);
3539 return Status;
3540 }
3541
3542 return STATUS_SUCCESS;
3543 }
3544 }
3545
3546 searchkey.obj_id = address;
3547 searchkey.obj_type = TYPE_EXTENT_ITEM;
3548 searchkey.offset = 0xffffffffffffffff;
3549
3550 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3551 if (!NT_SUCCESS(Status)) {
3552 ERR("error - find_item returned %08x\n", Status);
3553 return Status;
3554 }
3555
3556 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3557 EXTENT_ITEM_TREE* eit;
3558
3559 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3560 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3561 return STATUS_INTERNAL_ERROR;
3562 }
3563
3564 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3565
3566 if (!eit) {
3567 ERR("out of memory\n");
3568 return STATUS_INSUFFICIENT_RESOURCES;
3569 }
3570
3571 RtlCopyMemory(eit, tp.item->data, tp.item->size);
3572
3573 Status = delete_tree_item(Vcb, &tp);
3574 if (!NT_SUCCESS(Status)) {
3575 ERR("delete_tree_item returned %08x\n", Status);
3576 ExFreePool(eit);
3577 return Status;
3578 }
3579
3580 eit->level = level;
3581
3582 Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp);
3583 if (!NT_SUCCESS(Status)) {
3584 ERR("insert_tree_item returned %08x\n", Status);
3585 ExFreePool(eit);
3586 return Status;
3587 }
3588
3589 return STATUS_SUCCESS;
3590 }
3591
3592 ERR("could not find EXTENT_ITEM for address %I64x\n", address);
3593
3594 return STATUS_INTERNAL_ERROR;
3595 }
3596
3597 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
3598 NTSTATUS Status;
3599
3600 if (t->parent && !t->parent->updated_extents && t->parent->has_address) {
3601 Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback);
3602 if (!NT_SUCCESS(Status))
3603 return Status;
3604 }
3605
3606 Status = update_tree_extents(Vcb, t, Irp, rollback);
3607 if (!NT_SUCCESS(Status)) {
3608 ERR("update_tree_extents returned %08x\n", Status);
3609 return Status;
3610 }
3611
3612 return STATUS_SUCCESS;
3613 }
3614
3615 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3616 ULONG level, max_level;
3617 uint32_t min_size;
3618 bool empty, done_deletions = false;
3619 NTSTATUS Status;
3620 tree* t;
3621
3622 TRACE("(%p)\n", Vcb);
3623
3624 max_level = 0;
3625
3626 for (level = 0; level <= 255; level++) {
3627 LIST_ENTRY *le, *nextle;
3628
3629 empty = true;
3630
3631 TRACE("doing level %u\n", level);
3632
3633 le = Vcb->trees.Flink;
3634
3635 while (le != &Vcb->trees) {
3636 t = CONTAINING_RECORD(le, tree, list_entry);
3637
3638 nextle = le->Flink;
3639
3640 if (t->write && t->header.level == level) {
3641 empty = false;
3642
3643 if (t->header.num_items == 0) {
3644 if (t->parent) {
3645 done_deletions = true;
3646
3647 TRACE("deleting tree in root %I64x\n", t->root->id);
3648
3649 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3650
3651 if (t->has_new_address) { // delete associated EXTENT_ITEM
3652 Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3653
3654 if (!NT_SUCCESS(Status)) {
3655 ERR("reduce_tree_extent returned %08x\n", Status);
3656 return Status;
3657 }
3658
3659 t->has_new_address = false;
3660 } else if (t->has_address) {
3661 Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3662
3663 if (!NT_SUCCESS(Status)) {
3664 ERR("reduce_tree_extent returned %08x\n", Status);
3665 return Status;
3666 }
3667
3668 t->has_address = false;
3669 }
3670
3671 if (!t->paritem->ignore) {
3672 t->paritem->ignore = true;
3673 t->parent->header.num_items--;
3674 t->parent->size -= sizeof(internal_node);
3675 }
3676
3677 RemoveEntryList(&t->paritem->list_entry);
3678 ExFreePool(t->paritem);
3679 t->paritem = NULL;
3680
3681 free_tree(t);
3682 } else if (t->header.level != 0) {
3683 if (t->has_new_address) {
3684 Status = update_extent_level(Vcb, t->new_address, t, 0, Irp);
3685
3686 if (!NT_SUCCESS(Status)) {
3687 ERR("update_extent_level returned %08x\n", Status);
3688 return Status;
3689 }
3690 }
3691
3692 t->header.level = 0;
3693 }
3694 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3695 TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3696
3697 if (!t->updated_extents && t->has_address) {
3698 Status = update_tree_extents_recursive(Vcb, t, Irp, rollback);
3699 if (!NT_SUCCESS(Status)) {
3700 ERR("update_tree_extents_recursive returned %08x\n", Status);
3701 return Status;
3702 }
3703 }
3704
3705 Status = split_tree(Vcb, t);
3706
3707 if (!NT_SUCCESS(Status)) {
3708 ERR("split_tree returned %08x\n", Status);
3709 return Status;
3710 }
3711 }
3712 }
3713
3714 le = nextle;
3715 }
3716
3717 if (!empty) {
3718 max_level = level;
3719 } else {
3720 TRACE("nothing found for level %u\n", level);
3721 break;
3722 }
3723 }
3724
3725 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3726
3727 for (level = 0; level <= max_level; level++) {
3728 LIST_ENTRY* le;
3729
3730 le = Vcb->trees.Flink;
3731
3732 while (le != &Vcb->trees) {
3733 t = CONTAINING_RECORD(le, tree, list_entry);
3734
3735 if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size &&
3736 t->root->id != BTRFS_ROOT_FREE_SPACE && is_tree_unique(Vcb, t, Irp)) {
3737 bool done;
3738
3739 do {
3740 Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback);
3741 if (!NT_SUCCESS(Status)) {
3742 ERR("try_tree_amalgamate returned %08x\n", Status);
3743 return Status;
3744 }
3745 } while (done && t->size < min_size);
3746 }
3747
3748 le = le->Flink;
3749 }
3750 }
3751
3752 // simplify trees if top tree only has one entry
3753
3754 if (done_deletions) {
3755 for (level = max_level; level > 0; level--) {
3756 LIST_ENTRY *le, *nextle;
3757
3758 le = Vcb->trees.Flink;
3759 while (le != &Vcb->trees) {
3760 nextle = le->Flink;
3761 t = CONTAINING_RECORD(le, tree, list_entry);
3762
3763 if (t->write && t->header.level == level) {
3764 if (!t->parent && t->header.num_items == 1) {
3765 LIST_ENTRY* le2 = t->itemlist.Flink;
3766 tree_data* td = NULL;
3767 tree* child_tree = NULL;
3768
3769 while (le2 != &t->itemlist) {
3770 td = CONTAINING_RECORD(le2, tree_data, list_entry);
3771 if (!td->ignore)
3772 break;
3773 le2 = le2->Flink;
3774 }
3775
3776 TRACE("deleting top-level tree in root %I64x with one item\n", t->root->id);
3777
3778 if (t->has_new_address) { // delete associated EXTENT_ITEM
3779 Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback);
3780
3781 if (!NT_SUCCESS(Status)) {
3782 ERR("reduce_tree_extent returned %08x\n", Status);
3783 return Status;
3784 }
3785
3786 t->has_new_address = false;
3787 } else if (t->has_address) {
3788 Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback);
3789
3790 if (!NT_SUCCESS(Status)) {
3791 ERR("reduce_tree_extent returned %08x\n", Status);
3792 return Status;
3793 }
3794
3795 t->has_address = false;
3796 }
3797
3798 if (!td->treeholder.tree) { // load first item if not already loaded
3799 KEY searchkey = {0,0,0};
3800 traverse_ptr tp;
3801
3802 Status = find_item(Vcb, t->root, &tp, &searchkey, false, Irp);
3803 if (!NT_SUCCESS(Status)) {
3804 ERR("error - find_item returned %08x\n", Status);
3805 return Status;
3806 }
3807 }
3808
3809 child_tree = td->treeholder.tree;
3810
3811 if (child_tree) {
3812 child_tree->parent = NULL;
3813 child_tree->paritem = NULL;
3814 }
3815
3816 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3817
3818 free_tree(t);
3819
3820 if (child_tree)
3821 child_tree->root->treeholder.tree = child_tree;
3822 }
3823 }
3824
3825 le = nextle;
3826 }
3827 }
3828 }
3829
3830 return STATUS_SUCCESS;
3831 }
3832
3833 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, uint8_t level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) {
3834 NTSTATUS Status;
3835
3836 if (!th->tree) {
3837 uint8_t* buf;
3838 chunk* c;
3839
3840 buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
3841 if (!buf) {
3842 ERR("out of memory\n");
3843 return STATUS_INSUFFICIENT_RESOURCES;
3844 }
3845
3846 Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, true, buf, NULL,
3847 &c, Irp, th->generation, false, NormalPagePriority);
3848 if (!NT_SUCCESS(Status)) {
3849 ERR("read_data returned 0x%08x\n", Status);
3850 ExFreePool(buf);
3851 return Status;
3852 }
3853
3854 Status = load_tree(Vcb, th->address, buf, r, &th->tree);
3855
3856 if (!th->tree || th->tree->buf != buf)
3857 ExFreePool(buf);
3858
3859 if (!NT_SUCCESS(Status)) {
3860 ERR("load_tree(%I64x) returned %08x\n", th->address, Status);
3861 return Status;
3862 }
3863 }
3864
3865 if (level > 0) {
3866 LIST_ENTRY* le = th->tree->itemlist.Flink;
3867
3868 while (le != &th->tree->itemlist) {
3869 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3870
3871 if (!td->ignore) {
3872 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback);
3873
3874 if (!NT_SUCCESS(Status)) {
3875 ERR("remove_root_extents returned %08x\n", Status);
3876 return Status;
3877 }
3878 }
3879
3880 le = le->Flink;
3881 }
3882 }
3883
3884 if (th->tree && !th->tree->updated_extents && th->tree->has_address) {
3885 Status = update_tree_extents(Vcb, th->tree, Irp, rollback);
3886 if (!NT_SUCCESS(Status)) {
3887 ERR("update_tree_extents returned %08x\n", Status);
3888 return Status;
3889 }
3890 }
3891
3892 if (!th->tree || th->tree->has_address) {
3893 Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback);
3894
3895 if (!NT_SUCCESS(Status)) {
3896 ERR("reduce_tree_extent(%I64x) returned %08x\n", th->address, Status);
3897 return Status;
3898 }
3899 }
3900
3901 return STATUS_SUCCESS;
3902 }
3903
3904 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
3905 NTSTATUS Status;
3906 KEY searchkey;
3907 traverse_ptr tp;
3908
3909 Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback);
3910 if (!NT_SUCCESS(Status)) {
3911 ERR("remove_root_extents returned %08x\n", Status);
3912 return Status;
3913 }
3914
3915 // remove entries in uuid root (tree 9)
3916 if (Vcb->uuid_root) {
3917 RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(uint64_t));
3918 searchkey.obj_type = TYPE_SUBVOL_UUID;
3919 RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3920
3921 if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3922 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3923 if (!NT_SUCCESS(Status)) {
3924 WARN("find_item returned %08x\n", Status);
3925 } else {
3926 if (!keycmp(tp.item->key, searchkey)) {
3927 Status = delete_tree_item(Vcb, &tp);
3928 if (!NT_SUCCESS(Status)) {
3929 ERR("delete_tree_item returned %08x\n", Status);
3930 return Status;
3931 }
3932 } else
3933 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3934 }
3935 }
3936
3937 if (r->root_item.rtransid > 0) {
3938 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(uint64_t));
3939 searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
3940 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
3941
3942 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
3943 if (!NT_SUCCESS(Status))
3944 WARN("find_item returned %08x\n", Status);
3945 else {
3946 if (!keycmp(tp.item->key, searchkey)) {
3947 if (tp.item->size == sizeof(uint64_t)) {
3948 uint64_t* id = (uint64_t*)tp.item->data;
3949
3950 if (*id == r->id) {
3951 Status = delete_tree_item(Vcb, &tp);
3952 if (!NT_SUCCESS(Status)) {
3953 ERR("delete_tree_item returned %08x\n", Status);
3954 return Status;
3955 }
3956 }
3957 } else if (tp.item->size > sizeof(uint64_t)) {
3958 ULONG i;
3959 uint64_t* ids = (uint64_t*)tp.item->data;
3960
3961 for (i = 0; i < tp.item->size / sizeof(uint64_t); i++) {
3962 if (ids[i] == r->id) {
3963 uint64_t* ne;
3964
3965 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(uint64_t), ALLOC_TAG);
3966 if (!ne) {
3967 ERR("out of memory\n");
3968 return STATUS_INSUFFICIENT_RESOURCES;
3969 }
3970
3971 if (i > 0)
3972 RtlCopyMemory(ne, ids, sizeof(uint64_t) * i);
3973
3974 if ((i + 1) * sizeof(uint64_t) < tp.item->size)
3975 RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(uint64_t)));
3976
3977 Status = delete_tree_item(Vcb, &tp);
3978 if (!NT_SUCCESS(Status)) {
3979 ERR("delete_tree_item returned %08x\n", Status);
3980 ExFreePool(ne);
3981 return Status;
3982 }
3983
3984 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
3985 ne, tp.item->size - sizeof(uint64_t), NULL, Irp);
3986 if (!NT_SUCCESS(Status)) {
3987 ERR("insert_tree_item returned %08x\n", Status);
3988 ExFreePool(ne);
3989 return Status;
3990 }
3991
3992 break;
3993 }
3994 }
3995 }
3996 } else
3997 WARN("could not find (%I64x,%x,%I64x) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3998 }
3999 }
4000 }
4001
4002 // delete ROOT_ITEM
4003
4004 searchkey.obj_id = r->id;
4005 searchkey.obj_type = TYPE_ROOT_ITEM;
4006 searchkey.offset = 0xffffffffffffffff;
4007
4008 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
4009 if (!NT_SUCCESS(Status)) {
4010 ERR("find_item returned %08x\n", Status);
4011 return Status;
4012 }
4013
4014 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
4015 Status = delete_tree_item(Vcb, &tp);
4016
4017 if (!NT_SUCCESS(Status)) {
4018 ERR("delete_tree_item returned %08x\n", Status);
4019 return Status;
4020 }
4021 } else
4022 WARN("could not find (%I64x,%x,%I64x) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
4023
4024 // delete items in tree cache
4025
4026 free_trees_root(Vcb, r);
4027
4028 return STATUS_SUCCESS;
4029 }
4030
4031 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
4032 LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
4033 NTSTATUS Status;
4034
4035 while (le != &Vcb->drop_roots) {
4036 root* r = CONTAINING_RECORD(le, root, list_entry);
4037
4038 le2 = le->Flink;
4039
4040 Status = drop_root(Vcb, r, Irp, rollback);
4041 if (!NT_SUCCESS(Status)) {
4042 ERR("drop_root(%I64x) returned %08x\n", r->id, Status);
4043 return Status;
4044 }
4045
4046 le = le2;
4047 }
4048
4049 return STATUS_SUCCESS;
4050 }
4051
4052 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) {
4053 KEY searchkey;
4054 traverse_ptr tp;
4055 DEV_ITEM* di;
4056 NTSTATUS Status;
4057
4058 searchkey.obj_id = 1;
4059 searchkey.obj_type = TYPE_DEV_ITEM;
4060 searchkey.offset = device->devitem.dev_id;
4061
4062 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
4063 if (!NT_SUCCESS(Status)) {
4064 ERR("error - find_item returned %08x\n", Status);
4065 return Status;
4066 }
4067
4068 if (keycmp(tp.item->key, searchkey)) {
4069 ERR("error - could not find DEV_ITEM for device %I64x\n", device->devitem.dev_id);
4070 return STATUS_INTERNAL_ERROR;
4071 }
4072
4073 Status = delete_tree_item(Vcb, &tp);
4074 if (!NT_SUCCESS(Status)) {
4075 ERR("delete_tree_item returned %08x\n", Status);
4076 return Status;
4077 }
4078
4079 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
4080 if (!di) {
4081 ERR("out of memory\n");
4082 return STATUS_INSUFFICIENT_RESOURCES;
4083 }
4084
4085 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
4086
4087 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
4088 if (!NT_SUCCESS(Status)) {
4089 ERR("insert_tree_item returned %08x\n", Status);
4090 ExFreePool(di);
4091 return Status;
4092 }
4093
4094 return STATUS_SUCCESS;
4095 }
4096
4097 static void regen_bootstrap(device_extension* Vcb) {
4098 sys_chunk* sc2;
4099 USHORT i = 0;
4100 LIST_ENTRY* le;
4101
4102 i = 0;
4103 le = Vcb->sys_chunks.Flink;
4104 while (le != &Vcb->sys_chunks) {
4105 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4106
4107 TRACE("%I64x,%x,%I64x\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
4108
4109 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
4110 i += sizeof(KEY);
4111
4112 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
4113 i += sc2->size;
4114
4115 le = le->Flink;
4116 }
4117 }
4118
4119 static NTSTATUS add_to_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset, void* data, uint16_t size) {
4120 sys_chunk* sc;
4121 LIST_ENTRY* le;
4122
4123 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
4124 ERR("error - bootstrap is full\n");
4125 return STATUS_INTERNAL_ERROR;
4126 }
4127
4128 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
4129 if (!sc) {
4130 ERR("out of memory\n");
4131 return STATUS_INSUFFICIENT_RESOURCES;
4132 }
4133
4134 sc->key.obj_id = obj_id;
4135 sc->key.obj_type = obj_type;
4136 sc->key.offset = offset;
4137 sc->size = size;
4138 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
4139 if (!sc->data) {
4140 ERR("out of memory\n");
4141 ExFreePool(sc);
4142 return STATUS_INSUFFICIENT_RESOURCES;
4143 }
4144
4145 RtlCopyMemory(sc->data, data, sc->size);
4146
4147 le = Vcb->sys_chunks.Flink;
4148 while (le != &Vcb->sys_chunks) {
4149 sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4150
4151 if (keycmp(sc2->key, sc->key) == 1)
4152 break;
4153
4154 le = le->Flink;
4155 }
4156 InsertTailList(le, &sc->list_entry);
4157
4158 Vcb->superblock.n += sizeof(KEY) + size;
4159
4160 regen_bootstrap(Vcb);
4161
4162 return STATUS_SUCCESS;
4163 }
4164
4165 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
4166 CHUNK_ITEM* ci;
4167 CHUNK_ITEM_STRIPE* cis;
4168 BLOCK_GROUP_ITEM* bgi;
4169 uint16_t i, factor;
4170 NTSTATUS Status;
4171
4172 ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
4173 if (!ci) {
4174 ERR("out of memory\n");
4175 return STATUS_INSUFFICIENT_RESOURCES;
4176 }
4177
4178 RtlCopyMemory(ci, c->chunk_item, c->size);
4179
4180 Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp);
4181 if (!NT_SUCCESS(Status)) {
4182 ERR("insert_tree_item failed\n");
4183 ExFreePool(ci);
4184 return Status;
4185 }
4186
4187 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
4188 Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
4189 if (!NT_SUCCESS(Status)) {
4190 ERR("add_to_bootstrap returned %08x\n", Status);
4191 return Status;
4192 }
4193 }
4194
4195 // add BLOCK_GROUP_ITEM to tree 2
4196
4197 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
4198 if (!bgi) {
4199 ERR("out of memory\n");
4200 return STATUS_INSUFFICIENT_RESOURCES;
4201 }
4202
4203 bgi->used = c->used;
4204 bgi->chunk_tree = 0x100;
4205 bgi->flags = c->chunk_item->type;
4206
4207 Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp);
4208 if (!NT_SUCCESS(Status)) {
4209 ERR("insert_tree_item failed\n");
4210 ExFreePool(bgi);
4211 return Status;
4212 }
4213
4214 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
4215 factor = c->chunk_item->num_stripes;
4216 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
4217 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
4218 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
4219 factor = c->chunk_item->num_stripes - 1;
4220 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
4221 factor = c->chunk_item->num_stripes - 2;
4222 else // SINGLE, DUPLICATE, RAID1
4223 factor = 1;
4224
4225 // add DEV_EXTENTs to tree 4
4226
4227 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
4228
4229 for (i = 0; i < c->chunk_item->num_stripes; i++) {
4230 DEV_EXTENT* de;
4231
4232 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
4233 if (!de) {
4234 ERR("out of memory\n");
4235 return STATUS_INSUFFICIENT_RESOURCES;
4236 }
4237
4238 de->chunktree = Vcb->chunk_root->id;
4239 de->objid = 0x100;
4240 de->address = c->offset;
4241 de->length = c->chunk_item->size / factor;
4242 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
4243
4244 Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp);
4245 if (!NT_SUCCESS(Status)) {
4246 ERR("insert_tree_item returned %08x\n", Status);
4247 ExFreePool(de);
4248 return Status;
4249 }
4250
4251 // FIXME - no point in calling this twice for the same device
4252 Status = update_dev_item(Vcb, c->devices[i], Irp);
4253 if (!NT_SUCCESS(Status)) {
4254 ERR("update_dev_item returned %08x\n", Status);
4255 return Status;
4256 }
4257 }
4258
4259 c->created = false;
4260 c->oldused = c->used;
4261
4262 Vcb->superblock.bytes_used += chunk_estimate_phys_size(Vcb, c, c->used);
4263
4264 return STATUS_SUCCESS;
4265 }
4266
4267 static void remove_from_bootstrap(device_extension* Vcb, uint64_t obj_id, uint8_t obj_type, uint64_t offset) {
4268 sys_chunk* sc2;
4269 LIST_ENTRY* le;
4270
4271 le = Vcb->sys_chunks.Flink;
4272 while (le != &Vcb->sys_chunks) {
4273 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4274
4275 if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
4276 RemoveEntryList(&sc2->list_entry);
4277
4278 Vcb->superblock.n -= sizeof(KEY) + sc2->size;
4279
4280 ExFreePool(sc2->data);
4281 ExFreePool(sc2);
4282 regen_bootstrap(Vcb);
4283 return;
4284 }
4285
4286 le = le->Flink;
4287 }
4288 }
4289
4290 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name, uint16_t namelen,
4291 uint32_t crc32, uint8_t* data, uint16_t datalen) {
4292 NTSTATUS Status;
4293 uint16_t xasize;
4294 DIR_ITEM* xa;
4295
4296 TRACE("(%p, %I64x, %I64x, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen);
4297
4298 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen + datalen;
4299
4300 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4301 if (!xa) {
4302 ERR("out of memory\n");
4303 return STATUS_INSUFFICIENT_RESOURCES;
4304 }
4305
4306 xa->key.obj_id = 0;
4307 xa->key.obj_type = 0;
4308 xa->key.offset = 0;
4309 xa->transid = Vcb->superblock.generation;
4310 xa->m = datalen;
4311 xa->n = namelen;
4312 xa->type = BTRFS_TYPE_EA;
4313 RtlCopyMemory(xa->name, name, namelen);
4314 RtlCopyMemory(xa->name + namelen, data, datalen);
4315
4316 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr);
4317 if (!NT_SUCCESS(Status)) {
4318 ERR("insert_tree_item_batch returned %08x\n", Status);
4319 ExFreePool(xa);
4320 return Status;
4321 }
4322
4323 return STATUS_SUCCESS;
4324 }
4325
4326 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, uint64_t inode, char* name,
4327 uint16_t namelen, uint32_t crc32) {
4328 NTSTATUS Status;
4329 uint16_t xasize;
4330 DIR_ITEM* xa;
4331
4332 TRACE("(%p, %I64x, %I64x, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32);
4333
4334 xasize = (uint16_t)offsetof(DIR_ITEM, name[0]) + namelen;
4335
4336 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4337 if (!xa) {
4338 ERR("out of memory\n");
4339 return STATUS_INSUFFICIENT_RESOURCES;
4340 }
4341
4342 xa->key.obj_id = 0;
4343 xa->key.obj_type = 0;
4344 xa->key.offset = 0;
4345 xa->transid = Vcb->superblock.generation;
4346 xa->m = 0;
4347 xa->n = namelen;
4348 xa->type = BTRFS_TYPE_EA;
4349 RtlCopyMemory(xa->name, name, namelen);
4350
4351 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr);
4352 if (!NT_SUCCESS(Status)) {
4353 ERR("insert_tree_item_batch returned %08x\n", Status);
4354 ExFreePool(xa);
4355 return Status;
4356 }
4357
4358 return STATUS_SUCCESS;
4359 }
4360
4361 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, uint64_t start, uint64_t length) {
4362 NTSTATUS Status;
4363 EXTENT_DATA* ed;
4364 EXTENT_DATA2* ed2;
4365
4366 TRACE("((%I64x, %I64x), %I64x, %I64x)\n", fcb->subvol->id, fcb->inode, start, length);
4367
4368 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4369 if (!ed) {
4370 ERR("out of memory\n");
4371 return STATUS_INSUFFICIENT_RESOURCES;
4372 }
4373
4374 ed->generation = fcb->Vcb->superblock.generation;
4375 ed->decoded_size = length;
4376 ed->compression = BTRFS_COMPRESSION_NONE;
4377 ed->encryption = BTRFS_ENCRYPTION_NONE;
4378 ed->encoding = BTRFS_ENCODING_NONE;
4379 ed->type = EXTENT_TYPE_REGULAR;
4380
4381 ed2 = (EXTENT_DATA2*)ed->data;
4382 ed2->address = 0;
4383 ed2->size = 0;
4384 ed2->offset = 0;
4385 ed2->num_bytes = length;
4386
4387 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert);
4388 if (!NT_SUCCESS(Status)) {
4389 ERR("insert_tree_item_batch returned %08x\n", Status);
4390 ExFreePool(ed);
4391 return Status;
4392 }
4393
4394 return STATUS_SUCCESS;
4395 }
4396
4397 #ifdef _MSC_VER
4398 #pragma warning(push)
4399 #pragma warning(suppress: 28194)
4400 #endif
4401 NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, uint64_t objid, uint8_t objtype, uint64_t offset,
4402 _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, uint16_t datalen, enum batch_operation operation) {
4403 LIST_ENTRY* le;
4404 batch_root* br = NULL;
4405 batch_item* bi;
4406
4407 le = batchlist->Flink;
4408 while (le != batchlist) {
4409 batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
4410
4411 if (br2->r == r) {
4412 br = br2;
4413 break;
4414 }
4415
4416 le = le->Flink;
4417 }
4418
4419 if (!br) {
4420 br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
4421 if (!br) {
4422 ERR("out of memory\n");
4423 return STATUS_INSUFFICIENT_RESOURCES;
4424 }
4425
4426 br->r = r;
4427 InitializeListHead(&br->items);
4428 InsertTailList(batchlist, &br->list_entry);
4429 }
4430
4431 bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
4432 if (!bi) {
4433 ERR("out of memory\n");
4434 return STATUS_INSUFFICIENT_RESOURCES;
4435 }
4436
4437 bi->key.obj_id = objid;
4438 bi->key.obj_type = objtype;
4439 bi->key.offset = offset;
4440 bi->data = data;
4441 bi->datalen = datalen;
4442 bi->operation = operation;
4443
4444 le = br->items.Blink;
4445 while (le != &br->items) {
4446 batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry);
4447 int cmp = keycmp(bi2->key, bi->key);
4448
4449 if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) {
4450 InsertHeadList(&bi2->list_entry, &bi->list_entry);
4451 return STATUS_SUCCESS;
4452 }
4453
4454 le = le->Blink;
4455 }
4456
4457 InsertHeadList(&br->items, &bi->list_entry);
4458
4459 return STATUS_SUCCESS;
4460 }
4461 #ifdef _MSC_VER
4462 #pragma warning(pop)
4463 #endif
4464
4465 typedef struct {
4466 uint64_t address;
4467 uint64_t length;
4468 uint64_t offset;
4469 bool changed;
4470 chunk* chunk;
4471 uint64_t skip_start;
4472 uint64_t skip_end;
4473 LIST_ENTRY list_entry;
4474 } extent_range;
4475
4476 static void rationalize_extents(fcb* fcb, PIRP Irp) {
4477 LIST_ENTRY* le;
4478 LIST_ENTRY extent_ranges;
4479 extent_range* er;
4480 bool changed = false, truncating = false;
4481 uint32_t num_extents = 0;
4482
4483 InitializeListHead(&extent_ranges);
4484
4485 le = fcb->extents.Flink;
4486 while (le != &fcb->extents) {
4487 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4488
4489 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4490 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4491
4492 if (ed2->size != 0) {
4493 LIST_ENTRY* le2;
4494
4495 le2 = extent_ranges.Flink;
4496 while (le2 != &extent_ranges) {
4497 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4498
4499 if (er2->address == ed2->address) {
4500 er2->skip_start = min(er2->skip_start, ed2->offset);
4501 er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
4502 goto cont;
4503 } else if (er2->address > ed2->address)
4504 break;
4505
4506 le2 = le2->Flink;
4507 }
4508
4509 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
4510 if (!er) {
4511 ERR("out of memory\n");
4512 goto end;
4513 }
4514
4515 er->address = ed2->address;
4516 er->length = ed2->size;
4517 er->offset = ext->offset - ed2->offset;
4518 er->changed = false;
4519 er->chunk = NULL;
4520 er->skip_start = ed2->offset;
4521 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
4522
4523 if (er->skip_start != 0 || er->skip_end != 0)
4524 truncating = true;
4525
4526 InsertHeadList(le2->Blink, &er->list_entry);
4527 num_extents++;
4528 }
4529 }
4530
4531 cont:
4532 le = le->Flink;
4533 }
4534
4535 if (num_extents == 0 || (num_extents == 1 && !truncating))
4536 goto end;
4537
4538 le = extent_ranges.Flink;
4539 while (le != &extent_ranges) {
4540 er = CONTAINING_RECORD(le, extent_range, list_entry);
4541
4542 if (!er->chunk) {
4543 LIST_ENTRY* le2;
4544
4545 er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
4546
4547 if (!er->chunk) {
4548 ERR("get_chunk_from_address(%I64x) failed\n", er->address);
4549 goto end;
4550 }
4551
4552 le2 = le->Flink;
4553 while (le2 != &extent_ranges) {
4554 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4555
4556 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
4557 er2->chunk = er->chunk;
4558
4559 le2 = le2->Flink;
4560 }
4561 }
4562
4563 le = le->Flink;
4564 }
4565
4566 if (truncating) {
4567 // truncate beginning or end of extent if unused
4568
4569 le = extent_ranges.Flink;
4570 while (le != &extent_ranges) {
4571 er = CONTAINING_RECORD(le, extent_range, list_entry);
4572
4573 if (er->skip_start > 0) {
4574 LIST_ENTRY* le2 = fcb->extents.Flink;
4575 while (le2 != &fcb->extents) {
4576 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4577
4578 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4579 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4580
4581 if (ed2->size != 0 && ed2->address == er->address) {
4582 NTSTATUS Status;
4583
4584 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4585 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4586 if (!NT_SUCCESS(Status)) {
4587 ERR("update_changed_extent_ref returned %08x\n", Status);
4588 goto end;
4589 }
4590
4591 ext->extent_data.decoded_size -= er->skip_start;
4592 ed2->size -= er->skip_start;
4593 ed2->address += er->skip_start;
4594 ed2->offset -= er->skip_start;
4595
4596 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4597 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4598 }
4599 }
4600
4601 le2 = le2->Flink;
4602 }
4603
4604 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4605 add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start / fcb->Vcb->superblock.sector_size), NULL, NULL);
4606
4607 acquire_chunk_lock(er->chunk, fcb->Vcb);
4608
4609 if (!er->chunk->cache_loaded) {
4610 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4611
4612 if (!NT_SUCCESS(Status)) {
4613 ERR("load_cache_chunk returned %08x\n", Status);
4614 release_chunk_lock(er->chunk, fcb->Vcb);
4615 goto end;
4616 }
4617 }
4618
4619 er->chunk->used -= er->skip_start;
4620
4621 space_list_add(er->chunk, er->address, er->skip_start, NULL);
4622
4623 release_chunk_lock(er->chunk, fcb->Vcb);
4624
4625 er->address += er->skip_start;
4626 er->length -= er->skip_start;
4627 }
4628
4629 if (er->skip_end > 0) {
4630 LIST_ENTRY* le2 = fcb->extents.Flink;
4631 while (le2 != &fcb->extents) {
4632 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4633
4634 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4635 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4636
4637 if (ed2->size != 0 && ed2->address == er->address) {
4638 NTSTATUS Status;
4639
4640 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4641 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4642 if (!NT_SUCCESS(Status)) {
4643 ERR("update_changed_extent_ref returned %08x\n", Status);
4644 goto end;
4645 }
4646
4647 ext->extent_data.decoded_size -= er->skip_end;
4648 ed2->size -= er->skip_end;
4649
4650 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4651 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4652 }
4653 }
4654
4655 le2 = le2->Flink;
4656 }
4657
4658 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4659 add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end / fcb->Vcb->superblock.sector_size), NULL, NULL);
4660
4661 acquire_chunk_lock(er->chunk, fcb->Vcb);
4662
4663 if (!er->chunk->cache_loaded) {
4664 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4665
4666 if (!NT_SUCCESS(Status)) {
4667 ERR("load_cache_chunk returned %08x\n", Status);
4668 release_chunk_lock(er->chunk, fcb->Vcb);
4669 goto end;
4670 }
4671 }
4672
4673 er->chunk->used -= er->skip_end;
4674
4675 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL);
4676
4677 release_chunk_lock(er->chunk, fcb->Vcb);
4678
4679 er->length -= er->skip_end;
4680 }
4681
4682 le = le->Flink;
4683 }
4684 }
4685
4686 if (num_extents < 2)
4687 goto end;
4688
4689 // merge together adjacent extents
4690 le = extent_ranges.Flink;
4691 while (le != &extent_ranges) {
4692 er = CONTAINING_RECORD(le, extent_range, list_entry);
4693
4694 if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
4695 extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
4696
4697 if (er->chunk == er2->chunk) {
4698 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
4699 if (er->length + er2->length <= MAX_EXTENT_SIZE) {
4700 er->length += er2->length;
4701 er->changed = true;
4702
4703 RemoveEntryList(&er2->list_entry);
4704 ExFreePool(er2);
4705
4706 changed = true;
4707 continue;
4708 }
4709 }
4710 }
4711 }
4712
4713 le = le->Flink;
4714 }
4715
4716 if (!changed)
4717 goto end;
4718
4719 le = fcb->extents.Flink;
4720 while (le != &fcb->extents) {
4721 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4722
4723 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4724 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4725
4726 if (ed2->size != 0) {
4727 LIST_ENTRY* le2;
4728
4729 le2 = extent_ranges.Flink;
4730 while (le2 != &extent_ranges) {
4731 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4732
4733 if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
4734 NTSTATUS Status;
4735
4736 Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4737 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, true, Irp);
4738 if (!NT_SUCCESS(Status)) {
4739 ERR("update_changed_extent_ref returned %08x\n", Status);
4740 goto end;
4741 }
4742
4743 ed2->offset += ed2->address - er2->address;
4744 ed2->address = er2->address;
4745 ed2->size = er2->length;
4746 ext->extent_data.decoded_size = ed2->size;
4747
4748 add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4749 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4750
4751 break;
4752 }
4753
4754 le2 = le2->Flink;
4755 }
4756 }
4757 }
4758
4759 le = le->Flink;
4760 }
4761
4762 end:
4763 while (!IsListEmpty(&extent_ranges)) {
4764 le = RemoveHeadList(&extent_ranges);
4765 er = CONTAINING_RECORD(le, extent_range, list_entry);
4766
4767 ExFreePool(er);
4768 }
4769 }
4770
4771 NTSTATUS flush_fcb(fcb* fcb, bool cache, LIST_ENTRY* batchlist, PIRP Irp) {
4772 traverse_ptr tp;
4773 KEY searchkey;
4774 NTSTATUS Status;
4775 INODE_ITEM* ii;
4776 uint64_t ii_offset;
4777 #ifdef DEBUG_PARANOID
4778 uint64_t old_size = 0;
4779 bool extents_changed;
4780 #endif
4781
4782 if (fcb->ads) {
4783 if (fcb->deleted) {
4784 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash);
4785 if (!NT_SUCCESS(Status)) {
4786 ERR("delete_xattr returned %08x\n", Status);
4787 goto end;
4788 }
4789 } else {
4790 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length,
4791 fcb->adshash, (uint8_t*)fcb->adsdata.Buffer, fcb->adsdata.Length);
4792 if (!NT_SUCCESS(Status)) {
4793 ERR("set_xattr returned %08x\n", Status);
4794 goto end;
4795 }
4796 }
4797
4798 Status = STATUS_SUCCESS;
4799 goto end;
4800 }
4801
4802 if (fcb->deleted) {
4803 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode);
4804 if (!NT_SUCCESS(Status)) {
4805 ERR("insert_tree_item_batch returned %08x\n", Status);
4806 goto end;
4807 }
4808
4809 if (fcb->marked_as_orphan) {
4810 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
4811 fcb->inode, NULL, 0, Batch_Delete);
4812 if (!NT_SUCCESS(Status)) {
4813 ERR("insert_tree_item_batch returned %08x\n", Status);
4814 goto end;
4815 }
4816 }
4817
4818 Status = STATUS_SUCCESS;
4819 goto end;
4820 }
4821
4822 #ifdef DEBUG_PARANOID
4823 extents_changed = fcb->extents_changed;
4824 #endif
4825
4826 if (fcb->extents_changed) {
4827 LIST_ENTRY* le;
4828 bool prealloc = false, extents_inline = false;
4829 uint64_t last_end;
4830
4831 // delete ignored extent items
4832 le = fcb->extents.Flink;
4833 while (le != &fcb->extents) {
4834 LIST_ENTRY* le2 = le->Flink;
4835 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4836
4837 if (ext->ignore) {
4838 RemoveEntryList(&ext->list_entry);
4839
4840 if (ext->csum)
4841 ExFreePool(ext->csum);
4842
4843 ExFreePool(ext);
4844 }
4845
4846 le = le2;
4847 }
4848
4849 le = fcb->extents.Flink;
4850 while (le != &fcb->extents) {
4851 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4852
4853 if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) {
4854 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4855
4856 if (ed2->size > 0) { // not sparse
4857 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE)
4858 add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4859 else
4860 add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4861 }
4862 }
4863
4864 le = le->Flink;
4865 }
4866
4867 if (!IsListEmpty(&fcb->extents)) {
4868 rationalize_extents(fcb, Irp);
4869
4870 // merge together adjacent EXTENT_DATAs pointing to same extent
4871
4872 le = fcb->extents.Flink;
4873 while (le != &fcb->extents) {
4874 LIST_ENTRY* le2 = le->Flink;
4875 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4876
4877 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
4878 extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
4879
4880 if (ext->extent_data.type == nextext->extent_data.type) {
4881 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4882 EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data;
4883
4884 if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
4885 nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
4886 chunk* c;
4887
4888 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) {
4889 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size);
4890 uint32_t* csum;
4891
4892 csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(uint32_t), ALLOC_TAG);
4893 if (!csum) {
4894 ERR("out of memory\n");
4895 Status = STATUS_INSUFFICIENT_RESOURCES;
4896 goto end;
4897 }
4898
4899 RtlCopyMemory(csum, ext->csum, (ULONG)(ed2->num_bytes * sizeof(uint32_t) / fcb->Vcb->superblock.sector_size));
4900 RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum,
4901 (ULONG)(ned2->num_bytes * sizeof(uint32_t) / fcb->Vcb->superblock.sector_size));
4902
4903 ExFreePool(ext->csum);
4904 ext->csum = csum;
4905 }
4906
4907 ext->extent_data.generation = fcb->Vcb->superblock.generation;
4908 ed2->num_bytes += ned2->num_bytes;
4909
4910 RemoveEntryList(&nextext->list_entry);
4911
4912 if (nextext->csum)
4913 ExFreePool(nextext->csum);
4914
4915 ExFreePool(nextext);
4916
4917 c = get_chunk_from_address(fcb->Vcb, ed2->address);
4918
4919 if (!c) {
4920 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
4921 } else {
4922 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
4923 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
4924 if (!NT_SUCCESS(Status)) {
4925 ERR("update_changed_extent_ref returned %08x\n", Status);
4926 goto end;
4927 }
4928 }
4929
4930 le2 = le;
4931 }
4932 }
4933 }
4934
4935 le = le2;
4936 }
4937 }
4938
4939 if (!fcb->created) {
4940 // delete existing EXTENT_DATA items
4941
4942 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData);
4943 if (!NT_SUCCESS(Status)) {
4944 ERR("insert_tree_item_batch returned %08x\n", Status);
4945 goto end;
4946 }
4947 }
4948
4949 // add new EXTENT_DATAs
4950
4951 last_end = 0;
4952
4953 le = fcb->extents.Flink;
4954 while (le != &fcb->extents) {
4955 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4956 EXTENT_DATA* ed;
4957
4958 ext->inserted = false;
4959
4960 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
4961 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end);
4962 if (!NT_SUCCESS(Status)) {
4963 ERR("insert_sparse_extent returned %08x\n", Status);
4964 goto end;
4965 }
4966 }
4967
4968 ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
4969 if (!ed) {
4970 ERR("out of memory\n");
4971 Status = STATUS_INSUFFICIENT_RESOURCES;
4972 goto end;
4973 }
4974
4975 RtlCopyMemory(ed, &ext->extent_data, ext->datalen);
4976
4977 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
4978 ed, ext->datalen, Batch_Insert);
4979 if (!NT_SUCCESS(Status)) {
4980 ERR("insert_tree_item_batch returned %08x\n", Status);
4981 goto end;
4982 }
4983
4984 if (ed->type == EXTENT_TYPE_PREALLOC)
4985 prealloc = true;
4986
4987 if (ed->type == EXTENT_TYPE_INLINE)
4988 extents_inline = true;
4989
4990 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
4991 if (ed->type == EXTENT_TYPE_INLINE)
4992 last_end = ext->offset + ed->decoded_size;
4993 else {
4994 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4995
4996 last_end = ext->offset + ed2->num_bytes;
4997 }
4998 }
4999
5000 le = le->Flink;
5001 }
5002
5003 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
5004 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
5005 Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end);
5006 if (!NT_SUCCESS(Status)) {
5007 ERR("insert_sparse_extent returned %08x\n", Status);
5008 goto end;
5009 }
5010 }
5011
5012 // update prealloc flag in INODE_ITEM
5013
5014 if (!prealloc)
5015 fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
5016 else
5017 fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
5018
5019 fcb->inode_item_changed = true;
5020
5021 fcb->extents_changed = false;
5022 }
5023
5024 if ((!fcb->created && fcb->inode_item_changed) || cache) {
5025 searchkey.obj_id = fcb->inode;
5026 searchkey.obj_type = TYPE_INODE_ITEM;
5027 searchkey.offset = 0xffffffffffffffff;
5028
5029 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5030 if (!NT_SUCCESS(Status)) {
5031 ERR("error - find_item returned %08x\n", Status);
5032 goto end;
5033 }
5034
5035 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5036 if (cache) {
5037 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5038 if (!ii) {
5039 ERR("out of memory\n");
5040 Status = STATUS_INSUFFICIENT_RESOURCES;
5041 goto end;
5042 }
5043
5044 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5045
5046 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
5047 if (!NT_SUCCESS(Status)) {
5048 ERR("insert_tree_item returned %08x\n", Status);
5049 goto end;
5050 }
5051
5052 ii_offset = 0;
5053 } else {
5054 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5055 Status = STATUS_INTERNAL_ERROR;
5056 goto end;
5057 }
5058 } else {
5059 #ifdef DEBUG_PARANOID
5060 INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
5061
5062 old_size = ii2->st_size;
5063 #endif
5064
5065 ii_offset = tp.item->key.offset;
5066 }
5067
5068 if (!cache) {
5069 Status = delete_tree_item(fcb->Vcb, &tp);
5070 if (!NT_SUCCESS(Status)) {
5071 ERR("delete_tree_item returned %08x\n", Status);
5072 goto end;
5073 }
5074 } else {
5075 searchkey.obj_id = fcb->inode;
5076 searchkey.obj_type = TYPE_INODE_ITEM;
5077 searchkey.offset = ii_offset;
5078
5079 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, false, Irp);
5080 if (!NT_SUCCESS(Status)) {
5081 ERR("error - find_item returned %08x\n", Status);
5082 goto end;
5083 }
5084
5085 if (keycmp(tp.item->key, searchkey)) {
5086 ERR("could not find INODE_ITEM for inode %I64x in subvol %I64x\n", fcb->inode, fcb->subvol->id);
5087 Status = STATUS_INTERNAL_ERROR;
5088 goto end;
5089 } else
5090 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
5091 }
5092
5093 #ifdef DEBUG_PARANOID
5094 if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
5095 ERR("error - size has changed but extents not marked as changed\n");
5096 int3;
5097 }
5098 #endif
5099 } else
5100 ii_offset = 0;
5101
5102 fcb->created = false;
5103
5104 if (!cache && fcb->inode_item_changed) {
5105 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
5106 if (!ii) {
5107 ERR("out of memory\n");
5108 Status = STATUS_INSUFFICIENT_RESOURCES;
5109 goto end;
5110 }
5111
5112 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
5113
5114 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
5115 Batch_Insert);
5116 if (!NT_SUCCESS(Status)) {
5117 ERR("insert_tree_item_batch returned %08x\n", Status);
5118 goto end;
5119 }
5120
5121 fcb->inode_item_changed = false;
5122 }
5123
5124 if (fcb->sd_dirty) {
5125 if (!fcb->sd_deleted) {
5126 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1,
5127 EA_NTACL_HASH, (uint8_t*)fcb->sd, (uint16_t)RtlLengthSecurityDescriptor(fcb->sd));
5128 if (!NT_SUCCESS(Status)) {
5129 ERR("set_xattr returned %08x\n", Status);
5130 goto end;
5131 }
5132 } else {
5133 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH);
5134 if (!NT_SUCCESS(Status)) {
5135 ERR("delete_xattr returned %08x\n", Status);
5136 goto end;
5137 }
5138 }
5139
5140 fcb->sd_deleted = false;
5141 fcb->sd_dirty = false;
5142 }
5143
5144 if (fcb->atts_changed) {
5145 if (!fcb->atts_deleted) {
5146 uint8_t val[16], *val2;
5147 ULONG atts = fcb->atts;
5148
5149 TRACE("inserting new DOSATTRIB xattr\n");
5150
5151 if (fcb->inode == SUBVOL_ROOT_INODE)
5152 atts &= ~FILE_ATTRIBUTE_READONLY;
5153
5154 val2 = &val[sizeof(val) - 1];
5155
5156 do {
5157 uint8_t c = atts % 16;
5158 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a');
5159
5160 val2--;
5161 atts >>= 4;
5162 } while (atts != 0);
5163
5164 *val2 = 'x';
5165 val2--;
5166 *val2 = '0';
5167
5168 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1,
5169 EA_DOSATTRIB_HASH, val2, (uint16_t)(val + sizeof(val) - val2));
5170 if (!NT_SUCCESS(Status)) {
5171 ERR("set_xattr returned %08x\n", Status);
5172 goto end;
5173 }
5174 } else {
5175 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH);
5176 if (!NT_SUCCESS(Status)) {
5177 ERR("delete_xattr returned %08x\n", Status);
5178 goto end;
5179 }
5180 }
5181
5182 fcb->atts_changed = false;
5183 fcb->atts_deleted = false;
5184 }
5185
5186 if (fcb->reparse_xattr_changed) {
5187 if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
5188 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1,
5189 EA_REPARSE_HASH, (uint8_t*)fcb->reparse_xattr.Buffer, (uint16_t)fcb->reparse_xattr.Length);
5190 if (!NT_SUCCESS(Status)) {
5191 ERR("set_xattr returned %08x\n", Status);
5192 goto end;
5193 }
5194 } else {
5195 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH);
5196 if (!NT_SUCCESS(Status)) {
5197 ERR("delete_xattr returned %08x\n", Status);
5198 goto end;
5199 }
5200 }
5201
5202 fcb->reparse_xattr_changed = false;
5203 }
5204
5205 if (fcb->ea_changed) {
5206 if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
5207 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1,
5208 EA_EA_HASH, (uint8_t*)fcb->ea_xattr.Buffer, (uint16_t)fcb->ea_xattr.Length);
5209 if (!NT_SUCCESS(Status)) {
5210 ERR("set_xattr returned %08x\n", Status);
5211 goto end;
5212 }
5213 } else {
5214 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH);
5215 if (!NT_SUCCESS(Status)) {
5216 ERR("delete_xattr returned %08x\n", Status);
5217 goto end;
5218 }
5219 }
5220
5221 fcb->ea_changed = false;
5222 }
5223
5224 if (fcb->prop_compression_changed) {
5225 if (fcb->prop_compression == PropCompression_None) {
5226 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH);
5227 if (!NT_SUCCESS(Status)) {
5228 ERR("delete_xattr returned %08x\n", Status);
5229 goto end;
5230 }
5231 } else if (fcb->prop_compression == PropCompression_Zlib) {
5232 static const char zlib[] = "zlib";
5233
5234 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5235 EA_PROP_COMPRESSION_HASH, (uint8_t*)zlib, sizeof(zlib) - 1);
5236 if (!NT_SUCCESS(Status)) {
5237 ERR("set_xattr returned %08x\n", Status);
5238 goto end;
5239 }
5240 } else if (fcb->prop_compression == PropCompression_LZO) {
5241 static const char lzo[] = "lzo";
5242
5243 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5244 EA_PROP_COMPRESSION_HASH, (uint8_t*)lzo, sizeof(lzo) - 1);
5245 if (!NT_SUCCESS(Status)) {
5246 ERR("set_xattr returned %08x\n", Status);
5247 goto end;
5248 }
5249 } else if (fcb->prop_compression == PropCompression_ZSTD) {
5250 static const char zstd[] = "zstd";
5251
5252 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5253 EA_PROP_COMPRESSION_HASH, (uint8_t*)zstd, sizeof(zstd) - 1);
5254 if (!NT_SUCCESS(Status)) {
5255 ERR("set_xattr returned %08x\n", Status);
5256 goto end;
5257 }
5258 }
5259
5260 fcb->prop_compression_changed = false;
5261 }
5262
5263 if (fcb->xattrs_changed) {
5264 LIST_ENTRY* le;
5265
5266 le = fcb->xattrs.Flink;
5267 while (le != &fcb->xattrs) {
5268 xattr* xa = CONTAINING_RECORD(le, xattr, list_entry);
5269 LIST_ENTRY* le2 = le->Flink;
5270
5271 if (xa->dirty) {
5272 uint32_t hash = calc_crc32c(0xfffffffe, (uint8_t*)xa->data, xa->namelen);
5273
5274 if (xa->valuelen == 0) {
5275 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash);
5276 if (!NT_SUCCESS(Status)) {
5277 ERR("delete_xattr returned %08x\n", Status);
5278 goto end;
5279 }
5280
5281 RemoveEntryList(&xa->list_entry);
5282 ExFreePool(xa);
5283 } else {
5284 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen,
5285 hash, (uint8_t*)&xa->data[xa->namelen], xa->valuelen);
5286 if (!NT_SUCCESS(Status)) {
5287 ERR("set_xattr returned %08x\n", Status);
5288 goto end;
5289 }
5290
5291 xa->dirty = false;
5292 }
5293 }
5294
5295 le = le2;
5296 }
5297
5298 fcb->xattrs_changed = false;
5299 }
5300
5301 if ((fcb->case_sensitive_set && !fcb->case_sensitive)) {
5302 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5303 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH);
5304 if (!NT_SUCCESS(Status)) {
5305 ERR("delete_xattr returned %08x\n", Status);
5306 goto end;
5307 }
5308
5309 fcb->case_sensitive_set = false;
5310 } else if ((!fcb->case_sensitive_set && fcb->case_sensitive)) {
5311 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_CASE_SENSITIVE,
5312 sizeof(EA_CASE_SENSITIVE) - 1, EA_CASE_SENSITIVE_HASH, (uint8_t*)"1", 1);
5313 if (!NT_SUCCESS(Status)) {
5314 ERR("set_xattr returned %08x\n", Status);
5315 goto end;
5316 }
5317
5318 fcb->case_sensitive_set = true;
5319 }
5320
5321 if (fcb->inode_item.st_nlink == 0 && !fcb->marked_as_orphan) { // mark as orphan
5322 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, BTRFS_ORPHAN_INODE_OBJID, TYPE_ORPHAN_INODE,
5323 fcb->inode, NULL, 0, Batch_Insert);
5324 if (!NT_SUCCESS(Status)) {
5325 ERR("insert_tree_item_batch returned %08x\n", Status);
5326 goto end;
5327 }
5328
5329 fcb->marked_as_orphan = true;
5330 }
5331
5332 Status = STATUS_SUCCESS;
5333
5334 end:
5335 if (fcb->dirty) {
5336 bool lock = false;
5337
5338 fcb->dirty = false;
5339
5340 if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) {
5341 ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, true);
5342 lock = true;
5343 }
5344
5345 RemoveEntryList(&fcb->list_entry_dirty);
5346
5347 if (lock)
5348 ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
5349 }
5350
5351 return Status;
5352 }
5353
5354 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, uint64_t address, uint64_t size) {
5355 int i;
5356 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
5357
5358 i = 0;
5359 while (superblock_addrs[i] != 0) {
5360 if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) {
5361 if (superblock_addrs[i] > address)
5362 add_trim_entry(dev, address, superblock_addrs[i] - address);
5363
5364 if (size <= superblock_addrs[i] + sblen - address)
5365 return;
5366
5367 size -= superblock_addrs[i] + sblen - address;
5368 address = superblock_addrs[i] + sblen;
5369 } else if (superblock_addrs[i] > address + size)
5370 break;
5371
5372 i++;
5373 }
5374
5375 add_trim_entry(dev, address, size);
5376 }
5377
5378 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5379 NTSTATUS Status;
5380 KEY searchkey;
5381 traverse_ptr tp;
5382 uint64_t i, factor;
5383 #ifdef __REACTOS__
5384 uint64_t phys_used;
5385 #endif
5386 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];;
5387
5388 TRACE("dropping chunk %I64x\n", c->offset);
5389
5390 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
5391 factor = c->chunk_item->num_stripes;
5392 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
5393 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
5394 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
5395 factor = c->chunk_item->num_stripes - 1;
5396 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
5397 factor = c->chunk_item->num_stripes - 2;
5398 else // SINGLE, DUPLICATE, RAID1
5399 factor = 1;
5400
5401 // do TRIM
5402 if (Vcb->trim && !Vcb->options.no_trim) {
5403 uint64_t len = c->chunk_item->size / factor;
5404
5405 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5406 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
5407 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len);
5408 }
5409 }
5410
5411 if (!c->cache) {
5412 Status = load_stored_free_space_cache(Vcb, c, true, Irp);
5413
5414 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
5415 WARN("load_stored_free_space_cache returned %08x\n", Status);
5416 }
5417
5418 // remove free space cache
5419 if (c->cache) {
5420 c->cache->deleted = true;
5421
5422 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
5423 if (!NT_SUCCESS(Status)) {
5424 ERR("excise_extents returned %08x\n", Status);
5425 return Status;
5426 }
5427
5428 Status = flush_fcb(c->cache, true, batchlist, Irp);
5429
5430 free_fcb(c->cache);
5431
5432 if (c->cache->refcount == 0)
5433 reap_fcb(c->cache);
5434
5435 if (!NT_SUCCESS(Status)) {
5436 ERR("flush_fcb returned %08x\n", Status);
5437 return Status;
5438 }
5439
5440 searchkey.obj_id = FREE_SPACE_CACHE_ID;
5441 searchkey.obj_type = 0;
5442 searchkey.offset = c->offset;
5443
5444 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
5445 if (!NT_SUCCESS(Status)) {
5446 ERR("error - find_item returned %08x\n", Status);
5447 return Status;
5448 }
5449
5450 if (!keycmp(tp.item->key, searchkey)) {
5451 Status = delete_tree_item(Vcb, &tp);
5452 if (!NT_SUCCESS(Status)) {
5453 ERR("delete_tree_item returned %08x\n", Status);
5454 return Status;
5455 }
5456 }
5457 }
5458
5459 if (Vcb->space_root) {
5460 Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size,
5461 NULL, 0, Batch_DeleteFreeSpace);
5462 if (!NT_SUCCESS(Status)) {
5463 ERR("insert_tree_item_batch returned %08x\n", Status);
5464 return Status;
5465 }
5466 }
5467
5468 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5469 if (!c->created) {
5470 // remove DEV_EXTENTs from tree 4
5471 searchkey.obj_id = cis[i].dev_id;
5472 searchkey.obj_type = TYPE_DEV_EXTENT;
5473 searchkey.offset = cis[i].offset;
5474
5475 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
5476 if (!NT_SUCCESS(Status)) {
5477 ERR("error - find_item returned %08x\n", Status);
5478 return Status;
5479 }
5480
5481 if (!keycmp(tp.item->key, searchkey)) {
5482 Status = delete_tree_item(Vcb, &tp);
5483 if (!NT_SUCCESS(Status)) {
5484 ERR("delete_tree_item returned %08x\n", Status);
5485 return Status;
5486 }
5487
5488 if (tp.item->size >= sizeof(DEV_EXTENT)) {
5489 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
5490
5491 c->devices[i]->devitem.bytes_used -= de->length;
5492
5493 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5494 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start)
5495 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5496 } else
5497 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback);
5498 }
5499 } else
5500 WARN("could not find (%I64x,%x,%I64x) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
5501 } else {
5502 uint64_t len = c->chunk_item->size / factor;
5503
5504 c->devices[i]->devitem.bytes_used -= len;
5505
5506 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5507 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start)
5508 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5509 } else
5510 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback);
5511 }
5512 }
5513
5514 // modify DEV_ITEMs in chunk tree
5515 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5516 if (c->devices[i]) {
5517 uint64_t j;
5518 DEV_ITEM* di;
5519
5520 searchkey.obj_id = 1;
5521 searchkey.obj_type = TYPE_DEV_ITEM;
5522 searchkey.offset = c->devices[i]->devitem.dev_id;
5523
5524 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5525 if (!NT_SUCCESS(Status)) {
5526 ERR("error - find_item returned %08x\n", Status);
5527 return Status;
5528 }
5529
5530 if (!keycmp(tp.item->key, searchkey)) {
5531 Status = delete_tree_item(Vcb, &tp);
5532 if (!NT_SUCCESS(Status)) {
5533 ERR("delete_tree_item returned %08x\n", Status);
5534 return Status;
5535 }
5536
5537 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
5538 if (!di) {
5539 ERR("out of memory\n");
5540 return STATUS_INSUFFICIENT_RESOURCES;
5541 }
5542
5543 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
5544
5545 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
5546 if (!NT_SUCCESS(Status)) {
5547 ERR("insert_tree_item returned %08x\n", Status);
5548 return Status;
5549 }
5550 }
5551
5552 for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
5553 if (c->devices[j] == c->devices[i])
5554 c->devices[j] = NULL;
5555 }
5556 }
5557 }
5558
5559 if (!c->created) {
5560 // remove CHUNK_ITEM from chunk tree
5561 searchkey.obj_id = 0x100;
5562 searchkey.obj_type = TYPE_CHUNK_ITEM;
5563 searchkey.offset = c->offset;
5564
5565 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, false, Irp);
5566 if (!NT_SUCCESS(Status)) {
5567 ERR("error - find_item returned %08x\n", Status);
5568 return Status;
5569 }
5570
5571 if (!keycmp(tp.item->key, searchkey)) {
5572 Status = delete_tree_item(Vcb, &tp);
5573
5574 if (!NT_SUCCESS(Status)) {
5575 ERR("delete_tree_item returned %08x\n", Status);
5576 return Status;
5577 }
5578 } else
5579 WARN("could not find CHUNK_ITEM for chunk %I64x\n", c->offset);
5580
5581 // remove BLOCK_GROUP_ITEM from extent tree
5582 searchkey.obj_id = c->offset;
5583 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
5584 searchkey.offset = 0xffffffffffffffff;
5585
5586 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
5587 if (!NT_SUCCESS(Status)) {
5588 ERR("error - find_item returned %08x\n", Status);
5589 return Status;
5590 }
5591
5592 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
5593 Status = delete_tree_item(Vcb, &tp);
5594
5595 if (!NT_SUCCESS(Status)) {
5596 ERR("delete_tree_item returned %08x\n", Status);
5597 return Status;
5598 }
5599 } else
5600 WARN("could not find BLOCK_GROUP_ITEM for chunk %I64x\n", c->offset);
5601 }
5602
5603 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
5604 remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
5605
5606 RemoveEntryList(&c->list_entry);
5607
5608 // clear raid56 incompat flag if dropping last RAID5/6 chunk
5609
5610 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
5611 LIST_ENTRY* le;
5612 bool clear_flag = true;
5613
5614 le = Vcb->chunks.Flink;
5615 while (le != &Vcb->chunks) {
5616 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5617
5618 if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) {
5619 clear_flag = false;
5620 break;
5621 }
5622
5623 le = le->Flink;
5624 }
5625
5626 if (clear_flag)
5627 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56;
5628 }
5629
5630 #ifndef __REACTOS__
5631 uint64_t phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
5632 #else
5633 phys_used = chunk_estimate_phys_size(Vcb, c, c->oldused);
5634 #endif
5635
5636 if (phys_used < Vcb->superblock.bytes_used)
5637 Vcb->superblock.bytes_used -= phys_used;
5638 else
5639 Vcb->superblock.bytes_used = 0;
5640
5641 ExFreePool(c->chunk_item);
5642 ExFreePool(c->devices);
5643
5644 while (!IsListEmpty(&c->space)) {
5645 space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
5646
5647 RemoveEntryList(&s->list_entry);
5648 ExFreePool(s);
5649 }
5650
5651 while (!IsListEmpty(&c->deleting)) {
5652 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
5653
5654 RemoveEntryList(&s->list_entry);
5655 ExFreePool(s);
5656 }
5657
5658 release_chunk_lock(c, Vcb);
5659
5660 ExDeleteResourceLite(&c->partial_stripes_lock);
5661 ExDeleteResourceLite(&c->range_locks_lock);
5662 ExDeleteResourceLite(&c->lock);
5663 ExDeleteResourceLite(&c->changed_extents_lock);
5664
5665 ExFreePool(c);
5666
5667 return STATUS_SUCCESS;
5668 }
5669
5670 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, uint64_t startoff, uint16_t parity, ULONG offset, ULONG len) {
5671 NTSTATUS Status;
5672 ULONG sl = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size);
5673 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5674
5675 while (len > 0) {
5676 ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset;
5677 uint16_t stripe;
5678
5679 stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes;
5680
5681 if (c->devices[stripe]->devobj) {
5682 Status = sync_read_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5683 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), false);
5684 if (!NT_SUCCESS(Status)) {
5685 ERR("sync_read_phys returned %08x\n", Status);
5686 return Status;
5687 }
5688 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5689 uint16_t i;
5690 uint8_t* scratch;
5691
5692 scratch = ExAllocatePoolWithTag(NonPagedPool, readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5693 if (!scratch) {
5694 ERR("out of memory\n");
5695 return STATUS_INSUFFICIENT_RESOURCES;
5696 }
5697
5698 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5699 if (i != stripe) {
5700 if (!c->devices[i]->devobj) {
5701 ExFreePool(scratch);
5702 return STATUS_UNEXPECTED_IO_ERROR;
5703 }
5704
5705 if (i == 0 || (stripe == 0 && i == 1)) {
5706 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5707 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), false);
5708 if (!NT_SUCCESS(Status)) {
5709 ERR("sync_read_phys returned %08x\n", Status);
5710 ExFreePool(scratch);
5711 return Status;
5712 }
5713 } else {
5714 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5715 readlen * Vcb->superblock.sector_size, scratch, false);
5716 if (!NT_SUCCESS(Status)) {
5717 ERR("sync_read_phys returned %08x\n", Status);
5718 ExFreePool(scratch);
5719 return Status;
5720 }
5721
5722 do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch, readlen * Vcb->superblock.sector_size);
5723 }
5724 }
5725 }
5726
5727 ExFreePool(scratch);
5728 } else {
5729 uint8_t* scratch;
5730 uint16_t k, i, logstripe, error_stripe, num_errors = 0;
5731
5732 scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5733 if (!scratch) {
5734 ERR("out of memory\n");
5735 return STATUS_INSUFFICIENT_RESOURCES;
5736 }
5737
5738 i = (parity + 1) % c->chunk_item->num_stripes;
5739 for (k = 0; k < c->chunk_item->num_stripes; k++) {
5740 if (i != stripe) {
5741 if (c->devices[i]->devobj) {
5742 Status = sync_read_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5743 readlen * Vcb->superblock.sector_size, scratch + (k * readlen * Vcb->superblock.sector_size), false);
5744 if (!NT_SUCCESS(Status)) {
5745 ERR("sync_read_phys returned %08x\n", Status);
5746 num_errors++;
5747 error_stripe = k;
5748 }
5749 } else {
5750 num_errors++;
5751 error_stripe = k;
5752 }
5753
5754 if (num_errors > 1) {
5755 ExFreePool(scratch);
5756 return STATUS_UNEXPECTED_IO_ERROR;
5757 }
5758 } else
5759 logstripe = k;
5760
5761 i = (i + 1) % c->chunk_item->num_stripes;
5762 }
5763
5764 if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) {
5765 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) {
5766 if (k != logstripe) {
5767 if (k == 0 || (k == 1 && logstripe == 0)) {
5768 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5769 readlen * Vcb->superblock.sector_size);
5770 } else {
5771 do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5772 readlen * Vcb->superblock.sector_size);
5773 }
5774 }
5775 }
5776 } else {
5777 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen * Vcb->superblock.sector_size, logstripe,
5778 error_stripe, scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size));
5779
5780 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size),
5781 readlen * Vcb->superblock.sector_size);
5782 }
5783
5784 ExFreePool(scratch);
5785 }
5786
5787 offset += readlen;
5788 len -= readlen;
5789 }
5790
5791 return STATUS_SUCCESS;
5792 }
5793
5794 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) {
5795 NTSTATUS Status;
5796 uint16_t parity2, stripe, startoffstripe;
5797 uint8_t* data;
5798 uint64_t startoff;
5799 ULONG runlength, index, last1;
5800 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5801 LIST_ENTRY* le;
5802 uint16_t k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
5803 uint64_t ps_length = num_data_stripes * c->chunk_item->stripe_length;
5804 ULONG stripe_length = (ULONG)c->chunk_item->stripe_length;
5805
5806 // FIXME - do writes asynchronously?
5807
5808 get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe);
5809
5810 parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5811
5812 // read data (or reconstruct if degraded)
5813
5814 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
5815 last1 = 0;
5816
5817 while (runlength != 0) {
5818 if (index >= ps->bmplen)
5819 break;
5820
5821 if (index + runlength >= ps->bmplen) {
5822 runlength = ps->bmplen - index;
5823
5824 if (runlength == 0)
5825 break;
5826 }
5827
5828 if (index > last1) {
5829 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1);
5830 if (!NT_SUCCESS(Status)) {
5831 ERR("partial_stripe_read returned %08x\n", Status);
5832 return Status;
5833 }
5834 }
5835
5836 last1 = index + runlength;
5837
5838 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
5839 }
5840
5841 if (last1 < ps_length / Vcb->superblock.sector_size) {
5842 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length / Vcb->superblock.sector_size) - last1));
5843 if (!NT_SUCCESS(Status)) {
5844 ERR("partial_stripe_read returned %08x\n", Status);
5845 return Status;
5846 }
5847 }
5848
5849 // set unallocated data to 0
5850 le = c->space.Flink;
5851 while (le != &c->space) {
5852 space* s = CONTAINING_RECORD(le, space, list_entry);
5853
5854 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5855 uint64_t start = max(ps->address, s->address);
5856 uint64_t end = min(ps->address + ps_length, s->address + s->size);
5857
5858 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5859 } else if (s->address >= ps->address + ps_length)
5860 break;
5861
5862 le = le->Flink;
5863 }
5864
5865 le = c->deleting.Flink;
5866 while (le != &c->deleting) {
5867 space* s = CONTAINING_RECORD(le, space, list_entry);
5868
5869 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5870 uint64_t start = max(ps->address, s->address);
5871 uint64_t end = min(ps->address + ps_length, s->address + s->size);
5872
5873 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5874 } else if (s->address >= ps->address + ps_length)
5875 break;
5876
5877 le = le->Flink;
5878 }
5879
5880 stripe = (parity2 + 1) % c->chunk_item->num_stripes;
5881
5882 data = ps->data;
5883 for (k = 0; k < num_data_stripes; k++) {
5884 if (c->devices[stripe]->devobj) {
5885 Status = write_data_phys(c->devices[stripe]->devobj, c->devices[stripe]->fileobj, cis[stripe].offset + startoff, data, stripe_length);
5886 if (!NT_SUCCESS(Status)) {
5887 ERR("write_data_phys returned %08x\n", Status);
5888 return Status;
5889 }
5890 }
5891
5892 data += stripe_length;
5893 stripe = (stripe + 1) % c->chunk_item->num_stripes;
5894 }
5895
5896 // write parity
5897 if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5898 if (c->devices[parity2]->devobj) {
5899 uint16_t i;
5900
5901 for (i = 1; i < c->chunk_item->num_stripes - 1; i++) {
5902 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length);
5903 }
5904
5905 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff, ps->data, stripe_length);
5906 if (!NT_SUCCESS(Status)) {
5907 ERR("write_data_phys returned %08x\n", Status);
5908 return Status;
5909 }
5910 }
5911 } else {
5912 uint16_t parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5913
5914 if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) {
5915 uint8_t* scratch;
5916 uint16_t i;
5917
5918 scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG);
5919 if (!scratch) {
5920 ERR("out of memory\n");
5921 return STATUS_INSUFFICIENT_RESOURCES;
5922 }
5923
5924 i = c->chunk_item->num_stripes - 3;
5925
5926 while (true) {
5927 if (i == c->chunk_item->num_stripes - 3) {
5928 RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length);
5929 RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5930 } else {
5931 do_xor(scratch, ps->data + (i * stripe_length), stripe_length);
5932
5933 galois_double(scratch + stripe_length, stripe_length);
5934 do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5935 }
5936
5937 if (i == 0)
5938 break;
5939
5940 i--;
5941 }
5942
5943 if (c->devices[parity1]->devobj) {
5944 Status = write_data_phys(c->devices[parity1]->devobj, c->devices[parity1]->fileobj, cis[parity1].offset + startoff, scratch, stripe_length);
5945 if (!NT_SUCCESS(Status)) {
5946 ERR("write_data_phys returned %08x\n", Status);
5947 ExFreePool(scratch);
5948 return Status;
5949 }
5950 }
5951
5952 if (c->devices[parity2]->devobj) {
5953 Status = write_data_phys(c->devices[parity2]->devobj, c->devices[parity2]->fileobj, cis[parity2].offset + startoff,
5954 scratch + stripe_length, stripe_length);
5955 if (!NT_SUCCESS(Status)) {
5956 ERR("write_data_phys returned %08x\n", Status);
5957 ExFreePool(scratch);
5958 return Status;
5959 }
5960 }
5961
5962 ExFreePool(scratch);
5963 }
5964 }
5965
5966 return STATUS_SUCCESS;
5967 }
5968
5969 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5970 LIST_ENTRY *le, *le2;
5971 NTSTATUS Status;
5972 uint64_t used_minus_cache;
5973
5974 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
5975
5976 // FIXME - do tree chunks before data chunks
5977
5978 le = Vcb->chunks.Flink;
5979 while (le != &Vcb->chunks) {
5980 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
5981
5982 le2 = le->Flink;
5983
5984 if (c->changed) {
5985 acquire_chunk_lock(c, Vcb);
5986
5987 // flush partial stripes
5988 if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
5989 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
5990
5991 while (!IsListEmpty(&c->partial_stripes)) {
5992 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
5993
5994 Status = flush_partial_stripe(Vcb, c, ps);
5995
5996 if (ps->bmparr)
5997 ExFreePool(ps->bmparr);
5998
5999 ExFreePool(ps);
6000
6001 if (!NT_SUCCESS(Status)) {
6002 ERR("flush_partial_stripe returned %08x\n", Status);
6003 ExReleaseResourceLite(&c->partial_stripes_lock);
6004 release_chunk_lock(c, Vcb);
6005 ExReleaseResourceLite(&Vcb->chunk_lock);
6006 return Status;
6007 }
6008 }
6009
6010 ExReleaseResourceLite(&c->partial_stripes_lock);
6011 }
6012
6013 if (c->list_entry_balance.Flink) {
6014 release_chunk_lock(c, Vcb);
6015 le = le2;
6016 continue;
6017 }
6018
6019 if (c->space_changed || c->created) {
6020 bool created = c->created;
6021
6022 used_minus_cache = c->used;
6023
6024 // subtract self-hosted cache
6025 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
6026 LIST_ENTRY* le3;
6027
6028 le3 = c->cache->extents.Flink;
6029 while (le3 != &c->cache->extents) {
6030 extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
6031 EXTENT_DATA* ed = &ext->extent_data;
6032
6033 if (!ext->ignore) {
6034 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
6035 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
6036
6037 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
6038 used_minus_cache -= ed2->size;
6039 }
6040 }
6041
6042 le3 = le3->Flink;
6043 }
6044 }
6045
6046 if (used_minus_cache == 0) {
6047 Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
6048 if (!NT_SUCCESS(Status)) {
6049 ERR("drop_chunk returned %08x\n", Status);
6050 release_chunk_lock(c, Vcb);
6051 ExReleaseResourceLite(&Vcb->chunk_lock);
6052 return Status;
6053 }
6054
6055 // c is now freed, so avoid releasing non-existent lock
6056 le = le2;
6057 continue;
6058 } else if (c->created) {
6059 Status = create_chunk(Vcb, c, Irp);
6060 if (!NT_SUCCESS(Status)) {
6061 ERR("create_chunk returned %08x\n", Status);
6062 release_chunk_lock(c, Vcb);
6063 ExReleaseResourceLite(&Vcb->chunk_lock);
6064 return Status;
6065 }
6066 }
6067
6068 if (used_minus_cache > 0 || created)
6069 release_chunk_lock(c, Vcb);
6070 } else
6071 release_chunk_lock(c, Vcb);
6072 }
6073
6074 le = le2;
6075 }
6076
6077 ExReleaseResourceLite(&Vcb->chunk_lock);
6078
6079 return STATUS_SUCCESS;
6080 }
6081
6082 static NTSTATUS delete_root_ref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, uint64_t parinode, PANSI_STRING utf8, PIRP Irp) {
6083 KEY searchkey;
6084 traverse_ptr tp;
6085 NTSTATUS Status;
6086
6087 searchkey.obj_id = parsubvolid;
6088 searchkey.obj_type = TYPE_ROOT_REF;
6089 searchkey.offset = subvolid;
6090
6091 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6092 if (!NT_SUCCESS(Status)) {
6093 ERR("error - find_item returned %08x\n", Status);
6094 return Status;
6095 }
6096
6097 if (!keycmp(searchkey, tp.item->key)) {
6098 if (tp.item->size < sizeof(ROOT_REF)) {
6099 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
6100 return STATUS_INTERNAL_ERROR;
6101 } else {
6102 ROOT_REF* rr;
6103 ULONG len;
6104
6105 rr = (ROOT_REF*)tp.item->data;
6106 len = tp.item->size;
6107
6108 do {
6109 uint16_t itemlen;
6110
6111 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) {
6112 ERR("(%I64x,%x,%I64x) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6113 break;
6114 }
6115
6116 itemlen = (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6117
6118 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
6119 uint16_t newlen = tp.item->size - itemlen;
6120
6121 Status = delete_tree_item(Vcb, &tp);
6122 if (!NT_SUCCESS(Status)) {
6123 ERR("delete_tree_item returned %08x\n", Status);
6124 return Status;
6125 }
6126
6127 if (newlen == 0) {
6128 TRACE("deleting (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6129 } else {
6130 uint8_t *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
6131
6132 if (!newrr) {
6133 ERR("out of memory\n");
6134 return STATUS_INSUFFICIENT_RESOURCES;
6135 }
6136
6137 TRACE("modifying (%I64x,%x,%I64x)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
6138
6139 if ((uint8_t*)rr > tp.item->data) {
6140 RtlCopyMemory(newrr, tp.item->data, (uint8_t*)rr - tp.item->data);
6141 rroff = newrr + ((uint8_t*)rr - tp.item->data);
6142 } else {
6143 rroff = newrr;
6144 }
6145
6146 if ((uint8_t*)&rr->name[rr->n] < tp.item->data + tp.item->size)
6147 RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((uint8_t*)&rr->name[rr->n] - tp.item->data));
6148
6149 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp);
6150 if (!NT_SUCCESS(Status)) {
6151 ERR("insert_tree_item returned %08x\n", Status);
6152 ExFreePool(newrr);
6153 return Status;
6154 }
6155 }
6156
6157 break;
6158 }
6159
6160 if (len > itemlen) {
6161 len -= itemlen;
6162 rr = (ROOT_REF*)&rr->name[rr->n];
6163 } else
6164 break;
6165 } while (len > 0);
6166 }
6167 } else {
6168 WARN("could not find ROOT_REF entry for subvol %I64x in %I64x\n", searchkey.offset, searchkey.obj_id);
6169 return STATUS_NOT_FOUND;
6170 }
6171
6172 return STATUS_SUCCESS;
6173 }
6174
6175 #ifdef _MSC_VER
6176 #pragma warning(push)
6177 #pragma warning(suppress: 28194)
6178 #endif
6179 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ uint64_t subvolid, _In_ uint64_t parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) {
6180 KEY searchkey;
6181 traverse_ptr tp;
6182 NTSTATUS Status;
6183
6184 searchkey.obj_id = parsubvolid;
6185 searchkey.obj_type = TYPE_ROOT_REF;
6186 searchkey.offset = subvolid;
6187
6188 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6189 if (!NT_SUCCESS(Status)) {
6190 ERR("error - find_item returned %08x\n", Status);
6191 return Status;
6192 }
6193
6194 if (!keycmp(searchkey, tp.item->key)) {
6195 uint16_t rrsize = tp.item->size + (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n;
6196 uint8_t* rr2;
6197
6198 rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
6199 if (!rr2) {
6200 ERR("out of memory\n");
6201 return STATUS_INSUFFICIENT_RESOURCES;
6202 }
6203
6204 if (tp.item->size > 0)
6205 RtlCopyMemory(rr2, tp.item->data, tp.item->size);
6206
6207 RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n);
6208 ExFreePool(rr);
6209
6210 Status = delete_tree_item(Vcb, &tp);
6211 if (!NT_SUCCESS(Status)) {
6212 ERR("delete_tree_item returned %08x\n", Status);
6213 ExFreePool(rr2);
6214 return Status;
6215 }
6216
6217 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp);
6218 if (!NT_SUCCESS(Status)) {
6219 ERR("insert_tree_item returned %08x\n", Status);
6220 ExFreePool(rr2);
6221 return Status;
6222 }
6223 } else {
6224 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (uint16_t)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp);
6225 if (!NT_SUCCESS(Status)) {
6226 ERR("insert_tree_item returned %08x\n", Status);
6227 ExFreePool(rr);
6228 return Status;
6229 }
6230 }
6231
6232 return STATUS_SUCCESS;
6233 }
6234 #ifdef _MSC_VER
6235 #pragma warning(pop)
6236 #endif
6237
6238 static NTSTATUS update_root_backref(device_extension* Vcb, uint64_t subvolid, uint64_t parsubvolid, PIRP Irp) {
6239 KEY searchkey;
6240 traverse_ptr tp;
6241 uint8_t* data;
6242 uint16_t datalen;
6243 NTSTATUS Status;
6244
6245 searchkey.obj_id = parsubvolid;
6246 searchkey.obj_type = TYPE_ROOT_REF;
6247 searchkey.offset = subvolid;
6248
6249 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6250 if (!NT_SUCCESS(Status)) {
6251 ERR("error - find_item returned %08x\n", Status);
6252 return Status;
6253 }
6254
6255 if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
6256 datalen = tp.item->size;
6257
6258 data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
6259 if (!data) {
6260 ERR("out of memory\n");
6261 return STATUS_INSUFFICIENT_RESOURCES;
6262 }
6263
6264 RtlCopyMemory(data, tp.item->data, datalen);
6265 } else {
6266 datalen = 0;
6267 data = NULL;
6268 }
6269
6270 searchkey.obj_id = subvolid;
6271 searchkey.obj_type = TYPE_ROOT_BACKREF;
6272 searchkey.offset = parsubvolid;
6273
6274 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6275 if (!NT_SUCCESS(Status)) {
6276 ERR("error - find_item returned %08x\n", Status);
6277
6278 if (datalen > 0)
6279 ExFreePool(data);
6280
6281 return Status;
6282 }
6283
6284 if (!keycmp(tp.item->key, searchkey)) {
6285 Status = delete_tree_item(Vcb, &tp);
6286 if (!NT_SUCCESS(Status)) {
6287 ERR("delete_tree_item returned %08x\n", Status);
6288
6289 if (datalen > 0)
6290 ExFreePool(data);
6291
6292 return Status;
6293 }
6294 }
6295
6296 if (datalen > 0) {
6297 Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp);
6298 if (!NT_SUCCESS(Status)) {
6299 ERR("insert_tree_item returned %08x\n", Status);
6300 ExFreePool(data);
6301 return Status;
6302 }
6303 }
6304
6305 return STATUS_SUCCESS;
6306 }
6307
6308 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, uint64_t root, PIRP Irp) {
6309 KEY searchkey;
6310 traverse_ptr tp;
6311 NTSTATUS Status;
6312
6313 searchkey.obj_id = root;
6314 searchkey.obj_type = TYPE_ROOT_ITEM;
6315 searchkey.offset = 0xffffffffffffffff;
6316
6317 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6318 if (!NT_SUCCESS(Status)) {
6319 ERR("error - find_item returned %08x\n", Status);
6320 return Status;
6321 }
6322
6323 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6324 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
6325 return STATUS_INTERNAL_ERROR;
6326 }
6327
6328 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
6329 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6330 if (!ri) {
6331 ERR("out of memory\n");
6332 return STATUS_INSUFFICIENT_RESOURCES;
6333 }
6334
6335 if (tp.item->size > 0)
6336 RtlCopyMemory(ri, tp.item->data, tp.item->size);
6337
6338 RtlZeroMemory(((uint8_t*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
6339
6340 Status = delete_tree_item(Vcb, &tp);
6341 if (!NT_SUCCESS(Status)) {
6342 ERR("delete_tree_item returned %08x\n", Status);
6343 ExFreePool(ri);
6344 return Status;
6345 }
6346
6347 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6348 if (!NT_SUCCESS(Status)) {
6349 ERR("insert_tree_item returned %08x\n", Status);
6350 ExFreePool(ri);
6351 return Status;
6352 }
6353 } else {
6354 tp.tree->write = true;
6355 }
6356
6357 return STATUS_SUCCESS;
6358 }
6359
6360 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) {
6361 NTSTATUS Status;
6362
6363 // if fileref created and then immediately deleted, do nothing
6364 if (fileref->created && fileref->deleted) {
6365 fileref->dirty = false;
6366 return STATUS_SUCCESS;
6367 }
6368
6369 if (fileref->fcb->ads) {
6370 fileref->dirty = false;
6371 return STATUS_SUCCESS;
6372 }
6373
6374 if (fileref->created) {
6375 uint16_t disize;
6376 DIR_ITEM *di, *di2;
6377 uint32_t crc32;
6378
6379 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6380
6381 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6382 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6383 if (!di) {
6384 ERR("out of memory\n");
6385 return STATUS_INSUFFICIENT_RESOURCES;
6386 }
6387
6388 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6389 di->key.obj_id = fileref->fcb->inode;
6390 di->key.obj_type = TYPE_INODE_ITEM;
6391 di->key.offset = 0;
6392 } else { // subvolume
6393 di->key.obj_id = fileref->fcb->subvol->id;
6394 di->key.obj_type = TYPE_ROOT_ITEM;
6395 di->key.offset = 0xffffffffffffffff;
6396 }
6397
6398 di->transid = fileref->fcb->Vcb->superblock.generation;
6399 di->m = 0;
6400 di->n = (uint16_t)fileref->dc->utf8.Length;
6401 di->type = fileref->fcb->type;
6402 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6403
6404 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6405 if (!di2) {
6406 ERR("out of memory\n");
6407 return STATUS_INSUFFICIENT_RESOURCES;
6408 }
6409
6410 RtlCopyMemory(di2, di, disize);
6411
6412 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6413 fileref->dc->index, di, disize, Batch_Insert);
6414 if (!NT_SUCCESS(Status)) {
6415 ERR("insert_tree_item_batch returned %08x\n", Status);
6416 return Status;
6417 }
6418
6419 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6420 di2, disize, Batch_DirItem);
6421 if (!NT_SUCCESS(Status)) {
6422 ERR("insert_tree_item_batch returned %08x\n", Status);
6423 return Status;
6424 }
6425
6426 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6427 INODE_REF* ir;
6428
6429 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6430 if (!ir) {
6431 ERR("out of memory\n");
6432 return STATUS_INSUFFICIENT_RESOURCES;
6433 }
6434
6435 ir->index = fileref->dc->index;
6436 ir->n = fileref->dc->utf8.Length;
6437 RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n);
6438
6439 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6440 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef);
6441 if (!NT_SUCCESS(Status)) {
6442 ERR("insert_tree_item_batch returned %08x\n", Status);
6443 return Status;
6444 }
6445 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) {
6446 ULONG rrlen;
6447 ROOT_REF* rr;
6448
6449 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6450
6451 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6452 if (!rr) {
6453 ERR("out of memory\n");
6454 return STATUS_INSUFFICIENT_RESOURCES;
6455 }
6456
6457 rr->dir = fileref->parent->fcb->inode;
6458 rr->index = fileref->dc->index;
6459 rr->n = fileref->dc->utf8.Length;
6460 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6461
6462 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6463 if (!NT_SUCCESS(Status)) {
6464 ERR("add_root_ref returned %08x\n", Status);
6465 return Status;
6466 }
6467
6468 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6469 if (!NT_SUCCESS(Status)) {
6470 ERR("update_root_backref returned %08x\n", Status);
6471 return Status;
6472 }
6473 }
6474
6475 fileref->created = false;
6476 } else if (fileref->deleted) {
6477 uint32_t crc32;
6478 ANSI_STRING* name;
6479 DIR_ITEM* di;
6480
6481 name = &fileref->oldutf8;
6482
6483 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)name->Buffer, name->Length);
6484
6485 TRACE("deleting %.*S\n", file_desc_fileref(fileref));
6486
6487 di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG);
6488 if (!di) {
6489 ERR("out of memory\n");
6490 return STATUS_INSUFFICIENT_RESOURCES;
6491 }
6492
6493 di->m = 0;
6494 di->n = name->Length;
6495 RtlCopyMemory(di->name, name->Buffer, name->Length);
6496
6497 // delete DIR_ITEM (0x54)
6498
6499 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6500 crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem);
6501 if (!NT_SUCCESS(Status)) {
6502 ERR("insert_tree_item_batch returned %08x\n", Status);
6503 return Status;
6504 }
6505
6506 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6507 INODE_REF* ir;
6508
6509 // delete INODE_REF (0xc)
6510
6511 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG);
6512 if (!ir) {
6513 ERR("out of memory\n");
6514 return STATUS_INSUFFICIENT_RESOURCES;
6515 }
6516
6517 ir->index = fileref->oldindex;
6518 ir->n = name->Length;
6519 RtlCopyMemory(ir->name, name->Buffer, name->Length);
6520
6521 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF,
6522 fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef);
6523 if (!NT_SUCCESS(Status)) {
6524 ERR("insert_tree_item_batch returned %08x\n", Status);
6525 return Status;
6526 }
6527 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6528 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp);
6529 if (!NT_SUCCESS(Status)) {
6530 ERR("delete_root_ref returned %08x\n", Status);
6531 return Status;
6532 }
6533
6534 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6535 if (!NT_SUCCESS(Status)) {
6536 ERR("update_root_backref returned %08x\n", Status);
6537 return Status;
6538 }
6539 }
6540
6541 // delete DIR_INDEX (0x60)
6542
6543 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6544 fileref->oldindex, NULL, 0, Batch_Delete);
6545 if (!NT_SUCCESS(Status)) {
6546 ERR("insert_tree_item_batch returned %08x\n", Status);
6547 return Status;
6548 }
6549
6550 if (fileref->oldutf8.Buffer) {
6551 ExFreePool(fileref->oldutf8.Buffer);
6552 fileref->oldutf8.Buffer = NULL;
6553 }
6554 } else { // rename or change type
6555 PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8;
6556 uint32_t crc32, oldcrc32;
6557 uint16_t disize;
6558 DIR_ITEM *olddi, *di, *di2;
6559
6560 crc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6561
6562 if (!fileref->oldutf8.Buffer)
6563 oldcrc32 = crc32;
6564 else
6565 oldcrc32 = calc_crc32c(0xfffffffe, (uint8_t*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
6566
6567 olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG);
6568 if (!olddi) {
6569 ERR("out of memory\n");
6570 return STATUS_INSUFFICIENT_RESOURCES;
6571 }
6572
6573 olddi->m = 0;
6574 olddi->n = (uint16_t)oldutf8->Length;
6575 RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length);
6576
6577 // delete DIR_ITEM (0x54)
6578
6579 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6580 oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem);
6581 if (!NT_SUCCESS(Status)) {
6582 ERR("insert_tree_item_batch returned %08x\n", Status);
6583 ExFreePool(olddi);
6584 return Status;
6585 }
6586
6587 // add DIR_ITEM (0x54)
6588
6589 disize = (uint16_t)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6590 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6591 if (!di) {
6592 ERR("out of memory\n");
6593 return STATUS_INSUFFICIENT_RESOURCES;
6594 }
6595
6596 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6597 if (!di2) {
6598 ERR("out of memory\n");
6599 ExFreePool(di);
6600 return STATUS_INSUFFICIENT_RESOURCES;
6601 }
6602
6603 if (fileref->dc)
6604 di->key = fileref->dc->key;
6605 else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6606 di->key.obj_id = fileref->fcb->inode;
6607 di->key.obj_type = TYPE_INODE_ITEM;
6608 di->key.offset = 0;
6609 } else { // subvolume
6610 di->key.obj_id = fileref->fcb->subvol->id;
6611 di->key.obj_type = TYPE_ROOT_ITEM;
6612 di->key.offset = 0xffffffffffffffff;
6613 }
6614
6615 di->transid = fileref->fcb->Vcb->superblock.generation;
6616 di->m = 0;
6617 di->n = (uint16_t)fileref->dc->utf8.Length;
6618 di->type = fileref->fcb->type;
6619 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6620
6621 RtlCopyMemory(di2, di, disize);
6622
6623 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6624 di, disize, Batch_DirItem);
6625 if (!NT_SUCCESS(Status)) {
6626 ERR("insert_tree_item_batch returned %08x\n", Status);
6627 ExFreePool(di2);
6628 ExFreePool(di);
6629 return Status;
6630 }
6631
6632 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6633 INODE_REF *ir, *ir2;
6634
6635 // delete INODE_REF (0xc)
6636
6637 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG);
6638 if (!ir) {
6639 ERR("out of memory\n");
6640 ExFreePool(di2);
6641 return STATUS_INSUFFICIENT_RESOURCES;
6642 }
6643
6644 ir->index = fileref->dc->index;
6645 ir->n = oldutf8->Length;
6646 RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n);
6647
6648 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6649 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef);
6650 if (!NT_SUCCESS(Status)) {
6651 ERR("insert_tree_item_batch returned %08x\n", Status);
6652 ExFreePool(ir);
6653 ExFreePool(di2);
6654 return Status;
6655 }
6656
6657 // add INODE_REF (0xc)
6658
6659 ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6660 if (!ir2) {
6661 ERR("out of memory\n");
6662 ExFreePool(di2);
6663 return STATUS_INSUFFICIENT_RESOURCES;
6664 }
6665
6666 ir2->index = fileref->dc->index;
6667 ir2->n = fileref->dc->utf8.Length;
6668 RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n);
6669
6670 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6671 ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef);
6672 if (!NT_SUCCESS(Status)) {
6673 ERR("insert_tree_item_batch returned %08x\n", Status);
6674 ExFreePool(ir2);
6675 ExFreePool(di2);
6676 return Status;
6677 }
6678 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6679 ULONG rrlen;
6680 ROOT_REF* rr;
6681
6682 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp);
6683 if (!NT_SUCCESS(Status)) {
6684 ERR("delete_root_ref returned %08x\n", Status);
6685 ExFreePool(di2);
6686 return Status;
6687 }
6688
6689 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6690
6691 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6692 if (!rr) {
6693 ERR("out of memory\n");
6694 ExFreePool(di2);
6695 return STATUS_INSUFFICIENT_RESOURCES;
6696 }
6697
6698 rr->dir = fileref->parent->fcb->inode;
6699 rr->index = fileref->dc->index;
6700 rr->n = fileref->dc->utf8.Length;
6701 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6702
6703 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6704 if (!NT_SUCCESS(Status)) {
6705 ERR("add_root_ref returned %08x\n", Status);
6706 ExFreePool(di2);
6707 return Status;
6708 }
6709
6710 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6711 if (!NT_SUCCESS(Status)) {
6712 ERR("update_root_backref returned %08x\n", Status);
6713 ExFreePool(di2);
6714 return Status;
6715 }
6716 }
6717
6718 // delete DIR_INDEX (0x60)
6719
6720 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6721 fileref->dc->index, NULL, 0, Batch_Delete);
6722 if (!NT_SUCCESS(Status)) {
6723 ERR("insert_tree_item_batch returned %08x\n", Status);
6724 ExFreePool(di2);
6725 return Status;
6726 }
6727
6728 // add DIR_INDEX (0x60)
6729
6730 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6731 fileref->dc->index, di2, disize, Batch_Insert);
6732 if (!NT_SUCCESS(Status)) {
6733 ERR("insert_tree_item_batch returned %08x\n", Status);
6734 ExFreePool(di2);
6735 return Status;
6736 }
6737
6738 if (fileref->oldutf8.Buffer) {
6739 ExFreePool(fileref->oldutf8.Buffer);
6740 fileref->oldutf8.Buffer = NULL;
6741 }
6742 }
6743
6744 fileref->dirty = false;
6745
6746 return STATUS_SUCCESS;
6747 }
6748
6749 static void flush_disk_caches(device_extension* Vcb) {
6750 LIST_ENTRY* le;
6751 ioctl_context context;
6752 ULONG num;
6753
6754 context.left = 0;
6755
6756 le = Vcb->devices.Flink;
6757
6758 while (le != &Vcb->devices) {
6759 device* dev = CONTAINING_RECORD(le, device, list_entry);
6760
6761 if (dev->devobj && !dev->readonly && dev->can_flush)
6762 context.left++;
6763
6764 le = le->Flink;
6765 }
6766
6767 if (context.left == 0)
6768 return;
6769
6770 num = 0;
6771
6772 KeInitializeEvent(&context.Event, NotificationEvent, false);
6773
6774 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
6775 if (!context.stripes) {
6776 ERR("out of memory\n");
6777 return;
6778 }
6779
6780 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
6781
6782 le = Vcb->devices.Flink;
6783
6784 while (le != &Vcb->devices) {
6785 device* dev = CONTAINING_RECORD(le, device, list_entry);
6786
6787 if (dev->devobj && !dev->readonly && dev->can_flush) {
6788 PIO_STACK_LOCATION IrpSp;
6789 ioctl_context_stripe* stripe = &context.stripes[num];
6790
6791 RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX));
6792
6793 stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX);
6794 stripe->apte.TimeOutValue = 5;
6795 stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE;
6796
6797 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
6798
6799 if (!stripe->Irp) {
6800 ERR("IoAllocateIrp failed\n");
6801 goto nextdev;
6802 }
6803
6804 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
6805 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
6806 IrpSp->FileObject = dev->fileobj;
6807
6808 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH;
6809 IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6810 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6811
6812 stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte;
6813 stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION;
6814 stripe->Irp->UserBuffer = &stripe->apte;
6815 stripe->Irp->UserIosb = &stripe->iosb;
6816
6817 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
6818
6819 IoCallDriver(dev->devobj, stripe->Irp);
6820
6821 nextdev:
6822 num++;
6823 }
6824
6825 le = le->Flink;
6826 }
6827
6828 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
6829
6830 ExFreePool(context.stripes);
6831 }
6832
6833 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) {
6834 NTSTATUS Status;
6835 KEY searchkey;
6836 traverse_ptr tp;
6837 uint16_t statslen;
6838 uint64_t* stats;
6839
6840 searchkey.obj_id = 0;
6841 searchkey.obj_type = TYPE_DEV_STATS;
6842 searchkey.offset = dev->devitem.dev_id;
6843
6844 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, false, Irp);
6845 if (!NT_SUCCESS(Status)) {
6846 ERR("find_item returned %08x\n", Status);
6847 return Status;
6848 }
6849
6850 if (!keycmp(tp.item->key, searchkey)) {
6851 Status = delete_tree_item(Vcb, &tp);
6852 if (!NT_SUCCESS(Status)) {
6853 ERR("delete_tree_item returned %08x\n", Status);
6854 return Status;
6855 }
6856 }
6857
6858 statslen = sizeof(uint64_t) * 5;
6859 stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG);
6860 if (!stats) {
6861 ERR("out of memory\n");
6862 return STATUS_INSUFFICIENT_RESOURCES;
6863 }
6864
6865 RtlCopyMemory(stats, dev->stats, statslen);
6866
6867 Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp);
6868 if (!NT_SUCCESS(Status)) {
6869 ERR("insert_tree_item returned %08x\n", Status);
6870 ExFreePool(stats);
6871 return Status;
6872 }
6873
6874 return STATUS_SUCCESS;
6875 }
6876
6877 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) {
6878 NTSTATUS Status;
6879
6880 if (r != Vcb->root_root && r != Vcb->chunk_root) {
6881 KEY searchkey;
6882 traverse_ptr tp;
6883 ROOT_ITEM* ri;
6884
6885 searchkey.obj_id = r->id;
6886 searchkey.obj_type = TYPE_ROOT_ITEM;
6887 searchkey.offset = 0xffffffffffffffff;
6888
6889 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
6890 if (!NT_SUCCESS(Status)) {
6891 ERR("error - find_item returned %08x\n", Status);
6892 return Status;
6893 }
6894
6895 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6896 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
6897 return STATUS_INTERNAL_ERROR;
6898 }
6899
6900 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6901 if (!ri) {
6902 ERR("out of memory\n");
6903 return STATUS_INSUFFICIENT_RESOURCES;
6904 }
6905
6906 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
6907
6908 Status = delete_tree_item(Vcb, &tp);
6909 if (!NT_SUCCESS(Status)) {
6910 ERR("delete_tree_item returned %08x\n", Status);
6911 return Status;
6912 }
6913
6914 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6915 if (!NT_SUCCESS(Status)) {
6916 ERR("insert_tree_item returned %08x\n", Status);
6917 return Status;
6918 }
6919 }
6920
6921 if (r->received) {
6922 KEY searchkey;
6923 traverse_ptr tp;
6924
6925 if (!Vcb->uuid_root) {
6926 root* uuid_root;
6927
6928 TRACE("uuid root doesn't exist, creating it\n");
6929
6930 Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, false, 0, Irp);
6931
6932 if (!NT_SUCCESS(Status)) {
6933 ERR("create_root returned %08x\n", Status);
6934 return Status;
6935 }
6936
6937 Vcb->uuid_root = uuid_root;
6938 }
6939
6940 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(uint64_t));
6941 searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
6942 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(uint64_t)], sizeof(uint64_t));
6943
6944 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, false, Irp);
6945 if (!NT_SUCCESS(Status)) {
6946 ERR("find_item returned %08x\n", Status);
6947 return Status;
6948 }
6949
6950 if (!keycmp(tp.item->key, searchkey)) {
6951 if (tp.item->size + sizeof(uint64_t) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) {
6952 uint64_t* ids;
6953
6954 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(uint64_t), ALLOC_TAG);
6955 if (!ids) {
6956 ERR("out of memory\n");
6957 return STATUS_INSUFFICIENT_RESOURCES;
6958 }
6959
6960 RtlCopyMemory(ids, tp.item->data, tp.item->size);
6961 RtlCopyMemory((uint8_t*)ids + tp.item->size, &r->id, sizeof(uint64_t));
6962
6963 Status = delete_tree_item(Vcb, &tp);
6964 if (!NT_SUCCESS(Status)) {
6965 ERR("delete_tree_item returned %08x\n", Status);
6966 ExFreePool(ids);
6967 return Status;
6968 }
6969
6970 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(uint64_t), NULL, Irp);
6971 if (!NT_SUCCESS(Status)) {
6972 ERR("insert_tree_item returned %08x\n", Status);
6973 ExFreePool(ids);
6974 return Status;
6975 }
6976 }
6977 } else {
6978 uint64_t* root_num;
6979
6980 root_num = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t), ALLOC_TAG);
6981 if (!root_num) {
6982 ERR("out of memory\n");
6983 return STATUS_INSUFFICIENT_RESOURCES;
6984 }
6985
6986 *root_num = r->id;
6987
6988 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(uint64_t), NULL, Irp);
6989 if (!NT_SUCCESS(Status)) {
6990 ERR("insert_tree_item returned %08x\n", Status);
6991 ExFreePool(root_num);
6992 return Status;
6993 }
6994 }
6995
6996 r->received = false;
6997 }
6998
6999 r->dirty = false;
7000
7001 return STATUS_SUCCESS;
7002 }
7003
7004 static NTSTATUS test_not_full(device_extension* Vcb) {
7005 uint64_t reserve, could_alloc, free_space;
7006 LIST_ENTRY* le;
7007
7008 // This function ensures we drop into readonly mode if we're about to leave very little
7009 // space for metadata - this is similar to the "global reserve" of the Linux driver.
7010 // Otherwise we might completely fill our space, at which point due to COW we can't
7011 // delete anything in order to fix this.
7012
7013 reserve = Vcb->extent_root->root_item.bytes_used;
7014 reserve += Vcb->root_root->root_item.bytes_used;
7015 if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used;
7016
7017 reserve = max(reserve, 0x1000000); // 16 M
7018 reserve = min(reserve, 0x20000000); // 512 M
7019
7020 // Find out how much space would be available for new metadata chunks
7021
7022 could_alloc = 0;
7023
7024 if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) {
7025 uint64_t s1 = 0, s2 = 0, s3 = 0;
7026
7027 le = Vcb->devices.Flink;
7028 while (le != &Vcb->devices) {
7029 device* dev = CONTAINING_RECORD(le, device, list_entry);
7030
7031 if (!dev->readonly) {
7032 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7033
7034 if (space >= s1) {
7035 s3 = s2;
7036 s2 = s1;
7037 s1 = space;
7038 } else if (space >= s2) {
7039 s3 = s2;
7040 s2 = space;
7041 } else if (space >= s3)
7042 s3 = space;
7043 }
7044
7045 le = le->Flink;
7046 }
7047
7048 could_alloc = s3 * 2;
7049 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) {
7050 uint64_t s1 = 0, s2 = 0, s3 = 0, s4 = 0;
7051
7052 le = Vcb->devices.Flink;
7053 while (le != &Vcb->devices) {
7054 device* dev = CONTAINING_RECORD(le, device, list_entry);
7055
7056 if (!dev->readonly) {
7057 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7058
7059 if (space >= s1) {
7060 s4 = s3;
7061 s3 = s2;
7062 s2 = s1;
7063 s1 = space;
7064 } else if (space >= s2) {
7065 s4 = s3;
7066 s3 = s2;
7067 s2 = space;
7068 } else if (space >= s3) {
7069 s4 = s3;
7070 s3 = space;
7071 } else if (space >= s4)
7072 s4 = space;
7073 }
7074
7075 le = le->Flink;
7076 }
7077
7078 could_alloc = s4 * 2;
7079 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) {
7080 uint64_t s1 = 0, s2 = 0;
7081
7082 le = Vcb->devices.Flink;
7083 while (le != &Vcb->devices) {
7084 device* dev = CONTAINING_RECORD(le, device, list_entry);
7085
7086 if (!dev->readonly) {
7087 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7088
7089 if (space >= s1) {
7090 s2 = s1;
7091 s1 = space;
7092 } else if (space >= s2)
7093 s2 = space;
7094 }
7095
7096 le = le->Flink;
7097 }
7098
7099 if (Vcb->metadata_flags & BLOCK_FLAG_RAID1)
7100 could_alloc = s2;
7101 else // RAID0
7102 could_alloc = s2 * 2;
7103 } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) {
7104 le = Vcb->devices.Flink;
7105 while (le != &Vcb->devices) {
7106 device* dev = CONTAINING_RECORD(le, device, list_entry);
7107
7108 if (!dev->readonly) {
7109 uint64_t space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2;
7110
7111 could_alloc = max(could_alloc, space);
7112 }
7113
7114 le = le->Flink;
7115 }
7116 } else { // SINGLE
7117 le = Vcb->devices.Flink;
7118 while (le != &Vcb->devices) {
7119 device* dev = CONTAINING_RECORD(le, device, list_entry);
7120
7121 if (!dev->readonly) {
7122 uint64_t space = dev->devitem.num_bytes - dev->devitem.bytes_used;
7123
7124 could_alloc = max(could_alloc, space);
7125 }
7126
7127 le = le->Flink;
7128 }
7129 }
7130
7131 if (could_alloc >= reserve)
7132 return STATUS_SUCCESS;
7133
7134 free_space = 0;
7135
7136 le = Vcb->chunks.Flink;
7137 while (le != &Vcb->chunks) {
7138 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7139
7140 if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) {
7141 free_space += c->chunk_item->size - c->used;
7142
7143 if (free_space + could_alloc >= reserve)
7144 return STATUS_SUCCESS;
7145 }
7146
7147 le = le->Flink;
7148 }
7149
7150 return STATUS_DISK_FULL;
7151 }
7152
7153 static NTSTATUS check_for_orphans_root(device_extension* Vcb, root* r, PIRP Irp) {
7154 NTSTATUS Status;
7155 KEY searchkey;
7156 traverse_ptr tp;
7157 LIST_ENTRY rollback;
7158
7159 TRACE("(%p, %p)\n", Vcb, r);
7160
7161 InitializeListHead(&rollback);
7162
7163 searchkey.obj_id = BTRFS_ORPHAN_INODE_OBJID;
7164 searchkey.obj_type = TYPE_ORPHAN_INODE;
7165 searchkey.offset = 0;
7166
7167 Status = find_item(Vcb, r, &tp, &searchkey, false, Irp);
7168 if (!NT_SUCCESS(Status)) {
7169 ERR("find_item returned %08x\n", Status);
7170 return Status;
7171 }
7172
7173 do {
7174 traverse_ptr next_tp;
7175
7176 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
7177 break;
7178
7179 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
7180 fcb* fcb;
7181
7182 TRACE("removing orphaned inode %I64x\n", tp.item->key.offset);
7183
7184 Status = open_fcb(Vcb, r, tp.item->key.offset, 0, NULL, false, NULL, &fcb, PagedPool, Irp);
7185 if (!NT_SUCCESS(Status))
7186 ERR("open_fcb returned %08x\n", Status);
7187 else {
7188 if (fcb->inode_item.st_nlink == 0) {
7189 if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) {
7190 Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback);
7191 if (!NT_SUCCESS(Status)) {
7192 ERR("excise_extents returned %08x\n", Status);
7193 goto end;
7194 }
7195 }
7196
7197 fcb->deleted = true;
7198
7199 mark_fcb_dirty(fcb);
7200 }
7201
7202 free_fcb(fcb);
7203
7204 Status = delete_tree_item(Vcb, &tp);
7205 if (!NT_SUCCESS(Status)) {
7206 ERR("delete_tree_item returned %08x\n", Status);
7207 goto end;
7208 }
7209 }
7210 }
7211
7212 if (find_next_item(Vcb, &tp, &next_tp, false, Irp))
7213 tp = next_tp;
7214 else
7215 break;
7216 } while (true);
7217
7218 Status = STATUS_SUCCESS;
7219
7220 clear_rollback(&rollback);
7221
7222 end:
7223 do_rollback(Vcb, &rollback);
7224
7225 return Status;
7226 }
7227
7228 static NTSTATUS check_for_orphans(device_extension* Vcb, PIRP Irp) {
7229 NTSTATUS Status;
7230 LIST_ENTRY* le;
7231
7232 if (IsListEmpty(&Vcb->dirty_filerefs))
7233 return STATUS_SUCCESS;
7234
7235 le = Vcb->dirty_filerefs.Flink;
7236 while (le != &Vcb->dirty_filerefs) {
7237 file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry_dirty);
7238
7239 if (!fr->fcb->subvol->checked_for_orphans) {
7240 Status = check_for_orphans_root(Vcb, fr->fcb->subvol, Irp);
7241 if (!NT_SUCCESS(Status)) {
7242 ERR("check_for_orphans_root returned %08x\n", Status);
7243 return Status;
7244 }
7245
7246 fr->fcb->subvol->checked_for_orphans = true;
7247 }
7248
7249 le = le->Flink;
7250 }
7251
7252 return STATUS_SUCCESS;
7253 }
7254
7255 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
7256 NTSTATUS Status;
7257 LIST_ENTRY *le, batchlist;
7258 bool cache_changed = false;
7259 volume_device_extension* vde;
7260 bool no_cache = false;
7261 #ifdef DEBUG_FLUSH_TIMES
7262 uint64_t filerefs = 0, fcbs = 0;
7263 LARGE_INTEGER freq, time1, time2;
7264 #endif
7265 #ifdef DEBUG_WRITE_LOOPS
7266 UINT loops = 0;
7267 #endif
7268
7269 TRACE("(%p)\n", Vcb);
7270
7271 InitializeListHead(&batchlist);
7272
7273 #ifdef DEBUG_FLUSH_TIMES
7274 time1 = KeQueryPerformanceCounter(&freq);
7275 #endif
7276
7277 Status = check_for_orphans(Vcb, Irp);
7278 if (!NT_SUCCESS(Status)) {
7279 ERR("check_for_orphans returned %08x\n", Status);
7280 return Status;
7281 }
7282
7283 ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, true);
7284
7285 while (!IsListEmpty(&Vcb->dirty_filerefs)) {
7286 file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty);
7287
7288 flush_fileref(fr, &batchlist, Irp);
7289 free_fileref(fr);
7290
7291 #ifdef DEBUG_FLUSH_TIMES
7292 filerefs++;
7293 #endif
7294 }
7295
7296 ExReleaseResourceLite(&Vcb->dirty_filerefs_lock);
7297
7298 Status = commit_batch_list(Vcb, &batchlist, Irp);
7299 if (!NT_SUCCESS(Status)) {
7300 ERR("commit_batch_list returned %08x\n", Status);
7301 return Status;
7302 }
7303
7304 #ifdef DEBUG_FLUSH_TIMES
7305 time2 = KeQueryPerformanceCounter(NULL);
7306
7307 ERR("flushed %I64u filerefs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7308
7309 time1 = KeQueryPerformanceCounter(&freq);
7310 #endif
7311
7312 // We process deleted streams first, so we don't run over our xattr
7313 // limit unless we absolutely have to.
7314 // We also process deleted normal files, to avoid any problems
7315 // caused by inode collisions.
7316
7317 ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, true);
7318
7319 le = Vcb->dirty_fcbs.Flink;
7320 while (le != &Vcb->dirty_fcbs) {
7321 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7322 LIST_ENTRY* le2 = le->Flink;
7323
7324 if (fcb->deleted) {
7325 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7326 Status = flush_fcb(fcb, false, &batchlist, Irp);
7327 ExReleaseResourceLite(fcb->Header.Resource);
7328
7329 free_fcb(fcb);
7330
7331 if (!NT_SUCCESS(Status)) {
7332 ERR("flush_fcb returned %08x\n", Status);
7333 clear_batch_list(Vcb, &batchlist);
7334 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7335 return Status;
7336 }
7337
7338 #ifdef DEBUG_FLUSH_TIMES
7339 fcbs++;
7340 #endif
7341 }
7342
7343 le = le2;
7344 }
7345
7346 Status = commit_batch_list(Vcb, &batchlist, Irp);
7347 if (!NT_SUCCESS(Status)) {
7348 ERR("commit_batch_list returned %08x\n", Status);
7349 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7350 return Status;
7351 }
7352
7353 le = Vcb->dirty_fcbs.Flink;
7354 while (le != &Vcb->dirty_fcbs) {
7355 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7356 LIST_ENTRY* le2 = le->Flink;
7357
7358 if (fcb->subvol != Vcb->root_root) {
7359 ExAcquireResourceExclusiveLite(fcb->Header.Resource, true);
7360 Status = flush_fcb(fcb, false, &batchlist, Irp);
7361 ExReleaseResourceLite(fcb->Header.Resource);
7362 free_fcb(fcb);
7363
7364 if (!NT_SUCCESS(Status)) {
7365 ERR("flush_fcb returned %08x\n", Status);
7366 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7367 return Status;
7368 }
7369
7370 #ifdef DEBUG_FLUSH_TIMES
7371 fcbs++;
7372 #endif
7373 }
7374
7375 le = le2;
7376 }
7377
7378 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7379
7380 Status = commit_batch_list(Vcb, &batchlist, Irp);
7381 if (!NT_SUCCESS(Status)) {
7382 ERR("commit_batch_list returned %08x\n", Status);
7383 return Status;
7384 }
7385
7386 #ifdef DEBUG_FLUSH_TIMES
7387 time2 = KeQueryPerformanceCounter(NULL);
7388
7389 ERR("flushed %I64u fcbs in %I64u (freq = %I64u)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7390 #endif
7391
7392 // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7393 while (!IsListEmpty(&Vcb->dirty_subvols)) {
7394 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty);
7395
7396 Status = flush_subvol(Vcb, r, Irp);
7397 if (!NT_SUCCESS(Status)) {
7398 ERR("flush_subvol returned %08x\n", Status);
7399 return Status;
7400 }
7401 }
7402
7403 if (!IsListEmpty(&Vcb->drop_roots)) {
7404 Status = drop_roots(Vcb, Irp, rollback);
7405
7406 if (!NT_SUCCESS(Status)) {
7407 ERR("drop_roots returned %08x\n", Status);
7408 return Status;
7409 }
7410 }
7411
7412 Status = update_chunks(Vcb, &batchlist, Irp, rollback);
7413
7414 if (!NT_SUCCESS(Status)) {
7415 ERR("update_chunks returned %08x\n", Status);
7416 return Status;
7417 }
7418
7419 Status = commit_batch_list(Vcb, &batchlist, Irp);
7420
7421 // If only changing superblock, e.g. changing label, we still need to rewrite
7422 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7423 if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
7424 KEY searchkey;
7425
7426 traverse_ptr tp;
7427
7428 searchkey.obj_id = 0;
7429 searchkey.obj_type = 0;
7430 searchkey.offset = 0;
7431
7432 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
7433 if (!NT_SUCCESS(Status)) {
7434 ERR("error - find_item returned %08x\n", Status);
7435 return Status;
7436 }
7437
7438 Vcb->root_root->treeholder.tree->write = true;
7439 }
7440
7441 // make sure we always update the extent tree
7442 Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp);
7443 if (!NT_SUCCESS(Status)) {
7444 ERR("add_root_item_to_cache returned %08x\n", Status);
7445 return Status;
7446 }
7447
7448 if (Vcb->stats_changed) {
7449 le = Vcb->devices.Flink;
7450 while (le != &Vcb->devices) {
7451 device* dev = CONTAINING_RECORD(le, device, list_entry);
7452
7453 if (dev->stats_changed) {
7454 Status = flush_changed_dev_stats(Vcb, dev, Irp);
7455 if (!NT_SUCCESS(Status)) {
7456 ERR("flush_changed_dev_stats returned %08x\n", Status);
7457 return Status;
7458 }
7459 dev->stats_changed = false;
7460 }
7461
7462 le = le->Flink;
7463 }
7464
7465 Vcb->stats_changed = false;
7466 }
7467
7468 do {
7469 Status = add_parents(Vcb, Irp);
7470 if (!NT_SUCCESS(Status)) {
7471 ERR("add_parents returned %08x\n", Status);
7472 goto end;
7473 }
7474
7475 Status = allocate_tree_extents(Vcb, Irp, rollback);
7476 if (!NT_SUCCESS(Status)) {
7477 ERR("allocate_tree_extents returned %08x\n", Status);
7478 goto end;
7479 }
7480
7481 Status = do_splits(Vcb, Irp, rollback);
7482 if (!NT_SUCCESS(Status)) {
7483 ERR("do_splits returned %08x\n", Status);
7484 goto end;
7485 }
7486
7487 Status = update_chunk_usage(Vcb, Irp, rollback);
7488 if (!NT_SUCCESS(Status)) {
7489 ERR("update_chunk_usage returned %08x\n", Status);
7490 goto end;
7491 }
7492
7493 if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
7494 if (!no_cache) {
7495 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
7496 if (!NT_SUCCESS(Status)) {
7497 WARN("allocate_cache returned %08x\n", Status);
7498 no_cache = true;
7499 cache_changed = false;
7500 }
7501 }
7502 } else {
7503 Status = update_chunk_caches_tree(Vcb, Irp);
7504 if (!NT_SUCCESS(Status)) {
7505 ERR("update_chunk_caches_tree returned %08x\n", Status);
7506 goto end;
7507 }
7508 }
7509
7510 #ifdef DEBUG_WRITE_LOOPS
7511 loops++;
7512
7513 if (cache_changed)
7514 ERR("cache has changed, looping again\n");
7515 #endif
7516 } while (cache_changed || !trees_consistent(Vcb));
7517
7518 #ifdef DEBUG_WRITE_LOOPS
7519 ERR("%u loops\n", loops);
7520 #endif
7521
7522 TRACE("trees consistent\n");
7523
7524 Status = update_root_root(Vcb, no_cache, Irp, rollback);
7525 if (!NT_SUCCESS(Status)) {
7526 ERR("update_root_root returned %08x\n", Status);
7527 goto end;
7528 }
7529
7530 Status = write_trees(Vcb, Irp);
7531 if (!NT_SUCCESS(Status)) {
7532 ERR("write_trees returned %08x\n", Status);
7533 goto end;
7534 }
7535
7536 Status = test_not_full(Vcb);
7537 if (!NT_SUCCESS(Status)) {
7538 ERR("test_not_full returned %08x\n", Status);
7539 goto end;
7540 }
7541
7542 #ifdef DEBUG_PARANOID
7543 le = Vcb->trees.Flink;
7544 while (le != &Vcb->trees) {
7545 tree* t = CONTAINING_RECORD(le, tree, list_entry);
7546 KEY searchkey;
7547 traverse_ptr tp;
7548
7549 searchkey.obj_id = t->header.address;
7550 searchkey.obj_type = TYPE_METADATA_ITEM;
7551 searchkey.offset = 0xffffffffffffffff;
7552
7553 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7554 if (!NT_SUCCESS(Status)) {
7555 ERR("error - find_item returned %08x\n", Status);
7556 goto end;
7557 }
7558
7559 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7560 searchkey.obj_id = t->header.address;
7561 searchkey.obj_type = TYPE_EXTENT_ITEM;
7562 searchkey.offset = 0xffffffffffffffff;
7563
7564 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
7565 if (!NT_SUCCESS(Status)) {
7566 ERR("error - find_item returned %08x\n", Status);
7567 goto end;
7568 }
7569
7570 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7571 ERR("error - could not find entry in extent tree for tree at %I64x\n", t->header.address);
7572 Status = STATUS_INTERNAL_ERROR;
7573 goto end;
7574 }
7575 }
7576
7577 le = le->Flink;
7578 }
7579 #endif
7580
7581 Vcb->superblock.cache_generation = Vcb->superblock.generation;
7582
7583 if (!Vcb->options.no_barrier)
7584 flush_disk_caches(Vcb);
7585
7586 Status = write_superblocks(Vcb, Irp);
7587 if (!NT_SUCCESS(Status)) {
7588 ERR("write_superblocks returned %08x\n", Status);
7589 goto end;
7590 }
7591
7592 vde = Vcb->vde;
7593
7594 if (vde) {
7595 pdo_device_extension* pdode = vde->pdode;
7596
7597 ExAcquireResourceSharedLite(&pdode->child_lock, true);
7598
7599 le = pdode->children.Flink;
7600
7601 while (le != &pdode->children) {
7602 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
7603
7604 vc->generation = Vcb->superblock.generation;
7605 le = le->Flink;
7606 }
7607
7608 ExReleaseResourceLite(&pdode->child_lock);
7609 }
7610
7611 clean_space_cache(Vcb);
7612
7613 le = Vcb->chunks.Flink;
7614 while (le != &Vcb->chunks) {
7615 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7616
7617 c->changed = false;
7618 c->space_changed = false;
7619
7620 le = le->Flink;
7621 }
7622
7623 Vcb->superblock.generation++;
7624
7625 Status = STATUS_SUCCESS;
7626
7627 le = Vcb->trees.Flink;
7628 while (le != &Vcb->trees) {
7629 tree* t = CONTAINING_RECORD(le, tree, list_entry);
7630
7631 t->write = false;
7632
7633 le = le->Flink;
7634 }
7635
7636 Vcb->need_write = false;
7637
7638 while (!IsListEmpty(&Vcb->drop_roots)) {
7639 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry);
7640
7641 ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
7642 ExFreePool(r->nonpaged);
7643 ExFreePool(r);
7644 }
7645
7646 end:
7647 TRACE("do_write returning %08x\n", Status);
7648
7649 return Status;
7650 }
7651
7652 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) {
7653 LIST_ENTRY rollback;
7654 NTSTATUS Status;
7655
7656 InitializeListHead(&rollback);
7657
7658 Status = do_write2(Vcb, Irp, &rollback);
7659
7660 if (!NT_SUCCESS(Status)) {
7661 ERR("do_write2 returned %08x, dropping into readonly mode\n", Status);
7662 Vcb->readonly = true;
7663 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED);
7664 do_rollback(Vcb, &rollback);
7665 } else
7666 clear_rollback(&rollback);
7667
7668 return Status;
7669 }
7670
7671 static void do_flush(device_extension* Vcb) {
7672 NTSTATUS Status;
7673
7674 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
7675
7676 if (Vcb->need_write && !Vcb->readonly)
7677 Status = do_write(Vcb, NULL);
7678 else
7679 Status = STATUS_SUCCESS;
7680
7681 free_trees(Vcb);
7682
7683 if (!NT_SUCCESS(Status))
7684 ERR("do_write returned %08x\n", Status);
7685
7686 ExReleaseResourceLite(&Vcb->tree_lock);
7687 }
7688
7689 _Function_class_(KSTART_ROUTINE)
7690 void __stdcall flush_thread(void* context) {
7691 DEVICE_OBJECT* devobj = context;
7692 device_extension* Vcb = devobj->DeviceExtension;
7693 LARGE_INTEGER due_time;
7694
7695 ObReferenceObject(devobj);
7696
7697 KeInitializeTimer(&Vcb->flush_thread_timer);
7698
7699 due_time.QuadPart = (uint64_t)Vcb->options.flush_interval * -10000000;
7700
7701 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7702
7703 while (true) {
7704 KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, false, NULL);
7705
7706 if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing)
7707 break;
7708
7709 if (!Vcb->locked)
7710 do_flush(Vcb);
7711
7712 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7713 }
7714
7715 ObDereferenceObject(devobj);
7716 KeCancelTimer(&Vcb->flush_thread_timer);
7717
7718 KeSetEvent(&Vcb->flush_thread_finished, 0, false);
7719
7720 PsTerminateSystemThread(STATUS_SUCCESS);
7721 }