ceb37c5a2ae31f8316e57ced57c661daeb9ad3b2
[reactos.git] / drivers / filesystems / btrfs / flushthread.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include <ata.h>
20 #include <ntddscsi.h>
21 #include <ntddstor.h>
22
23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
24
25 // #define DEBUG_WRITE_LOOPS
26
27 typedef struct {
28 KEVENT Event;
29 IO_STATUS_BLOCK iosb;
30 } write_context;
31
32 typedef struct {
33 EXTENT_ITEM_TREE eit;
34 UINT8 type;
35 TREE_BLOCK_REF tbr;
36 } EXTENT_ITEM_TREE2;
37
38 typedef struct {
39 EXTENT_ITEM ei;
40 UINT8 type;
41 TREE_BLOCK_REF tbr;
42 } EXTENT_ITEM_SKINNY_METADATA;
43
44 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
45 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
46
47 #ifndef _MSC_VER // not in mingw yet
48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
49 #endif
50
51 _Function_class_(IO_COMPLETION_ROUTINE)
52 #ifdef __REACTOS__
53 static NTSTATUS NTAPI write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
54 #else
55 static NTSTATUS write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
56 #endif
57 write_context* context = conptr;
58
59 UNUSED(DeviceObject);
60
61 context->iosb = Irp->IoStatus;
62 KeSetEvent(&context->Event, 0, FALSE);
63
64 return STATUS_MORE_PROCESSING_REQUIRED;
65 }
66
67 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length) {
68 NTSTATUS Status;
69 LARGE_INTEGER offset;
70 PIRP Irp;
71 PIO_STACK_LOCATION IrpSp;
72 write_context context;
73
74 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
75
76 RtlZeroMemory(&context, sizeof(write_context));
77
78 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
79
80 offset.QuadPart = address;
81
82 Irp = IoAllocateIrp(device->StackSize, FALSE);
83
84 if (!Irp) {
85 ERR("IoAllocateIrp failed\n");
86 return STATUS_INSUFFICIENT_RESOURCES;
87 }
88
89 IrpSp = IoGetNextIrpStackLocation(Irp);
90 IrpSp->MajorFunction = IRP_MJ_WRITE;
91
92 if (device->Flags & DO_BUFFERED_IO) {
93 Irp->AssociatedIrp.SystemBuffer = data;
94
95 Irp->Flags = IRP_BUFFERED_IO;
96 } else if (device->Flags & DO_DIRECT_IO) {
97 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
98 if (!Irp->MdlAddress) {
99 DbgPrint("IoAllocateMdl failed\n");
100 Status = STATUS_INSUFFICIENT_RESOURCES;
101 goto exit;
102 }
103
104 Status = STATUS_SUCCESS;
105
106 _SEH2_TRY {
107 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
108 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
109 Status = _SEH2_GetExceptionCode();
110 } _SEH2_END;
111
112 if (!NT_SUCCESS(Status)) {
113 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
114 IoFreeMdl(Irp->MdlAddress);
115 goto exit;
116 }
117 } else {
118 Irp->UserBuffer = data;
119 }
120
121 IrpSp->Parameters.Write.Length = length;
122 IrpSp->Parameters.Write.ByteOffset = offset;
123
124 Irp->UserIosb = &context.iosb;
125
126 Irp->UserEvent = &context.Event;
127
128 IoSetCompletionRoutine(Irp, write_completion, &context, TRUE, TRUE, TRUE);
129
130 Status = IoCallDriver(device, Irp);
131
132 if (Status == STATUS_PENDING) {
133 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
134 Status = context.iosb.Status;
135 }
136
137 if (!NT_SUCCESS(Status)) {
138 ERR("IoCallDriver returned %08x\n", Status);
139 }
140
141 if (device->Flags & DO_DIRECT_IO) {
142 MmUnlockPages(Irp->MdlAddress);
143 IoFreeMdl(Irp->MdlAddress);
144 }
145
146 exit:
147 IoFreeIrp(Irp);
148
149 return Status;
150 }
151
152 static void add_trim_entry(device* dev, UINT64 address, UINT64 size) {
153 space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
154 if (!s) {
155 ERR("out of memory\n");
156 return;
157 }
158
159 s->address = address;
160 s->size = size;
161 dev->num_trim_entries++;
162
163 InsertTailList(&dev->trim_list, &s->list_entry);
164 }
165
166 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
167 ULONG type;
168
169 if (Vcb->trim && !Vcb->options.no_trim) {
170 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
171 type = BLOCK_FLAG_DUPLICATE;
172 else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
173 type = BLOCK_FLAG_RAID0;
174 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
175 type = BLOCK_FLAG_DUPLICATE;
176 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
177 type = BLOCK_FLAG_RAID10;
178 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
179 type = BLOCK_FLAG_RAID5;
180 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
181 type = BLOCK_FLAG_RAID6;
182 else // SINGLE
183 type = BLOCK_FLAG_DUPLICATE;
184 }
185
186 while (!IsListEmpty(&c->deleting)) {
187 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
188
189 if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
190 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
191
192 if (type == BLOCK_FLAG_DUPLICATE) {
193 UINT16 i;
194
195 for (i = 0; i < c->chunk_item->num_stripes; i++) {
196 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
197 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
198 }
199 } else if (type == BLOCK_FLAG_RAID0) {
200 UINT64 startoff, endoff;
201 UINT16 startoffstripe, endoffstripe, i;
202
203 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
204 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
205
206 for (i = 0; i < c->chunk_item->num_stripes; i++) {
207 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
208 UINT64 stripestart, stripeend;
209
210 if (startoffstripe > i)
211 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
212 else if (startoffstripe == i)
213 stripestart = startoff;
214 else
215 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
216
217 if (endoffstripe > i)
218 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
219 else if (endoffstripe == i)
220 stripeend = endoff + 1;
221 else
222 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
223
224 if (stripestart != stripeend)
225 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
226 }
227 }
228 } else if (type == BLOCK_FLAG_RAID10) {
229 UINT64 startoff, endoff;
230 UINT16 sub_stripes, startoffstripe, endoffstripe, i;
231
232 sub_stripes = max(1, c->chunk_item->sub_stripes);
233
234 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
235 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
236
237 startoffstripe *= sub_stripes;
238 endoffstripe *= sub_stripes;
239
240 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
241 ULONG j;
242 UINT64 stripestart, stripeend;
243
244 if (startoffstripe > i)
245 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
246 else if (startoffstripe == i)
247 stripestart = startoff;
248 else
249 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
250
251 if (endoffstripe > i)
252 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
253 else if (endoffstripe == i)
254 stripeend = endoff + 1;
255 else
256 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
257
258 if (stripestart != stripeend) {
259 for (j = 0; j < sub_stripes; j++) {
260 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
261 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
262 }
263 }
264 }
265 }
266 // FIXME - RAID5(?), RAID6(?)
267 }
268
269 RemoveEntryList(&s->list_entry);
270 ExFreePool(s);
271 }
272 }
273
274 typedef struct {
275 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
276 ATA_PASS_THROUGH_EX apte;
277 PIRP Irp;
278 IO_STATUS_BLOCK iosb;
279 } ioctl_context_stripe;
280
281 typedef struct {
282 KEVENT Event;
283 LONG left;
284 ioctl_context_stripe* stripes;
285 } ioctl_context;
286
287 _Function_class_(IO_COMPLETION_ROUTINE)
288 #ifdef __REACTOS__
289 static NTSTATUS NTAPI ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
290 #else
291 static NTSTATUS ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
292 #endif
293 ioctl_context* context = (ioctl_context*)conptr;
294 LONG left2 = InterlockedDecrement(&context->left);
295
296 UNUSED(DeviceObject);
297 UNUSED(Irp);
298
299 if (left2 == 0)
300 KeSetEvent(&context->Event, 0, FALSE);
301
302 return STATUS_MORE_PROCESSING_REQUIRED;
303 }
304
305 static void clean_space_cache(device_extension* Vcb) {
306 LIST_ENTRY* le;
307 chunk* c;
308 ULONG num;
309
310 TRACE("(%p)\n", Vcb);
311
312 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
313
314 le = Vcb->chunks.Flink;
315 while (le != &Vcb->chunks) {
316 c = CONTAINING_RECORD(le, chunk, list_entry);
317
318 if (c->space_changed) {
319 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
320
321 if (c->space_changed)
322 clean_space_cache_chunk(Vcb, c);
323
324 c->space_changed = FALSE;
325
326 ExReleaseResourceLite(&c->lock);
327 }
328
329 le = le->Flink;
330 }
331
332 ExReleaseResourceLite(&Vcb->chunk_lock);
333
334 if (Vcb->trim && !Vcb->options.no_trim) {
335 ioctl_context context;
336 ULONG total_num;
337
338 context.left = 0;
339
340 le = Vcb->devices.Flink;
341 while (le != &Vcb->devices) {
342 device* dev = CONTAINING_RECORD(le, device, list_entry);
343
344 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
345 context.left++;
346
347 le = le->Flink;
348 }
349
350 if (context.left == 0)
351 return;
352
353 total_num = context.left;
354 num = 0;
355
356 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
357
358 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
359 if (!context.stripes) {
360 ERR("out of memory\n");
361 return;
362 }
363
364 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
365
366 le = Vcb->devices.Flink;
367 while (le != &Vcb->devices) {
368 device* dev = CONTAINING_RECORD(le, device, list_entry);
369
370 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
371 LIST_ENTRY* le2;
372 ioctl_context_stripe* stripe = &context.stripes[num];
373 DEVICE_DATA_SET_RANGE* ranges;
374 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
375 PIO_STACK_LOCATION IrpSp;
376
377 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
378 if (!stripe->dmdsa) {
379 ERR("out of memory\n");
380 goto nextdev;
381 }
382
383 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
384 stripe->dmdsa->Action = DeviceDsmAction_Trim;
385 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
386 stripe->dmdsa->ParameterBlockOffset = 0;
387 stripe->dmdsa->ParameterBlockLength = 0;
388 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64));
389 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
390
391 ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
392
393 i = 0;
394
395 le2 = dev->trim_list.Flink;
396 while (le2 != &dev->trim_list) {
397 space* s = CONTAINING_RECORD(le2, space, list_entry);
398
399 ranges[i].StartingOffset = s->address;
400 ranges[i].LengthInBytes = s->size;
401 i++;
402
403 le2 = le2->Flink;
404 }
405
406 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE);
407
408 if (!stripe->Irp) {
409 ERR("IoAllocateIrp failed\n");
410 goto nextdev;
411 }
412
413 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
414 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
415
416 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
417 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
418 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
419
420 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
421 stripe->Irp->Flags |= IRP_BUFFERED_IO;
422 stripe->Irp->UserBuffer = NULL;
423 stripe->Irp->UserIosb = &stripe->iosb;
424
425 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE);
426
427 IoCallDriver(dev->devobj, stripe->Irp);
428
429 nextdev:
430 while (!IsListEmpty(&dev->trim_list)) {
431 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
432 ExFreePool(s);
433 }
434
435 dev->num_trim_entries = 0;
436
437 num++;
438 }
439
440 le = le->Flink;
441 }
442
443 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
444
445 for (num = 0; num < total_num; num++) {
446 if (context.stripes[num].dmdsa)
447 ExFreePool(context.stripes[num].dmdsa);
448 }
449
450 ExFreePool(context.stripes);
451 }
452 }
453
454 static BOOL trees_consistent(device_extension* Vcb) {
455 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
456 LIST_ENTRY* le;
457
458 le = Vcb->trees.Flink;
459 while (le != &Vcb->trees) {
460 tree* t = CONTAINING_RECORD(le, tree, list_entry);
461
462 if (t->write) {
463 if (t->header.num_items == 0 && t->parent) {
464 #ifdef DEBUG_WRITE_LOOPS
465 ERR("empty tree found, looping again\n");
466 #endif
467 return FALSE;
468 }
469
470 if (t->size > maxsize) {
471 #ifdef DEBUG_WRITE_LOOPS
472 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
473 #endif
474 return FALSE;
475 }
476
477 if (!t->has_new_address) {
478 #ifdef DEBUG_WRITE_LOOPS
479 ERR("tree found without new address, looping again\n");
480 #endif
481 return FALSE;
482 }
483 }
484
485 le = le->Flink;
486 }
487
488 return TRUE;
489 }
490
491 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
492 ULONG level;
493 LIST_ENTRY* le;
494
495 for (level = 0; level <= 255; level++) {
496 BOOL nothing_found = TRUE;
497
498 TRACE("level = %u\n", level);
499
500 le = Vcb->trees.Flink;
501 while (le != &Vcb->trees) {
502 tree* t = CONTAINING_RECORD(le, tree, list_entry);
503
504 if (t->write && t->header.level == level) {
505 TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
506
507 nothing_found = FALSE;
508
509 if (t->parent) {
510 if (!t->parent->write)
511 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
512
513 t->parent->write = TRUE;
514 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
515 KEY searchkey;
516 traverse_ptr tp;
517 NTSTATUS Status;
518
519 searchkey.obj_id = t->root->id;
520 searchkey.obj_type = TYPE_ROOT_ITEM;
521 searchkey.offset = 0xffffffffffffffff;
522
523 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
524 if (!NT_SUCCESS(Status)) {
525 ERR("error - find_item returned %08x\n", Status);
526 return Status;
527 }
528
529 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
530 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
531 return STATUS_INTERNAL_ERROR;
532 }
533
534 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
535 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
536
537 if (!ri) {
538 ERR("out of memory\n");
539 return STATUS_INSUFFICIENT_RESOURCES;
540 }
541
542 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
543
544 Status = delete_tree_item(Vcb, &tp);
545 if (!NT_SUCCESS(Status)) {
546 ERR("delete_tree_item returned %08x\n", Status);
547 ExFreePool(ri);
548 return Status;
549 }
550
551 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
552 if (!NT_SUCCESS(Status)) {
553 ERR("insert_tree_item returned %08x\n", Status);
554 ExFreePool(ri);
555 return Status;
556 }
557 }
558 }
559 }
560
561 le = le->Flink;
562 }
563
564 if (nothing_found)
565 break;
566 }
567
568 return STATUS_SUCCESS;
569 }
570
571 static void add_parents_to_cache(tree* t) {
572 while (t->parent) {
573 t = t->parent;
574 t->write = TRUE;
575 }
576 }
577
578 static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
579 NTSTATUS Status;
580 EXTENT_ITEM_SKINNY_METADATA* eism;
581 traverse_ptr insert_tp;
582
583 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
584 if (!eism) {
585 ERR("out of memory\n");
586 return FALSE;
587 }
588
589 eism->ei.refcount = 1;
590 eism->ei.generation = Vcb->superblock.generation;
591 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
592 eism->type = TYPE_TREE_BLOCK_REF;
593 eism->tbr.offset = root_id;
594
595 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
596 if (!NT_SUCCESS(Status)) {
597 ERR("insert_tree_item returned %08x\n", Status);
598 ExFreePool(eism);
599 return FALSE;
600 }
601
602 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
603
604 space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback);
605
606 ExReleaseResourceLite(&c->lock);
607
608 add_parents_to_cache(insert_tp.tree);
609
610 return TRUE;
611 }
612
613 BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address) {
614 LIST_ENTRY* le;
615 space* s;
616
617 TRACE("(%p, %llx, %p)\n", Vcb, c->offset, address);
618
619 if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
620 return FALSE;
621
622 if (!c->cache_loaded) {
623 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
624
625 if (!NT_SUCCESS(Status)) {
626 ERR("load_cache_chunk returned %08x\n", Status);
627 return FALSE;
628 }
629 }
630
631 if (IsListEmpty(&c->space_size))
632 return FALSE;
633
634 if (!c->last_alloc_set) {
635 s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
636
637 c->last_alloc = s->address;
638 c->last_alloc_set = TRUE;
639
640 if (s->size >= Vcb->superblock.node_size) {
641 *address = s->address;
642 c->last_alloc += Vcb->superblock.node_size;
643 return TRUE;
644 }
645 }
646
647 le = c->space.Flink;
648 while (le != &c->space) {
649 s = CONTAINING_RECORD(le, space, list_entry);
650
651 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
652 *address = c->last_alloc;
653 c->last_alloc += Vcb->superblock.node_size;
654 return TRUE;
655 }
656
657 le = le->Flink;
658 }
659
660 le = c->space_size.Flink;
661 while (le != &c->space_size) {
662 s = CONTAINING_RECORD(le, space, list_entry_size);
663
664 if (s->size == Vcb->superblock.node_size) {
665 *address = s->address;
666 c->last_alloc = s->address + Vcb->superblock.node_size;
667 return TRUE;
668 } else if (s->size < Vcb->superblock.node_size) {
669 if (le == c->space_size.Flink)
670 return FALSE;
671
672 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
673
674 *address = s->address;
675 c->last_alloc = s->address + Vcb->superblock.node_size;
676
677 return TRUE;
678 }
679
680 le = le->Flink;
681 }
682
683 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
684
685 if (s->size > Vcb->superblock.node_size) {
686 *address = s->address;
687 c->last_alloc = s->address + Vcb->superblock.node_size;
688 return TRUE;
689 }
690
691 return FALSE;
692 }
693
694 static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
695 NTSTATUS Status;
696 UINT64 address;
697 EXTENT_ITEM_TREE2* eit2;
698 traverse_ptr insert_tp;
699
700 TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
701
702 if (!find_metadata_address_in_chunk(Vcb, c, &address))
703 return FALSE;
704
705 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
706 BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
707
708 if (b)
709 *new_address = address;
710
711 return b;
712 }
713
714 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
715 if (!eit2) {
716 ERR("out of memory\n");
717 return FALSE;
718 }
719
720 eit2->eit.extent_item.refcount = 1;
721 eit2->eit.extent_item.generation = Vcb->superblock.generation;
722 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
723 eit2->eit.level = level;
724 eit2->type = TYPE_TREE_BLOCK_REF;
725 eit2->tbr.offset = root_id;
726
727 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
728 if (!NT_SUCCESS(Status)) {
729 ERR("insert_tree_item returned %08x\n", Status);
730 ExFreePool(eit2);
731 return FALSE;
732 }
733
734 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
735
736 space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback);
737
738 ExReleaseResourceLite(&c->lock);
739
740 add_parents_to_cache(insert_tp.tree);
741
742 *new_address = address;
743
744 return TRUE;
745 }
746
747 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
748 NTSTATUS Status;
749 chunk *origchunk = NULL, *c;
750 LIST_ENTRY* le;
751 UINT64 flags, addr;
752
753 if (t->root->id == BTRFS_ROOT_CHUNK)
754 flags = Vcb->system_flags;
755 else
756 flags = Vcb->metadata_flags;
757
758 if (t->has_address) {
759 origchunk = get_chunk_from_address(Vcb, t->header.address);
760
761 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
762 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
763 t->new_address = addr;
764 t->has_new_address = TRUE;
765 return STATUS_SUCCESS;
766 }
767 }
768
769 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
770
771 le = Vcb->chunks.Flink;
772 while (le != &Vcb->chunks) {
773 c = CONTAINING_RECORD(le, chunk, list_entry);
774
775 if (!c->readonly && !c->reloc) {
776 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
777
778 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
779 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
780 ExReleaseResourceLite(&c->lock);
781 ExReleaseResourceLite(&Vcb->chunk_lock);
782 t->new_address = addr;
783 t->has_new_address = TRUE;
784 return STATUS_SUCCESS;
785 }
786 }
787
788 ExReleaseResourceLite(&c->lock);
789 }
790
791 le = le->Flink;
792 }
793
794 // allocate new chunk if necessary
795
796 Status = alloc_chunk(Vcb, flags, &c, FALSE);
797
798 if (!NT_SUCCESS(Status)) {
799 ERR("alloc_chunk returned %08x\n", Status);
800 ExReleaseResourceLite(&Vcb->chunk_lock);
801 return Status;
802 }
803
804 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
805
806 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
807 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
808 ExReleaseResourceLite(&c->lock);
809 ExReleaseResourceLite(&Vcb->chunk_lock);
810 t->new_address = addr;
811 t->has_new_address = TRUE;
812 return STATUS_SUCCESS;
813 }
814 }
815
816 ExReleaseResourceLite(&c->lock);
817
818 ExReleaseResourceLite(&Vcb->chunk_lock);
819
820 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
821
822 return STATUS_DISK_FULL;
823 }
824
825 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, UINT64 parent_root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
826 NTSTATUS Status;
827 UINT64 rc, root;
828
829 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
830
831 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
832 if (rc == 0) {
833 ERR("error - refcount for extent %llx was 0\n", address);
834 return STATUS_INTERNAL_ERROR;
835 }
836
837 if (!t || t->parent)
838 root = parent_root;
839 else
840 root = t->header.tree_id;
841
842 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
843 if (!NT_SUCCESS(Status)) {
844 ERR("decrease_extent_refcount_tree returned %08x\n", Status);
845 return Status;
846 }
847
848 if (rc == 1) {
849 chunk* c = get_chunk_from_address(Vcb, address);
850
851 if (c) {
852 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
853
854 if (!c->cache_loaded) {
855 Status = load_cache_chunk(Vcb, c, NULL);
856
857 if (!NT_SUCCESS(Status)) {
858 ERR("load_cache_chunk returned %08x\n", Status);
859 ExReleaseResourceLite(&c->lock);
860 return Status;
861 }
862 }
863
864 c->used -= Vcb->superblock.node_size;
865
866 space_list_add(c, address, Vcb->superblock.node_size, rollback);
867
868 ExReleaseResourceLite(&c->lock);
869 } else
870 ERR("could not find chunk for address %llx\n", address);
871 }
872
873 return STATUS_SUCCESS;
874 }
875
876 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, BOOL old) {
877 LIST_ENTRY *le2, *list;
878 changed_extent_ref* cer;
879
880 list = old ? &ce->old_refs : &ce->refs;
881
882 le2 = list->Flink;
883 while (le2 != list) {
884 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
885
886 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
887 cer->edr.count += edr->count;
888 goto end;
889 }
890
891 le2 = le2->Flink;
892 }
893
894 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
895 if (!cer) {
896 ERR("out of memory\n");
897 return STATUS_INSUFFICIENT_RESOURCES;
898 }
899
900 cer->type = TYPE_EXTENT_DATA_REF;
901 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
902 InsertTailList(list, &cer->list_entry);
903
904 end:
905 if (old)
906 ce->old_count += edr->count;
907 else
908 ce->count += edr->count;
909
910 return STATUS_SUCCESS;
911 }
912
913 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, BOOL old) {
914 LIST_ENTRY *le2, *list;
915 changed_extent_ref* cer;
916
917 list = old ? &ce->old_refs : &ce->refs;
918
919 le2 = list->Flink;
920 while (le2 != list) {
921 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
922
923 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
924 cer->sdr.count += sdr->count;
925 goto end;
926 }
927
928 le2 = le2->Flink;
929 }
930
931 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
932 if (!cer) {
933 ERR("out of memory\n");
934 return STATUS_INSUFFICIENT_RESOURCES;
935 }
936
937 cer->type = TYPE_SHARED_DATA_REF;
938 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
939 InsertTailList(list, &cer->list_entry);
940
941 end:
942 if (old)
943 ce->old_count += sdr->count;
944 else
945 ce->count += sdr->count;
946
947 return STATUS_SUCCESS;
948 }
949
950 static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
951 KEY searchkey;
952 traverse_ptr tp;
953 NTSTATUS Status;
954
955 if (!t->updated_extents && t->has_address) {
956 Status = update_tree_extents(Vcb, t, Irp, rollback);
957 if (!NT_SUCCESS(Status)) {
958 ERR("update_tree_extents returned %08x\n", Status);
959 return FALSE;
960 }
961 }
962
963 searchkey.obj_id = t->header.address;
964 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
965 searchkey.offset = 0xffffffffffffffff;
966
967 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
968 if (!NT_SUCCESS(Status)) {
969 ERR("error - find_item returned %08x\n", Status);
970 return FALSE;
971 }
972
973 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
974 return FALSE;
975 else
976 return TRUE;
977 }
978
979 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
980 NTSTATUS Status;
981 UINT64 rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
982 UINT64 flags = get_extent_flags(Vcb, t->header.address, Irp);
983
984 if (rc == 0) {
985 ERR("refcount for extent %llx was 0\n", t->header.address);
986 return STATUS_INTERNAL_ERROR;
987 }
988
989 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
990 TREE_BLOCK_REF tbr;
991 BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : FALSE);
992
993 if (t->header.level == 0) {
994 LIST_ENTRY* le;
995
996 le = t->itemlist.Flink;
997 while (le != &t->itemlist) {
998 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
999
1000 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1001 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1002
1003 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1004 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1005
1006 if (ed2->size > 0) {
1007 EXTENT_DATA_REF edr;
1008 changed_extent* ce = NULL;
1009 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1010
1011 if (c) {
1012 LIST_ENTRY* le2;
1013
1014 le2 = c->changed_extents.Flink;
1015 while (le2 != &c->changed_extents) {
1016 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1017
1018 if (ce2->address == ed2->address) {
1019 ce = ce2;
1020 break;
1021 }
1022
1023 le2 = le2->Flink;
1024 }
1025 }
1026
1027 edr.root = t->root->id;
1028 edr.objid = td->key.obj_id;
1029 edr.offset = td->key.offset - ed2->offset;
1030 edr.count = 1;
1031
1032 if (ce) {
1033 Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
1034 if (!NT_SUCCESS(Status)) {
1035 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1036 return Status;
1037 }
1038
1039 Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
1040 if (!NT_SUCCESS(Status)) {
1041 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1042 return Status;
1043 }
1044 }
1045
1046 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1047 if (!NT_SUCCESS(Status)) {
1048 ERR("increase_extent_refcount returned %08x\n", Status);
1049 return Status;
1050 }
1051
1052 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1053 UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1054
1055 if (sdrrc > 0) {
1056 SHARED_DATA_REF sdr;
1057
1058 sdr.offset = t->header.address;
1059 sdr.count = 1;
1060
1061 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1062 t->header.address, ce ? ce->superseded : FALSE, Irp);
1063 if (!NT_SUCCESS(Status)) {
1064 ERR("decrease_extent_refcount returned %08x\n", Status);
1065 return Status;
1066 }
1067
1068 if (ce) {
1069 LIST_ENTRY* le2;
1070
1071 le2 = ce->refs.Flink;
1072 while (le2 != &ce->refs) {
1073 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1074
1075 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1076 ce->count--;
1077 cer->sdr.count--;
1078 break;
1079 }
1080
1081 le2 = le2->Flink;
1082 }
1083
1084 le2 = ce->old_refs.Flink;
1085 while (le2 != &ce->old_refs) {
1086 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1087
1088 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1089 ce->old_count--;
1090
1091 if (cer->sdr.count > 1)
1092 cer->sdr.count--;
1093 else {
1094 RemoveEntryList(&cer->list_entry);
1095 ExFreePool(cer);
1096 }
1097
1098 break;
1099 }
1100
1101 le2 = le2->Flink;
1102 }
1103 }
1104 }
1105 }
1106
1107 // FIXME - clear shared flag if unique?
1108 }
1109 }
1110 }
1111
1112 le = le->Flink;
1113 }
1114 } else {
1115 LIST_ENTRY* le;
1116
1117 le = t->itemlist.Flink;
1118 while (le != &t->itemlist) {
1119 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1120
1121 if (!td->inserted) {
1122 tbr.offset = t->root->id;
1123
1124 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1125 &tbr, &td->key, t->header.level - 1, Irp);
1126 if (!NT_SUCCESS(Status)) {
1127 ERR("increase_extent_refcount returned %08x\n", Status);
1128 return Status;
1129 }
1130
1131 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1132 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1133
1134 if (sbrrc > 0) {
1135 SHARED_BLOCK_REF sbr;
1136
1137 sbr.offset = t->header.address;
1138
1139 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1140 t->header.address, FALSE, Irp);
1141 if (!NT_SUCCESS(Status)) {
1142 ERR("decrease_extent_refcount returned %08x\n", Status);
1143 return Status;
1144 }
1145 }
1146 }
1147
1148 // FIXME - clear shared flag if unique?
1149 }
1150
1151 le = le->Flink;
1152 }
1153 }
1154
1155 if (unique) {
1156 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1157
1158 if (sbrrc == 1) {
1159 SHARED_BLOCK_REF sbr;
1160
1161 sbr.offset = t->parent->header.address;
1162
1163 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1164 t->parent->header.address, FALSE, Irp);
1165 if (!NT_SUCCESS(Status)) {
1166 ERR("decrease_extent_refcount returned %08x\n", Status);
1167 return Status;
1168 }
1169 }
1170 }
1171
1172 if (t->parent)
1173 tbr.offset = t->parent->header.tree_id;
1174 else
1175 tbr.offset = t->header.tree_id;
1176
1177 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1178 t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1179 if (!NT_SUCCESS(Status)) {
1180 ERR("increase_extent_refcount returned %08x\n", Status);
1181 return Status;
1182 }
1183
1184 // FIXME - clear shared flag if unique?
1185
1186 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1187 }
1188
1189 if (rc > 1 || t->header.tree_id == t->root->id) {
1190 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1191
1192 if (!NT_SUCCESS(Status)) {
1193 ERR("reduce_tree_extent returned %08x\n", Status);
1194 return Status;
1195 }
1196 }
1197
1198 t->has_address = FALSE;
1199
1200 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1201 if (t->header.tree_id == t->root->id) {
1202 flags |= EXTENT_ITEM_SHARED_BACKREFS;
1203 update_extent_flags(Vcb, t->header.address, flags, Irp);
1204 }
1205
1206 if (t->header.level > 0) {
1207 LIST_ENTRY* le;
1208
1209 le = t->itemlist.Flink;
1210 while (le != &t->itemlist) {
1211 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1212
1213 if (!td->inserted) {
1214 if (t->header.tree_id == t->root->id) {
1215 SHARED_BLOCK_REF sbr;
1216
1217 sbr.offset = t->header.address;
1218
1219 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1220 } else {
1221 TREE_BLOCK_REF tbr;
1222
1223 tbr.offset = t->root->id;
1224
1225 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1226 }
1227
1228 if (!NT_SUCCESS(Status)) {
1229 ERR("increase_extent_refcount returned %08x\n", Status);
1230 return Status;
1231 }
1232 }
1233
1234 le = le->Flink;
1235 }
1236 } else {
1237 LIST_ENTRY* le;
1238
1239 le = t->itemlist.Flink;
1240 while (le != &t->itemlist) {
1241 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1242
1243 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1244 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1245
1246 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1247 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1248
1249 if (ed2->size > 0) {
1250 changed_extent* ce = NULL;
1251 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1252
1253 if (c) {
1254 LIST_ENTRY* le2;
1255
1256 le2 = c->changed_extents.Flink;
1257 while (le2 != &c->changed_extents) {
1258 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1259
1260 if (ce2->address == ed2->address) {
1261 ce = ce2;
1262 break;
1263 }
1264
1265 le2 = le2->Flink;
1266 }
1267 }
1268
1269 if (t->header.tree_id == t->root->id) {
1270 SHARED_DATA_REF sdr;
1271
1272 sdr.offset = t->header.address;
1273 sdr.count = 1;
1274
1275 if (ce) {
1276 Status = add_changed_extent_ref_sdr(ce, &sdr, TRUE);
1277 if (!NT_SUCCESS(Status)) {
1278 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1279 return Status;
1280 }
1281
1282 Status = add_changed_extent_ref_sdr(ce, &sdr, FALSE);
1283 if (!NT_SUCCESS(Status)) {
1284 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1285 return Status;
1286 }
1287 }
1288
1289 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1290 } else {
1291 EXTENT_DATA_REF edr;
1292
1293 edr.root = t->root->id;
1294 edr.objid = td->key.obj_id;
1295 edr.offset = td->key.offset - ed2->offset;
1296 edr.count = 1;
1297
1298 if (ce) {
1299 Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
1300 if (!NT_SUCCESS(Status)) {
1301 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1302 return Status;
1303 }
1304
1305 Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
1306 if (!NT_SUCCESS(Status)) {
1307 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1308 return Status;
1309 }
1310 }
1311
1312 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1313 }
1314
1315 if (!NT_SUCCESS(Status)) {
1316 ERR("increase_extent_refcount returned %08x\n", Status);
1317 return Status;
1318 }
1319 }
1320 }
1321 }
1322
1323 le = le->Flink;
1324 }
1325 }
1326 }
1327
1328 t->updated_extents = TRUE;
1329 t->header.tree_id = t->root->id;
1330
1331 return STATUS_SUCCESS;
1332 }
1333
1334 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1335 LIST_ENTRY* le;
1336 NTSTATUS Status;
1337 BOOL changed = FALSE;
1338 UINT8 max_level = 0, level;
1339
1340 TRACE("(%p)\n", Vcb);
1341
1342 le = Vcb->trees.Flink;
1343 while (le != &Vcb->trees) {
1344 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1345
1346 if (t->write && !t->has_new_address) {
1347 chunk* c;
1348
1349 if (t->has_address) {
1350 c = get_chunk_from_address(Vcb, t->header.address);
1351
1352 if (c) {
1353 if (!c->cache_loaded) {
1354 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
1355
1356 if (!c->cache_loaded) {
1357 Status = load_cache_chunk(Vcb, c, NULL);
1358
1359 if (!NT_SUCCESS(Status)) {
1360 ERR("load_cache_chunk returned %08x\n", Status);
1361 ExReleaseResourceLite(&c->lock);
1362 return Status;
1363 }
1364 }
1365
1366 ExReleaseResourceLite(&c->lock);
1367 }
1368 }
1369 }
1370
1371 Status = get_tree_new_address(Vcb, t, Irp, rollback);
1372 if (!NT_SUCCESS(Status)) {
1373 ERR("get_tree_new_address returned %08x\n", Status);
1374 return Status;
1375 }
1376
1377 TRACE("allocated extent %llx\n", t->new_address);
1378
1379 c = get_chunk_from_address(Vcb, t->new_address);
1380
1381 if (c)
1382 c->used += Vcb->superblock.node_size;
1383 else {
1384 ERR("could not find chunk for address %llx\n", t->new_address);
1385 return STATUS_INTERNAL_ERROR;
1386 }
1387
1388 changed = TRUE;
1389
1390 if (t->header.level > max_level)
1391 max_level = t->header.level;
1392 }
1393
1394 le = le->Flink;
1395 }
1396
1397 if (!changed)
1398 return STATUS_SUCCESS;
1399
1400 level = max_level;
1401 do {
1402 le = Vcb->trees.Flink;
1403 while (le != &Vcb->trees) {
1404 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1405
1406 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1407 Status = update_tree_extents(Vcb, t, Irp, rollback);
1408 if (!NT_SUCCESS(Status)) {
1409 ERR("update_tree_extents returned %08x\n", Status);
1410 return Status;
1411 }
1412 }
1413
1414 le = le->Flink;
1415 }
1416
1417 if (level == 0)
1418 break;
1419
1420 level--;
1421 } while (TRUE);
1422
1423 return STATUS_SUCCESS;
1424 }
1425
1426 static NTSTATUS update_root_root(device_extension* Vcb, BOOL no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1427 LIST_ENTRY* le;
1428 NTSTATUS Status;
1429
1430 TRACE("(%p)\n", Vcb);
1431
1432 le = Vcb->trees.Flink;
1433 while (le != &Vcb->trees) {
1434 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1435
1436 if (t->write && !t->parent) {
1437 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1438 KEY searchkey;
1439 traverse_ptr tp;
1440
1441 searchkey.obj_id = t->root->id;
1442 searchkey.obj_type = TYPE_ROOT_ITEM;
1443 searchkey.offset = 0xffffffffffffffff;
1444
1445 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
1446 if (!NT_SUCCESS(Status)) {
1447 ERR("error - find_item returned %08x\n", Status);
1448 return Status;
1449 }
1450
1451 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1452 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1453 return STATUS_INTERNAL_ERROR;
1454 }
1455
1456 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
1457
1458 t->root->root_item.block_number = t->new_address;
1459 t->root->root_item.root_level = t->header.level;
1460 t->root->root_item.generation = Vcb->superblock.generation;
1461 t->root->root_item.generation2 = Vcb->superblock.generation;
1462
1463 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1464
1465 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1466 }
1467
1468 t->root->treeholder.address = t->new_address;
1469 t->root->treeholder.generation = Vcb->superblock.generation;
1470 }
1471
1472 le = le->Flink;
1473 }
1474
1475 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1476 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
1477 Status = update_chunk_caches(Vcb, Irp, rollback);
1478 ExReleaseResourceLite(&Vcb->chunk_lock);
1479
1480 if (!NT_SUCCESS(Status)) {
1481 ERR("update_chunk_caches returned %08x\n", Status);
1482 return Status;
1483 }
1484 }
1485
1486 return STATUS_SUCCESS;
1487 }
1488
1489 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, BOOL no_free) {
1490 chunk* c;
1491 LIST_ENTRY* le;
1492 tree_write* tw;
1493 NTSTATUS Status;
1494 ULONG i, num_bits;
1495 write_data_context* wtc;
1496 ULONG bit_num = 0;
1497 BOOL raid56 = FALSE;
1498
1499 // merge together runs
1500 c = NULL;
1501 le = tree_writes->Flink;
1502 while (le != tree_writes) {
1503 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1504
1505 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1506 c = get_chunk_from_address(Vcb, tw->address);
1507 else {
1508 tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1509
1510 if (tw->address == tw2->address + tw2->length) {
1511 UINT8* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1512
1513 if (!data) {
1514 ERR("out of memory\n");
1515 return STATUS_INSUFFICIENT_RESOURCES;
1516 }
1517
1518 RtlCopyMemory(data, tw2->data, tw2->length);
1519 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1520
1521 if (!no_free)
1522 ExFreePool(tw2->data);
1523
1524 tw2->data = data;
1525 tw2->length += tw->length;
1526
1527 if (!no_free) // FIXME - what if we allocated this just now?
1528 ExFreePool(tw->data);
1529
1530 RemoveEntryList(&tw->list_entry);
1531 ExFreePool(tw);
1532
1533 le = tw2->list_entry.Flink;
1534 continue;
1535 }
1536 }
1537
1538 tw->c = c;
1539
1540 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1541 raid56 = TRUE;
1542
1543 le = le->Flink;
1544 }
1545
1546 num_bits = 0;
1547
1548 le = tree_writes->Flink;
1549 while (le != tree_writes) {
1550 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1551
1552 num_bits++;
1553
1554 le = le->Flink;
1555 }
1556
1557 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1558 if (!wtc) {
1559 ERR("out of memory\n");
1560 return STATUS_INSUFFICIENT_RESOURCES;
1561 }
1562
1563 le = tree_writes->Flink;
1564
1565 while (le != tree_writes) {
1566 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1567
1568 TRACE("address: %llx, size: %x\n", tw->address, tw->length);
1569
1570 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, FALSE);
1571 InitializeListHead(&wtc[bit_num].stripes);
1572 wtc[bit_num].need_wait = FALSE;
1573 wtc[bit_num].stripes_left = 0;
1574 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1575 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1576
1577 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, FALSE, 0, HighPagePriority);
1578 if (!NT_SUCCESS(Status)) {
1579 ERR("write_data returned %08x\n", Status);
1580
1581 for (i = 0; i < num_bits; i++) {
1582 free_write_data_stripes(&wtc[i]);
1583 }
1584 ExFreePool(wtc);
1585
1586 return Status;
1587 }
1588
1589 bit_num++;
1590
1591 le = le->Flink;
1592 }
1593
1594 for (i = 0; i < num_bits; i++) {
1595 if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1596 // launch writes and wait
1597 le = wtc[i].stripes.Flink;
1598 while (le != &wtc[i].stripes) {
1599 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1600
1601 if (stripe->status != WriteDataStatus_Ignore) {
1602 wtc[i].need_wait = TRUE;
1603 IoCallDriver(stripe->device->devobj, stripe->Irp);
1604 }
1605
1606 le = le->Flink;
1607 }
1608 }
1609 }
1610
1611 for (i = 0; i < num_bits; i++) {
1612 if (wtc[i].need_wait)
1613 KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, FALSE, NULL);
1614 }
1615
1616 for (i = 0; i < num_bits; i++) {
1617 le = wtc[i].stripes.Flink;
1618 while (le != &wtc[i].stripes) {
1619 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1620
1621 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1622 Status = stripe->iosb.Status;
1623 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1624 break;
1625 }
1626
1627 le = le->Flink;
1628 }
1629
1630 free_write_data_stripes(&wtc[i]);
1631 }
1632
1633 ExFreePool(wtc);
1634
1635 if (raid56) {
1636 c = NULL;
1637
1638 le = tree_writes->Flink;
1639 while (le != tree_writes) {
1640 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1641
1642 if (tw->c != c) {
1643 c = tw->c;
1644
1645 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE);
1646
1647 while (!IsListEmpty(&c->partial_stripes)) {
1648 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1649
1650 Status = flush_partial_stripe(Vcb, c, ps);
1651
1652 if (ps->bmparr)
1653 ExFreePool(ps->bmparr);
1654
1655 ExFreePool(ps);
1656
1657 if (!NT_SUCCESS(Status)) {
1658 ERR("flush_partial_stripe returned %08x\n", Status);
1659 ExReleaseResourceLite(&c->partial_stripes_lock);
1660 return Status;
1661 }
1662 }
1663
1664 ExReleaseResourceLite(&c->partial_stripes_lock);
1665 }
1666
1667 le = le->Flink;
1668 }
1669 }
1670
1671 return STATUS_SUCCESS;
1672 }
1673
1674 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1675 ULONG level;
1676 UINT8 *data, *body;
1677 UINT32 crc32;
1678 NTSTATUS Status;
1679 LIST_ENTRY* le;
1680 LIST_ENTRY tree_writes;
1681 tree_write* tw;
1682
1683 TRACE("(%p)\n", Vcb);
1684
1685 InitializeListHead(&tree_writes);
1686
1687 for (level = 0; level <= 255; level++) {
1688 BOOL nothing_found = TRUE;
1689
1690 TRACE("level = %u\n", level);
1691
1692 le = Vcb->trees.Flink;
1693 while (le != &Vcb->trees) {
1694 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1695
1696 if (t->write && t->header.level == level) {
1697 KEY firstitem, searchkey;
1698 LIST_ENTRY* le2;
1699 traverse_ptr tp;
1700
1701 if (!t->has_new_address) {
1702 ERR("error - tried to write tree with no new address\n");
1703 return STATUS_INTERNAL_ERROR;
1704 }
1705
1706 le2 = t->itemlist.Flink;
1707 while (le2 != &t->itemlist) {
1708 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1709 if (!td->ignore) {
1710 firstitem = td->key;
1711 break;
1712 }
1713 le2 = le2->Flink;
1714 }
1715
1716 if (t->parent) {
1717 t->paritem->key = firstitem;
1718 t->paritem->treeholder.address = t->new_address;
1719 t->paritem->treeholder.generation = Vcb->superblock.generation;
1720 }
1721
1722 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1723 EXTENT_ITEM_TREE* eit;
1724
1725 searchkey.obj_id = t->new_address;
1726 searchkey.obj_type = TYPE_EXTENT_ITEM;
1727 searchkey.offset = Vcb->superblock.node_size;
1728
1729 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
1730 if (!NT_SUCCESS(Status)) {
1731 ERR("error - find_item returned %08x\n", Status);
1732 return Status;
1733 }
1734
1735 if (keycmp(searchkey, tp.item->key)) {
1736 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1737 return STATUS_INTERNAL_ERROR;
1738 }
1739
1740 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1741 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1742 return STATUS_INTERNAL_ERROR;
1743 }
1744
1745 eit = (EXTENT_ITEM_TREE*)tp.item->data;
1746 eit->firstitem = firstitem;
1747 }
1748
1749 nothing_found = FALSE;
1750 }
1751
1752 le = le->Flink;
1753 }
1754
1755 if (nothing_found)
1756 break;
1757 }
1758
1759 TRACE("allocated tree extents\n");
1760
1761 le = Vcb->trees.Flink;
1762 while (le != &Vcb->trees) {
1763 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1764 LIST_ENTRY* le2;
1765 #ifdef DEBUG_PARANOID
1766 UINT32 num_items = 0, size = 0;
1767 BOOL crash = FALSE;
1768 #endif
1769
1770 if (t->write) {
1771 #ifdef DEBUG_PARANOID
1772 BOOL first = TRUE;
1773 KEY lastkey;
1774
1775 le2 = t->itemlist.Flink;
1776 while (le2 != &t->itemlist) {
1777 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1778 if (!td->ignore) {
1779 num_items++;
1780
1781 if (!first) {
1782 if (keycmp(td->key, lastkey) == 0) {
1783 ERR("(%llx,%x,%llx): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1784 crash = TRUE;
1785 } else if (keycmp(td->key, lastkey) == -1) {
1786 ERR("(%llx,%x,%llx): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1787 crash = TRUE;
1788 }
1789 } else
1790 first = FALSE;
1791
1792 lastkey = td->key;
1793
1794 if (t->header.level == 0)
1795 size += td->size;
1796 }
1797 le2 = le2->Flink;
1798 }
1799
1800 if (t->header.level == 0)
1801 size += num_items * sizeof(leaf_node);
1802 else
1803 size += num_items * sizeof(internal_node);
1804
1805 if (num_items != t->header.num_items) {
1806 ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1807 crash = TRUE;
1808 }
1809
1810 if (size != t->size) {
1811 ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1812 crash = TRUE;
1813 }
1814
1815 if (t->header.num_items == 0 && t->parent) {
1816 ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1817 crash = TRUE;
1818 }
1819
1820 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1821 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1822 crash = TRUE;
1823 }
1824
1825 if (crash) {
1826 ERR("tree %p\n", t);
1827 le2 = t->itemlist.Flink;
1828 while (le2 != &t->itemlist) {
1829 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1830 if (!td->ignore) {
1831 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1832 }
1833 le2 = le2->Flink;
1834 }
1835 int3;
1836 }
1837 #endif
1838 t->header.address = t->new_address;
1839 t->header.generation = Vcb->superblock.generation;
1840 t->header.tree_id = t->root->id;
1841 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1842 t->header.fs_uuid = Vcb->superblock.uuid;
1843 t->has_address = TRUE;
1844
1845 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1846 if (!data) {
1847 ERR("out of memory\n");
1848 Status = STATUS_INSUFFICIENT_RESOURCES;
1849 goto end;
1850 }
1851
1852 body = data + sizeof(tree_header);
1853
1854 RtlCopyMemory(data, &t->header, sizeof(tree_header));
1855 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
1856
1857 if (t->header.level == 0) {
1858 leaf_node* itemptr = (leaf_node*)body;
1859 int i = 0;
1860 UINT8* dataptr = data + Vcb->superblock.node_size;
1861
1862 le2 = t->itemlist.Flink;
1863 while (le2 != &t->itemlist) {
1864 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1865 if (!td->ignore) {
1866 dataptr = dataptr - td->size;
1867
1868 itemptr[i].key = td->key;
1869 itemptr[i].offset = (UINT32)((UINT8*)dataptr - (UINT8*)body);
1870 itemptr[i].size = td->size;
1871 i++;
1872
1873 if (td->size > 0)
1874 RtlCopyMemory(dataptr, td->data, td->size);
1875 }
1876
1877 le2 = le2->Flink;
1878 }
1879 } else {
1880 internal_node* itemptr = (internal_node*)body;
1881 int i = 0;
1882
1883 le2 = t->itemlist.Flink;
1884 while (le2 != &t->itemlist) {
1885 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1886 if (!td->ignore) {
1887 itemptr[i].key = td->key;
1888 itemptr[i].address = td->treeholder.address;
1889 itemptr[i].generation = td->treeholder.generation;
1890 i++;
1891 }
1892
1893 le2 = le2->Flink;
1894 }
1895 }
1896
1897 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
1898 crc32 = ~crc32;
1899 *((UINT32*)data) = crc32;
1900 TRACE("setting crc32 to %08x\n", crc32);
1901
1902 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
1903 if (!tw) {
1904 ERR("out of memory\n");
1905 ExFreePool(data);
1906 Status = STATUS_INSUFFICIENT_RESOURCES;
1907 goto end;
1908 }
1909
1910 tw->address = t->new_address;
1911 tw->length = Vcb->superblock.node_size;
1912 tw->data = data;
1913
1914 if (IsListEmpty(&tree_writes))
1915 InsertTailList(&tree_writes, &tw->list_entry);
1916 else {
1917 BOOL inserted = FALSE;
1918
1919 le2 = tree_writes.Flink;
1920 while (le2 != &tree_writes) {
1921 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
1922
1923 if (tw2->address > tw->address) {
1924 InsertHeadList(le2->Blink, &tw->list_entry);
1925 inserted = TRUE;
1926 break;
1927 }
1928
1929 le2 = le2->Flink;
1930 }
1931
1932 if (!inserted)
1933 InsertTailList(&tree_writes, &tw->list_entry);
1934 }
1935 }
1936
1937 le = le->Flink;
1938 }
1939
1940 Status = do_tree_writes(Vcb, &tree_writes, FALSE);
1941 if (!NT_SUCCESS(Status)) {
1942 ERR("do_tree_writes returned %08x\n", Status);
1943 goto end;
1944 }
1945
1946 Status = STATUS_SUCCESS;
1947
1948 end:
1949 while (!IsListEmpty(&tree_writes)) {
1950 le = RemoveHeadList(&tree_writes);
1951 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1952
1953 if (tw->data)
1954 ExFreePool(tw->data);
1955
1956 ExFreePool(tw);
1957 }
1958
1959 return Status;
1960 }
1961
1962 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
1963 KEY searchkey;
1964 traverse_ptr tp;
1965
1966 RtlZeroMemory(sb, sizeof(superblock_backup));
1967
1968 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
1969 sb->root_tree_generation = Vcb->superblock.generation;
1970 sb->root_level = Vcb->superblock.root_level;
1971
1972 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
1973 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
1974 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
1975
1976 searchkey.obj_id = BTRFS_ROOT_EXTENT;
1977 searchkey.obj_type = TYPE_ROOT_ITEM;
1978 searchkey.offset = 0xffffffffffffffff;
1979
1980 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
1981 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
1982 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
1983
1984 sb->extent_tree_addr = ri->block_number;
1985 sb->extent_tree_generation = ri->generation;
1986 sb->extent_root_level = ri->root_level;
1987 }
1988 }
1989
1990 searchkey.obj_id = BTRFS_ROOT_FSTREE;
1991
1992 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
1993 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
1994 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
1995
1996 sb->fs_tree_addr = ri->block_number;
1997 sb->fs_tree_generation = ri->generation;
1998 sb->fs_root_level = ri->root_level;
1999 }
2000 }
2001
2002 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2003
2004 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
2005 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2006 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2007
2008 sb->dev_root_addr = ri->block_number;
2009 sb->dev_root_generation = ri->generation;
2010 sb->dev_root_level = ri->root_level;
2011 }
2012 }
2013
2014 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2015
2016 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
2017 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2018 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2019
2020 sb->csum_root_addr = ri->block_number;
2021 sb->csum_root_generation = ri->generation;
2022 sb->csum_root_level = ri->root_level;
2023 }
2024 }
2025
2026 sb->total_bytes = Vcb->superblock.total_bytes;
2027 sb->bytes_used = Vcb->superblock.bytes_used;
2028 sb->num_devices = Vcb->superblock.num_devices;
2029 }
2030
2031 typedef struct {
2032 void* context;
2033 UINT8* buf;
2034 PMDL mdl;
2035 device* device;
2036 NTSTATUS Status;
2037 PIRP Irp;
2038 LIST_ENTRY list_entry;
2039 } write_superblocks_stripe;
2040
2041 typedef struct _write_superblocks_context {
2042 KEVENT Event;
2043 LIST_ENTRY stripes;
2044 LONG left;
2045 } write_superblocks_context;
2046
2047 _Function_class_(IO_COMPLETION_ROUTINE)
2048 #ifdef __REACTOS__
2049 static NTSTATUS NTAPI write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2050 #else
2051 static NTSTATUS write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2052 #endif
2053 write_superblocks_stripe* stripe = conptr;
2054 write_superblocks_context* context = stripe->context;
2055
2056 UNUSED(DeviceObject);
2057
2058 stripe->Status = Irp->IoStatus.Status;
2059
2060 if (InterlockedDecrement(&context->left) == 0)
2061 KeSetEvent(&context->Event, 0, FALSE);
2062
2063 return STATUS_MORE_PROCESSING_REQUIRED;
2064 }
2065
2066 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2067 unsigned int i = 0;
2068
2069 // All the documentation says that the Linux driver only writes one superblock
2070 // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2071
2072 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2073 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2074 superblock* sb;
2075 UINT32 crc32;
2076 write_superblocks_stripe* stripe;
2077 PIO_STACK_LOCATION IrpSp;
2078
2079 sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2080 if (!sb) {
2081 ERR("out of memory\n");
2082 return STATUS_INSUFFICIENT_RESOURCES;
2083 }
2084
2085 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2086
2087 if (sblen > sizeof(superblock))
2088 RtlZeroMemory((UINT8*)sb + sizeof(superblock), sblen - sizeof(superblock));
2089
2090 RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2091 sb->sb_phys_addr = superblock_addrs[i];
2092
2093 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2094 RtlCopyMemory(&sb->checksum, &crc32, sizeof(UINT32));
2095
2096 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2097 if (!stripe) {
2098 ERR("out of memory\n");
2099 ExFreePool(sb);
2100 return STATUS_INSUFFICIENT_RESOURCES;
2101 }
2102
2103 stripe->buf = (UINT8*)sb;
2104
2105 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, FALSE);
2106 if (!stripe->Irp) {
2107 ERR("IoAllocateIrp failed\n");
2108 ExFreePool(stripe);
2109 ExFreePool(sb);
2110 return STATUS_INSUFFICIENT_RESOURCES;
2111 }
2112
2113 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2114 IrpSp->MajorFunction = IRP_MJ_WRITE;
2115
2116 if (i == 0)
2117 IrpSp->Flags |= SL_WRITE_THROUGH;
2118
2119 if (device->devobj->Flags & DO_BUFFERED_IO) {
2120 stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2121 stripe->mdl = NULL;
2122
2123 stripe->Irp->Flags = IRP_BUFFERED_IO;
2124 } else if (device->devobj->Flags & DO_DIRECT_IO) {
2125 stripe->mdl = IoAllocateMdl(sb, sblen, FALSE, FALSE, NULL);
2126 if (!stripe->mdl) {
2127 ERR("IoAllocateMdl failed\n");
2128 IoFreeIrp(stripe->Irp);
2129 ExFreePool(stripe);
2130 ExFreePool(sb);
2131 return STATUS_INSUFFICIENT_RESOURCES;
2132 }
2133
2134 stripe->Irp->MdlAddress = stripe->mdl;
2135
2136 MmBuildMdlForNonPagedPool(stripe->mdl);
2137 } else {
2138 stripe->Irp->UserBuffer = sb;
2139 stripe->mdl = NULL;
2140 }
2141
2142 IrpSp->Parameters.Write.Length = sblen;
2143 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2144
2145 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, TRUE, TRUE, TRUE);
2146
2147 stripe->context = context;
2148 stripe->device = device;
2149 InsertTailList(&context->stripes, &stripe->list_entry);
2150
2151 context->left++;
2152
2153 i++;
2154 }
2155
2156 if (i == 0)
2157 ERR("no superblocks written!\n");
2158
2159 return STATUS_SUCCESS;
2160 }
2161
2162 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2163 UINT64 i;
2164 NTSTATUS Status;
2165 LIST_ENTRY* le;
2166 write_superblocks_context context;
2167
2168 TRACE("(%p)\n", Vcb);
2169
2170 le = Vcb->trees.Flink;
2171 while (le != &Vcb->trees) {
2172 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2173
2174 if (t->write && !t->parent) {
2175 if (t->root == Vcb->root_root) {
2176 Vcb->superblock.root_tree_addr = t->new_address;
2177 Vcb->superblock.root_level = t->header.level;
2178 } else if (t->root == Vcb->chunk_root) {
2179 Vcb->superblock.chunk_tree_addr = t->new_address;
2180 Vcb->superblock.chunk_root_generation = t->header.generation;
2181 Vcb->superblock.chunk_root_level = t->header.level;
2182 }
2183 }
2184
2185 le = le->Flink;
2186 }
2187
2188 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2189 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2190 }
2191
2192 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2193
2194 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
2195 InitializeListHead(&context.stripes);
2196 context.left = 0;
2197
2198 le = Vcb->devices.Flink;
2199 while (le != &Vcb->devices) {
2200 device* dev = CONTAINING_RECORD(le, device, list_entry);
2201
2202 if (dev->devobj && !dev->readonly) {
2203 Status = write_superblock(Vcb, dev, &context);
2204 if (!NT_SUCCESS(Status)) {
2205 ERR("write_superblock returned %08x\n", Status);
2206 goto end;
2207 }
2208 }
2209
2210 le = le->Flink;
2211 }
2212
2213 if (IsListEmpty(&context.stripes)) {
2214 ERR("error - not writing any superblocks\n");
2215 Status = STATUS_INTERNAL_ERROR;
2216 goto end;
2217 }
2218
2219 le = context.stripes.Flink;
2220 while (le != &context.stripes) {
2221 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2222
2223 IoCallDriver(stripe->device->devobj, stripe->Irp);
2224
2225 le = le->Flink;
2226 }
2227
2228 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2229
2230 le = context.stripes.Flink;
2231 while (le != &context.stripes) {
2232 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2233
2234 if (!NT_SUCCESS(stripe->Status)) {
2235 ERR("device %llx returned %08x\n", stripe->device->devitem.dev_id, stripe->Status);
2236 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2237 Status = stripe->Status;
2238 goto end;
2239 }
2240
2241 le = le->Flink;
2242 }
2243
2244 Status = STATUS_SUCCESS;
2245
2246 end:
2247 while (!IsListEmpty(&context.stripes)) {
2248 write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2249
2250 if (stripe->mdl) {
2251 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2252 MmUnlockPages(stripe->mdl);
2253
2254 IoFreeMdl(stripe->mdl);
2255 }
2256
2257 if (stripe->Irp)
2258 IoFreeIrp(stripe->Irp);
2259
2260 if (stripe->buf)
2261 ExFreePool(stripe->buf);
2262
2263 ExFreePool(stripe);
2264 }
2265
2266 return Status;
2267 }
2268
2269 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2270 LIST_ENTRY *le, *le2;
2271 NTSTATUS Status;
2272 UINT64 old_size;
2273
2274 if (ce->count == 0 && ce->old_count == 0) {
2275 while (!IsListEmpty(&ce->refs)) {
2276 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2277 ExFreePool(cer);
2278 }
2279
2280 while (!IsListEmpty(&ce->old_refs)) {
2281 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2282 ExFreePool(cer);
2283 }
2284
2285 goto end;
2286 }
2287
2288 le = ce->refs.Flink;
2289 while (le != &ce->refs) {
2290 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2291 UINT32 old_count = 0;
2292
2293 if (cer->type == TYPE_EXTENT_DATA_REF) {
2294 le2 = ce->old_refs.Flink;
2295 while (le2 != &ce->old_refs) {
2296 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2297
2298 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2299 old_count = cer2->edr.count;
2300 break;
2301 }
2302
2303 le2 = le2->Flink;
2304 }
2305
2306 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2307
2308 if (cer->edr.count > old_count) {
2309 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2310
2311 if (!NT_SUCCESS(Status)) {
2312 ERR("increase_extent_refcount_data returned %08x\n", Status);
2313 return Status;
2314 }
2315 }
2316 } else if (cer->type == TYPE_SHARED_DATA_REF) {
2317 le2 = ce->old_refs.Flink;
2318 while (le2 != &ce->old_refs) {
2319 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2320
2321 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2322 RemoveEntryList(&cer2->list_entry);
2323 ExFreePool(cer2);
2324 break;
2325 }
2326
2327 le2 = le2->Flink;
2328 }
2329 }
2330
2331 le = le->Flink;
2332 }
2333
2334 le = ce->refs.Flink;
2335 while (le != &ce->refs) {
2336 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2337 LIST_ENTRY* le3 = le->Flink;
2338 UINT32 old_count = 0;
2339
2340 if (cer->type == TYPE_EXTENT_DATA_REF) {
2341 le2 = ce->old_refs.Flink;
2342 while (le2 != &ce->old_refs) {
2343 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2344
2345 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2346 old_count = cer2->edr.count;
2347
2348 RemoveEntryList(&cer2->list_entry);
2349 ExFreePool(cer2);
2350 break;
2351 }
2352
2353 le2 = le2->Flink;
2354 }
2355
2356 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2357
2358 if (cer->edr.count < old_count) {
2359 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2360 old_count - cer->edr.count, ce->superseded, Irp);
2361
2362 if (!NT_SUCCESS(Status)) {
2363 ERR("decrease_extent_refcount_data returned %08x\n", Status);
2364 return Status;
2365 }
2366 }
2367
2368 if (ce->size != ce->old_size && ce->old_count > 0) {
2369 KEY searchkey;
2370 traverse_ptr tp;
2371 void* data;
2372
2373 searchkey.obj_id = ce->address;
2374 searchkey.obj_type = TYPE_EXTENT_ITEM;
2375 searchkey.offset = ce->old_size;
2376
2377 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
2378 if (!NT_SUCCESS(Status)) {
2379 ERR("error - find_item returned %08x\n", Status);
2380 return Status;
2381 }
2382
2383 if (keycmp(searchkey, tp.item->key)) {
2384 ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2385 return STATUS_INTERNAL_ERROR;
2386 }
2387
2388 if (tp.item->size > 0) {
2389 data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2390
2391 if (!data) {
2392 ERR("out of memory\n");
2393 return STATUS_INSUFFICIENT_RESOURCES;
2394 }
2395
2396 RtlCopyMemory(data, tp.item->data, tp.item->size);
2397 } else
2398 data = NULL;
2399
2400 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2401 if (!NT_SUCCESS(Status)) {
2402 ERR("insert_tree_item returned %08x\n", Status);
2403 if (data) ExFreePool(data);
2404 return Status;
2405 }
2406
2407 Status = delete_tree_item(Vcb, &tp);
2408 if (!NT_SUCCESS(Status)) {
2409 ERR("delete_tree_item returned %08x\n", Status);
2410 return Status;
2411 }
2412 }
2413 }
2414
2415 RemoveEntryList(&cer->list_entry);
2416 ExFreePool(cer);
2417
2418 le = le3;
2419 }
2420
2421 #ifdef DEBUG_PARANOID
2422 if (!IsListEmpty(&ce->old_refs))
2423 WARN("old_refs not empty\n");
2424 #endif
2425
2426 end:
2427 if (ce->count == 0 && !ce->superseded) {
2428 c->used -= ce->size;
2429 space_list_add(c, ce->address, ce->size, rollback);
2430 }
2431
2432 RemoveEntryList(&ce->list_entry);
2433 ExFreePool(ce);
2434
2435 return STATUS_SUCCESS;
2436 }
2437
2438 void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp) {
2439 KEY searchkey;
2440 traverse_ptr tp, next_tp;
2441 NTSTATUS Status;
2442 UINT64 startaddr, endaddr;
2443 ULONG len;
2444 UINT32* checksums;
2445 RTL_BITMAP bmp;
2446 ULONG* bmparr;
2447 ULONG runlength, index;
2448
2449 searchkey.obj_id = EXTENT_CSUM_ID;
2450 searchkey.obj_type = TYPE_EXTENT_CSUM;
2451 searchkey.offset = address;
2452
2453 // FIXME - create checksum_root if it doesn't exist at all
2454
2455 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2456 if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2457 if (csum) { // not deleted
2458 ULONG length2 = length;
2459 UINT64 off = address;
2460 UINT32* data = csum;
2461
2462 do {
2463 UINT16 il = (UINT16)min(length2, MAX_CSUM_SIZE / sizeof(UINT32));
2464
2465 checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(UINT32), ALLOC_TAG);
2466 if (!checksums) {
2467 ERR("out of memory\n");
2468 return;
2469 }
2470
2471 RtlCopyMemory(checksums, data, il * sizeof(UINT32));
2472
2473 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2474 il * sizeof(UINT32), NULL, Irp);
2475 if (!NT_SUCCESS(Status)) {
2476 ERR("insert_tree_item returned %08x\n", Status);
2477 ExFreePool(checksums);
2478 return;
2479 }
2480
2481 length2 -= il;
2482
2483 if (length2 > 0) {
2484 off += il * Vcb->superblock.sector_size;
2485 data += il;
2486 }
2487 } while (length2 > 0);
2488 }
2489 } else if (!NT_SUCCESS(Status)) {
2490 ERR("find_item returned %08x\n", Status);
2491 return;
2492 } else {
2493 UINT32 tplen;
2494
2495 // FIXME - check entry is TYPE_EXTENT_CSUM?
2496
2497 if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= address)
2498 startaddr = tp.item->key.offset;
2499 else
2500 startaddr = address;
2501
2502 searchkey.obj_id = EXTENT_CSUM_ID;
2503 searchkey.obj_type = TYPE_EXTENT_CSUM;
2504 searchkey.offset = address + (length * Vcb->superblock.sector_size);
2505
2506 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2507 if (!NT_SUCCESS(Status)) {
2508 ERR("find_item returned %08x\n", Status);
2509 return;
2510 }
2511
2512 tplen = tp.item->size / sizeof(UINT32);
2513
2514 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size))
2515 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
2516 else
2517 endaddr = address + (length * Vcb->superblock.sector_size);
2518
2519 TRACE("cs starts at %llx (%x sectors)\n", address, length);
2520 TRACE("startaddr = %llx\n", startaddr);
2521 TRACE("endaddr = %llx\n", endaddr);
2522
2523 len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size);
2524
2525 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
2526 if (!checksums) {
2527 ERR("out of memory\n");
2528 return;
2529 }
2530
2531 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2532 if (!bmparr) {
2533 ERR("out of memory\n");
2534 ExFreePool(checksums);
2535 return;
2536 }
2537
2538 RtlInitializeBitMap(&bmp, bmparr, len);
2539 RtlSetAllBits(&bmp);
2540
2541 searchkey.obj_id = EXTENT_CSUM_ID;
2542 searchkey.obj_type = TYPE_EXTENT_CSUM;
2543 searchkey.offset = address;
2544
2545 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2546 if (!NT_SUCCESS(Status)) {
2547 ERR("find_item returned %08x\n", Status);
2548 ExFreePool(checksums);
2549 ExFreePool(bmparr);
2550 return;
2551 }
2552
2553 // set bit = free space, cleared bit = allocated sector
2554
2555 while (tp.item->key.offset < endaddr) {
2556 if (tp.item->key.offset >= startaddr) {
2557 if (tp.item->size > 0) {
2558 ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(UINT32), tp.item->size);
2559
2560 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen);
2561 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(UINT32));
2562 }
2563
2564 Status = delete_tree_item(Vcb, &tp);
2565 if (!NT_SUCCESS(Status)) {
2566 ERR("delete_tree_item returned %08x\n", Status);
2567 ExFreePool(checksums);
2568 ExFreePool(bmparr);
2569 return;
2570 }
2571 }
2572
2573 if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
2574 tp = next_tp;
2575 } else
2576 break;
2577 }
2578
2579 if (!csum) { // deleted
2580 RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2581 } else {
2582 RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(UINT32));
2583 RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2584 }
2585
2586 runlength = RtlFindFirstRunClear(&bmp, &index);
2587
2588 while (runlength != 0) {
2589 do {
2590 UINT16 rl;
2591 UINT64 off;
2592 UINT32* data;
2593
2594 if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
2595 rl = MAX_CSUM_SIZE / sizeof(UINT32);
2596 else
2597 rl = (UINT16)runlength;
2598
2599 data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
2600 if (!data) {
2601 ERR("out of memory\n");
2602 ExFreePool(bmparr);
2603 ExFreePool(checksums);
2604 return;
2605 }
2606
2607 RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
2608
2609 off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
2610
2611 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp);
2612 if (!NT_SUCCESS(Status)) {
2613 ERR("insert_tree_item returned %08x\n", Status);
2614 ExFreePool(data);
2615 ExFreePool(bmparr);
2616 ExFreePool(checksums);
2617 return;
2618 }
2619
2620 runlength -= rl;
2621 index += rl;
2622 } while (runlength > 0);
2623
2624 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2625 }
2626
2627 ExFreePool(bmparr);
2628 ExFreePool(checksums);
2629 }
2630 }
2631
2632 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2633 LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2634 chunk* c;
2635 KEY searchkey;
2636 traverse_ptr tp;
2637 BLOCK_GROUP_ITEM* bgi;
2638 NTSTATUS Status;
2639
2640 TRACE("(%p)\n", Vcb);
2641
2642 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2643
2644 while (le != &Vcb->chunks) {
2645 c = CONTAINING_RECORD(le, chunk, list_entry);
2646
2647 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2648
2649 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2650 Status = load_cache_chunk(Vcb, c, NULL);
2651
2652 if (!NT_SUCCESS(Status)) {
2653 ERR("load_cache_chunk returned %08x\n", Status);
2654 ExReleaseResourceLite(&c->lock);
2655 goto end;
2656 }
2657 }
2658
2659 le2 = c->changed_extents.Flink;
2660 while (le2 != &c->changed_extents) {
2661 LIST_ENTRY* le3 = le2->Flink;
2662 changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2663
2664 Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2665 if (!NT_SUCCESS(Status)) {
2666 ERR("flush_changed_extent returned %08x\n", Status);
2667 ExReleaseResourceLite(&c->lock);
2668 goto end;
2669 }
2670
2671 le2 = le3;
2672 }
2673
2674 // This is usually done by update_chunks, but we have to check again in case any new chunks
2675 // have been allocated since.
2676 if (c->created) {
2677 Status = create_chunk(Vcb, c, Irp);
2678 if (!NT_SUCCESS(Status)) {
2679 ERR("create_chunk returned %08x\n", Status);
2680 ExReleaseResourceLite(&c->lock);
2681 goto end;
2682 }
2683 }
2684
2685 if (c->old_cache) {
2686 if (c->old_cache->dirty) {
2687 LIST_ENTRY batchlist;
2688
2689 InitializeListHead(&batchlist);
2690
2691 Status = flush_fcb(c->old_cache, FALSE, &batchlist, Irp);
2692 if (!NT_SUCCESS(Status)) {
2693 ERR("flush_fcb returned %08x\n", Status);
2694 ExReleaseResourceLite(&c->lock);
2695 clear_batch_list(Vcb, &batchlist);
2696 goto end;
2697 }
2698
2699 Status = commit_batch_list(Vcb, &batchlist, Irp);
2700 if (!NT_SUCCESS(Status)) {
2701 ERR("commit_batch_list returned %08x\n", Status);
2702 ExReleaseResourceLite(&c->lock);
2703 goto end;
2704 }
2705 }
2706
2707 free_fcb(Vcb, c->old_cache);
2708 c->old_cache = NULL;
2709 }
2710
2711 if (c->used != c->oldused) {
2712 searchkey.obj_id = c->offset;
2713 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2714 searchkey.offset = c->chunk_item->size;
2715
2716 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
2717 if (!NT_SUCCESS(Status)) {
2718 ERR("error - find_item returned %08x\n", Status);
2719 ExReleaseResourceLite(&c->lock);
2720 goto end;
2721 }
2722
2723 if (keycmp(searchkey, tp.item->key)) {
2724 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2725 Status = STATUS_INTERNAL_ERROR;
2726 ExReleaseResourceLite(&c->lock);
2727 goto end;
2728 }
2729
2730 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2731 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2732 Status = STATUS_INTERNAL_ERROR;
2733 ExReleaseResourceLite(&c->lock);
2734 goto end;
2735 }
2736
2737 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2738 if (!bgi) {
2739 ERR("out of memory\n");
2740 Status = STATUS_INSUFFICIENT_RESOURCES;
2741 ExReleaseResourceLite(&c->lock);
2742 goto end;
2743 }
2744
2745 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2746 bgi->used = c->used;
2747
2748 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2749
2750 Status = delete_tree_item(Vcb, &tp);
2751 if (!NT_SUCCESS(Status)) {
2752 ERR("delete_tree_item returned %08x\n", Status);
2753 ExFreePool(bgi);
2754 ExReleaseResourceLite(&c->lock);
2755 goto end;
2756 }
2757
2758 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2759 if (!NT_SUCCESS(Status)) {
2760 ERR("insert_tree_item returned %08x\n", Status);
2761 ExFreePool(bgi);
2762 ExReleaseResourceLite(&c->lock);
2763 goto end;
2764 }
2765
2766 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2767
2768 Vcb->superblock.bytes_used += c->used - c->oldused;
2769
2770 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2771
2772 c->oldused = c->used;
2773 }
2774
2775 ExReleaseResourceLite(&c->lock);
2776
2777 le = le->Flink;
2778 }
2779
2780 Status = STATUS_SUCCESS;
2781
2782 end:
2783 ExReleaseResourceLite(&Vcb->chunk_lock);
2784
2785 return Status;
2786 }
2787
2788 static void get_first_item(tree* t, KEY* key) {
2789 LIST_ENTRY* le;
2790
2791 le = t->itemlist.Flink;
2792 while (le != &t->itemlist) {
2793 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2794
2795 *key = td->key;
2796 return;
2797 }
2798 }
2799
2800 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2801 tree *nt, *pt;
2802 tree_data* td;
2803 tree_data* oldlastitem;
2804
2805 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2806
2807 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2808 if (!nt) {
2809 ERR("out of memory\n");
2810 return STATUS_INSUFFICIENT_RESOURCES;
2811 }
2812
2813 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2814 nt->header.address = 0;
2815 nt->header.generation = Vcb->superblock.generation;
2816 nt->header.num_items = t->header.num_items - numitems;
2817 nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2818
2819 nt->has_address = FALSE;
2820 nt->Vcb = Vcb;
2821 nt->parent = t->parent;
2822
2823 #ifdef DEBUG_PARANOID
2824 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2825 #endif
2826
2827 nt->root = t->root;
2828 nt->new_address = 0;
2829 nt->has_new_address = FALSE;
2830 nt->updated_extents = FALSE;
2831 nt->uniqueness_determined = TRUE;
2832 nt->is_unique = TRUE;
2833 nt->list_entry_hash.Flink = NULL;
2834 nt->buf = NULL;
2835 InitializeListHead(&nt->itemlist);
2836
2837 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2838
2839 nt->itemlist.Flink = &newfirstitem->list_entry;
2840 nt->itemlist.Blink = t->itemlist.Blink;
2841 nt->itemlist.Flink->Blink = &nt->itemlist;
2842 nt->itemlist.Blink->Flink = &nt->itemlist;
2843
2844 t->itemlist.Blink = &oldlastitem->list_entry;
2845 t->itemlist.Blink->Flink = &t->itemlist;
2846
2847 nt->size = t->size - size;
2848 t->size = size;
2849 t->header.num_items = numitems;
2850 nt->write = TRUE;
2851
2852 InsertTailList(&Vcb->trees, &nt->list_entry);
2853
2854 if (nt->header.level > 0) {
2855 LIST_ENTRY* le = nt->itemlist.Flink;
2856
2857 while (le != &nt->itemlist) {
2858 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2859
2860 if (td2->treeholder.tree) {
2861 td2->treeholder.tree->parent = nt;
2862 #ifdef DEBUG_PARANOID
2863 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
2864 #endif
2865 }
2866
2867 le = le->Flink;
2868 }
2869 } else {
2870 LIST_ENTRY* le = nt->itemlist.Flink;
2871
2872 while (le != &nt->itemlist) {
2873 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2874
2875 if (!td2->inserted && td2->data) {
2876 UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
2877
2878 if (!data) {
2879 ERR("out of memory\n");
2880 return STATUS_INSUFFICIENT_RESOURCES;
2881 }
2882
2883 RtlCopyMemory(data, td2->data, td2->size);
2884 td2->data = data;
2885 td2->inserted = TRUE;
2886 }
2887
2888 le = le->Flink;
2889 }
2890 }
2891
2892 if (nt->parent) {
2893 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2894 if (!td) {
2895 ERR("out of memory\n");
2896 return STATUS_INSUFFICIENT_RESOURCES;
2897 }
2898
2899 td->key = newfirstitem->key;
2900
2901 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2902
2903 td->ignore = FALSE;
2904 td->inserted = TRUE;
2905 td->treeholder.tree = nt;
2906 nt->paritem = td;
2907
2908 nt->parent->header.num_items++;
2909 nt->parent->size += sizeof(internal_node);
2910
2911 goto end;
2912 }
2913
2914 TRACE("adding new tree parent\n");
2915
2916 if (nt->header.level == 255) {
2917 ERR("cannot add parent to tree at level 255\n");
2918 return STATUS_INTERNAL_ERROR;
2919 }
2920
2921 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2922 if (!pt) {
2923 ERR("out of memory\n");
2924 return STATUS_INSUFFICIENT_RESOURCES;
2925 }
2926
2927 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2928 pt->header.address = 0;
2929 pt->header.num_items = 2;
2930 pt->header.level = nt->header.level + 1;
2931 pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2932
2933 pt->has_address = FALSE;
2934 pt->Vcb = Vcb;
2935 pt->parent = NULL;
2936 pt->paritem = NULL;
2937 pt->root = t->root;
2938 pt->new_address = 0;
2939 pt->has_new_address = FALSE;
2940 pt->updated_extents = FALSE;
2941 pt->size = pt->header.num_items * sizeof(internal_node);
2942 pt->uniqueness_determined = TRUE;
2943 pt->is_unique = TRUE;
2944 pt->list_entry_hash.Flink = NULL;
2945 pt->buf = NULL;
2946 InitializeListHead(&pt->itemlist);
2947
2948 InsertTailList(&Vcb->trees, &pt->list_entry);
2949
2950 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2951 if (!td) {
2952 ERR("out of memory\n");
2953 return STATUS_INSUFFICIENT_RESOURCES;
2954 }
2955
2956 get_first_item(t, &td->key);
2957 td->ignore = FALSE;
2958 td->inserted = FALSE;
2959 td->treeholder.address = 0;
2960 td->treeholder.generation = Vcb->superblock.generation;
2961 td->treeholder.tree = t;
2962 InsertTailList(&pt->itemlist, &td->list_entry);
2963 t->paritem = td;
2964
2965 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2966 if (!td) {
2967 ERR("out of memory\n");
2968 return STATUS_INSUFFICIENT_RESOURCES;
2969 }
2970
2971 td->key = newfirstitem->key;
2972 td->ignore = FALSE;
2973 td->inserted = FALSE;
2974 td->treeholder.address = 0;
2975 td->treeholder.generation = Vcb->superblock.generation;
2976 td->treeholder.tree = nt;
2977 InsertTailList(&pt->itemlist, &td->list_entry);
2978 nt->paritem = td;
2979
2980 pt->write = TRUE;
2981
2982 t->root->treeholder.tree = pt;
2983
2984 t->parent = pt;
2985 nt->parent = pt;
2986
2987 #ifdef DEBUG_PARANOID
2988 if (t->parent && t->parent->header.level <= t->header.level) int3;
2989 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2990 #endif
2991
2992 end:
2993 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2994
2995 return STATUS_SUCCESS;
2996 }
2997
2998 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
2999 LIST_ENTRY* le;
3000 UINT32 size, ds, numitems;
3001
3002 size = 0;
3003 numitems = 0;
3004
3005 // FIXME - naïve implementation: maximizes number of filled trees
3006
3007 le = t->itemlist.Flink;
3008 while (le != &t->itemlist) {
3009 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3010
3011 if (!td->ignore) {
3012 if (t->header.level == 0)
3013 ds = sizeof(leaf_node) + td->size;
3014 else
3015 ds = sizeof(internal_node);
3016
3017 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3018 ERR("(%llx,%x,%llx) in tree %llx is too large (%x > %x)\n",
3019 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3020 ds, Vcb->superblock.node_size - sizeof(tree_header));
3021 return STATUS_INTERNAL_ERROR;
3022 }
3023
3024 // FIXME - move back if previous item was deleted item with same key
3025 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3026 return split_tree_at(Vcb, t, td, numitems, size);
3027
3028 size += ds;
3029 numitems++;
3030 }
3031
3032 le = le->Flink;
3033 }
3034
3035 return STATUS_SUCCESS;
3036 }
3037
3038 BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3039 KEY searchkey;
3040 traverse_ptr tp;
3041 NTSTATUS Status;
3042 BOOL ret = FALSE;
3043 EXTENT_ITEM* ei;
3044 UINT8* type;
3045
3046 if (t->uniqueness_determined)
3047 return t->is_unique;
3048
3049 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3050 goto end;
3051
3052 if (t->has_address) {
3053 searchkey.obj_id = t->header.address;
3054 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3055 searchkey.offset = 0xffffffffffffffff;
3056
3057 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3058 if (!NT_SUCCESS(Status)) {
3059 ERR("error - find_item returned %08x\n", Status);
3060 goto end;
3061 }
3062
3063 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3064 goto end;
3065
3066 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3067 goto end;
3068
3069 if (tp.item->size < sizeof(EXTENT_ITEM))
3070 goto end;
3071
3072 ei = (EXTENT_ITEM*)tp.item->data;
3073
3074 if (ei->refcount > 1)
3075 goto end;
3076
3077 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3078 EXTENT_ITEM2* ei2;
3079
3080 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3081 goto end;
3082
3083 ei2 = (EXTENT_ITEM2*)&ei[1];
3084 type = (UINT8*)&ei2[1];
3085 } else
3086 type = (UINT8*)&ei[1];
3087
3088 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3089 goto end;
3090 }
3091
3092 ret = TRUE;
3093
3094 end:
3095 t->is_unique = ret;
3096 t->uniqueness_determined = TRUE;
3097
3098 return ret;
3099 }
3100
3101 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, BOOL* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3102 LIST_ENTRY* le;
3103 tree_data* nextparitem = NULL;
3104 NTSTATUS Status;
3105 tree *next_tree, *par;
3106 BOOL loaded;
3107
3108 *done = FALSE;
3109
3110 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3111
3112 // FIXME - doesn't capture everything, as it doesn't ascend
3113 le = t->paritem->list_entry.Flink;
3114 while (le != &t->parent->itemlist) {
3115 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3116
3117 if (!td->ignore) {
3118 nextparitem = td;
3119 break;
3120 }
3121
3122 le = le->Flink;
3123 }
3124
3125 if (!nextparitem)
3126 return STATUS_SUCCESS;
3127
3128 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3129
3130 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL);
3131 if (!NT_SUCCESS(Status)) {
3132 ERR("do_load_tree returned %08x\n", Status);
3133 return Status;
3134 }
3135
3136 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3137 return STATUS_SUCCESS;
3138
3139 next_tree = nextparitem->treeholder.tree;
3140
3141 if (!next_tree->updated_extents && next_tree->has_address) {
3142 Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3143 if (!NT_SUCCESS(Status)) {
3144 ERR("update_tree_extents returned %08x\n", Status);
3145 return Status;
3146 }
3147 }
3148
3149 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3150 // merge two trees into one
3151
3152 t->header.num_items += next_tree->header.num_items;
3153 t->size += next_tree->size;
3154
3155 if (next_tree->header.level > 0) {
3156 le = next_tree->itemlist.Flink;
3157
3158 while (le != &next_tree->itemlist) {
3159 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3160
3161 if (td2->treeholder.tree) {
3162 td2->treeholder.tree->parent = t;
3163 #ifdef DEBUG_PARANOID
3164 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3165 #endif
3166 }
3167
3168 td2->inserted = TRUE;
3169 le = le->Flink;
3170 }
3171 } else {
3172 le = next_tree->itemlist.Flink;
3173
3174 while (le != &next_tree->itemlist) {
3175 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3176
3177 if (!td2->inserted && td2->data) {
3178 UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3179
3180 if (!data) {
3181 ERR("out of memory\n");
3182 return STATUS_INSUFFICIENT_RESOURCES;
3183 }
3184
3185 RtlCopyMemory(data, td2->data, td2->size);
3186 td2->data = data;
3187 td2->inserted = TRUE;
3188 }
3189
3190 le = le->Flink;
3191 }
3192 }
3193
3194 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3195 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3196 t->itemlist.Blink = next_tree->itemlist.Blink;
3197 t->itemlist.Blink->Flink = &t->itemlist;
3198
3199 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3200
3201 next_tree->header.num_items = 0;
3202 next_tree->size = 0;
3203
3204 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3205 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3206
3207 if (!NT_SUCCESS(Status)) {
3208 ERR("reduce_tree_extent returned %08x\n", Status);
3209 return Status;
3210 }
3211 } else if (next_tree->has_address) {
3212 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3213
3214 if (!NT_SUCCESS(Status)) {
3215 ERR("reduce_tree_extent returned %08x\n", Status);
3216 return Status;
3217 }
3218 }
3219
3220 if (!nextparitem->ignore) {
3221 nextparitem->ignore = TRUE;
3222 next_tree->parent->header.num_items--;
3223 next_tree->parent->size -= sizeof(internal_node);
3224
3225 *done_deletions = TRUE;
3226 }
3227
3228 par = next_tree->parent;
3229 while (par) {
3230 par->write = TRUE;
3231 par = par->parent;
3232 }
3233
3234 RemoveEntryList(&nextparitem->list_entry);
3235 ExFreePool(next_tree->paritem);
3236 next_tree->paritem = NULL;
3237
3238 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3239
3240 free_tree(next_tree);
3241
3242 *done = TRUE;
3243 } else {
3244 // rebalance by moving items from second tree into first
3245 ULONG avg_size = (t->size + next_tree->size) / 2;
3246 KEY firstitem = {0, 0, 0};
3247 BOOL changed = FALSE;
3248
3249 TRACE("attempting rebalance\n");
3250
3251 le = next_tree->itemlist.Flink;
3252 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3253 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3254 ULONG size;
3255
3256 if (!td->ignore) {
3257 if (next_tree->header.level == 0)
3258 size = sizeof(leaf_node) + td->size;
3259 else
3260 size = sizeof(internal_node);
3261 } else
3262 size = 0;
3263
3264 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3265 RemoveEntryList(&td->list_entry);
3266 InsertTailList(&t->itemlist, &td->list_entry);
3267
3268 if (next_tree->header.level > 0 && td->treeholder.tree) {
3269 td->treeholder.tree->parent = t;
3270 #ifdef DEBUG_PARANOID
3271 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3272 #endif
3273 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3274 UINT8* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3275
3276 if (!data) {
3277 ERR("out of memory\n");
3278 return STATUS_INSUFFICIENT_RESOURCES;
3279 }
3280
3281 RtlCopyMemory(data, td->data, td->size);
3282 td->data = data;
3283 }
3284
3285 td->inserted = TRUE;
3286
3287 if (!td->ignore) {
3288 next_tree->size -= size;
3289 t->size += size;
3290 next_tree->header.num_items--;
3291 t->header.num_items++;
3292 }
3293
3294 changed = TRUE;
3295 } else
3296 break;
3297
3298 le = next_tree->itemlist.Flink;
3299 }
3300
3301 le = next_tree->itemlist.Flink;
3302 while (le != &next_tree->itemlist) {
3303 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3304
3305 if (!td->ignore) {
3306 firstitem = td->key;
3307 break;
3308 }
3309
3310 le = le->Flink;
3311 }
3312
3313 // FIXME - once ascension is working, make this work with parent's parent, etc.
3314 if (next_tree->paritem)
3315 next_tree->paritem->key = firstitem;
3316
3317 par = next_tree;
3318 while (par) {
3319 par->write = TRUE;
3320 par = par->parent;
3321 }
3322
3323 if (changed)
3324 *done = TRUE;
3325 }
3326
3327 return STATUS_SUCCESS;
3328 }
3329
3330 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp) {
3331 KEY searchkey;
3332 traverse_ptr tp;
3333 NTSTATUS Status;
3334
3335 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3336 searchkey.obj_id = address;
3337 searchkey.obj_type = TYPE_METADATA_ITEM;
3338 searchkey.offset = t->header.level;
3339
3340 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3341 if (!NT_SUCCESS(Status)) {
3342 ERR("error - find_item returned %08x\n", Status);
3343 return Status;
3344 }
3345
3346 if (!keycmp(tp.item->key, searchkey)) {
3347 EXTENT_ITEM_SKINNY_METADATA* eism;
3348
3349 if (tp.item->size > 0) {
3350 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3351
3352 if (!eism) {
3353 ERR("out of memory\n");
3354 return STATUS_INSUFFICIENT_RESOURCES;
3355 }