[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
21
22 // BOOL did_split;
23 BOOL chunk_test = FALSE;
24
25 typedef struct {
26 KEVENT Event;
27 IO_STATUS_BLOCK iosb;
28 } write_context;
29
30 typedef struct {
31 EXTENT_ITEM ei;
32 UINT8 type;
33 EXTENT_DATA_REF edr;
34 } EXTENT_ITEM_DATA_REF;
35
36 typedef struct {
37 EXTENT_ITEM_TREE eit;
38 UINT8 type;
39 TREE_BLOCK_REF tbr;
40 } EXTENT_ITEM_TREE2;
41
42 typedef struct {
43 EXTENT_ITEM ei;
44 UINT8 type;
45 TREE_BLOCK_REF tbr;
46 } EXTENT_ITEM_SKINNY_METADATA;
47
48 typedef struct {
49 CHUNK_ITEM ci;
50 CHUNK_ITEM_STRIPE stripes[1];
51 } CHUNK_ITEM2;
52
53 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
54 static BOOL is_file_prealloc(fcb* fcb, UINT64 start_data, UINT64 end_data);
55
56 static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
57 write_context* context = conptr;
58
59 context->iosb = Irp->IoStatus;
60 KeSetEvent(&context->Event, 0, FALSE);
61
62 // return STATUS_SUCCESS;
63 return STATUS_MORE_PROCESSING_REQUIRED;
64 }
65
66 static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
67 NTSTATUS Status;
68 LARGE_INTEGER offset;
69 PIRP Irp;
70 PIO_STACK_LOCATION IrpSp;
71 write_context* context = NULL;
72
73 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
74
75 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
76 if (!context) {
77 ERR("out of memory\n");
78 return STATUS_INSUFFICIENT_RESOURCES;
79 }
80
81 RtlZeroMemory(context, sizeof(write_context));
82
83 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
84
85 offset.QuadPart = address;
86
87 // Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
88
89 Irp = IoAllocateIrp(device->StackSize, FALSE);
90
91 if (!Irp) {
92 ERR("IoAllocateIrp failed\n");
93 Status = STATUS_INTERNAL_ERROR;
94 goto exit2;
95 }
96
97 IrpSp = IoGetNextIrpStackLocation(Irp);
98 IrpSp->MajorFunction = IRP_MJ_WRITE;
99
100 if (device->Flags & DO_BUFFERED_IO) {
101 Irp->AssociatedIrp.SystemBuffer = data;
102
103 Irp->Flags = IRP_BUFFERED_IO;
104 } else if (device->Flags & DO_DIRECT_IO) {
105 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
106 if (!Irp->MdlAddress) {
107 DbgPrint("IoAllocateMdl failed\n");
108 goto exit;
109 }
110
111 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
112 } else {
113 Irp->UserBuffer = data;
114 }
115
116 IrpSp->Parameters.Write.Length = length;
117 IrpSp->Parameters.Write.ByteOffset = offset;
118
119 Irp->UserIosb = &context->iosb;
120
121 Irp->UserEvent = &context->Event;
122
123 IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
124
125 // FIXME - support multiple devices
126 Status = IoCallDriver(device, Irp);
127
128 if (Status == STATUS_PENDING) {
129 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
130 Status = context->iosb.Status;
131 }
132
133 if (!NT_SUCCESS(Status)) {
134 ERR("IoCallDriver returned %08x\n", Status);
135 }
136
137 if (device->Flags & DO_DIRECT_IO) {
138 MmUnlockPages(Irp->MdlAddress);
139 IoFreeMdl(Irp->MdlAddress);
140 }
141
142 exit:
143 IoFreeIrp(Irp);
144
145 exit2:
146 if (context)
147 ExFreePool(context);
148
149 return Status;
150 }
151
152 static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
153 NTSTATUS Status;
154 unsigned int i = 0;
155 UINT32 crc32;
156
157 #ifdef __REACTOS__
158 Status = STATUS_INTERNAL_ERROR;
159 #endif
160
161 // FIXME - work with RAID
162
163 // FIXME - only write one superblock if on SSD (?)
164 while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
165 TRACE("writing superblock %u\n", i);
166
167 Vcb->superblock.sb_phys_addr = superblock_addrs[i];
168 RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
169
170 crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
171 crc32 = ~crc32;
172 TRACE("crc32 is %08x\n", crc32);
173 RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
174
175 Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
176
177 if (!NT_SUCCESS(Status))
178 break;
179
180 i++;
181 }
182
183 return Status;
184 }
185
186 static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
187 LIST_ENTRY* le;
188 space *s, *bestfit = NULL;
189
190 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
191
192 le = c->space.Flink;
193 while (le != &c->space) {
194 s = CONTAINING_RECORD(le, space, list_entry);
195
196 if (s->type == SPACE_TYPE_FREE) {
197 if (s->size == length) {
198 *address = s->offset;
199 TRACE("returning exact fit at %llx\n", s->offset);
200 return TRUE;
201 } else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
202 bestfit = s;
203 }
204 }
205
206 le = le->Flink;
207 }
208
209 if (bestfit) {
210 TRACE("returning best fit at %llx\n", bestfit->offset);
211 *address = bestfit->offset;
212 return TRUE;
213 }
214
215 return FALSE;
216 }
217
218 void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
219 LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
220 space *s, *s2, *s3;
221 #ifdef DEBUG_PARANOID
222 UINT64 lastaddr;
223 #endif
224
225 TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
226
227 #ifdef DEBUG_PARANOID
228 // TESTING
229 le = c->space.Flink;
230 while (le != &c->space) {
231 s = CONTAINING_RECORD(le, space, list_entry);
232
233 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
234
235 le = le->Flink;
236 }
237 #endif
238
239 c->space_changed = TRUE;
240
241 le = c->space.Flink;
242 insbef = &c->space;
243 while (le != &c->space) {
244 s = CONTAINING_RECORD(le, space, list_entry);
245 nextle = le->Flink;
246
247 if (s->offset >= offset + size) {
248 insbef = le;
249 break;
250 }
251
252 if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
253 if (s->offset + s->size == offset + size) {
254 insbef = s->list_entry.Flink;
255 RemoveEntryList(&s->list_entry);
256 ExFreePool(s);
257 break;
258 }
259
260 RemoveEntryList(&s->list_entry);
261 ExFreePool(s);
262 } else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
263 s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
264 if (!s3) {
265 ERR("out of memory\n");
266 return;
267 }
268
269 s3->offset = offset + size;
270 s3->size = s->size - size - offset + s->offset;
271 s3->type = s->type;
272 InsertHeadList(&s->list_entry, &s3->list_entry);
273 insbef = &s3->list_entry;
274
275 s->size = offset - s->offset;
276 break;
277 } else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
278 s->size = offset - s->offset;
279 } else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
280 s->size -= offset + size - s->offset;
281 s->offset = offset + size;
282
283 insbef = le;
284 break;
285 }
286
287 le = nextle;
288 }
289
290 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
291 if (!s2) {
292 ERR("out of memory\n");
293 return;
294 }
295
296 s2->offset = offset;
297 s2->size = size;
298 s2->type = type;
299 InsertTailList(insbef, &s2->list_entry);
300
301 // merge entries if same type
302
303 if (s2->list_entry.Blink != &c->space) {
304 s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
305
306 if (s->type == type) {
307 s->size += s2->size;
308
309 RemoveEntryList(&s2->list_entry);
310 ExFreePool(s2);
311
312 s2 = s;
313 }
314 }
315
316 if (s2->list_entry.Flink != &c->space) {
317 s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
318
319 if (s->type == type) {
320 s2->size += s->size;
321
322 RemoveEntryList(&s->list_entry);
323 ExFreePool(s);
324 }
325 }
326
327 le = c->space.Flink;
328 while (le != &c->space) {
329 s = CONTAINING_RECORD(le, space, list_entry);
330
331 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
332
333 le = le->Flink;
334 }
335
336 #ifdef DEBUG_PARANOID
337 // TESTING
338 lastaddr = c->offset;
339 le = c->space.Flink;
340 while (le != &c->space) {
341 s = CONTAINING_RECORD(le, space, list_entry);
342
343 if (s->offset != lastaddr) {
344 ERR("inconsistency detected!\n");
345 int3;
346 }
347
348 lastaddr = s->offset + s->size;
349
350 le = le->Flink;
351 }
352
353 if (lastaddr != c->offset + c->chunk_item->size) {
354 ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
355 int3;
356 }
357 #endif
358 }
359
360 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
361 LIST_ENTRY* le2;
362 chunk* c;
363
364 le2 = Vcb->chunks.Flink;
365 while (le2 != &Vcb->chunks) {
366 c = CONTAINING_RECORD(le2, chunk, list_entry);
367
368 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
369
370 if (address >= c->offset && address < c->offset + c->chunk_item->size)
371 return c;
372
373 le2 = le2->Flink;
374 }
375
376 return NULL;
377 }
378
379 typedef struct {
380 disk_hole* dh;
381 device* device;
382 } stripe;
383
384 static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
385 // LIST_ENTRY* le = s->device->disk_holes.Flink;
386 // disk_hole* dh;
387
388 // ERR("old holes:\n");
389 // while (le != &s->device->disk_holes) {
390 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
391 //
392 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
393 //
394 // le = le->Flink;
395 // }
396
397 if (s->dh->size <= max_stripe_size) {
398 s->dh->provisional = TRUE;
399 } else {
400 disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
401 if (!newdh) {
402 ERR("out of memory\n");
403 return;
404 }
405
406 newdh->address = s->dh->address + max_stripe_size;
407 newdh->size = s->dh->size - max_stripe_size;
408 newdh->provisional = FALSE;
409 InsertTailList(&s->device->disk_holes, &newdh->listentry);
410
411 s->dh->size = max_stripe_size;
412 s->dh->provisional = TRUE;
413 }
414
415 // ERR("new holes:\n");
416 // le = s->device->disk_holes.Flink;
417 // while (le != &s->device->disk_holes) {
418 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
419 //
420 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
421 //
422 // le = le->Flink;
423 // }
424 }
425
426 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
427 KEY searchkey;
428 traverse_ptr tp, next_tp;
429 BOOL b;
430 UINT64 lastaddr;
431 NTSTATUS Status;
432
433 searchkey.obj_id = 0x100;
434 searchkey.obj_type = TYPE_CHUNK_ITEM;
435 searchkey.offset = 0;
436
437 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
438 if (!NT_SUCCESS(Status)) {
439 ERR("error - find_item returned %08x\n", Status);
440 return 0xffffffffffffffff;
441 }
442
443 lastaddr = 0;
444
445 do {
446 if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
447 if (tp.item->size < sizeof(CHUNK_ITEM)) {
448 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
449 } else {
450 CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
451
452 if (tp.item->key.offset >= lastaddr + size)
453 return lastaddr;
454
455 lastaddr = tp.item->key.offset + ci->size;
456 }
457 }
458
459 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
460 if (b) {
461 tp = next_tp;
462
463 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
464 break;
465 }
466 } while (b);
467
468 return lastaddr;
469 }
470
471 static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
472 KEY searchkey;
473 traverse_ptr tp;
474 DEV_ITEM* di;
475 NTSTATUS Status;
476
477 searchkey.obj_id = 1;
478 searchkey.obj_type = TYPE_DEV_ITEM;
479 searchkey.offset = device->devitem.dev_id;
480
481 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
482 if (!NT_SUCCESS(Status)) {
483 ERR("error - find_item returned %08x\n", Status);
484 return FALSE;
485 }
486
487 if (keycmp(&tp.item->key, &searchkey)) {
488 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
489 return FALSE;
490 }
491
492 delete_tree_item(Vcb, &tp, rollback);
493
494 device->devitem.bytes_used += size;
495
496 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
497 if (!di) {
498 ERR("out of memory\n");
499 return FALSE;
500 }
501
502 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
503
504 if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
505 ERR("insert_tree_item failed\n");
506 return FALSE;
507 }
508
509 return TRUE;
510 }
511
512 static void reset_disk_holes(device* device, BOOL commit) {
513 LIST_ENTRY* le = device->disk_holes.Flink;
514 disk_hole* dh;
515
516 // ERR("old holes:\n");
517 // while (le != &device->disk_holes) {
518 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
519 //
520 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
521 //
522 // le = le->Flink;
523 // }
524
525 le = device->disk_holes.Flink;
526 while (le != &device->disk_holes) {
527 LIST_ENTRY* le2 = le->Flink;
528
529 dh = CONTAINING_RECORD(le, disk_hole, listentry);
530
531 if (dh->provisional) {
532 if (commit) {
533 RemoveEntryList(le);
534 ExFreePool(dh);
535 } else {
536 dh->provisional = FALSE;
537 }
538 }
539
540 le = le2;
541 }
542
543 if (!commit) {
544 le = device->disk_holes.Flink;
545 while (le != &device->disk_holes) {
546 LIST_ENTRY* le2 = le->Flink;
547
548 dh = CONTAINING_RECORD(le, disk_hole, listentry);
549
550 while (le2 != &device->disk_holes) {
551 disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
552
553 if (dh2->address == dh->address + dh->size) {
554 LIST_ENTRY* le3 = le2->Flink;
555 dh->size += dh2->size;
556
557 RemoveEntryList(le2);
558 ExFreePool(dh2);
559
560 le2 = le3;
561 } else
562 break;
563 }
564
565 le = le->Flink;
566 }
567 }
568
569 // ERR("new holes:\n");
570 // le = device->disk_holes.Flink;
571 // while (le != &device->disk_holes) {
572 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
573 //
574 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
575 //
576 // le = le->Flink;
577 // }
578 }
579
580 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
581 sys_chunk *sc, *sc2;
582 LIST_ENTRY* le;
583 USHORT i;
584
585 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
586 ERR("error - bootstrap is full\n");
587 return STATUS_INTERNAL_ERROR;
588 }
589
590 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
591 if (!sc) {
592 ERR("out of memory\n");
593 return STATUS_INSUFFICIENT_RESOURCES;
594 }
595
596 sc->key.obj_id = obj_id;
597 sc->key.obj_type = obj_type;
598 sc->key.offset = offset;
599 sc->size = size;
600 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
601 if (!sc->data) {
602 ERR("out of memory\n");
603 ExFreePool(sc);
604 return STATUS_INSUFFICIENT_RESOURCES;
605 }
606
607 RtlCopyMemory(sc->data, data, sc->size);
608
609 le = Vcb->sys_chunks.Flink;
610 while (le != &Vcb->sys_chunks) {
611 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
612
613 if (keycmp(&sc2->key, &sc->key) == 1)
614 break;
615
616 le = le->Flink;
617 }
618 InsertTailList(le, &sc->list_entry);
619
620 Vcb->superblock.n += sizeof(KEY) + size;
621
622 i = 0;
623 le = Vcb->sys_chunks.Flink;
624 while (le != &Vcb->sys_chunks) {
625 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
626
627 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
628
629 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
630 i += sizeof(KEY);
631
632 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
633 i += sc2->size;
634
635 le = le->Flink;
636 }
637
638 return STATUS_SUCCESS;
639 }
640
641 chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
642 UINT64 max_stripe_size, max_chunk_size, stripe_size;
643 UINT64 total_size = 0, i, j, logaddr;
644 int num_stripes;
645 disk_hole* dh;
646 stripe* stripes;
647 ULONG cisize;
648 CHUNK_ITEM* ci;
649 CHUNK_ITEM_STRIPE* cis;
650 chunk* c = NULL;
651 space* s = NULL;
652 BOOL success = FALSE;
653 BLOCK_GROUP_ITEM* bgi;
654
655 for (i = 0; i < Vcb->superblock.num_devices; i++) {
656 total_size += Vcb->devices[i].devitem.num_bytes;
657 }
658 TRACE("total_size = %llx\n", total_size);
659
660 if (flags & BLOCK_FLAG_DATA) {
661 max_stripe_size = 0x40000000; // 1 GB
662 max_chunk_size = 10 * max_stripe_size;
663 } else if (flags & BLOCK_FLAG_METADATA) {
664 if (total_size > 0xC80000000) // 50 GB
665 max_stripe_size = 0x40000000; // 1 GB
666 else
667 max_stripe_size = 0x10000000; // 256 MB
668
669 max_chunk_size = max_stripe_size;
670 } else if (flags & BLOCK_FLAG_SYSTEM) {
671 max_stripe_size = 0x2000000; // 32 MB
672 max_chunk_size = 2 * max_stripe_size;
673 }
674
675 // FIXME - make sure whole number of sectors?
676 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
677
678 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
679
680 if (flags & BLOCK_FLAG_DUPLICATE) {
681 num_stripes = 2;
682 } else if (flags & BLOCK_FLAG_RAID0) {
683 FIXME("RAID0 not yet supported\n");
684 return NULL;
685 } else if (flags & BLOCK_FLAG_RAID1) {
686 FIXME("RAID1 not yet supported\n");
687 return NULL;
688 } else if (flags & BLOCK_FLAG_RAID10) {
689 FIXME("RAID10 not yet supported\n");
690 return NULL;
691 } else if (flags & BLOCK_FLAG_RAID5) {
692 FIXME("RAID5 not yet supported\n");
693 return NULL;
694 } else if (flags & BLOCK_FLAG_RAID6) {
695 FIXME("RAID6 not yet supported\n");
696 return NULL;
697 } else { // SINGLE
698 num_stripes = 1;
699 }
700
701 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
702 if (!stripes) {
703 ERR("out of memory\n");
704 return NULL;
705 }
706
707 for (i = 0; i < num_stripes; i++) {
708 stripes[i].dh = NULL;
709
710 for (j = 0; j < Vcb->superblock.num_devices; j++) {
711 LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
712
713 while (le != &Vcb->devices[j].disk_holes) {
714 dh = CONTAINING_RECORD(le, disk_hole, listentry);
715
716 if (!dh->provisional) {
717 if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
718 stripes[i].dh = dh;
719 stripes[i].device = &Vcb->devices[j];
720
721 if (stripes[i].dh->size >= max_stripe_size)
722 break;
723 }
724 }
725
726 le = le->Flink;
727 }
728
729 if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
730 break;
731 }
732
733 if (stripes[i].dh) {
734 TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
735 } else {
736 TRACE("good DH not found\n");
737 goto end;
738 }
739
740 add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
741 }
742
743 stripe_size = min(stripes[0].dh->size, max_stripe_size);
744 for (i = 1; i < num_stripes; i++) {
745 stripe_size = min(stripe_size, stripes[1].dh->size);
746 }
747 // FIXME - make sure stripe_size aligned properly
748 // FIXME - obey max_chunk_size
749
750 c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
751 if (!c) {
752 ERR("out of memory\n");
753 goto end;
754 }
755
756 // add CHUNK_ITEM to tree 3
757
758 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
759 ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
760 if (!ci) {
761 ERR("out of memory\n");
762 goto end;
763 }
764
765 ci->size = stripe_size; // FIXME for RAID
766 ci->root_id = Vcb->extent_root->id;
767 ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
768 ci->type = flags;
769 ci->opt_io_alignment = ci->stripe_length;
770 ci->opt_io_width = ci->stripe_length;
771 ci->sector_size = stripes[0].device->devitem.minimal_io_size;
772 ci->num_stripes = num_stripes;
773 ci->sub_stripes = 1;
774
775 c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
776 if (!c->devices) {
777 ERR("out of memory\n");
778 ExFreePool(ci);
779 goto end;
780 }
781
782 for (i = 0; i < num_stripes; i++) {
783 if (i == 0)
784 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
785 else
786 cis = &cis[1];
787
788 cis->dev_id = stripes[i].device->devitem.dev_id;
789 cis->offset = stripes[i].dh->address;
790 cis->dev_uuid = stripes[i].device->devitem.device_uuid;
791
792 c->devices[i] = stripes[i].device;
793 }
794
795 logaddr = find_new_chunk_address(Vcb, ci->size);
796 if (logaddr == 0xffffffffffffffff) {
797 ERR("find_new_chunk_address failed\n");
798 ExFreePool(ci);
799 goto end;
800 }
801
802 if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
803 ERR("insert_tree_item failed\n");
804 ExFreePool(ci);
805 goto end;
806 }
807
808 if (flags & BLOCK_FLAG_SYSTEM) {
809 NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
810 if (!NT_SUCCESS(Status)) {
811 ERR("add_to_bootstrap returned %08x\n", Status);
812 goto end;
813 }
814 }
815
816 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
817
818 c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
819 if (!c->chunk_item) {
820 ERR("out of memory\n");
821 goto end;
822 }
823
824 RtlCopyMemory(c->chunk_item, ci, cisize);
825 c->size = cisize;
826 c->offset = logaddr;
827 c->used = c->oldused = 0;
828 c->space_changed = FALSE;
829 c->cache_size = 0;
830 c->cache_inode = 0;
831 InitializeListHead(&c->space);
832
833 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
834 if (!s) {
835 ERR("out of memory\n");
836 goto end;
837 }
838
839 s->offset = c->offset;
840 s->size = c->chunk_item->size;
841 s->type = SPACE_TYPE_FREE;
842 InsertTailList(&c->space, &s->list_entry);
843
844 protect_superblocks(Vcb, c);
845
846 // add BLOCK_GROUP_ITEM to tree 2
847
848 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
849 if (!bgi) {
850 ERR("out of memory\n");
851 goto end;
852 }
853
854 bgi->used = 0;
855 bgi->chunk_tree = 0x100;
856 bgi->flags = flags;
857
858 if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
859 ERR("insert_tree_item failed\n");
860 ExFreePool(bgi);
861 goto end;
862 }
863
864 // add DEV_EXTENTs to tree 4
865
866 for (i = 0; i < num_stripes; i++) {
867 DEV_EXTENT* de;
868
869 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
870 if (!de) {
871 ERR("out of memory\n");
872 goto end;
873 }
874
875 de->chunktree = Vcb->chunk_root->id;
876 de->objid = 0x100;
877 de->address = logaddr;
878 de->length = ci->size;
879 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
880
881 if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
882 ERR("insert_tree_item failed\n");
883 ExFreePool(de);
884 goto end;
885 }
886
887 if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
888 ERR("increase_dev_item_used failed\n");
889 goto end;
890 }
891 }
892
893 for (i = 0; i < num_stripes; i++) {
894 BOOL b = FALSE;
895 for (j = 0; j < i; j++) {
896 if (stripes[j].device == stripes[i].device)
897 b = TRUE;
898 }
899
900 if (!b)
901 reset_disk_holes(stripes[i].device, TRUE);
902 }
903
904 success = TRUE;
905
906 end:
907 ExFreePool(stripes);
908
909 if (!success) {
910 for (i = 0; i < num_stripes; i++) {
911 BOOL b = FALSE;
912 for (j = 0; j < i; j++) {
913 if (stripes[j].device == stripes[i].device)
914 b = TRUE;
915 }
916
917 if (!b)
918 reset_disk_holes(stripes[i].device, FALSE);
919 }
920
921 if (c) ExFreePool(c);
922 if (s) ExFreePool(s);
923 } else
924 InsertTailList(&Vcb->chunks, &c->list_entry);
925
926 return success ? c : NULL;
927 }
928
929 NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
930 KEY searchkey;
931 traverse_ptr tp;
932 CHUNK_ITEM2* ci;
933 NTSTATUS Status;
934 UINT32 i;
935
936 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
937
938 // FIXME - use version cached in Vcb
939
940 searchkey.obj_id = 0x100; // fixed?
941 searchkey.obj_type = TYPE_CHUNK_ITEM;
942 searchkey.offset = address;
943
944 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
945 if (!NT_SUCCESS(Status)) {
946 ERR("error - find_item returned %08x\n", Status);
947 return Status;
948 }
949
950 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
951 ERR("error - unexpected item in chunk tree\n");
952 Status = STATUS_INTERNAL_ERROR;
953 goto end;
954 }
955
956 if (tp.item->size < sizeof(CHUNK_ITEM2)) {
957 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
958 Status = STATUS_INTERNAL_ERROR;
959 goto end;
960 }
961
962 ci = (CHUNK_ITEM2*)tp.item->data;
963
964 if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
965 ERR("error - address %llx was out of chunk bounds\n", address);
966 Status = STATUS_INTERNAL_ERROR;
967 goto end;
968 }
969
970 // FIXME - only do this for chunks marked DUPLICATE?
971 // FIXME - for multiple writes, if PENDING do waits at the end
972 // FIXME - work with RAID
973 for (i = 0; i < ci->ci.num_stripes; i++) {
974 Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
975 if (!NT_SUCCESS(Status)) {
976 ERR("error - write_data_phys failed\n");
977 goto end;
978 }
979 }
980
981 end:
982
983 return Status;
984 }
985
986 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
987 LIST_ENTRY *le, *nextle;
988 space *s, *s2;
989
990 // // TESTING
991 // le = c->space.Flink;
992 // while (le != &c->space) {
993 // s = CONTAINING_RECORD(le, space, list_entry);
994 //
995 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
996 //
997 // le = le->Flink;
998 // }
999
1000 le = c->space.Flink;
1001 while (le != &c->space) {
1002 s = CONTAINING_RECORD(le, space, list_entry);
1003 nextle = le->Flink;
1004
1005 if (s->type == SPACE_TYPE_DELETING)
1006 s->type = SPACE_TYPE_FREE;
1007 else if (s->type == SPACE_TYPE_WRITING)
1008 s->type = SPACE_TYPE_USED;
1009
1010 if (le->Blink != &c->space) {
1011 s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
1012
1013 if (s2->type == s->type) { // do merge
1014 s2->size += s->size;
1015
1016 RemoveEntryList(&s->list_entry);
1017 ExFreePool(s);
1018 }
1019 }
1020
1021 le = nextle;
1022 }
1023
1024 // le = c->space.Flink;
1025 // while (le != &c->space) {
1026 // s = CONTAINING_RECORD(le, space, list_entry);
1027 //
1028 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1029 //
1030 // le = le->Flink;
1031 // }
1032 }
1033
1034 static void clean_space_cache(device_extension* Vcb) {
1035 LIST_ENTRY* le;
1036 chunk* c;
1037
1038 TRACE("(%p)\n", Vcb);
1039
1040 le = Vcb->chunks.Flink;
1041 while (le != &Vcb->chunks) {
1042 c = CONTAINING_RECORD(le, chunk, list_entry);
1043
1044 if (c->space_changed) {
1045 clean_space_cache_chunk(Vcb, c);
1046 c->space_changed = FALSE;
1047 }
1048
1049 le = le->Flink;
1050 }
1051 }
1052
1053 static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
1054 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
1055 LIST_ENTRY* le;
1056
1057 le = Vcb->trees.Flink;
1058 while (le != &Vcb->trees) {
1059 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1060
1061 if (t->write) {
1062 if (t->header.num_items == 0 && t->parent)
1063 return FALSE;
1064
1065 if (t->size > maxsize)
1066 return FALSE;
1067
1068 if (!t->has_new_address)
1069 return FALSE;
1070 }
1071
1072 le = le->Flink;
1073 }
1074
1075 return TRUE;
1076 }
1077
1078 static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
1079 LIST_ENTRY* le;
1080 NTSTATUS Status;
1081
1082 le = Vcb->trees.Flink;
1083 while (le != &Vcb->trees) {
1084 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1085
1086 if (t->write) {
1087 if (t->parent) {
1088 if (!t->parent->write) {
1089 t->parent->write = TRUE;
1090 Vcb->write_trees++;
1091 }
1092 } else if (t->root != Vcb->chunk_root && t->root != Vcb->root_root) {
1093 KEY searchkey;
1094 traverse_ptr tp;
1095
1096 searchkey.obj_id = t->root->id;
1097 searchkey.obj_type = TYPE_ROOT_ITEM;
1098 searchkey.offset = 0xffffffffffffffff;
1099
1100 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1101 if (!NT_SUCCESS(Status)) {
1102 ERR("error - find_item returned %08x\n", Status);
1103 return Status;
1104 }
1105
1106 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1107 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1108 return STATUS_INTERNAL_ERROR;
1109 }
1110
1111 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
1112 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1113 if (!ri) {
1114 ERR("out of memory\n");
1115 return STATUS_INSUFFICIENT_RESOURCES;
1116 }
1117
1118 if (tp.item->size > 0)
1119 RtlCopyMemory(ri, tp.item->data, tp.item->size);
1120
1121 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
1122
1123 delete_tree_item(Vcb, &tp, rollback);
1124
1125 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1126 ERR("insert_tree_item failed\n");
1127 return STATUS_INTERNAL_ERROR;
1128 }
1129 } else {
1130 if (!tp.tree->write) {
1131 tp.tree->write = TRUE;
1132 Vcb->write_trees++;
1133 }
1134 }
1135 }
1136 }
1137
1138 le = le->Flink;
1139 }
1140
1141 return STATUS_SUCCESS;
1142 }
1143
1144 static void add_parents_to_cache(device_extension* Vcb, tree* t) {
1145 KEY searchkey;
1146 traverse_ptr tp;
1147 NTSTATUS Status;
1148
1149 while (t->parent) {
1150 t = t->parent;
1151
1152 if (!t->write) {
1153 t->write = TRUE;
1154 Vcb->write_trees++;
1155 }
1156 }
1157
1158 if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
1159 return;
1160
1161 searchkey.obj_id = t->root->id;
1162 searchkey.obj_type = TYPE_ROOT_ITEM;
1163 searchkey.offset = 0xffffffffffffffff;
1164
1165 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1166 if (!NT_SUCCESS(Status)) {
1167 ERR("error - find_item returned %08x\n", Status);
1168 return;
1169 }
1170
1171 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1172 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1173 return;
1174 }
1175
1176 if (!tp.tree->write) {
1177 tp.tree->write = TRUE;
1178 Vcb->write_trees++;
1179 }
1180 }
1181
1182 static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
1183 EXTENT_ITEM_SKINNY_METADATA* eism;
1184 traverse_ptr insert_tp;
1185
1186 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1187 if (!eism) {
1188 ERR("out of memory\n");
1189 return FALSE;
1190 }
1191
1192 eism->ei.refcount = 1;
1193 eism->ei.generation = Vcb->superblock.generation;
1194 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1195 eism->type = TYPE_TREE_BLOCK_REF;
1196 eism->tbr.offset = root_id;
1197
1198 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1199 ERR("insert_tree_item failed\n");
1200 ExFreePool(eism);
1201 return FALSE;
1202 }
1203
1204 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1205
1206 add_parents_to_cache(Vcb, insert_tp.tree);
1207
1208 return TRUE;
1209 }
1210
1211 static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, LIST_ENTRY* rollback) {
1212 UINT64 address;
1213 EXTENT_ITEM_TREE2* eit2;
1214 traverse_ptr insert_tp;
1215
1216 TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
1217
1218 if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
1219 return FALSE;
1220
1221 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1222 BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, rollback);
1223
1224 if (b)
1225 *new_address = address;
1226
1227 return b;
1228 }
1229
1230 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1231 if (!eit2) {
1232 ERR("out of memory\n");
1233 return FALSE;
1234 }
1235
1236 eit2->eit.extent_item.refcount = 1;
1237 eit2->eit.extent_item.generation = Vcb->superblock.generation;
1238 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1239 // eit2->eit.firstitem = wt->firstitem;
1240 eit2->eit.level = level;
1241 eit2->type = TYPE_TREE_BLOCK_REF;
1242 eit2->tbr.offset = root_id;
1243
1244 // #ifdef DEBUG_PARANOID
1245 // if (wt->firstitem.obj_type == 0xcc) { // TESTING
1246 // ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
1247 // ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
1248 // int3;
1249 // }
1250 // #endif
1251
1252 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1253 ERR("insert_tree_item failed\n");
1254 ExFreePool(eit2);
1255 return FALSE;
1256 }
1257
1258 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1259
1260 add_parents_to_cache(Vcb, insert_tp.tree);
1261
1262 *new_address = address;
1263
1264 return TRUE;
1265 }
1266
1267 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
1268 chunk *origchunk = NULL, *c;
1269 LIST_ENTRY* le;
1270 UINT64 flags = t->flags, addr;
1271
1272 if (flags == 0)
1273 flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
1274
1275 // TRACE("flags = %x\n", (UINT32)wt->flags);
1276
1277 // if (!chunk_test) { // TESTING
1278 // if ((c = alloc_chunk(Vcb, flags))) {
1279 // if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1280 // if (insert_tree_extent(Vcb, t, c)) {
1281 // chunk_test = TRUE;
1282 // return STATUS_SUCCESS;
1283 // }
1284 // }
1285 // }
1286 // }
1287
1288 if (t->has_address) {
1289 origchunk = get_chunk_from_address(Vcb, t->header.address);
1290
1291 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, origchunk, &addr, rollback)) {
1292 t->new_address = addr;
1293 t->has_new_address = TRUE;
1294 return STATUS_SUCCESS;
1295 }
1296 }
1297
1298 le = Vcb->chunks.Flink;
1299 while (le != &Vcb->chunks) {
1300 c = CONTAINING_RECORD(le, chunk, list_entry);
1301
1302 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1303 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, rollback)) {
1304 t->new_address = addr;
1305 t->has_new_address = TRUE;
1306 return STATUS_SUCCESS;
1307 }
1308 }
1309
1310 le = le->Flink;
1311 }
1312
1313 // allocate new chunk if necessary
1314 if ((c = alloc_chunk(Vcb, flags, rollback))) {
1315 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1316 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, rollback)) {
1317 t->new_address = addr;
1318 t->has_new_address = TRUE;
1319 return STATUS_SUCCESS;
1320 }
1321 }
1322 }
1323
1324 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
1325
1326 return STATUS_DISK_FULL;
1327 }
1328
1329 static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1330 KEY searchkey;
1331 traverse_ptr tp;
1332 chunk* c;
1333 EXTENT_ITEM_SKINNY_METADATA* eism;
1334 NTSTATUS Status;
1335
1336 searchkey.obj_id = address;
1337 searchkey.obj_type = TYPE_METADATA_ITEM;
1338 searchkey.offset = 0xffffffffffffffff;
1339
1340 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1341 if (!NT_SUCCESS(Status)) {
1342 ERR("error - find_item returned %08x\n", Status);
1343 return FALSE;
1344 }
1345
1346 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1347 TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1348 return FALSE;
1349 }
1350
1351 if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
1352 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
1353 return FALSE;
1354 }
1355
1356 delete_tree_item(Vcb, &tp, rollback);
1357
1358 eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
1359 if (t && t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
1360 // convert shared data extents
1361
1362 LIST_ENTRY* le = t->itemlist.Flink;
1363 while (le != &t->itemlist) {
1364 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1365
1366 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1367
1368 if (!td->ignore && !td->inserted) {
1369 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1370 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1371
1372 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1373 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1374
1375 if (ed2->address != 0) {
1376 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1377 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1378 }
1379 }
1380 }
1381 }
1382
1383 le = le->Flink;
1384 }
1385
1386 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1387 }
1388
1389 c = get_chunk_from_address(Vcb, address);
1390
1391 if (c) {
1392 decrease_chunk_usage(c, Vcb->superblock.node_size);
1393
1394 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
1395 } else
1396 ERR("could not find chunk for address %llx\n", address);
1397
1398 return TRUE;
1399 }
1400
1401 // TESTING
1402 // static void check_tree_num_items(tree* t) {
1403 // LIST_ENTRY* le2;
1404 // UINT32 ni;
1405 //
1406 // le2 = t->itemlist.Flink;
1407 // ni = 0;
1408 // while (le2 != &t->itemlist) {
1409 // tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1410 // if (!td->ignore)
1411 // ni++;
1412 // le2 = le2->Flink;
1413 // }
1414 //
1415 // if (t->header.num_items != ni) {
1416 // ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
1417 // int3;
1418 // } else {
1419 // ERR("tree %p okay\n", t);
1420 // }
1421 // }
1422 //
1423 // static void check_trees_num_items(LIST_ENTRY* tc) {
1424 // LIST_ENTRY* le = tc->Flink;
1425 // while (le != tc) {
1426 // tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1427 //
1428 // check_tree_num_items(tc2->tree);
1429 //
1430 // le = le->Flink;
1431 // }
1432 // }
1433
1434 static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
1435 KEY searchkey;
1436 traverse_ptr tp, tp2, insert_tp;
1437 EXTENT_REF_V0* erv0;
1438 NTSTATUS Status;
1439
1440 TRACE("(%p, %p, %p)\n", Vcb, td, t);
1441
1442 searchkey.obj_id = td->treeholder.address;
1443 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1444 searchkey.offset = 0xffffffffffffffff;
1445
1446 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1447 if (!NT_SUCCESS(Status)) {
1448 ERR("error - find_item returned %08x\n", Status);
1449 return;
1450 }
1451
1452 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1453 TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
1454 return;
1455 }
1456
1457 searchkey.obj_id = td->treeholder.address;
1458 searchkey.obj_type = TYPE_EXTENT_ITEM;
1459 searchkey.offset = Vcb->superblock.node_size;
1460
1461 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1462 if (!NT_SUCCESS(Status)) {
1463 ERR("error - find_item returned %08x\n", Status);
1464 return;
1465 }
1466
1467 if (keycmp(&searchkey, &tp2.item->key)) {
1468 ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1469 return;
1470 }
1471
1472 if (tp.item->size < sizeof(EXTENT_REF_V0)) {
1473 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
1474 return;
1475 }
1476
1477 erv0 = (EXTENT_REF_V0*)tp.item->data;
1478
1479 delete_tree_item(Vcb, &tp, rollback);
1480 delete_tree_item(Vcb, &tp2, rollback);
1481
1482 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1483 EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1484
1485 if (!eism) {
1486 ERR("out of memory\n");
1487 return;
1488 }
1489
1490 eism->ei.refcount = 1;
1491 eism->ei.generation = erv0->gen;
1492 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1493 eism->type = TYPE_TREE_BLOCK_REF;
1494 eism->tbr.offset = t->header.tree_id;
1495
1496 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1497 ERR("insert_tree_item failed\n");
1498 return;
1499 }
1500 } else {
1501 EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1502
1503 if (!eit2) {
1504 ERR("out of memory\n");
1505 return;
1506 }
1507
1508 eit2->eit.extent_item.refcount = 1;
1509 eit2->eit.extent_item.generation = erv0->gen;
1510 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1511 eit2->eit.firstitem = td->key;
1512 eit2->eit.level = t->header.level - 1;
1513 eit2->type = TYPE_TREE_BLOCK_REF;
1514 eit2->tbr.offset = t->header.tree_id;
1515
1516 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1517 ERR("insert_tree_item failed\n");
1518 return;
1519 }
1520 }
1521
1522 add_parents_to_cache(Vcb, insert_tp.tree);
1523 add_parents_to_cache(Vcb, tp.tree);
1524 add_parents_to_cache(Vcb, tp2.tree);
1525 }
1526
1527 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1528 KEY searchkey;
1529 traverse_ptr tp;
1530 EXTENT_ITEM* ei;
1531 EXTENT_ITEM_V0* eiv0;
1532 chunk* c;
1533 NTSTATUS Status;
1534
1535 // FIXME - deal with refcounts > 1
1536
1537 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
1538
1539 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1540 if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
1541 return STATUS_SUCCESS;
1542 }
1543 }
1544
1545 searchkey.obj_id = address;
1546 searchkey.obj_type = TYPE_EXTENT_ITEM;
1547 searchkey.offset = Vcb->superblock.node_size;
1548
1549 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1550 if (!NT_SUCCESS(Status)) {
1551 ERR("error - find_item returned %08x\n", Status);
1552 return Status;
1553 }
1554
1555 if (keycmp(&tp.item->key, &searchkey)) {
1556 ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1557 int3;
1558 return STATUS_INTERNAL_ERROR;
1559 }
1560
1561 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1562 eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
1563
1564 if (eiv0->refcount > 1) {
1565 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
1566 return STATUS_INTERNAL_ERROR;
1567 }
1568 } else {
1569 if (tp.item->size < sizeof(EXTENT_ITEM)) {
1570 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
1571 return STATUS_INTERNAL_ERROR;
1572 }
1573
1574 ei = (EXTENT_ITEM*)tp.item->data;
1575
1576 if (ei->refcount > 1) {
1577 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
1578 return STATUS_INTERNAL_ERROR;
1579 }
1580
1581 if (t && t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
1582 // convert shared data extents
1583
1584 LIST_ENTRY* le = t->itemlist.Flink;
1585 while (le != &t->itemlist) {
1586 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1587
1588 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1589
1590 if (!td->ignore && !td->inserted) {
1591 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1592 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1593
1594 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1595 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1596
1597 if (ed2->address != 0) {
1598 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1599 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1600 }
1601 }
1602 }
1603 }
1604
1605 le = le->Flink;
1606 }
1607
1608 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1609 }
1610 }
1611
1612 delete_tree_item(Vcb, &tp, rollback);
1613
1614 // if EXTENT_ITEM_V0, delete corresponding B4 item
1615 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1616 traverse_ptr tp2;
1617
1618 searchkey.obj_id = address;
1619 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1620 searchkey.offset = 0xffffffffffffffff;
1621
1622 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1623 if (!NT_SUCCESS(Status)) {
1624 ERR("error - find_item returned %08x\n", Status);
1625 return Status;
1626 }
1627
1628 if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
1629 delete_tree_item(Vcb, &tp2, rollback);
1630 }
1631 }
1632
1633 if (t && !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1634 LIST_ENTRY* le;
1635
1636 // when writing old internal trees, convert related extents
1637
1638 le = t->itemlist.Flink;
1639 while (le != &t->itemlist) {
1640 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1641
1642 // ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1643
1644 if (!td->ignore && !td->inserted) {
1645 if (t->header.level > 0) {
1646 convert_old_tree_extent(Vcb, td, t, rollback);
1647 } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
1648 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1649
1650 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1651 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1652
1653 if (ed2->address != 0) {
1654 TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
1655 convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
1656 }
1657 }
1658 }
1659 }
1660
1661 le = le->Flink;
1662 }
1663 }
1664
1665 c = get_chunk_from_address(Vcb, address);
1666
1667 if (c) {
1668 decrease_chunk_usage(c, tp.item->key.offset);
1669
1670 add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
1671 } else
1672 ERR("could not find chunk for address %llx\n", address);
1673
1674 return STATUS_SUCCESS;
1675 }
1676
1677 static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
1678 LIST_ENTRY* le;
1679 NTSTATUS Status;
1680
1681 TRACE("(%p)\n", Vcb);
1682
1683 le = Vcb->trees.Flink;
1684 while (le != &Vcb->trees) {
1685 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1686
1687 if (t->write && !t->has_new_address) {
1688 chunk* c;
1689
1690 Status = get_tree_new_address(Vcb, t, rollback);
1691 if (!NT_SUCCESS(Status)) {
1692 ERR("get_tree_new_address returned %08x\n", Status);
1693 return Status;
1694 }
1695
1696 TRACE("allocated extent %llx\n", t->new_address);
1697
1698 if (t->has_address) {
1699 Status = reduce_tree_extent(Vcb, t->header.address, t, rollback);
1700
1701 if (!NT_SUCCESS(Status)) {
1702 ERR("reduce_tree_extent returned %08x\n", Status);
1703 return Status;
1704 }
1705 }
1706
1707 c = get_chunk_from_address(Vcb, t->new_address);
1708
1709 if (c) {
1710 increase_chunk_usage(c, Vcb->superblock.node_size);
1711 } else {
1712 ERR("could not find chunk for address %llx\n", t->new_address);
1713 return STATUS_INTERNAL_ERROR;
1714 }
1715 }
1716
1717 le = le->Flink;
1718 }
1719
1720 return STATUS_SUCCESS;
1721 }
1722
1723 static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
1724 LIST_ENTRY* le;
1725 NTSTATUS Status;
1726
1727 TRACE("(%p)\n", Vcb);
1728
1729 le = Vcb->trees.Flink;
1730 while (le != &Vcb->trees) {
1731 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1732
1733 if (t->write && !t->parent) {
1734 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1735 KEY searchkey;
1736 traverse_ptr tp;
1737
1738 searchkey.obj_id = t->root->id;
1739 searchkey.obj_type = TYPE_ROOT_ITEM;
1740 searchkey.offset = 0xffffffffffffffff;
1741
1742 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1743 if (!NT_SUCCESS(Status)) {
1744 ERR("error - find_item returned %08x\n", Status);
1745 return Status;
1746 }
1747
1748 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1749 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1750 return STATUS_INTERNAL_ERROR;
1751 }
1752
1753 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
1754
1755 t->root->root_item.block_number = t->new_address;
1756 t->root->root_item.root_level = t->header.level;
1757 t->root->root_item.generation = Vcb->superblock.generation;
1758 t->root->root_item.generation2 = Vcb->superblock.generation;
1759
1760 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
1761 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1762
1763 if (!ri) {
1764 ERR("out of memory\n");
1765 return STATUS_INSUFFICIENT_RESOURCES;
1766 }
1767
1768 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
1769
1770 delete_tree_item(Vcb, &tp, rollback);
1771
1772 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1773 ERR("insert_tree_item failed\n");
1774 return STATUS_INTERNAL_ERROR;
1775 }
1776 } else
1777 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1778 }
1779
1780 t->root->treeholder.address = t->new_address;
1781 }
1782
1783 le = le->Flink;
1784 }
1785
1786 Status = update_chunk_caches(Vcb, rollback);
1787 if (!NT_SUCCESS(Status)) {
1788 ERR("update_chunk_caches returned %08x\n", Status);
1789 return Status;
1790 }
1791
1792 return STATUS_SUCCESS;
1793 }
1794
1795 static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1796 write_tree_stripe* stripe = conptr;
1797 write_tree_context* context = (write_tree_context*)stripe->context;
1798 LIST_ENTRY* le;
1799 BOOL complete;
1800
1801 if (stripe->status == WriteTreeStatus_Cancelling) {
1802 stripe->status = WriteTreeStatus_Cancelled;
1803 goto end;
1804 }
1805
1806 stripe->iosb = Irp->IoStatus;
1807
1808 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1809 stripe->status = WriteTreeStatus_Success;
1810 } else {
1811 le = context->stripes.Flink;
1812
1813 stripe->status = WriteTreeStatus_Error;
1814
1815 while (le != &context->stripes) {
1816 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1817
1818 if (s2->status == WriteTreeStatus_Pending) {
1819 s2->status = WriteTreeStatus_Cancelling;
1820 IoCancelIrp(s2->Irp);
1821 }
1822
1823 le = le->Flink;
1824 }
1825 }
1826
1827 end:
1828 le = context->stripes.Flink;
1829 complete = TRUE;
1830
1831 while (le != &context->stripes) {
1832 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1833
1834 if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
1835 complete = FALSE;
1836 break;
1837 }
1838
1839 le = le->Flink;
1840 }
1841
1842 if (complete)
1843 KeSetEvent(&context->Event, 0, FALSE);
1844
1845 return STATUS_MORE_PROCESSING_REQUIRED;
1846 }
1847
1848 NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
1849 chunk* c;
1850 CHUNK_ITEM_STRIPE* cis;
1851 write_tree_stripe* stripe;
1852 UINT64 i;
1853
1854 c = get_chunk_from_address(Vcb, addr);
1855
1856 if (!c) {
1857 ERR("get_chunk_from_address failed\n");
1858 return STATUS_INTERNAL_ERROR;
1859 }
1860
1861 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1862
1863 // FIXME - make this work with RAID
1864
1865 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1866 PIO_STACK_LOCATION IrpSp;
1867
1868 // FIXME - handle missing devices
1869
1870 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
1871 if (!stripe) {
1872 ERR("out of memory\n");
1873 return STATUS_INSUFFICIENT_RESOURCES;
1874 }
1875
1876 stripe->context = (struct write_tree_context*)wtc;
1877 stripe->buf = data;
1878 stripe->device = c->devices[i];
1879 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1880 stripe->status = WriteTreeStatus_Pending;
1881
1882 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1883
1884 if (!stripe->Irp) {
1885 ERR("IoAllocateIrp failed\n");
1886 return STATUS_INTERNAL_ERROR;
1887 }
1888
1889 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1890 IrpSp->MajorFunction = IRP_MJ_WRITE;
1891
1892 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
1893 stripe->Irp->AssociatedIrp.SystemBuffer = data;
1894
1895 stripe->Irp->Flags = IRP_BUFFERED_IO;
1896 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1897 stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
1898 if (!stripe->Irp->MdlAddress) {
1899 ERR("IoAllocateMdl failed\n");
1900 return STATUS_INTERNAL_ERROR;
1901 }
1902
1903 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
1904 } else {
1905 stripe->Irp->UserBuffer = data;
1906 }
1907
1908 IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
1909 IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
1910
1911 stripe->Irp->UserIosb = &stripe->iosb;
1912
1913 IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
1914
1915 InsertTailList(&wtc->stripes, &stripe->list_entry);
1916 }
1917
1918 return STATUS_SUCCESS;
1919 }
1920
1921 void free_write_tree_stripes(write_tree_context* wtc) {
1922 LIST_ENTRY *le, *le2, *nextle;
1923
1924 le = wtc->stripes.Flink;
1925 while (le != &wtc->stripes) {
1926 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1927
1928 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1929 MmUnlockPages(stripe->Irp->MdlAddress);
1930 IoFreeMdl(stripe->Irp->MdlAddress);
1931 }
1932
1933 le = le->Flink;
1934 }
1935
1936 le = wtc->stripes.Flink;
1937 while (le != &wtc->stripes) {
1938 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1939
1940 nextle = le->Flink;
1941
1942 if (stripe->buf) {
1943 ExFreePool(stripe->buf);
1944
1945 le2 = le->Flink;
1946 while (le2 != &wtc->stripes) {
1947 write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
1948
1949 if (s2->buf == stripe->buf)
1950 s2->buf = NULL;
1951
1952 le2 = le2->Flink;
1953 }
1954
1955 }
1956
1957 ExFreePool(stripe);
1958
1959 le = nextle;
1960 }
1961 }
1962
1963 static NTSTATUS write_trees(device_extension* Vcb) {
1964 UINT8 level;
1965 UINT8 *data, *body;
1966 UINT32 crc32;
1967 NTSTATUS Status;
1968 LIST_ENTRY* le;
1969 write_tree_context* wtc;
1970
1971 TRACE("(%p)\n", Vcb);
1972
1973 for (level = 0; level <= 255; level++) {
1974 BOOL nothing_found = TRUE;
1975
1976 TRACE("level = %u\n", level);
1977
1978 le = Vcb->trees.Flink;
1979 while (le != &Vcb->trees) {
1980 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1981
1982 if (t->write && t->header.level == level) {
1983 KEY firstitem, searchkey;
1984 LIST_ENTRY* le2;
1985 traverse_ptr tp;
1986 EXTENT_ITEM_TREE* eit;
1987
1988 if (!t->has_new_address) {
1989 ERR("error - tried to write tree with no new address\n");
1990 int3;
1991 }
1992
1993 le2 = t->itemlist.Flink;
1994 while (le2 != &t->itemlist) {
1995 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1996 if (!td->ignore) {
1997 firstitem = td->key;
1998 break;
1999 }
2000 le2 = le2->Flink;
2001 }
2002
2003 if (t->parent) {
2004 t->paritem->key = firstitem;
2005 t->paritem->treeholder.address = t->new_address;
2006 t->paritem->treeholder.generation = Vcb->superblock.generation;
2007 }
2008
2009 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
2010 searchkey.obj_id = t->new_address;
2011 searchkey.obj_type = TYPE_EXTENT_ITEM;
2012 searchkey.offset = Vcb->superblock.node_size;
2013
2014 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2015 if (!NT_SUCCESS(Status)) {
2016 ERR("error - find_item returned %08x\n", Status);
2017 return Status;
2018 }
2019
2020 if (keycmp(&searchkey, &tp.item->key)) {
2021 // traverse_ptr next_tp;
2022 // BOOL b;
2023 // tree_data* paritem;
2024
2025 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2026
2027 // searchkey.obj_id = 0;
2028 // searchkey.obj_type = 0;
2029 // searchkey.offset = 0;
2030 //
2031 // find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2032 //
2033 // paritem = NULL;
2034 // do {
2035 // if (tp.tree->paritem != paritem) {
2036 // paritem = tp.tree->paritem;
2037 // ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
2038 // }
2039 //
2040 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2041 //
2042 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
2043 // if (b) {
2044 // free_traverse_ptr(&tp);
2045 // tp = next_tp;
2046 // }
2047 // } while (b);
2048 //
2049 // free_traverse_ptr(&tp);
2050
2051 return STATUS_INTERNAL_ERROR;
2052 }
2053
2054 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
2055 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
2056 return STATUS_INTERNAL_ERROR;
2057 }
2058
2059 eit = (EXTENT_ITEM_TREE*)tp.item->data;
2060 eit->firstitem = firstitem;
2061 }
2062
2063 nothing_found = FALSE;
2064 }
2065
2066 le = le->Flink;
2067 }
2068
2069 if (nothing_found)
2070 break;
2071 }
2072
2073 TRACE("allocated tree extents\n");
2074
2075 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
2076 if (!wtc) {
2077 ERR("out of memory\n");
2078 return STATUS_INSUFFICIENT_RESOURCES;
2079 }
2080
2081 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
2082 InitializeListHead(&wtc->stripes);
2083
2084 le = Vcb->trees.Flink;
2085 while (le != &Vcb->trees) {
2086 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2087 #ifdef DEBUG_PARANOID
2088 UINT32 num_items = 0, size = 0;
2089 LIST_ENTRY* le2;
2090 BOOL crash = FALSE;
2091 #endif
2092
2093 if (t->write) {
2094 #ifdef DEBUG_PARANOID
2095 le2 = t->itemlist.Flink;
2096 while (le2 != &t->itemlist) {
2097 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2098 if (!td->ignore) {
2099 num_items++;
2100
2101 if (t->header.level == 0)
2102 size += td->size;
2103 }
2104 le2 = le2->Flink;
2105 }
2106
2107 if (t->header.level == 0)
2108 size += num_items * sizeof(leaf_node);
2109 else
2110 size += num_items * sizeof(internal_node);
2111
2112 if (num_items != t->header.num_items) {
2113 ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
2114 crash = TRUE;
2115 }
2116
2117 if (size != t->size) {
2118 ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
2119 crash = TRUE;
2120 }
2121
2122 if (t->header.num_items == 0 && t->parent) {
2123 ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
2124 crash = TRUE;
2125 }
2126
2127 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
2128 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
2129 crash = TRUE;
2130 }
2131
2132 if (crash) {
2133 ERR("tree %p\n", t);
2134 le2 = t->itemlist.Flink;
2135 while (le2 != &t->itemlist) {
2136 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2137 if (!td->ignore) {
2138 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
2139 }
2140 le2 = le2->Flink;
2141 }
2142 int3;
2143 }
2144 #endif
2145 t->header.address = t->new_address;
2146 t->header.generation = Vcb->superblock.generation;
2147 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
2148 t->has_address = TRUE;
2149
2150 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
2151 if (!data) {
2152 ERR("out of memory\n");
2153 Status = STATUS_INSUFFICIENT_RESOURCES;
2154 goto end;
2155 }
2156
2157 body = data + sizeof(tree_header);
2158
2159 RtlCopyMemory(data, &t->header, sizeof(tree_header));
2160 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2161
2162 if (t->header.level == 0) {
2163 leaf_node* itemptr = (leaf_node*)body;
2164 int i = 0;
2165 LIST_ENTRY* le2;
2166 UINT8* dataptr = data + Vcb->superblock.node_size;
2167
2168 le2 = t->itemlist.Flink;
2169 while (le2 != &t->itemlist) {
2170 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2171 if (!td->ignore) {
2172 dataptr = dataptr - td->size;
2173
2174 itemptr[i].key = td->key;
2175 itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
2176 itemptr[i].size = td->size;
2177 i++;
2178
2179 if (td->size > 0)
2180 RtlCopyMemory(dataptr, td->data, td->size);
2181 }
2182
2183 le2 = le2->Flink;
2184 }
2185 } else {
2186 internal_node* itemptr = (internal_node*)body;
2187 int i = 0;
2188 LIST_ENTRY* le2;
2189
2190 le2 = t->itemlist.Flink;
2191 while (le2 != &t->itemlist) {
2192 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2193 if (!td->ignore) {
2194 itemptr[i].key = td->key;
2195 itemptr[i].address = td->treeholder.address;
2196 itemptr[i].generation = td->treeholder.generation;
2197 i++;
2198 }
2199
2200 le2 = le2->Flink;
2201 }
2202 }
2203
2204 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2205 crc32 = ~crc32;
2206 *((UINT32*)data) = crc32;
2207 TRACE("setting crc32 to %08x\n", crc32);
2208
2209 Status = write_tree(Vcb, t->new_address, data, wtc);
2210 if (!NT_SUCCESS(Status)) {
2211 ERR("write_tree returned %08x\n", Status);
2212 goto end;
2213 }
2214 }
2215
2216 le = le->Flink;
2217 }
2218
2219 Status = STATUS_SUCCESS;
2220
2221 if (wtc->stripes.Flink != &wtc->stripes) {
2222 // launch writes and wait
2223 le = wtc->stripes.Flink;
2224 while (le != &wtc->stripes) {
2225 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2226
2227 IoCallDriver(stripe->device->devobj, stripe->Irp);
2228
2229 le = le->Flink;
2230 }
2231
2232 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
2233
2234 le = wtc->stripes.Flink;
2235 while (le != &wtc->stripes) {
2236 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2237
2238 if (!NT_SUCCESS(stripe->iosb.Status)) {
2239 Status = stripe->iosb.Status;
2240 break;
2241 }
2242
2243 le = le->Flink;
2244 }
2245
2246 free_write_tree_stripes(wtc);
2247 }
2248
2249 end:
2250 ExFreePool(wtc);
2251
2252 return Status;
2253 }
2254
2255 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb) {
2256 KEY searchkey;
2257 traverse_ptr tp;
2258
2259 RtlZeroMemory(sb, sizeof(superblock_backup));
2260
2261 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2262 sb->root_tree_generation = Vcb->superblock.generation;
2263 sb->root_level = Vcb->superblock.root_level;
2264
2265 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2266 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2267 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2268
2269 searchkey.obj_id = BTRFS_ROOT_EXTENT;
2270 searchkey.obj_type = TYPE_ROOT_ITEM;
2271 searchkey.offset = 0xffffffffffffffff;
2272
2273 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2274 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2275 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2276
2277 sb->extent_tree_addr = ri->block_number;
2278 sb->extent_tree_generation = ri->generation;
2279 sb->extent_root_level = ri->root_level;
2280 }
2281 }
2282
2283 searchkey.obj_id = BTRFS_ROOT_FSTREE;
2284
2285 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2286 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2287 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2288
2289 sb->fs_tree_addr = ri->block_number;
2290 sb->fs_tree_generation = ri->generation;
2291 sb->fs_root_level = ri->root_level;
2292 }
2293 }
2294
2295 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2296
2297 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2298 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2299 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2300
2301 sb->dev_root_addr = ri->block_number;
2302 sb->dev_root_generation = ri->generation;
2303 sb->dev_root_level = ri->root_level;
2304 }
2305 }
2306
2307 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2308
2309 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2310 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2311 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2312
2313 sb->csum_root_addr = ri->block_number;
2314 sb->csum_root_generation = ri->generation;
2315 sb->csum_root_level = ri->root_level;
2316 }
2317 }
2318
2319 sb->total_bytes = Vcb->superblock.total_bytes;
2320 sb->bytes_used = Vcb->superblock.bytes_used;
2321 sb->num_devices = Vcb->superblock.num_devices;
2322 }
2323
2324 static NTSTATUS write_superblocks(device_extension* Vcb) {
2325 UINT64 i;
2326 NTSTATUS Status;
2327 LIST_ENTRY* le;
2328
2329 TRACE("(%p)\n", Vcb);
2330
2331 le = Vcb->trees.Flink;
2332 while (le != &Vcb->trees) {
2333 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2334
2335 if (t->write && !t->parent) {
2336 if (t->root == Vcb->root_root) {
2337 Vcb->superblock.root_tree_addr = t->new_address;
2338 Vcb->superblock.root_level = t->header.level;
2339 } else if (t->root == Vcb->chunk_root) {
2340 Vcb->superblock.chunk_tree_addr = t->new_address;
2341 Vcb->superblock.chunk_root_generation = t->header.generation;
2342 Vcb->superblock.chunk_root_level = t->header.level;
2343 }
2344 }
2345
2346 le = le->Flink;
2347 }
2348
2349 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2350 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2351 }
2352
2353 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1]);
2354
2355 for (i = 0; i < Vcb->superblock.num_devices; i++) {
2356 if (Vcb->devices[i].devobj) {
2357 Status = write_superblock(Vcb, &Vcb->devices[i]);
2358 if (!NT_SUCCESS(Status)) {
2359 ERR("write_superblock returned %08x\n", Status);
2360 return Status;
2361 }
2362 }
2363 }
2364
2365 return STATUS_SUCCESS;
2366 }
2367
2368 static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
2369 LIST_ENTRY* le = Vcb->chunks.Flink;
2370 chunk* c;
2371 KEY searchkey;
2372 traverse_ptr tp;
2373 BLOCK_GROUP_ITEM* bgi;
2374 NTSTATUS Status;
2375
2376 TRACE("(%p)\n", Vcb);
2377
2378 while (le != &Vcb->chunks) {
2379 c = CONTAINING_RECORD(le, chunk, list_entry);
2380
2381 if (c->used != c->oldused) {
2382 searchkey.obj_id = c->offset;
2383 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2384 searchkey.offset = c->chunk_item->size;
2385
2386 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2387 if (!NT_SUCCESS(Status)) {
2388 ERR("error - find_item returned %08x\n", Status);
2389 return Status;
2390 }
2391
2392 if (keycmp(&searchkey, &tp.item->key)) {
2393 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2394 int3;
2395 return STATUS_INTERNAL_ERROR;
2396 }
2397
2398 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2399 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2400 return STATUS_INTERNAL_ERROR;
2401 }
2402
2403 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2404 if (!bgi) {
2405 ERR("out of memory\n");
2406 return STATUS_INSUFFICIENT_RESOURCES;
2407 }
2408
2409 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2410 bgi->used = c->used;
2411
2412 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2413
2414 delete_tree_item(Vcb, &tp, rollback);
2415
2416 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
2417 ERR("insert_tree_item failed\n");
2418 ExFreePool(bgi);
2419 return STATUS_INTERNAL_ERROR;
2420 }
2421
2422 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2423 TRACE("chunk_item type = %llx\n", c->chunk_item->type);
2424
2425 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
2426 FIXME("RAID0 not yet supported\n");
2427 ExFreePool(bgi);
2428 return STATUS_INTERNAL_ERROR;
2429 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
2430 FIXME("RAID1 not yet supported\n");
2431 ExFreePool(bgi);
2432 return STATUS_INTERNAL_ERROR;
2433 } else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
2434 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
2435 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
2436 FIXME("RAID10 not yet supported\n");
2437 ExFreePool(bgi);
2438 return STATUS_INTERNAL_ERROR;
2439 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2440 FIXME("RAID5 not yet supported\n");
2441 ExFreePool(bgi);
2442 return STATUS_INTERNAL_ERROR;
2443 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2444 FIXME("RAID6 not yet supported\n");
2445 ExFreePool(bgi);
2446 return STATUS_INTERNAL_ERROR;
2447 } else { // SINGLE
2448 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
2449 }
2450
2451 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2452
2453 c->oldused = c->used;
2454 }
2455
2456 le = le->Flink;
2457 }
2458
2459 return STATUS_SUCCESS;
2460 }
2461
2462 static void get_first_item(tree* t, KEY* key) {
2463 LIST_ENTRY* le;
2464
2465 le = t->itemlist.Flink;
2466 while (le != &t->itemlist) {
2467 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2468
2469 *key = td->key;
2470 return;
2471 }
2472 }
2473
2474 static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2475 tree *nt, *pt;
2476 tree_data* td;
2477 tree_data* oldlastitem;
2478 // write_tree* wt2;
2479 // // tree_data *firsttd, *lasttd;
2480 // // LIST_ENTRY* le;
2481 // #ifdef DEBUG_PARANOID
2482 // KEY lastkey1, lastkey2;
2483 // traverse_ptr tp, next_tp;
2484 // ULONG numitems1, numitems2;
2485 // #endif
2486
2487 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2488
2489 // #ifdef DEBUG_PARANOID
2490 // lastkey1.obj_id = 0xffffffffffffffff;
2491 // lastkey1.obj_type = 0xff;
2492 // lastkey1.offset = 0xffffffffffffffff;
2493 //
2494 // if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
2495 // ERR("error - find_item failed\n");
2496 // else {
2497 // lastkey1 = tp.item->key;
2498 // numitems1 = 0;
2499 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2500 // free_traverse_ptr(&tp);
2501 // tp = next_tp;
2502 // numitems1++;
2503 // }
2504 // free_traverse_ptr(&tp);
2505 // }
2506 // #endif
2507
2508 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2509 if (!nt) {
2510 ERR("out of memory\n");
2511 return STATUS_INSUFFICIENT_RESOURCES;
2512 }
2513
2514 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2515 nt->header.address = 0;
2516 nt->header.generation = Vcb->superblock.generation;
2517 nt->header.num_items = t->header.num_items - numitems;
2518 nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2519
2520 nt->has_address = FALSE;
2521 nt->Vcb = Vcb;
2522 nt->parent = t->parent;
2523 nt->root = t->root;
2524 // nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2525 nt->new_address = 0;
2526 nt->has_new_address = FALSE;
2527 nt->flags = t->flags;
2528 InitializeListHead(&nt->itemlist);
2529
2530 // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
2531
2532 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2533
2534 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2535 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2536 // //
2537 // // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
2538 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2539 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2540 // //
2541 // // le = wt->tree->itemlist.Flink;
2542 // // while (le != &wt->tree->itemlist) {
2543 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2544 // // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2545 // // le = le->Flink;
2546 // // }
2547
2548 nt->itemlist.Flink = &newfirstitem->list_entry;
2549 nt->itemlist.Blink = t->itemlist.Blink;
2550 nt->itemlist.Flink->Blink = &nt->itemlist;
2551 nt->itemlist.Blink->Flink = &nt->itemlist;
2552
2553 t->itemlist.Blink = &oldlastitem->list_entry;
2554 t->itemlist.Blink->Flink = &t->itemlist;
2555
2556 // // le = wt->tree->itemlist.Flink;
2557 // // while (le != &wt->tree->itemlist) {
2558 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2559 // // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2560 // // le = le->Flink;
2561 // // }
2562 // //
2563 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2564 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2565 // //
2566 // // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
2567 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2568 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2569
2570 nt->size = t->size - size;
2571 t->size = size;
2572 t->header.num_items = numitems;
2573 nt->write = TRUE;
2574 Vcb->write_trees++;
2575
2576 InterlockedIncrement(&Vcb->open_trees);
2577 InsertTailList(&Vcb->trees, &nt->list_entry);
2578
2579 // // // TESTING
2580 // // td = wt->tree->items;
2581 // // while (td) {
2582 // // if (!td->ignore) {
2583 // // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2584 // // }
2585 // // td = td->next;
2586 // // }
2587
2588 // // oldlastitem->next = NULL;
2589 // // wt->tree->lastitem = oldlastitem;
2590
2591 // // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
2592
2593 if (nt->header.level > 0) {
2594 LIST_ENTRY* le = nt->itemlist.Flink;
2595
2596 while (le != &nt->itemlist) {
2597 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2598
2599 if (td2->treeholder.tree)
2600 td2->treeholder.tree->parent = nt;
2601
2602 le = le->Flink;
2603 }
2604 }
2605
2606 if (nt->parent) {
2607 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2608 if (!td) {
2609 ERR("out of memory\n");
2610 return STATUS_INSUFFICIENT_RESOURCES;
2611 }
2612
2613 td->key = newfirstitem->key;
2614
2615 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2616
2617 td->ignore = FALSE;
2618 td->inserted = TRUE;
2619 td->treeholder.tree = nt;
2620 init_tree_holder(&td->treeholder);
2621 // td->treeholder.nonpaged->status = tree_holder_loaded;
2622 nt->paritem = td;
2623
2624 nt->parent->header.num_items++;
2625 nt->parent->size += sizeof(internal_node);
2626
2627 goto end;
2628 }
2629
2630 TRACE("adding new tree parent\n");
2631
2632 if (nt->header.level == 255) {
2633 ERR("cannot add parent to tree at level 255\n");
2634 return STATUS_INTERNAL_ERROR;
2635 }
2636
2637 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2638 if (!pt) {
2639 ERR("out of memory\n");
2640 return STATUS_INSUFFICIENT_RESOURCES;
2641 }
2642
2643 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2644 pt->header.address = 0;
2645 pt->header.num_items = 2;
2646 pt->header.level = nt->header.level + 1;
2647 pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2648
2649 pt->has_address = FALSE;
2650 pt->Vcb = Vcb;
2651 pt->parent = NULL;
2652 pt->paritem = NULL;
2653 pt->root = t->root;
2654 pt->new_address = 0;
2655 pt->has_new_address = FALSE;
2656 // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2657 pt->size = pt->header.num_items * sizeof(internal_node);
2658 pt->flags = t->flags;
2659 InitializeListHead(&pt->itemlist);
2660
2661 // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
2662
2663 InterlockedIncrement(&Vcb->open_trees);
2664 InsertTailList(&Vcb->trees, &pt->list_entry);
2665
2666 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2667 if (!td) {
2668 ERR("out of memory\n");
2669 return STATUS_INSUFFICIENT_RESOURCES;
2670 }
2671
2672 get_first_item(t, &td->key);
2673 td->ignore = FALSE;
2674 td->inserted = FALSE;
2675 td->treeholder.address = 0;
2676 td->treeholder.generation = Vcb->superblock.generation;
2677 td->treeholder.tree = t;
2678 init_tree_holder(&td->treeholder);
2679 // td->treeholder.nonpaged->status = tree_holder_loaded;
2680 InsertTailList(&pt->itemlist, &td->list_entry);
2681 t->paritem = td;
2682
2683 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2684 if (!td) {
2685 ERR("out of memory\n");
2686 return STATUS_INSUFFICIENT_RESOURCES;
2687 }
2688
2689 td->key = newfirstitem->key;
2690 td->ignore = FALSE;
2691 td->inserted = FALSE;
2692 td->treeholder.address = 0;
2693 td->treeholder.generation = Vcb->superblock.generation;
2694 td->treeholder.tree = nt;
2695 init_tree_holder(&td->treeholder);
2696 // td->treeholder.nonpaged->status = tree_holder_loaded;
2697 InsertTailList(&pt->itemlist, &td->list_entry);
2698 nt->paritem = td;
2699
2700 pt->write = TRUE;
2701 Vcb->write_trees++;
2702
2703 t->root->treeholder.tree = pt;
2704
2705 t->parent = pt;
2706 nt->parent = pt;
2707
2708 end:
2709 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2710
2711 // #ifdef DEBUG_PARANOID
2712 // lastkey2.obj_id = 0xffffffffffffffff;
2713 // lastkey2.obj_type = 0xff;
2714 // lastkey2.offset = 0xffffffffffffffff;
2715 //
2716 // if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
2717 // ERR("error - find_item failed\n");
2718 // else {
2719 // lastkey2 = tp.item->key;
2720 //
2721 // numitems2 = 0;
2722 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2723 // free_traverse_ptr(&tp);
2724 // tp = next_tp;
2725 // numitems2++;
2726 // }
2727 // free_traverse_ptr(&tp);
2728 // }
2729 //
2730 // ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
2731 // ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
2732 // ERR("numitems1 = %u\n", numitems1);
2733 // ERR("numitems2 = %u\n", numitems2);
2734 // #endif
2735
2736 return STATUS_SUCCESS;
2737 }
2738
2739 static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
2740 LIST_ENTRY* le;
2741 UINT32 size, ds, numitems;
2742
2743 size = 0;
2744 numitems = 0;
2745
2746 // FIXME - naïve implementation: maximizes number of filled trees
2747
2748 le = t->itemlist.Flink;
2749 while (le != &t->itemlist) {
2750 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2751
2752 if (!td->ignore) {
2753 if (t->header.level == 0)
2754 ds = sizeof(leaf_node) + td->size;
2755 else
2756 ds = sizeof(internal_node);
2757
2758 // FIXME - move back if previous item was deleted item with same key
2759 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
2760 return split_tree_at(Vcb, t, td, numitems, size);
2761
2762 size += ds;
2763 numitems++;
2764 }
2765
2766 le = le->Flink;
2767 }
2768
2769 return STATUS_SUCCESS;
2770 }
2771
2772 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
2773 LIST_ENTRY* le;
2774 tree_data* nextparitem = NULL;
2775 NTSTATUS Status;
2776 tree *next_tree, *par;
2777 BOOL loaded;
2778
2779 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
2780
2781 // FIXME - doesn't capture everything, as it doesn't ascend
2782 // FIXME - write proper function and put it in treefuncs.c
2783 le = t->paritem->list_entry.Flink;
2784 while (le != &t->parent->itemlist) {
2785 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2786
2787 if (!td->ignore) {
2788 nextparitem = td;
2789 break;
2790 }
2791
2792 le = le->Flink;
2793 }
2794
2795 if (!nextparitem)
2796 return STATUS_SUCCESS;
2797
2798 // FIXME - loop, and capture more than one tree if we can
2799
2800 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
2801 // nextparitem = t->paritem;
2802
2803 // ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
2804
2805 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
2806 if (!NT_SUCCESS(Status)) {
2807 ERR("do_load_tree returned %08x\n", Status);
2808 return Status;
2809 }
2810
2811 // ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
2812
2813 next_tree = nextparitem->treeholder.tree;
2814
2815 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
2816 // merge two trees into one
2817
2818 t->header.num_items += next_tree->header.num_items;
2819 t->size += next_tree->size;
2820
2821 if (next_tree->header.level > 0) {
2822 le = next_tree->itemlist.Flink;
2823
2824 while (le != &next_tree->itemlist) {
2825 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2826
2827 if (td2->treeholder.tree)
2828 td2->treeholder.tree->parent = t;
2829
2830 le = le->Flink;
2831 }
2832 }
2833
2834 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
2835 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
2836 t->itemlist.Blink = next_tree->itemlist.Blink;
2837 t->itemlist.Blink->Flink = &t->itemlist;
2838
2839 // // TESTING
2840 // le = t->itemlist.Flink;
2841 // while (le != &t->itemlist) {
2842 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2843 // if (!td->ignore) {
2844 // ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
2845 // }
2846 // le = le->Flink;
2847 // }
2848
2849 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
2850
2851 next_tree->header.num_items = 0;
2852 next_tree->size = 0;
2853
2854 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
2855 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
2856
2857 if (!NT_SUCCESS(Status)) {
2858 ERR("reduce_tree_extent returned %08x\n", Status);
2859 return Status;
2860 }
2861 } else if (next_tree->has_address) {
2862 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
2863
2864 if (!NT_SUCCESS(Status)) {
2865 ERR("reduce_tree_extent returned %08x\n", Status);
2866 return Status;
2867 }
2868 }
2869
2870 if (!nextparitem->ignore) {
2871 nextparitem->ignore = TRUE;
2872 next_tree->parent->header.num_items--;
2873 next_tree->parent->size -= sizeof(internal_node);
2874 }
2875
2876 par = next_tree->parent;
2877 while (par) {
2878 if (!par->write) {
2879 par->write = TRUE;
2880 Vcb->write_trees++;
2881 }
2882 par = par->parent;
2883 }
2884
2885 RemoveEntryList(&nextparitem->list_entry);
2886 ExFreePool(next_tree->paritem);
2887 next_tree->paritem = NULL;
2888
2889 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
2890
2891 free_tree(next_tree);
2892 } else {
2893 // rebalance by moving items from second tree into first
2894 ULONG avg_size = (t->size + next_tree->size) / 2;
2895 KEY firstitem = {0, 0, 0};
2896
2897 TRACE("attempting rebalance\n");
2898
2899 le = next_tree->itemlist.Flink;
2900 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
2901 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2902 ULONG size;
2903
2904 if (!td->ignore) {
2905 if (next_tree->header.level == 0)
2906 size = sizeof(leaf_node) + td->size;
2907 else
2908 size = sizeof(internal_node);
2909 } else
2910 size = 0;
2911
2912 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
2913 RemoveEntryList(&td->list_entry);
2914 InsertTailList(&t->itemlist, &td->list_entry);
2915
2916 if (next_tree->header.level > 0 && td->treeholder.tree)
2917 td->treeholder.tree->parent = t;
2918
2919 if (!td->ignore) {
2920 next_tree->size -= size;
2921 t->size += size;
2922 next_tree->header.num_items--;
2923 t->header.num_items++;
2924 }
2925 } else
2926 break;
2927
2928 le = next_tree->itemlist.Flink;
2929 }
2930
2931 le = next_tree->itemlist.Flink;
2932 while (le != &next_tree->itemlist) {
2933 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2934
2935 if (!td->ignore) {
2936 firstitem = td->key;
2937 break;
2938 }
2939
2940 le = le->Flink;
2941 }
2942
2943 // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
2944
2945 // FIXME - once ascension is working, make this work with parent's parent, etc.
2946 if (next_tree->paritem)
2947 next_tree->paritem->key = firstitem;
2948
2949 par = next_tree;
2950 while (par) {
2951 if (!par->write) {
2952 par->write = TRUE;
2953 Vcb->write_trees++;
2954 }
2955 par = par->parent;
2956 }
2957 }
2958
2959 return STATUS_SUCCESS;
2960 }
2961
2962 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
2963 KEY searchkey;
2964 traverse_ptr tp;
2965 NTSTATUS Status;
2966
2967 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
2968 searchkey.obj_id = address;
2969 searchkey.obj_type = TYPE_METADATA_ITEM;
2970 searchkey.offset = t->header.level;
2971
2972 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2973 if (!NT_SUCCESS(Status)) {
2974 ERR(