[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
21
22 // BOOL did_split;
23 BOOL chunk_test = FALSE;
24
25 typedef struct {
26 KEVENT Event;
27 IO_STATUS_BLOCK iosb;
28 } write_context;
29
30 typedef struct {
31 EXTENT_ITEM ei;
32 UINT8 type;
33 EXTENT_DATA_REF edr;
34 } EXTENT_ITEM_DATA_REF;
35
36 typedef struct {
37 EXTENT_ITEM_TREE eit;
38 UINT8 type;
39 TREE_BLOCK_REF tbr;
40 } EXTENT_ITEM_TREE2;
41
42 typedef struct {
43 EXTENT_ITEM ei;
44 UINT8 type;
45 TREE_BLOCK_REF tbr;
46 } EXTENT_ITEM_SKINNY_METADATA;
47
48 typedef struct {
49 CHUNK_ITEM ci;
50 CHUNK_ITEM_STRIPE stripes[1];
51 } CHUNK_ITEM2;
52
53 typedef struct {
54 LIST_ENTRY list_entry;
55 UINT64 key;
56 } ordered_list;
57
58 typedef struct {
59 ordered_list ol;
60 ULONG length;
61 UINT32* checksums;
62 BOOL deleted;
63 } changed_sector;
64
65 static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
66 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
67 static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
68
69 static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 write_context* context = conptr;
71
72 context->iosb = Irp->IoStatus;
73 KeSetEvent(&context->Event, 0, FALSE);
74
75 // return STATUS_SUCCESS;
76 return STATUS_MORE_PROCESSING_REQUIRED;
77 }
78
79 static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
80 NTSTATUS Status;
81 LARGE_INTEGER offset;
82 PIRP Irp;
83 PIO_STACK_LOCATION IrpSp;
84 write_context* context = NULL;
85
86 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
87
88 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
89 if (!context) {
90 ERR("out of memory\n");
91 return STATUS_INSUFFICIENT_RESOURCES;
92 }
93
94 RtlZeroMemory(context, sizeof(write_context));
95
96 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
97
98 offset.QuadPart = address;
99
100 // Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
101
102 Irp = IoAllocateIrp(device->StackSize, FALSE);
103
104 if (!Irp) {
105 ERR("IoAllocateIrp failed\n");
106 Status = STATUS_INTERNAL_ERROR;
107 goto exit2;
108 }
109
110 IrpSp = IoGetNextIrpStackLocation(Irp);
111 IrpSp->MajorFunction = IRP_MJ_WRITE;
112
113 if (device->Flags & DO_BUFFERED_IO) {
114 Irp->AssociatedIrp.SystemBuffer = data;
115
116 Irp->Flags = IRP_BUFFERED_IO;
117 } else if (device->Flags & DO_DIRECT_IO) {
118 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
119 if (!Irp->MdlAddress) {
120 DbgPrint("IoAllocateMdl failed\n");
121 goto exit;
122 }
123
124 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
125 } else {
126 Irp->UserBuffer = data;
127 }
128
129 IrpSp->Parameters.Write.Length = length;
130 IrpSp->Parameters.Write.ByteOffset = offset;
131
132 Irp->UserIosb = &context->iosb;
133
134 Irp->UserEvent = &context->Event;
135
136 IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
137
138 // FIXME - support multiple devices
139 Status = IoCallDriver(device, Irp);
140
141 if (Status == STATUS_PENDING) {
142 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
143 Status = context->iosb.Status;
144 }
145
146 if (!NT_SUCCESS(Status)) {
147 ERR("IoCallDriver returned %08x\n", Status);
148 }
149
150 if (device->Flags & DO_DIRECT_IO) {
151 MmUnlockPages(Irp->MdlAddress);
152 IoFreeMdl(Irp->MdlAddress);
153 }
154
155 exit:
156 IoFreeIrp(Irp);
157
158 exit2:
159 if (context)
160 ExFreePool(context);
161
162 return Status;
163 }
164
165 static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
166 NTSTATUS Status;
167 unsigned int i = 0;
168 UINT32 crc32;
169
170 #ifdef __REACTOS__
171 Status = STATUS_INTERNAL_ERROR;
172 #endif
173
174 // FIXME - work with RAID
175
176 // FIXME - only write one superblock if on SSD (?)
177 while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
178 TRACE("writing superblock %u\n", i);
179
180 Vcb->superblock.sb_phys_addr = superblock_addrs[i];
181 RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
182
183 crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
184 crc32 = ~crc32;
185 TRACE("crc32 is %08x\n", crc32);
186 RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
187
188 Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
189
190 if (!NT_SUCCESS(Status))
191 break;
192
193 i++;
194 }
195
196 return Status;
197 }
198
199 static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
200 LIST_ENTRY* le;
201 space *s, *bestfit = NULL;
202
203 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
204
205 le = c->space.Flink;
206 while (le != &c->space) {
207 s = CONTAINING_RECORD(le, space, list_entry);
208
209 if (s->type == SPACE_TYPE_FREE) {
210 if (s->size == length) {
211 *address = s->offset;
212 TRACE("returning exact fit at %llx\n", s->offset);
213 return TRUE;
214 } else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
215 bestfit = s;
216 }
217 }
218
219 le = le->Flink;
220 }
221
222 if (bestfit) {
223 TRACE("returning best fit at %llx\n", bestfit->offset);
224 *address = bestfit->offset;
225 return TRUE;
226 }
227
228 return FALSE;
229 }
230
231 void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
232 LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
233 space *s, *s2, *s3;
234 #ifdef DEBUG_PARANOID
235 UINT64 lastaddr;
236 #endif
237
238 TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
239
240 #ifdef DEBUG_PARANOID
241 // TESTING
242 le = c->space.Flink;
243 while (le != &c->space) {
244 s = CONTAINING_RECORD(le, space, list_entry);
245
246 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
247
248 le = le->Flink;
249 }
250 #endif
251
252 c->space_changed = TRUE;
253
254 le = c->space.Flink;
255 insbef = &c->space;
256 while (le != &c->space) {
257 s = CONTAINING_RECORD(le, space, list_entry);
258 nextle = le->Flink;
259
260 if (s->offset >= offset + size) {
261 insbef = le;
262 break;
263 }
264
265 if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
266 #ifndef __REACTOS__
267 RemoveEntryList(&s->list_entry);
268
269 #endif
270 if (s->offset + s->size == offset + size) {
271 insbef = s->list_entry.Flink;
272 RemoveEntryList(&s->list_entry);
273 ExFreePool(s);
274 break;
275 }
276
277 RemoveEntryList(&s->list_entry);
278 ExFreePool(s);
279 } else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
280 s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
281 if (!s3) {
282 ERR("out of memory\n");
283 return;
284 }
285
286 s3->offset = offset + size;
287 s3->size = s->size - size - offset + s->offset;
288 s3->type = s->type;
289 InsertHeadList(&s->list_entry, &s3->list_entry);
290 insbef = &s3->list_entry;
291
292 s->size = offset - s->offset;
293 break;
294 } else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
295 s->size = offset - s->offset;
296 } else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
297 s->size -= s->offset - offset + size;
298 s->offset = offset + size;
299
300 insbef = le;
301 break;
302 }
303
304 le = nextle;
305 }
306
307 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
308 if (!s2) {
309 ERR("out of memory\n");
310 return;
311 }
312
313 s2->offset = offset;
314 s2->size = size;
315 s2->type = type;
316 InsertTailList(insbef, &s2->list_entry);
317
318 // merge entries if same type
319
320 if (s2->list_entry.Blink != &c->space) {
321 s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
322
323 if (s->type == type) {
324 s->size += s2->size;
325
326 RemoveEntryList(&s2->list_entry);
327 ExFreePool(s2);
328
329 s2 = s;
330 }
331 }
332
333 if (s2->list_entry.Flink != &c->space) {
334 s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
335
336 if (s->type == type) {
337 s2->size += s->size;
338
339 RemoveEntryList(&s->list_entry);
340 ExFreePool(s);
341 }
342 }
343
344 le = c->space.Flink;
345 while (le != &c->space) {
346 s = CONTAINING_RECORD(le, space, list_entry);
347
348 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
349
350 le = le->Flink;
351 }
352
353 #ifdef DEBUG_PARANOID
354 // TESTING
355 lastaddr = c->offset;
356 le = c->space.Flink;
357 while (le != &c->space) {
358 s = CONTAINING_RECORD(le, space, list_entry);
359
360 if (s->offset != lastaddr) {
361 ERR("inconsistency detected!\n");
362 int3;
363 }
364
365 lastaddr = s->offset + s->size;
366
367 le = le->Flink;
368 }
369
370 if (lastaddr != c->offset + c->chunk_item->size) {
371 ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
372 int3;
373 }
374 #endif
375 }
376
377 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
378 LIST_ENTRY* le2;
379 chunk* c;
380
381 le2 = Vcb->chunks.Flink;
382 while (le2 != &Vcb->chunks) {
383 c = CONTAINING_RECORD(le2, chunk, list_entry);
384
385 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
386
387 if (address >= c->offset && address < c->offset + c->chunk_item->size)
388 return c;
389
390 le2 = le2->Flink;
391 }
392
393 return NULL;
394 }
395
396 typedef struct {
397 disk_hole* dh;
398 device* device;
399 } stripe;
400
401 static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
402 // LIST_ENTRY* le = s->device->disk_holes.Flink;
403 // disk_hole* dh;
404
405 // ERR("old holes:\n");
406 // while (le != &s->device->disk_holes) {
407 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
408 //
409 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
410 //
411 // le = le->Flink;
412 // }
413
414 if (s->dh->size <= max_stripe_size) {
415 s->dh->provisional = TRUE;
416 } else {
417 disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
418 if (!newdh) {
419 ERR("out of memory\n");
420 return;
421 }
422
423 newdh->address = s->dh->address + max_stripe_size;
424 newdh->size = s->dh->size - max_stripe_size;
425 newdh->provisional = FALSE;
426 InsertTailList(&s->device->disk_holes, &newdh->listentry);
427
428 s->dh->size = max_stripe_size;
429 s->dh->provisional = TRUE;
430 }
431
432 // ERR("new holes:\n");
433 // le = s->device->disk_holes.Flink;
434 // while (le != &s->device->disk_holes) {
435 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
436 //
437 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
438 //
439 // le = le->Flink;
440 // }
441 }
442
443 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
444 KEY searchkey;
445 traverse_ptr tp, next_tp;
446 BOOL b;
447 UINT64 lastaddr;
448 NTSTATUS Status;
449
450 searchkey.obj_id = 0x100;
451 searchkey.obj_type = TYPE_CHUNK_ITEM;
452 searchkey.offset = 0;
453
454 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
455 if (!NT_SUCCESS(Status)) {
456 ERR("error - find_item returned %08x\n", Status);
457 return 0xffffffffffffffff;
458 }
459
460 lastaddr = 0;
461
462 do {
463 if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
464 if (tp.item->size < sizeof(CHUNK_ITEM)) {
465 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
466 } else {
467 CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
468
469 if (tp.item->key.offset >= lastaddr + size) {
470 free_traverse_ptr(&tp);
471 return lastaddr;
472 }
473
474 lastaddr = tp.item->key.offset + ci->size;
475 }
476 }
477
478 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
479 if (b) {
480 free_traverse_ptr(&tp);
481 tp = next_tp;
482
483 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
484 break;
485 }
486 } while (b);
487
488 free_traverse_ptr(&tp);
489
490 return lastaddr;
491 }
492
493 static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
494 KEY searchkey;
495 traverse_ptr tp;
496 DEV_ITEM* di;
497 NTSTATUS Status;
498
499 searchkey.obj_id = 1;
500 searchkey.obj_type = TYPE_DEV_ITEM;
501 searchkey.offset = device->devitem.dev_id;
502
503 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
504 if (!NT_SUCCESS(Status)) {
505 ERR("error - find_item returned %08x\n", Status);
506 return FALSE;
507 }
508
509 if (keycmp(&tp.item->key, &searchkey)) {
510 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
511 free_traverse_ptr(&tp);
512 return FALSE;
513 }
514
515 delete_tree_item(Vcb, &tp, rollback);
516
517 free_traverse_ptr(&tp);
518
519 device->devitem.bytes_used += size;
520
521 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
522 if (!di) {
523 ERR("out of memory\n");
524 return FALSE;
525 }
526
527 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
528
529 if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
530 ERR("insert_tree_item failed\n");
531 return FALSE;
532 }
533
534 return TRUE;
535 }
536
537 static void reset_disk_holes(device* device, BOOL commit) {
538 LIST_ENTRY* le = device->disk_holes.Flink;
539 disk_hole* dh;
540
541 // ERR("old holes:\n");
542 // while (le != &device->disk_holes) {
543 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
544 //
545 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
546 //
547 // le = le->Flink;
548 // }
549
550 le = device->disk_holes.Flink;
551 while (le != &device->disk_holes) {
552 LIST_ENTRY* le2 = le->Flink;
553
554 dh = CONTAINING_RECORD(le, disk_hole, listentry);
555
556 if (dh->provisional) {
557 if (commit) {
558 RemoveEntryList(le);
559 ExFreePool(dh);
560 } else {
561 dh->provisional = FALSE;
562 }
563 }
564
565 le = le2;
566 }
567
568 if (!commit) {
569 le = device->disk_holes.Flink;
570 while (le != &device->disk_holes) {
571 LIST_ENTRY* le2 = le->Flink;
572
573 dh = CONTAINING_RECORD(le, disk_hole, listentry);
574
575 while (le2 != &device->disk_holes) {
576 disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
577
578 if (dh2->address == dh->address + dh->size) {
579 LIST_ENTRY* le3 = le2->Flink;
580 dh->size += dh2->size;
581
582 RemoveEntryList(le2);
583 ExFreePool(dh2);
584
585 le2 = le3;
586 } else
587 break;
588 }
589
590 le = le->Flink;
591 }
592 }
593
594 // ERR("new holes:\n");
595 // le = device->disk_holes.Flink;
596 // while (le != &device->disk_holes) {
597 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
598 //
599 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
600 //
601 // le = le->Flink;
602 // }
603 }
604
605 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
606 sys_chunk *sc, *sc2;
607 LIST_ENTRY* le;
608 USHORT i;
609
610 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
611 ERR("error - bootstrap is full\n");
612 return STATUS_INTERNAL_ERROR;
613 }
614
615 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
616 if (!sc) {
617 ERR("out of memory\n");
618 return STATUS_INSUFFICIENT_RESOURCES;
619 }
620
621 sc->key.obj_id = obj_id;
622 sc->key.obj_type = obj_type;
623 sc->key.offset = offset;
624 sc->size = size;
625 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
626 if (!sc->data) {
627 ERR("out of memory\n");
628 ExFreePool(sc);
629 return STATUS_INSUFFICIENT_RESOURCES;
630 }
631
632 RtlCopyMemory(sc->data, data, sc->size);
633
634 le = Vcb->sys_chunks.Flink;
635 while (le != &Vcb->sys_chunks) {
636 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
637
638 if (keycmp(&sc2->key, &sc->key) == 1)
639 break;
640
641 le = le->Flink;
642 }
643 InsertTailList(le, &sc->list_entry);
644
645 Vcb->superblock.n += sizeof(KEY) + size;
646
647 i = 0;
648 le = Vcb->sys_chunks.Flink;
649 while (le != &Vcb->sys_chunks) {
650 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
651
652 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
653
654 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
655 i += sizeof(KEY);
656
657 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
658 i += sc2->size;
659
660 le = le->Flink;
661 }
662
663 return STATUS_SUCCESS;
664 }
665
666 static chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
667 UINT64 max_stripe_size, max_chunk_size, stripe_size;
668 UINT64 total_size = 0, i, j, logaddr;
669 int num_stripes;
670 disk_hole* dh;
671 stripe* stripes;
672 ULONG cisize;
673 CHUNK_ITEM* ci;
674 CHUNK_ITEM_STRIPE* cis;
675 chunk* c = NULL;
676 space* s = NULL;
677 BOOL success = FALSE;
678 BLOCK_GROUP_ITEM* bgi;
679
680 for (i = 0; i < Vcb->superblock.num_devices; i++) {
681 total_size += Vcb->devices[i].devitem.num_bytes;
682 }
683 TRACE("total_size = %llx\n", total_size);
684
685 if (flags & BLOCK_FLAG_DATA) {
686 max_stripe_size = 0x40000000; // 1 GB
687 max_chunk_size = 10 * max_stripe_size;
688 } else if (flags & BLOCK_FLAG_METADATA) {
689 if (total_size > 0xC80000000) // 50 GB
690 max_stripe_size = 0x40000000; // 1 GB
691 else
692 max_stripe_size = 0x10000000; // 256 MB
693
694 max_chunk_size = max_stripe_size;
695 } else if (flags & BLOCK_FLAG_SYSTEM) {
696 max_stripe_size = 0x2000000; // 32 MB
697 max_chunk_size = 2 * max_stripe_size;
698 }
699
700 // FIXME - make sure whole number of sectors?
701 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
702
703 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
704
705 if (flags & BLOCK_FLAG_DUPLICATE) {
706 num_stripes = 2;
707 } else if (flags & BLOCK_FLAG_RAID0) {
708 FIXME("RAID0 not yet supported\n");
709 return NULL;
710 } else if (flags & BLOCK_FLAG_RAID1) {
711 FIXME("RAID1 not yet supported\n");
712 return NULL;
713 } else if (flags & BLOCK_FLAG_RAID10) {
714 FIXME("RAID10 not yet supported\n");
715 return NULL;
716 } else if (flags & BLOCK_FLAG_RAID5) {
717 FIXME("RAID5 not yet supported\n");
718 return NULL;
719 } else if (flags & BLOCK_FLAG_RAID6) {
720 FIXME("RAID6 not yet supported\n");
721 return NULL;
722 } else { // SINGLE
723 num_stripes = 1;
724 }
725
726 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
727 if (!stripes) {
728 ERR("out of memory\n");
729 return NULL;
730 }
731
732 for (i = 0; i < num_stripes; i++) {
733 stripes[i].dh = NULL;
734
735 for (j = 0; j < Vcb->superblock.num_devices; j++) {
736 LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
737
738 while (le != &Vcb->devices[j].disk_holes) {
739 dh = CONTAINING_RECORD(le, disk_hole, listentry);
740
741 if (!dh->provisional) {
742 if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
743 stripes[i].dh = dh;
744 stripes[i].device = &Vcb->devices[j];
745
746 if (stripes[i].dh->size >= max_stripe_size)
747 break;
748 }
749 }
750
751 le = le->Flink;
752 }
753
754 if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
755 break;
756 }
757
758 if (stripes[i].dh) {
759 TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
760 } else {
761 TRACE("good DH not found\n");
762 goto end;
763 }
764
765 add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
766 }
767
768 stripe_size = min(stripes[0].dh->size, max_stripe_size);
769 for (i = 1; i < num_stripes; i++) {
770 stripe_size = min(stripe_size, stripes[1].dh->size);
771 }
772 // FIXME - make sure stripe_size aligned properly
773 // FIXME - obey max_chunk_size
774
775 c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
776 if (!c) {
777 ERR("out of memory\n");
778 goto end;
779 }
780
781 // add CHUNK_ITEM to tree 3
782
783 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
784 ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
785 if (!ci) {
786 ERR("out of memory\n");
787 goto end;
788 }
789
790 ci->size = stripe_size; // FIXME for RAID
791 ci->root_id = Vcb->extent_root->id;
792 ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
793 ci->type = flags;
794 ci->opt_io_alignment = ci->stripe_length;
795 ci->opt_io_width = ci->stripe_length;
796 ci->sector_size = stripes[0].device->devitem.minimal_io_size;
797 ci->num_stripes = num_stripes;
798 ci->sub_stripes = 1;
799
800 c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
801 if (!c->devices) {
802 ERR("out of memory\n");
803 ExFreePool(ci);
804 goto end;
805 }
806
807 for (i = 0; i < num_stripes; i++) {
808 if (i == 0)
809 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
810 else
811 cis = &cis[1];
812
813 cis->dev_id = stripes[i].device->devitem.dev_id;
814 cis->offset = stripes[i].dh->address;
815 cis->dev_uuid = stripes[i].device->devitem.device_uuid;
816
817 c->devices[i] = stripes[i].device;
818 }
819
820 logaddr = find_new_chunk_address(Vcb, ci->size);
821 if (logaddr == 0xffffffffffffffff) {
822 ERR("find_new_chunk_address failed\n");
823 ExFreePool(ci);
824 goto end;
825 }
826
827 if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
828 ERR("insert_tree_item failed\n");
829 ExFreePool(ci);
830 goto end;
831 }
832
833 if (flags & BLOCK_FLAG_SYSTEM) {
834 NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
835 if (!NT_SUCCESS(Status)) {
836 ERR("add_to_bootstrap returned %08x\n", Status);
837 goto end;
838 }
839 }
840
841 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
842
843 c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
844 if (!c->chunk_item) {
845 ERR("out of memory\n");
846 goto end;
847 }
848
849 RtlCopyMemory(c->chunk_item, ci, cisize);
850 c->size = cisize;
851 c->offset = logaddr;
852 c->used = c->oldused = 0;
853 c->space_changed = FALSE;
854 InitializeListHead(&c->space);
855
856 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
857 if (!s) {
858 ERR("out of memory\n");
859 goto end;
860 }
861
862 s->offset = c->offset;
863 s->size = c->chunk_item->size;
864 s->type = SPACE_TYPE_FREE;
865 InsertTailList(&c->space, &s->list_entry);
866
867 protect_superblocks(Vcb, c);
868
869 // add BLOCK_GROUP_ITEM to tree 2
870
871 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
872 if (!bgi) {
873 ERR("out of memory\n");
874 goto end;
875 }
876
877 bgi->used = 0;
878 bgi->chunk_tree = 0x100;
879 bgi->flags = flags;
880
881 if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
882 ERR("insert_tree_item failed\n");
883 ExFreePool(bgi);
884 goto end;
885 }
886
887 // add DEV_EXTENTs to tree 4
888
889 for (i = 0; i < num_stripes; i++) {
890 DEV_EXTENT* de;
891
892 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
893 if (!de) {
894 ERR("out of memory\n");
895 goto end;
896 }
897
898 de->chunktree = Vcb->chunk_root->id;
899 de->objid = 0x100;
900 de->address = logaddr;
901 de->length = ci->size;
902 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
903
904 if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
905 ERR("insert_tree_item failed\n");
906 ExFreePool(de);
907 goto end;
908 }
909
910 if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
911 ERR("increase_dev_item_used failed\n");
912 goto end;
913 }
914 }
915
916 for (i = 0; i < num_stripes; i++) {
917 BOOL b = FALSE;
918 for (j = 0; j < i; j++) {
919 if (stripes[j].device == stripes[i].device)
920 b = TRUE;
921 }
922
923 if (!b)
924 reset_disk_holes(stripes[i].device, TRUE);
925 }
926
927 success = TRUE;
928
929 end:
930 ExFreePool(stripes);
931
932 if (!success) {
933 for (i = 0; i < num_stripes; i++) {
934 BOOL b = FALSE;
935 for (j = 0; j < i; j++) {
936 if (stripes[j].device == stripes[i].device)
937 b = TRUE;
938 }
939
940 if (!b)
941 reset_disk_holes(stripes[i].device, FALSE);
942 }
943
944 if (c) ExFreePool(c);
945 if (s) ExFreePool(s);
946 } else
947 InsertTailList(&Vcb->chunks, &c->list_entry);
948
949 return success ? c : NULL;
950 }
951
952 static void decrease_chunk_usage(chunk* c, UINT64 delta) {
953 c->used -= delta;
954
955 TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
956 }
957
958 static void increase_chunk_usage(chunk* c, UINT64 delta) {
959 c->used += delta;
960
961 TRACE("increasing size of chunk %llx by %llx\n", c->offset, delta);
962 }
963
964 static NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
965 KEY searchkey;
966 traverse_ptr tp;
967 CHUNK_ITEM2* ci;
968 NTSTATUS Status;
969 UINT32 i;
970
971 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
972
973 // FIXME - use version cached in Vcb
974
975 searchkey.obj_id = 0x100; // fixed?
976 searchkey.obj_type = TYPE_CHUNK_ITEM;
977 searchkey.offset = address;
978
979 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
980 if (!NT_SUCCESS(Status)) {
981 ERR("error - find_item returned %08x\n", Status);
982 return Status;
983 }
984
985 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
986 ERR("error - unexpected item in chunk tree\n");
987 Status = STATUS_INTERNAL_ERROR;
988 goto end;
989 }
990
991 if (tp.item->size < sizeof(CHUNK_ITEM2)) {
992 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
993 Status = STATUS_INTERNAL_ERROR;
994 goto end;
995 }
996
997 ci = (CHUNK_ITEM2*)tp.item->data;
998
999 if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
1000 ERR("error - address %llx was out of chunk bounds\n", address);
1001 Status = STATUS_INTERNAL_ERROR;
1002 goto end;
1003 }
1004
1005 // FIXME - only do this for chunks marked DUPLICATE?
1006 // FIXME - for multiple writes, if PENDING do waits at the end
1007 // FIXME - work with RAID
1008 for (i = 0; i < ci->ci.num_stripes; i++) {
1009 Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
1010 if (!NT_SUCCESS(Status)) {
1011 ERR("error - write_data_phys failed\n");
1012 goto end;
1013 }
1014 }
1015
1016 end:
1017 free_traverse_ptr(&tp);
1018
1019 return Status;
1020 }
1021
1022 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
1023 LIST_ENTRY *le, *nextle;
1024 space *s, *s2;
1025
1026 // // TESTING
1027 // le = c->space.Flink;
1028 // while (le != &c->space) {
1029 // s = CONTAINING_RECORD(le, space, list_entry);
1030 //
1031 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1032 //
1033 // le = le->Flink;
1034 // }
1035
1036 le = c->space.Flink;
1037 while (le != &c->space) {
1038 s = CONTAINING_RECORD(le, space, list_entry);
1039 nextle = le->Flink;
1040
1041 if (s->type == SPACE_TYPE_DELETING)
1042 s->type = SPACE_TYPE_FREE;
1043 else if (s->type == SPACE_TYPE_WRITING)
1044 s->type = SPACE_TYPE_USED;
1045
1046 if (le->Blink != &c->space) {
1047 s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
1048
1049 if (s2->type == s->type) { // do merge
1050 s2->size += s->size;
1051
1052 RemoveEntryList(&s->list_entry);
1053 ExFreePool(s);
1054 }
1055 }
1056
1057 le = nextle;
1058 }
1059
1060 // le = c->space.Flink;
1061 // while (le != &c->space) {
1062 // s = CONTAINING_RECORD(le, space, list_entry);
1063 //
1064 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1065 //
1066 // le = le->Flink;
1067 // }
1068 }
1069
1070 static void clean_space_cache(device_extension* Vcb) {
1071 LIST_ENTRY* le;
1072 chunk* c;
1073
1074 TRACE("(%p)\n", Vcb);
1075
1076 le = Vcb->chunks.Flink;
1077 while (le != &Vcb->chunks) {
1078 c = CONTAINING_RECORD(le, chunk, list_entry);
1079
1080 if (c->space_changed) {
1081 clean_space_cache_chunk(Vcb, c);
1082 c->space_changed = FALSE;
1083 }
1084
1085 le = le->Flink;
1086 }
1087 }
1088
1089 static BOOL trees_consistent(device_extension* Vcb) {
1090 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
1091 LIST_ENTRY* le;
1092
1093 le = Vcb->tree_cache.Flink;
1094 while (le != &Vcb->tree_cache) {
1095 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1096
1097 if (tc2->write) {
1098 if (tc2->tree->header.num_items == 0 && tc2->tree->parent)
1099 return FALSE;
1100
1101 if (tc2->tree->size > maxsize)
1102 return FALSE;
1103
1104 if (!tc2->tree->has_new_address)
1105 return FALSE;
1106 }
1107
1108 le = le->Flink;
1109 }
1110
1111 return TRUE;
1112 }
1113
1114 static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
1115 LIST_ENTRY* le;
1116 NTSTATUS Status;
1117
1118 le = Vcb->tree_cache.Flink;
1119 while (le != &Vcb->tree_cache) {
1120 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1121
1122 if (tc2->write) {
1123 if (tc2->tree->parent)
1124 add_to_tree_cache(Vcb, tc2->tree->parent, TRUE);
1125 else if (tc2->tree->root != Vcb->chunk_root && tc2->tree->root != Vcb->root_root) {
1126 KEY searchkey;
1127 traverse_ptr tp;
1128
1129 searchkey.obj_id = tc2->tree->root->id;
1130 searchkey.obj_type = TYPE_ROOT_ITEM;
1131 searchkey.offset = 0xffffffffffffffff;
1132
1133 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1134 if (!NT_SUCCESS(Status)) {
1135 ERR("error - find_item returned %08x\n", Status);
1136 return Status;
1137 }
1138
1139 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1140 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1141 free_traverse_ptr(&tp);
1142 return STATUS_INTERNAL_ERROR;
1143 }
1144
1145 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
1146 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1147 if (!ri) {
1148 ERR("out of memory\n");
1149 return STATUS_INSUFFICIENT_RESOURCES;
1150 }
1151
1152 if (tp.item->size > 0)
1153 RtlCopyMemory(ri, tp.item->data, tp.item->size);
1154
1155 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
1156
1157 delete_tree_item(Vcb, &tp, rollback);
1158
1159 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1160 ERR("insert_tree_item failed\n");
1161 return STATUS_INTERNAL_ERROR;
1162 }
1163 } else {
1164 add_to_tree_cache(Vcb, tp.tree, TRUE);
1165 }
1166
1167 free_traverse_ptr(&tp);
1168 }
1169 }
1170
1171 le = le->Flink;
1172 }
1173
1174 return STATUS_SUCCESS;
1175 }
1176
1177 void print_trees(LIST_ENTRY* tc) {
1178 LIST_ENTRY *le, *le2;
1179
1180 le = tc->Flink;
1181 while (le != tc) {
1182 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
1183 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1184 UINT32 num_items = 0;
1185
1186 le2 = tc2->tree->itemlist.Flink;
1187 while (le2 != &tc2->tree->itemlist) {
1188 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1189 if (!td->ignore) {
1190 firstitem = td->key;
1191 num_items++;
1192 }
1193 le2 = le2->Flink;
1194 }
1195
1196 ERR("tree: root %llx, first key %llx,%x,%llx, level %x, num_items %x / %x\n",
1197 tc2->tree->header.tree_id, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
1198
1199 le = le->Flink;
1200 }
1201 }
1202
1203 static void add_parents_to_cache(device_extension* Vcb, tree* t) {
1204 KEY searchkey;
1205 traverse_ptr tp;
1206 NTSTATUS Status;
1207
1208 while (t->parent) {
1209 t = t->parent;
1210
1211 add_to_tree_cache(Vcb, t, TRUE);
1212 }
1213
1214 if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
1215 return;
1216
1217 searchkey.obj_id = t->root->id;
1218 searchkey.obj_type = TYPE_ROOT_ITEM;
1219 searchkey.offset = 0xffffffffffffffff;
1220
1221 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1222 if (!NT_SUCCESS(Status)) {
1223 ERR("error - find_item returned %08x\n", Status);
1224 return;
1225 }
1226
1227 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1228 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1229 free_traverse_ptr(&tp);
1230 return;
1231 }
1232
1233 add_to_tree_cache(Vcb, tp.tree, TRUE);
1234
1235 free_traverse_ptr(&tp);
1236 }
1237
1238 static BOOL insert_tree_extent_skinny(device_extension* Vcb, tree* t, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
1239 EXTENT_ITEM_SKINNY_METADATA* eism;
1240 traverse_ptr insert_tp;
1241
1242 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1243 if (!eism) {
1244 ERR("out of memory\n");
1245 return FALSE;
1246 }
1247
1248 eism->ei.refcount = 1;
1249 eism->ei.generation = Vcb->superblock.generation;
1250 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1251 eism->type = TYPE_TREE_BLOCK_REF;
1252 eism->tbr.offset = t->header.tree_id;
1253
1254 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, t->header.level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1255 ERR("insert_tree_item failed\n");
1256 ExFreePool(eism);
1257 return FALSE;
1258 }
1259
1260 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1261
1262 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1263 add_parents_to_cache(Vcb, insert_tp.tree);
1264
1265 free_traverse_ptr(&insert_tp);
1266
1267 t->new_address = address;
1268 t->has_new_address = TRUE;
1269
1270 return TRUE;
1271 }
1272
1273 static BOOL insert_tree_extent(device_extension* Vcb, tree* t, chunk* c, LIST_ENTRY* rollback) {
1274 UINT64 address;
1275 EXTENT_ITEM_TREE2* eit2;
1276 traverse_ptr insert_tp;
1277
1278 TRACE("(%p, %p, %p, %p)\n", Vcb, t, c, rollback);
1279
1280 if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
1281 return FALSE;
1282
1283 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
1284 return insert_tree_extent_skinny(Vcb, t, c, address, rollback);
1285
1286 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1287 if (!eit2) {
1288 ERR("out of memory\n");
1289 return FALSE;
1290 }
1291
1292 eit2->eit.extent_item.refcount = 1;
1293 eit2->eit.extent_item.generation = Vcb->superblock.generation;
1294 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1295 // eit2->eit.firstitem = wt->firstitem;
1296 eit2->eit.level = t->header.level;
1297 eit2->type = TYPE_TREE_BLOCK_REF;
1298 eit2->tbr.offset = t->header.tree_id;
1299
1300 // #ifdef DEBUG_PARANOID
1301 // if (wt->firstitem.obj_type == 0xcc) { // TESTING
1302 // ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
1303 // ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
1304 // int3;
1305 // }
1306 // #endif
1307
1308 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1309 ERR("insert_tree_item failed\n");
1310 ExFreePool(eit2);
1311 return FALSE;
1312 }
1313
1314 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1315
1316 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1317 add_parents_to_cache(Vcb, insert_tp.tree);
1318
1319 free_traverse_ptr(&insert_tp);
1320
1321 t->new_address = address;
1322 t->has_new_address = TRUE;
1323
1324 return TRUE;
1325 }
1326
1327 static NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
1328 chunk *origchunk = NULL, *c;
1329 LIST_ENTRY* le;
1330 UINT64 flags = t->flags;
1331
1332 if (flags == 0)
1333 flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
1334
1335 // TRACE("flags = %x\n", (UINT32)wt->flags);
1336
1337 // if (!chunk_test) { // TESTING
1338 // if ((c = alloc_chunk(Vcb, flags))) {
1339 // if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1340 // if (insert_tree_extent(Vcb, t, c)) {
1341 // chunk_test = TRUE;
1342 // return STATUS_SUCCESS;
1343 // }
1344 // }
1345 // }
1346 // }
1347
1348 if (t->has_address) {
1349 origchunk = get_chunk_from_address(Vcb, t->header.address);
1350
1351 if (insert_tree_extent(Vcb, t, origchunk, rollback))
1352 return STATUS_SUCCESS;
1353 }
1354
1355 le = Vcb->chunks.Flink;
1356 while (le != &Vcb->chunks) {
1357 c = CONTAINING_RECORD(le, chunk, list_entry);
1358
1359 // FIXME - make sure to avoid superblocks
1360
1361 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1362 if (insert_tree_extent(Vcb, t, c, rollback))
1363 return STATUS_SUCCESS;
1364 }
1365
1366 le = le->Flink;
1367 }
1368
1369 // allocate new chunk if necessary
1370 if ((c = alloc_chunk(Vcb, flags, rollback))) {
1371 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1372 if (insert_tree_extent(Vcb, t, c, rollback))
1373 return STATUS_SUCCESS;
1374 }
1375 }
1376
1377 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
1378
1379 return STATUS_DISK_FULL;
1380 }
1381
1382 static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1383 KEY searchkey;
1384 traverse_ptr tp;
1385 chunk* c;
1386 EXTENT_ITEM_SKINNY_METADATA* eism;
1387 NTSTATUS Status;
1388
1389 searchkey.obj_id = address;
1390 searchkey.obj_type = TYPE_METADATA_ITEM;
1391 searchkey.offset = 0xffffffffffffffff;
1392
1393 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1394 if (!NT_SUCCESS(Status)) {
1395 ERR("error - find_item returned %08x\n", Status);
1396 return FALSE;
1397 }
1398
1399 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1400 TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1401 free_traverse_ptr(&tp);
1402 return FALSE;
1403 }
1404
1405 if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
1406 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
1407 free_traverse_ptr(&tp);
1408 return FALSE;
1409 }
1410
1411 delete_tree_item(Vcb, &tp, rollback);
1412
1413 eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
1414 if (t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
1415 // convert shared data extents
1416
1417 LIST_ENTRY* le = t->itemlist.Flink;
1418 while (le != &t->itemlist) {
1419 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1420
1421 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1422
1423 if (!td->ignore && !td->inserted) {
1424 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1425 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1426
1427 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1428 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1429
1430 if (ed2->address != 0) {
1431 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1432 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1433 }
1434 }
1435 }
1436 }
1437
1438 le = le->Flink;
1439 }
1440
1441 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1442 }
1443
1444 c = get_chunk_from_address(Vcb, address);
1445
1446 if (c) {
1447 decrease_chunk_usage(c, Vcb->superblock.node_size);
1448
1449 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
1450 } else
1451 ERR("could not find chunk for address %llx\n", address);
1452
1453 free_traverse_ptr(&tp);
1454
1455 return TRUE;
1456 }
1457
1458 // TESTING
1459 // static void check_tree_num_items(tree* t) {
1460 // LIST_ENTRY* le2;
1461 // UINT32 ni;
1462 //
1463 // le2 = t->itemlist.Flink;
1464 // ni = 0;
1465 // while (le2 != &t->itemlist) {
1466 // tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1467 // if (!td->ignore)
1468 // ni++;
1469 // le2 = le2->Flink;
1470 // }
1471 //
1472 // if (t->header.num_items != ni) {
1473 // ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
1474 // int3;
1475 // } else {
1476 // ERR("tree %p okay\n", t);
1477 // }
1478 // }
1479 //
1480 // static void check_trees_num_items(LIST_ENTRY* tc) {
1481 // LIST_ENTRY* le = tc->Flink;
1482 // while (le != tc) {
1483 // tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1484 //
1485 // check_tree_num_items(tc2->tree);
1486 //
1487 // le = le->Flink;
1488 // }
1489 // }
1490
1491 static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
1492 KEY searchkey;
1493 traverse_ptr tp, tp2, insert_tp;
1494 EXTENT_REF_V0* erv0;
1495 NTSTATUS Status;
1496
1497 TRACE("(%p, %p, %p)\n", Vcb, td, t);
1498
1499 searchkey.obj_id = td->treeholder.address;
1500 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1501 searchkey.offset = 0xffffffffffffffff;
1502
1503 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1504 if (!NT_SUCCESS(Status)) {
1505 ERR("error - find_item returned %08x\n", Status);
1506 return;
1507 }
1508
1509 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1510 TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
1511 free_traverse_ptr(&tp);
1512 return;
1513 }
1514
1515 searchkey.obj_id = td->treeholder.address;
1516 searchkey.obj_type = TYPE_EXTENT_ITEM;
1517 searchkey.offset = Vcb->superblock.node_size;
1518
1519 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1520 if (!NT_SUCCESS(Status)) {
1521 ERR("error - find_item returned %08x\n", Status);
1522 free_traverse_ptr(&tp);
1523 return;
1524 }
1525
1526 if (keycmp(&searchkey, &tp2.item->key)) {
1527 ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1528 free_traverse_ptr(&tp2);
1529 free_traverse_ptr(&tp);
1530 return;
1531 }
1532
1533 if (tp.item->size < sizeof(EXTENT_REF_V0)) {
1534 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
1535 free_traverse_ptr(&tp2);
1536 free_traverse_ptr(&tp);
1537 return;
1538 }
1539
1540 erv0 = (EXTENT_REF_V0*)tp.item->data;
1541
1542 delete_tree_item(Vcb, &tp, rollback);
1543 delete_tree_item(Vcb, &tp2, rollback);
1544
1545 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1546 EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1547
1548 if (!eism) {
1549 ERR("out of memory\n");
1550 free_traverse_ptr(&tp2);
1551 free_traverse_ptr(&tp);
1552 return;
1553 }
1554
1555 eism->ei.refcount = 1;
1556 eism->ei.generation = erv0->gen;
1557 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1558 eism->type = TYPE_TREE_BLOCK_REF;
1559 eism->tbr.offset = t->header.tree_id;
1560
1561 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1562 ERR("insert_tree_item failed\n");
1563 free_traverse_ptr(&tp2);
1564 free_traverse_ptr(&tp);
1565 return;
1566 }
1567 } else {
1568 EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1569
1570 if (!eit2) {
1571 ERR("out of memory\n");
1572 free_traverse_ptr(&tp2);
1573 free_traverse_ptr(&tp);
1574 return;
1575 }
1576
1577 eit2->eit.extent_item.refcount = 1;
1578 eit2->eit.extent_item.generation = erv0->gen;
1579 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1580 eit2->eit.firstitem = td->key;
1581 eit2->eit.level = t->header.level - 1;
1582 eit2->type = TYPE_TREE_BLOCK_REF;
1583 eit2->tbr.offset = t->header.tree_id;
1584
1585 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1586 ERR("insert_tree_item failed\n");
1587 free_traverse_ptr(&tp2);
1588 free_traverse_ptr(&tp);
1589 return;
1590 }
1591 }
1592
1593 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1594 add_parents_to_cache(Vcb, insert_tp.tree);
1595 add_parents_to_cache(Vcb, tp.tree);
1596 add_parents_to_cache(Vcb, tp2.tree);
1597
1598 free_traverse_ptr(&insert_tp);
1599
1600 free_traverse_ptr(&tp2);
1601 free_traverse_ptr(&tp);
1602 }
1603
1604 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1605 KEY searchkey;
1606 traverse_ptr tp;
1607 EXTENT_ITEM* ei;
1608 EXTENT_ITEM_V0* eiv0;
1609 chunk* c;
1610 NTSTATUS Status;
1611
1612 // FIXME - deal with refcounts > 1
1613
1614 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
1615
1616 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1617 if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
1618 return STATUS_SUCCESS;
1619 }
1620 }
1621
1622 searchkey.obj_id = address;
1623 searchkey.obj_type = TYPE_EXTENT_ITEM;
1624 searchkey.offset = Vcb->superblock.node_size;
1625
1626 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1627 if (!NT_SUCCESS(Status)) {
1628 ERR("error - find_item returned %08x\n", Status);
1629 return Status;
1630 }
1631
1632 if (keycmp(&tp.item->key, &searchkey)) {
1633 ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1634 int3;
1635 free_traverse_ptr(&tp);
1636 return STATUS_INTERNAL_ERROR;
1637 }
1638
1639 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1640 eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
1641
1642 if (eiv0->refcount > 1) {
1643 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
1644 free_traverse_ptr(&tp);
1645 return STATUS_INTERNAL_ERROR;
1646 }
1647 } else {
1648 if (tp.item->size < sizeof(EXTENT_ITEM)) {
1649 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
1650 free_traverse_ptr(&tp);
1651 return STATUS_INTERNAL_ERROR;
1652 }
1653
1654 ei = (EXTENT_ITEM*)tp.item->data;
1655
1656 if (ei->refcount > 1) {
1657 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
1658 free_traverse_ptr(&tp);
1659 return STATUS_INTERNAL_ERROR;
1660 }
1661
1662 if (t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
1663 // convert shared data extents
1664
1665 LIST_ENTRY* le = t->itemlist.Flink;
1666 while (le != &t->itemlist) {
1667 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1668
1669 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1670
1671 if (!td->ignore && !td->inserted) {
1672 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1673 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1674
1675 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1676 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1677
1678 if (ed2->address != 0) {
1679 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1680 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1681 }
1682 }
1683 }
1684 }
1685
1686 le = le->Flink;
1687 }
1688
1689 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1690 }
1691 }
1692
1693 delete_tree_item(Vcb, &tp, rollback);
1694
1695 // if EXTENT_ITEM_V0, delete corresponding B4 item
1696 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1697 traverse_ptr tp2;
1698
1699 searchkey.obj_id = address;
1700 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1701 searchkey.offset = 0xffffffffffffffff;
1702
1703 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1704 if (!NT_SUCCESS(Status)) {
1705 ERR("error - find_item returned %08x\n", Status);
1706 free_traverse_ptr(&tp);
1707 return Status;
1708 }
1709
1710 if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
1711 delete_tree_item(Vcb, &tp2, rollback);
1712 }
1713 free_traverse_ptr(&tp2);
1714 }
1715
1716 if (!(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1717 LIST_ENTRY* le;
1718
1719 // when writing old internal trees, convert related extents
1720
1721 le = t->itemlist.Flink;
1722 while (le != &t->itemlist) {
1723 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1724
1725 // ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1726
1727 if (!td->ignore && !td->inserted) {
1728 if (t->header.level > 0) {
1729 convert_old_tree_extent(Vcb, td, t, rollback);
1730 } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
1731 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1732
1733 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1734 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1735
1736 if (ed2->address != 0) {
1737 TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
1738 convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
1739 }
1740 }
1741 }
1742 }
1743
1744 le = le->Flink;
1745 }
1746 }
1747
1748 c = get_chunk_from_address(Vcb, address);
1749
1750 if (c) {
1751 decrease_chunk_usage(c, tp.item->key.offset);
1752
1753 add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
1754 } else
1755 ERR("could not find chunk for address %llx\n", address);
1756
1757 free_traverse_ptr(&tp);
1758
1759 return STATUS_SUCCESS;
1760 }
1761
1762 static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
1763 LIST_ENTRY* le;
1764 NTSTATUS Status;
1765
1766 TRACE("(%p)\n", Vcb);
1767
1768 le = Vcb->tree_cache.Flink;
1769 while (le != &Vcb->tree_cache) {
1770 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1771
1772 if (tc2->write && !tc2->tree->has_new_address) {
1773 chunk* c;
1774
1775 Status = get_tree_new_address(Vcb, tc2->tree, rollback);
1776 if (!NT_SUCCESS(Status)) {
1777 ERR("get_tree_new_address returned %08x\n", Status);
1778 return Status;
1779 }
1780
1781 TRACE("allocated extent %llx\n", tc2->tree->new_address);
1782
1783 if (tc2->tree->has_address) {
1784 Status = reduce_tree_extent(Vcb, tc2->tree->header.address, tc2->tree, rollback);
1785
1786 if (!NT_SUCCESS(Status)) {
1787 ERR("reduce_tree_extent returned %08x\n", Status);
1788 return Status;
1789 }
1790 }
1791
1792 c = get_chunk_from_address(Vcb, tc2->tree->new_address);
1793
1794 if (c) {
1795 increase_chunk_usage(c, Vcb->superblock.node_size);
1796 } else {
1797 ERR("could not find chunk for address %llx\n", tc2->tree->new_address);
1798 return STATUS_INTERNAL_ERROR;
1799 }
1800 }
1801
1802 le = le->Flink;
1803 }
1804
1805 return STATUS_SUCCESS;
1806 }
1807
1808 static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
1809 LIST_ENTRY* le;
1810 NTSTATUS Status;
1811
1812 TRACE("(%p)\n", Vcb);
1813
1814 le = Vcb->tree_cache.Flink;
1815 while (le != &Vcb->tree_cache) {
1816 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1817
1818 if (tc2->write && !tc2->tree->parent) {
1819 if (tc2->tree->root != Vcb->root_root && tc2->tree->root != Vcb->chunk_root) {
1820 KEY searchkey;
1821 traverse_ptr tp;
1822
1823 searchkey.obj_id = tc2->tree->root->id;
1824 searchkey.obj_type = TYPE_ROOT_ITEM;
1825 searchkey.offset = 0xffffffffffffffff;
1826
1827 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1828 if (!NT_SUCCESS(Status)) {
1829 ERR("error - find_item returned %08x\n", Status);
1830 return Status;
1831 }
1832
1833 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1834 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1835 free_traverse_ptr(&tp);
1836 return STATUS_INTERNAL_ERROR;
1837 }
1838
1839 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, tc2->tree->new_address);
1840
1841 tc2->tree->root->root_item.block_number = tc2->tree->new_address;
1842 tc2->tree->root->root_item.root_level = tc2->tree->header.level;
1843 tc2->tree->root->root_item.generation = Vcb->superblock.generation;
1844 tc2->tree->root->root_item.generation2 = Vcb->superblock.generation;
1845
1846 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
1847 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1848
1849 if (!ri) {
1850 ERR("out of memory\n");
1851 return STATUS_INSUFFICIENT_RESOURCES;
1852 }
1853
1854 RtlCopyMemory(ri, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
1855
1856 delete_tree_item(Vcb, &tp, rollback);
1857
1858 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1859 ERR("insert_tree_item failed\n");
1860 return STATUS_INTERNAL_ERROR;
1861 }
1862 } else
1863 RtlCopyMemory(tp.item->data, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
1864
1865 free_traverse_ptr(&tp);
1866 }
1867
1868 tc2->tree->root->treeholder.address = tc2->tree->new_address;
1869 }
1870
1871 le = le->Flink;
1872 }
1873
1874 return STATUS_SUCCESS;
1875 }
1876
1877 enum write_tree_status {
1878 WriteTreeStatus_Pending,
1879 WriteTreeStatus_Success,
1880 WriteTreeStatus_Error,
1881 WriteTreeStatus_Cancelling,
1882 WriteTreeStatus_Cancelled
1883 };
1884
1885 struct write_tree_context;
1886
1887 typedef struct {
1888 struct write_tree_context* context;
1889 UINT8* buf;
1890 device* device;
1891 PIRP Irp;
1892 IO_STATUS_BLOCK iosb;
1893 enum write_tree_status status;
1894 LIST_ENTRY list_entry;
1895 } write_tree_stripe;
1896
1897 typedef struct {
1898 KEVENT Event;
1899 LIST_ENTRY stripes;
1900 } write_tree_context;
1901
1902 static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1903 write_tree_stripe* stripe = conptr;
1904 write_tree_context* context = (write_tree_context*)stripe->context;
1905 LIST_ENTRY* le;
1906 BOOL complete;
1907
1908 if (stripe->status == WriteTreeStatus_Cancelling) {
1909 stripe->status = WriteTreeStatus_Cancelled;
1910 goto end;
1911 }
1912
1913 stripe->iosb = Irp->IoStatus;
1914
1915 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1916 stripe->status = WriteTreeStatus_Success;
1917 } else {
1918 le = context->stripes.Flink;
1919
1920 stripe->status = WriteTreeStatus_Error;
1921
1922 while (le != &context->stripes) {
1923 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1924
1925 if (s2->status == WriteTreeStatus_Pending) {
1926 s2->status = WriteTreeStatus_Cancelling;
1927 IoCancelIrp(s2->Irp);
1928 }
1929
1930 le = le->Flink;
1931 }
1932 }
1933
1934 end:
1935 le = context->stripes.Flink;
1936 complete = TRUE;
1937
1938 while (le != &context->stripes) {
1939 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1940
1941 if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
1942 complete = FALSE;
1943 break;
1944 }
1945
1946 le = le->Flink;
1947 }
1948
1949 if (complete)
1950 KeSetEvent(&context->Event, 0, FALSE);
1951
1952 return STATUS_MORE_PROCESSING_REQUIRED;
1953 }
1954
1955 static NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
1956 chunk* c;
1957 CHUNK_ITEM_STRIPE* cis;
1958 write_tree_stripe* stripe;
1959 UINT64 i;
1960
1961 c = get_chunk_from_address(Vcb, addr);
1962
1963 if (!c) {
1964 ERR("get_chunk_from_address failed\n");
1965 return STATUS_INTERNAL_ERROR;
1966 }
1967
1968 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1969
1970 // FIXME - make this work with RAID
1971
1972 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1973 PIO_STACK_LOCATION IrpSp;
1974
1975 // FIXME - handle missing devices
1976
1977 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
1978 if (!stripe) {
1979 ERR("out of memory\n");
1980 return STATUS_INSUFFICIENT_RESOURCES;
1981 }
1982
1983 stripe->context = (struct write_tree_context*)wtc;
1984 stripe->buf = data;
1985 stripe->device = c->devices[i];
1986 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1987 stripe->status = WriteTreeStatus_Pending;
1988
1989 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1990
1991 if (!stripe->Irp) {
1992 ERR("IoAllocateIrp failed\n");
1993 return STATUS_INTERNAL_ERROR;
1994 }
1995
1996 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1997 IrpSp->MajorFunction = IRP_MJ_WRITE;
1998
1999 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2000 stripe->Irp->AssociatedIrp.SystemBuffer = data;
2001
2002 stripe->Irp->Flags = IRP_BUFFERED_IO;
2003 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
2004 stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
2005 if (!stripe->Irp->MdlAddress) {
2006 ERR("IoAllocateMdl failed\n");
2007 return STATUS_INTERNAL_ERROR;
2008 }
2009
2010 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
2011 } else {
2012 stripe->Irp->UserBuffer = data;
2013 }
2014
2015 IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
2016 IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
2017
2018 stripe->Irp->UserIosb = &stripe->iosb;
2019
2020 IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
2021
2022 InsertTailList(&wtc->stripes, &stripe->list_entry);
2023 }
2024
2025 return STATUS_SUCCESS;
2026 }
2027
2028 static void free_stripes(write_tree_context* wtc) {
2029 LIST_ENTRY *le, *le2, *nextle;
2030
2031 le = wtc->stripes.Flink;
2032 while (le != &wtc->stripes) {
2033 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2034
2035 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
2036 MmUnlockPages(stripe->Irp->MdlAddress);
2037 IoFreeMdl(stripe->Irp->MdlAddress);
2038 }
2039
2040 le = le->Flink;
2041 }
2042
2043 le = wtc->stripes.Flink;
2044 while (le != &wtc->stripes) {
2045 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2046
2047 nextle = le->Flink;
2048
2049 if (stripe->buf) {
2050 ExFreePool(stripe->buf);
2051
2052 le2 = le->Flink;
2053 while (le2 != &wtc->stripes) {
2054 write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
2055
2056 if (s2->buf == stripe->buf)
2057 s2->buf = NULL;
2058
2059 le2 = le2->Flink;
2060 }
2061
2062 }
2063
2064 ExFreePool(stripe);
2065
2066 le = nextle;
2067 }
2068 }
2069
2070 static NTSTATUS write_trees(device_extension* Vcb) {
2071 UINT8 level;
2072 UINT8 *data, *body;
2073 UINT32 crc32;
2074 NTSTATUS Status;
2075 LIST_ENTRY* le;
2076 write_tree_context* wtc;
2077
2078 TRACE("(%p)\n", Vcb);
2079
2080 for (level = 0; level <= 255; level++) {
2081 BOOL nothing_found = TRUE;
2082
2083 TRACE("level = %u\n", level);
2084
2085 le = Vcb->tree_cache.Flink;
2086 while (le != &Vcb->tree_cache) {
2087 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2088
2089 if (tc2->write && tc2->tree->header.level == level) {
2090 KEY firstitem, searchkey;
2091 LIST_ENTRY* le2;
2092 traverse_ptr tp;
2093 EXTENT_ITEM_TREE* eit;
2094
2095 if (!tc2->tree->has_new_address) {
2096 ERR("error - tried to write tree with no new address\n");
2097 int3;
2098 }
2099
2100 le2 = tc2->tree->itemlist.Flink;
2101 while (le2 != &tc2->tree->itemlist) {
2102 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2103 if (!td->ignore) {
2104 firstitem = td->key;
2105 break;
2106 }
2107 le2 = le2->Flink;
2108 }
2109
2110 if (tc2->tree->parent) {
2111 tc2->tree->paritem->key = firstitem;
2112 tc2->tree->paritem->treeholder.address = tc2->tree->new_address;
2113 tc2->tree->paritem->treeholder.generation = Vcb->superblock.generation;
2114 }
2115
2116 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
2117 searchkey.obj_id = tc2->tree->new_address;
2118 searchkey.obj_type = TYPE_EXTENT_ITEM;
2119 searchkey.offset = Vcb->superblock.node_size;
2120
2121 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2122 if (!NT_SUCCESS(Status)) {
2123 ERR("error - find_item returned %08x\n", Status);
2124 return Status;
2125 }
2126
2127 if (keycmp(&searchkey, &tp.item->key)) {
2128 // traverse_ptr next_tp;
2129 // BOOL b;
2130 // tree_data* paritem;
2131
2132 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2133 free_traverse_ptr(&tp);
2134
2135 // searchkey.obj_id = 0;
2136 // searchkey.obj_type = 0;
2137 // searchkey.offset = 0;
2138 //
2139 // find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2140 //
2141 // paritem = NULL;
2142 // do {
2143 // if (tp.tree->paritem != paritem) {
2144 // paritem = tp.tree->paritem;
2145 // ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
2146 // }
2147 //
2148 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2149 //
2150 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
2151 // if (b) {
2152 // free_traverse_ptr(&tp);
2153 // tp = next_tp;
2154 // }
2155 // } while (b);
2156 //
2157 // free_traverse_ptr(&tp);
2158
2159 return STATUS_INTERNAL_ERROR;
2160 }
2161
2162 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
2163 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
2164 free_traverse_ptr(&tp);
2165 return STATUS_INTERNAL_ERROR;
2166 }
2167
2168 eit = (EXTENT_ITEM_TREE*)tp.item->data;
2169 eit->firstitem = firstitem;
2170
2171 free_traverse_ptr(&tp);
2172 }
2173
2174 nothing_found = FALSE;
2175 }
2176
2177 le = le->Flink;
2178 }
2179
2180 if (nothing_found)
2181 break;
2182 }
2183
2184 TRACE("allocated tree extents\n");
2185
2186 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
2187 if (!wtc) {
2188 ERR("out of memory\n");
2189 return STATUS_INSUFFICIENT_RESOURCES;
2190 }
2191
2192 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
2193 InitializeListHead(&wtc->stripes);
2194
2195 le = Vcb->tree_cache.Flink;
2196 while (le != &Vcb->tree_cache) {
2197 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2198 #ifdef DEBUG_PARANOID
2199 UINT32 num_items = 0, size = 0;
2200 LIST_ENTRY* le2;
2201 BOOL crash = FALSE;
2202 #endif
2203
2204 if (tc2->write) {
2205 #ifdef DEBUG_PARANOID
2206 le2 = tc2->tree->itemlist.Flink;
2207 while (le2 != &tc2->tree->itemlist) {
2208 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2209 if (!td->ignore) {
2210 num_items++;
2211
2212 if (tc2->tree->header.level == 0)
2213 size += td->size;
2214 }
2215 le2 = le2->Flink;
2216 }
2217
2218 if (tc2->tree->header.level == 0)
2219 size += num_items * sizeof(leaf_node);
2220 else
2221 size += num_items * sizeof(internal_node);
2222
2223 if (num_items != tc2->tree->header.num_items) {
2224 ERR("tree %llx, level %x: num_items was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
2225 crash = TRUE;
2226 }
2227
2228 if (size != tc2->tree->size) {
2229 ERR("tree %llx, level %x: size was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, size, tc2->tree->size);
2230 crash = TRUE;
2231 }
2232
2233 if (tc2->tree->header.num_items == 0 && tc2->tree->parent) {
2234 ERR("tree %llx, level %x: tried to write empty tree with parent\n", tc2->tree->root->id, tc2->tree->header.level);
2235 crash = TRUE;
2236 }
2237
2238 if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
2239 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", tc2->tree->root->id, tc2->tree->header.level, tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
2240 crash = TRUE;
2241 }
2242
2243 if (crash) {
2244 ERR("tree %p\n", tc2->tree);
2245 le2 = tc2->tree->itemlist.Flink;
2246 while (le2 != &tc2->tree->itemlist) {
2247 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2248 if (!td->ignore) {
2249 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
2250 }
2251 le2 = le2->Flink;
2252 }
2253 int3;
2254 }
2255 #endif
2256 tc2->tree->header.address = tc2->tree->new_address;
2257 tc2->tree->header.generation = Vcb->superblock.generation;
2258 tc2->tree->header.flags |= HEADER_FLAG_MIXED_BACKREF;
2259 tc2->tree->has_address = TRUE;
2260
2261 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
2262 if (!data) {
2263 ERR("out of memory\n");
2264 Status = STATUS_INSUFFICIENT_RESOURCES;
2265 goto end;
2266 }
2267
2268 body = data + sizeof(tree_header);
2269
2270 RtlCopyMemory(data, &tc2->tree->header, sizeof(tree_header));
2271 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2272
2273 if (tc2->tree->header.level == 0) {
2274 leaf_node* itemptr = (leaf_node*)body;
2275 int i = 0;
2276 LIST_ENTRY* le2;
2277 UINT8* dataptr = data + Vcb->superblock.node_size;
2278
2279 le2 = tc2->tree->itemlist.Flink;
2280 while (le2 != &tc2->tree->itemlist) {
2281 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2282 if (!td->ignore) {
2283 dataptr = dataptr - td->size;
2284
2285 itemptr[i].key = td->key;
2286 itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
2287 itemptr[i].size = td->size;
2288 i++;
2289
2290 if (td->size > 0)
2291 RtlCopyMemory(dataptr, td->data, td->size);
2292 }
2293
2294 le2 = le2->Flink;
2295 }
2296 } else {
2297 internal_node* itemptr = (internal_node*)body;
2298 int i = 0;
2299 LIST_ENTRY* le2;
2300
2301 le2 = tc2->tree->itemlist.Flink;
2302 while (le2 != &tc2->tree->itemlist) {
2303 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2304 if (!td->ignore) {
2305 itemptr[i].key = td->key;
2306 itemptr[i].address = td->treeholder.address;
2307 itemptr[i].generation = td->treeholder.generation;
2308 i++;
2309 }
2310
2311 le2 = le2->Flink;
2312 }
2313 }
2314
2315 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2316 crc32 = ~crc32;
2317 *((UINT32*)data) = crc32;
2318 TRACE("setting crc32 to %08x\n", crc32);
2319
2320 Status = write_tree(Vcb, tc2->tree->new_address, data, wtc);
2321 if (!NT_SUCCESS(Status)) {
2322 ERR("write_tree returned %08x\n", Status);
2323 goto end;
2324 }
2325 }
2326
2327 le = le->Flink;
2328 }
2329
2330 Status = STATUS_SUCCESS;
2331
2332 if (wtc->stripes.Flink != &wtc->stripes) {
2333 // launch writes and wait
2334 le = wtc->stripes.Flink;
2335 while (le != &wtc->stripes) {
2336 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2337
2338 IoCallDriver(stripe->device->devobj, stripe->Irp);
2339
2340 le = le->Flink;
2341 }
2342
2343 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
2344
2345 le = wtc->stripes.Flink;
2346 while (le != &wtc->stripes) {
2347 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2348
2349 if (!NT_SUCCESS(stripe->iosb.Status)) {
2350 Status = stripe->iosb.Status;
2351 break;
2352 }
2353
2354 le = le->Flink;
2355 }
2356
2357 free_stripes(wtc);
2358 }
2359
2360 end:
2361 ExFreePool(wtc);
2362
2363 return Status;
2364 }
2365
2366 static NTSTATUS write_superblocks(device_extension* Vcb) {
2367 UINT64 i;
2368 NTSTATUS Status;
2369 LIST_ENTRY* le;
2370
2371 TRACE("(%p)\n", Vcb);
2372
2373 le = Vcb->tree_cache.Flink;
2374 while (le != &Vcb->tree_cache) {
2375 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2376
2377 if (tc2->write && !tc2->tree->parent) {
2378 if (tc2->tree->root == Vcb->root_root) {
2379 Vcb->superblock.root_tree_addr = tc2->tree->new_address;
2380 Vcb->superblock.root_level = tc2->tree->header.level;
2381 } else if (tc2->tree->root == Vcb->chunk_root) {
2382 Vcb->superblock.chunk_tree_addr = tc2->tree->new_address;
2383 Vcb->superblock.chunk_root_generation = tc2->tree->header.generation;
2384 Vcb->superblock.chunk_root_level = tc2->tree->header.level;
2385 }
2386 }
2387
2388 le = le->Flink;
2389 }
2390
2391 for (i = 0; i < Vcb->superblock.num_devices; i++) {
2392 if (Vcb->devices[i].devobj) {
2393 Status = write_superblock(Vcb, &Vcb->devices[i]);
2394 if (!NT_SUCCESS(Status)) {
2395 ERR("write_superblock returned %08x\n", Status);
2396 return Status;
2397 }
2398 }
2399 }
2400
2401 return STATUS_SUCCESS;
2402 }
2403
2404 static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
2405 LIST_ENTRY* le = Vcb->chunks.Flink;
2406 chunk* c;
2407 KEY searchkey;
2408 traverse_ptr tp;
2409 BLOCK_GROUP_ITEM* bgi;
2410 NTSTATUS Status;
2411
2412 TRACE("(%p)\n", Vcb);
2413
2414 while (le != &Vcb->chunks) {
2415 c = CONTAINING_RECORD(le, chunk, list_entry);
2416
2417 if (c->used != c->oldused) {
2418 searchkey.obj_id = c->offset;
2419 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2420 searchkey.offset = c->chunk_item->size;
2421
2422 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2423 if (!NT_SUCCESS(Status)) {
2424 ERR("error - find_item returned %08x\n", Status);
2425 return Status;
2426 }
2427
2428 if (keycmp(&searchkey, &tp.item->key)) {
2429 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2430 int3;
2431 free_traverse_ptr(&tp);
2432 return STATUS_INTERNAL_ERROR;
2433 }
2434
2435 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2436 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2437 free_traverse_ptr(&tp);
2438 return STATUS_INTERNAL_ERROR;
2439 }
2440
2441 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2442 if (!bgi) {
2443 ERR("out of memory\n");
2444 free_traverse_ptr(&tp);
2445 return STATUS_INSUFFICIENT_RESOURCES;
2446 }
2447
2448 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2449 bgi->used = c->used;
2450
2451 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2452
2453 delete_tree_item(Vcb, &tp, rollback);
2454
2455 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
2456 ERR("insert_tree_item failed\n");
2457 ExFreePool(bgi);
2458 return STATUS_INTERNAL_ERROR;
2459 }
2460
2461 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2462 TRACE("chunk_item type = %llx\n", c->chunk_item->type);
2463
2464 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
2465 FIXME("RAID0 not yet supported\n");
2466 ExFreePool(bgi);
2467 free_traverse_ptr(&tp);
2468 return STATUS_INTERNAL_ERROR;
2469 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
2470 FIXME("RAID1 not yet supported\n");
2471 ExFreePool(bgi);
2472 free_traverse_ptr(&tp);
2473 return STATUS_INTERNAL_ERROR;
2474 } else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
2475 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
2476 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
2477 FIXME("RAID10 not yet supported\n");
2478 ExFreePool(bgi);
2479 free_traverse_ptr(&tp);
2480 return STATUS_INTERNAL_ERROR;
2481 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2482 FIXME("RAID5 not yet supported\n");
2483 ExFreePool(bgi);
2484 free_traverse_ptr(&tp);
2485 return STATUS_INTERNAL_ERROR;
2486 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2487 FIXME("RAID6 not yet supported\n");
2488 ExFreePool(bgi);
2489 free_traverse_ptr(&tp);
2490 return STATUS_INTERNAL_ERROR;
2491 } else { // SINGLE
2492 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
2493 }
2494
2495 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2496
2497 free_traverse_ptr(&tp);
2498
2499 c->oldused = c->used;
2500 }
2501
2502 le = le->Flink;
2503 }
2504
2505 return STATUS_SUCCESS;
2506 }
2507
2508 static void get_first_item(tree* t, KEY* key) {
2509 LIST_ENTRY* le;
2510
2511 le = t->itemlist.Flink;
2512 while (le != &t->itemlist) {
2513 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2514
2515 *key = td->key;
2516 return;
2517 }
2518 }
2519
2520 static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2521 tree *nt, *pt;
2522 tree_data* td;
2523 tree_data* oldlastitem;
2524 // write_tree* wt2;
2525 // // tree_data *firsttd, *lasttd;
2526 // // LIST_ENTRY* le;
2527 // #ifdef DEBUG_PARANOID
2528 // KEY lastkey1, lastkey2;
2529 // traverse_ptr tp, next_tp;
2530 // ULONG numitems1, numitems2;
2531 // #endif
2532
2533 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2534
2535 // #ifdef DEBUG_PARANOID
2536 // lastkey1.obj_id = 0xffffffffffffffff;
2537 // lastkey1.obj_type = 0xff;
2538 // lastkey1.offset = 0xffffffffffffffff;
2539 //
2540 // if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
2541 // ERR("error - find_item failed\n");
2542 // else {
2543 // lastkey1 = tp.item->key;
2544 // numitems1 = 0;
2545 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2546 // free_traverse_ptr(&tp);
2547 // tp = next_tp;
2548 // numitems1++;
2549 // }
2550 // free_traverse_ptr(&tp);
2551 // }
2552 // #endif
2553
2554 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2555 if (!nt) {
2556 ERR("out of memory\n");
2557 return STATUS_INSUFFICIENT_RESOURCES;
2558 }
2559
2560 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2561 nt->header.address = 0;
2562 nt->header.generation = Vcb->superblock.generation;
2563 nt->header.num_items = t->header.num_items - numitems;
2564 nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2565
2566 nt->refcount = 0;
2567 nt->has_address = FALSE;
2568 nt->Vcb = Vcb;
2569 nt->parent = t->parent;
2570 nt->root = t->root;
2571 // nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2572 nt->new_address = 0;
2573 nt->has_new_address = FALSE;
2574 nt->flags = t->flags;
2575 InitializeListHead(&nt->itemlist);
2576
2577 // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
2578
2579 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2580
2581 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2582 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2583 // //
2584 // // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
2585 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2586 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2587 // //
2588 // // le = wt->tree->itemlist.Flink;
2589 // // while (le != &wt->tree->itemlist) {
2590 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2591 // // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2592 // // le = le->Flink;
2593 // // }
2594
2595 nt->itemlist.Flink = &newfirstitem->list_entry;
2596 nt->itemlist.Blink = t->itemlist.Blink;
2597 nt->itemlist.Flink->Blink = &nt->itemlist;
2598 nt->itemlist.Blink->Flink = &nt->itemlist;
2599
2600 t->itemlist.Blink = &oldlastitem->list_entry;
2601 t->itemlist.Blink->Flink = &t->itemlist;
2602
2603 // // le = wt->tree->itemlist.Flink;
2604 // // while (le != &wt->tree->itemlist) {
2605 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2606 // // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2607 // // le = le->Flink;
2608 // // }
2609 // //
2610 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2611 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2612 // //
2613 // // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
2614 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2615 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2616
2617 nt->size = t->size - size;
2618 t->size = size;
2619 t->header.num_items = numitems;
2620 add_to_tree_cache(Vcb, nt, TRUE);
2621
2622 InterlockedIncrement(&Vcb->open_trees);
2623 #ifdef DEBUG_TREE_REFCOUNTS
2624 TRACE("created new split tree %p\n", nt);
2625 #endif
2626 InsertTailList(&Vcb->trees, &nt->list_entry);
2627
2628 // // // TESTING
2629 // // td = wt->tree->items;
2630 // // while (td) {
2631 // // if (!td->ignore) {
2632 // // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2633 // // }
2634 // // td = td->next;
2635 // // }
2636
2637 // // oldlastitem->next = NULL;
2638 // // wt->tree->lastitem = oldlastitem;
2639
2640 // // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
2641
2642 if (nt->header.level > 0) {
2643 LIST_ENTRY* le = nt->itemlist.Flink;
2644
2645 while (le != &nt->itemlist) {
2646 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2647
2648 if (td2->treeholder.tree) {
2649 td2->treeholder.tree->parent = nt;
2650 increase_tree_rc(nt);
2651 free_tree(t);
2652 }
2653
2654 le = le->Flink;
2655 }
2656 }
2657
2658 if (nt->parent) {
2659 increase_tree_rc(nt->parent);
2660
2661 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2662 if (!td) {
2663 ERR("out of memory\n");
2664 return STATUS_INSUFFICIENT_RESOURCES;
2665 }
2666
2667 td->key = newfirstitem->key;
2668
2669 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2670
2671 td->ignore = FALSE;
2672 td->inserted = TRUE;
2673 td->treeholder.tree = nt;
2674 init_tree_holder(&td->treeholder);
2675 // td->treeholder.nonpaged->status = tree_holder_loaded;
2676 nt->paritem = td;
2677
2678 nt->parent->header.num_items++;
2679 nt->parent->size += sizeof(internal_node);
2680
2681 goto end;
2682 }
2683
2684 TRACE("adding new tree parent\n");
2685
2686 if (nt->header.level == 255) {
2687 ERR("cannot add parent to tree at level 255\n");
2688 return STATUS_INTERNAL_ERROR;
2689 }
2690
2691 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2692 if (!pt) {
2693 ERR("out of memory\n");
2694 return STATUS_INSUFFICIENT_RESOURCES;
2695 }
2696
2697 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2698 pt->header.address = 0;
2699 pt->header.num_items = 2;
2700 pt->header.level = nt->header.level + 1;
2701 pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2702
2703 pt->refcount = 2;
2704 pt->has_address = FALSE;
2705 pt->Vcb = Vcb;
2706 pt->parent = NULL;
2707 pt->paritem = NULL;
2708 pt->root = t->root;
2709 pt->new_address = 0;
2710 pt->has_new_address = FALSE;
2711 // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2712 pt->size = pt->header.num_items * sizeof(internal_node);
2713 pt->flags = t->flags;
2714 InitializeListHead(&pt->itemlist);
2715
2716 // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
2717
2718 InterlockedIncrement(&Vcb->open_trees);
2719 #ifdef DEBUG_TREE_REFCOUNTS
2720 TRACE("created new parent tree %p\n", pt);
2721 #endif
2722 InsertTailList(&Vcb->trees, &pt->list_entry);
2723
2724 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2725 if (!td) {
2726 ERR("out of memory\n");
2727 return STATUS_INSUFFICIENT_RESOURCES;
2728 }
2729
2730 get_first_item(t, &td->key);
2731 td->ignore = FALSE;
2732 td->inserted = FALSE;
2733 td->treeholder.address = 0;
2734 td->treeholder.generation = Vcb->superblock.generation;
2735 td->treeholder.tree = t;
2736 init_tree_holder(&td->treeholder);
2737 // td->treeholder.nonpaged->status = tree_holder_loaded;
2738 InsertTailList(&pt->itemlist, &td->list_entry);
2739 t->paritem = td;
2740
2741 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2742 if (!td) {
2743 ERR("out of memory\n");
2744 return STATUS_INSUFFICIENT_RESOURCES;
2745 }
2746
2747 td->key = newfirstitem->key;
2748 td->ignore = FALSE;
2749 td->inserted = FALSE;
2750 td->treeholder.address = 0;
2751 td->treeholder.generation = Vcb->superblock.generation;
2752 td->treeholder.tree = nt;
2753 init_tree_holder(&td->treeholder);
2754 // td->treeholder.nonpaged->status = tree_holder_loaded;
2755 InsertTailList(&pt->itemlist, &td->list_entry);
2756 nt->paritem = td;
2757
2758 add_to_tree_cache(Vcb, pt, TRUE);
2759
2760 t->root->treeholder.tree = pt;
2761
2762 t->parent = pt;
2763 nt->parent = pt;
2764
2765 end:
2766 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2767
2768 // #ifdef DEBUG_PARANOID
2769 // lastkey2.obj_id = 0xffffffffffffffff;
2770 // lastkey2.obj_type = 0xff;
2771 // lastkey2.offset = 0xffffffffffffffff;
2772 //
2773 // if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
2774 // ERR("error - find_item failed\n");
2775 // else {
2776 // lastkey2 = tp.item->key;
2777 //
2778 // numitems2 = 0;
2779 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2780 // free_traverse_ptr(&tp);
2781 // tp = next_tp;
2782 // numitems2++;
2783 // }
2784 // free_traverse_ptr(&tp);
2785 // }
2786 //
2787 // ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
2788 // ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
2789 // ERR("numitems1 = %u\n", numitems1);
2790 // ERR("numitems2 = %u\n", numitems2);
2791 // #endif
2792
2793 return STATUS_SUCCESS;
2794 }
2795
2796 static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
2797 LIST_ENTRY* le;
2798 UINT32 size, ds, numitems;
2799
2800 size = 0;
2801 numitems = 0;
2802
2803 // FIXME - naïve implementation: maximizes number of filled trees
2804
2805 le = t->itemlist.Flink;
2806 while (le != &t->itemlist) {
2807 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2808
2809 if (!td->ignore) {
2810 if (t->header.level == 0)
2811 ds = sizeof(leaf_node) + td->size;
2812 else
2813 ds = sizeof(internal_node);
2814
2815 // FIXME - move back if previous item was deleted item with same key
2816 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
2817 return split_tree_at(Vcb, t, td, numitems, size);
2818
2819 size += ds;
2820 numitems++;
2821 }
2822
2823 le = le->Flink;
2824 }
2825
2826 return STATUS_SUCCESS;
2827 }
2828
2829 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
2830 LIST_ENTRY* le;
2831 tree_data* nextparitem = NULL;
2832 NTSTATUS Status;
2833 tree *next_tree, *par;
2834 BOOL loaded;
2835
2836 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
2837
2838 // FIXME - doesn't capture everything, as it doesn't ascend
2839 // FIXME - write proper function and put it in treefuncs.c
2840 le = t->paritem->list_entry.Flink;
2841 while (le != &t->parent->itemlist) {
2842 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2843
2844 if (!td->ignore) {
2845 nextparitem = td;
2846 break;
2847 }
2848
2849 le = le->Flink;
2850 }
2851
2852 if (!nextparitem)
2853 return STATUS_SUCCESS;
2854
2855 // FIXME - loop, and capture more than one tree if we can
2856
2857 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
2858 // nextparitem = t->paritem;
2859
2860 // ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
2861
2862 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
2863 if (!NT_SUCCESS(Status)) {
2864 ERR("do_load_tree returned %08x\n", Status);
2865 return Status;
2866 }
2867
2868 if (loaded)
2869 increase_tree_rc(t->parent);
2870
2871 // ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
2872
2873 next_tree = nextparitem->treeholder.tree;
2874
2875 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
2876 // merge two trees into one
2877
2878 t->header.num_items += next_tree->header.num_items;
2879 t->size += next_tree->size;
2880
2881 if (next_tree->header.level > 0) {
2882 le = next_tree->itemlist.Flink;
2883
2884 while (le != &next_tree->itemlist) {
2885 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2886
2887 if (td2->treeholder.tree) {
2888 td2->treeholder.tree->parent = t;
2889 increase_tree_rc(t);
2890 free_tree(next_tree);
2891 }
2892
2893 le = le->Flink;
2894 }
2895 }
2896
2897 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
2898 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
2899 t->itemlist.Blink = next_tree->itemlist.Blink;
2900 t->itemlist.Blink->Flink = &t->itemlist;
2901
2902 // // TESTING
2903 // le = t->itemlist.Flink;
2904 // while (le != &t->itemlist) {
2905 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2906 // if (!td->ignore) {
2907 // ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
2908 // }
2909 // le = le->Flink;
2910 // }
2911
2912 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
2913
2914 next_tree->header.num_items = 0;
2915 next_tree->size = 0;
2916
2917 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
2918 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
2919
2920 if (!NT_SUCCESS(Status)) {
2921 ERR("reduce_tree_extent returned %08x\n", Status);
2922 free_tree(next_tree);
2923 return Status;
2924 }
2925 } else if (next_tree->has_address) {
2926 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
2927
2928 if (!NT_SUCCESS(Status)) {
2929 ERR("reduce_tree_extent returned %08x\n", Status);
2930 free_tree(next_tree);
2931 return Status;
2932 }
2933 }
2934
2935 if (!nextparitem->ignore) {
2936 nextparitem->ignore = TRUE;
2937 next_tree->parent->header.num_items--;
2938 next_tree->parent->size -= sizeof(internal_node);
2939 }
2940
2941 par = next_tree->parent;
2942 while (par) {
2943 add_to_tree_cache(Vcb, par, TRUE);
2944 par = par->parent;
2945 }
2946
2947 RemoveEntryList(&nextparitem->list_entry);
2948 ExFreePool(next_tree->paritem);
2949 next_tree->paritem = NULL;
2950
2951 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
2952
2953 free_tree(next_tree);
2954
2955 // remove next_tree from tree cache
2956 le = Vcb->tree_cache.Flink;
2957 while (le != &Vcb->tree_cache) {
2958 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2959
2960 if (tc2->tree == next_tree) {
2961 free_tree(next_tree);
2962 RemoveEntryList(le);
2963 ExFreePool(tc2);
2964 break;
2965 }
2966
2967 le = le->Flink;
2968 }
2969 } else {
2970 // rebalance by moving items from second tree into first
2971 ULONG avg_size = (t->size + next_tree->size) / 2;
2972 KEY firstitem = {0, 0, 0};
2973
2974 TRACE("attempting rebalance\n");
2975
2976 le = next_tree->itemlist.Flink;
2977 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
2978 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2979 ULONG size;
2980
2981 if (!td->ignore) {
2982 if (next_tree->header.level == 0)