[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
21
22 // BOOL did_split;
23 BOOL chunk_test = FALSE;
24
25 typedef struct {
26 KEVENT Event;
27 IO_STATUS_BLOCK iosb;
28 } write_context;
29
30 typedef struct {
31 EXTENT_ITEM ei;
32 UINT8 type;
33 EXTENT_DATA_REF edr;
34 } EXTENT_ITEM_DATA_REF;
35
36 typedef struct {
37 EXTENT_ITEM_TREE eit;
38 UINT8 type;
39 TREE_BLOCK_REF tbr;
40 } EXTENT_ITEM_TREE2;
41
42 typedef struct {
43 EXTENT_ITEM ei;
44 UINT8 type;
45 TREE_BLOCK_REF tbr;
46 } EXTENT_ITEM_SKINNY_METADATA;
47
48 typedef struct {
49 CHUNK_ITEM ci;
50 CHUNK_ITEM_STRIPE stripes[1];
51 } CHUNK_ITEM2;
52
53 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
54 static BOOL is_file_prealloc(fcb* fcb, UINT64 start_data, UINT64 end_data);
55
56 static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
57 write_context* context = conptr;
58
59 context->iosb = Irp->IoStatus;
60 KeSetEvent(&context->Event, 0, FALSE);
61
62 // return STATUS_SUCCESS;
63 return STATUS_MORE_PROCESSING_REQUIRED;
64 }
65
66 static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
67 NTSTATUS Status;
68 LARGE_INTEGER offset;
69 PIRP Irp;
70 PIO_STACK_LOCATION IrpSp;
71 write_context* context = NULL;
72
73 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
74
75 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
76 if (!context) {
77 ERR("out of memory\n");
78 return STATUS_INSUFFICIENT_RESOURCES;
79 }
80
81 RtlZeroMemory(context, sizeof(write_context));
82
83 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
84
85 offset.QuadPart = address;
86
87 // Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
88
89 Irp = IoAllocateIrp(device->StackSize, FALSE);
90
91 if (!Irp) {
92 ERR("IoAllocateIrp failed\n");
93 Status = STATUS_INTERNAL_ERROR;
94 goto exit2;
95 }
96
97 IrpSp = IoGetNextIrpStackLocation(Irp);
98 IrpSp->MajorFunction = IRP_MJ_WRITE;
99
100 if (device->Flags & DO_BUFFERED_IO) {
101 Irp->AssociatedIrp.SystemBuffer = data;
102
103 Irp->Flags = IRP_BUFFERED_IO;
104 } else if (device->Flags & DO_DIRECT_IO) {
105 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
106 if (!Irp->MdlAddress) {
107 DbgPrint("IoAllocateMdl failed\n");
108 goto exit;
109 }
110
111 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
112 } else {
113 Irp->UserBuffer = data;
114 }
115
116 IrpSp->Parameters.Write.Length = length;
117 IrpSp->Parameters.Write.ByteOffset = offset;
118
119 Irp->UserIosb = &context->iosb;
120
121 Irp->UserEvent = &context->Event;
122
123 IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
124
125 // FIXME - support multiple devices
126 Status = IoCallDriver(device, Irp);
127
128 if (Status == STATUS_PENDING) {
129 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
130 Status = context->iosb.Status;
131 }
132
133 if (!NT_SUCCESS(Status)) {
134 ERR("IoCallDriver returned %08x\n", Status);
135 }
136
137 if (device->Flags & DO_DIRECT_IO) {
138 MmUnlockPages(Irp->MdlAddress);
139 IoFreeMdl(Irp->MdlAddress);
140 }
141
142 exit:
143 IoFreeIrp(Irp);
144
145 exit2:
146 if (context)
147 ExFreePool(context);
148
149 return Status;
150 }
151
152 static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
153 NTSTATUS Status;
154 unsigned int i = 0;
155 UINT32 crc32;
156
157 #ifdef __REACTOS__
158 Status = STATUS_INTERNAL_ERROR;
159 #endif
160
161 // FIXME - work with RAID
162
163 // FIXME - only write one superblock if on SSD (?)
164 while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
165 TRACE("writing superblock %u\n", i);
166
167 Vcb->superblock.sb_phys_addr = superblock_addrs[i];
168 RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
169
170 crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
171 crc32 = ~crc32;
172 TRACE("crc32 is %08x\n", crc32);
173 RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
174
175 Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
176
177 if (!NT_SUCCESS(Status))
178 break;
179
180 i++;
181 }
182
183 return Status;
184 }
185
186 static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
187 LIST_ENTRY* le;
188 space *s, *bestfit = NULL;
189
190 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
191
192 le = c->space.Flink;
193 while (le != &c->space) {
194 s = CONTAINING_RECORD(le, space, list_entry);
195
196 if (s->type == SPACE_TYPE_FREE) {
197 if (s->size == length) {
198 *address = s->offset;
199 TRACE("returning exact fit at %llx\n", s->offset);
200 return TRUE;
201 } else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
202 bestfit = s;
203 }
204 }
205
206 le = le->Flink;
207 }
208
209 if (bestfit) {
210 TRACE("returning best fit at %llx\n", bestfit->offset);
211 *address = bestfit->offset;
212 return TRUE;
213 }
214
215 return FALSE;
216 }
217
218 void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
219 LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
220 space *s, *s2, *s3;
221 #ifdef DEBUG_PARANOID
222 UINT64 lastaddr;
223 #endif
224
225 TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
226
227 #ifdef DEBUG_PARANOID
228 // TESTING
229 le = c->space.Flink;
230 while (le != &c->space) {
231 s = CONTAINING_RECORD(le, space, list_entry);
232
233 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
234
235 le = le->Flink;
236 }
237 #endif
238
239 c->space_changed = TRUE;
240
241 le = c->space.Flink;
242 insbef = &c->space;
243 while (le != &c->space) {
244 s = CONTAINING_RECORD(le, space, list_entry);
245 nextle = le->Flink;
246
247 if (s->offset >= offset + size) {
248 insbef = le;
249 break;
250 }
251
252 if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
253 #ifndef __REACTOS__
254 RemoveEntryList(&s->list_entry);
255 #endif
256
257 if (s->offset + s->size == offset + size) {
258 insbef = s->list_entry.Flink;
259 RemoveEntryList(&s->list_entry);
260 ExFreePool(s);
261 break;
262 }
263
264 RemoveEntryList(&s->list_entry);
265 ExFreePool(s);
266 } else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
267 s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
268 if (!s3) {
269 ERR("out of memory\n");
270 return;
271 }
272
273 s3->offset = offset + size;
274 s3->size = s->size - size - offset + s->offset;
275 s3->type = s->type;
276 InsertHeadList(&s->list_entry, &s3->list_entry);
277 insbef = &s3->list_entry;
278
279 s->size = offset - s->offset;
280 break;
281 } else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
282 s->size = offset - s->offset;
283 } else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
284 s->size -= offset + size - s->offset;
285 s->offset = offset + size;
286
287 insbef = le;
288 break;
289 }
290
291 le = nextle;
292 }
293
294 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
295 if (!s2) {
296 ERR("out of memory\n");
297 return;
298 }
299
300 s2->offset = offset;
301 s2->size = size;
302 s2->type = type;
303 InsertTailList(insbef, &s2->list_entry);
304
305 // merge entries if same type
306
307 if (s2->list_entry.Blink != &c->space) {
308 s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
309
310 if (s->type == type) {
311 s->size += s2->size;
312
313 RemoveEntryList(&s2->list_entry);
314 ExFreePool(s2);
315
316 s2 = s;
317 }
318 }
319
320 if (s2->list_entry.Flink != &c->space) {
321 s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
322
323 if (s->type == type) {
324 s2->size += s->size;
325
326 RemoveEntryList(&s->list_entry);
327 ExFreePool(s);
328 }
329 }
330
331 le = c->space.Flink;
332 while (le != &c->space) {
333 s = CONTAINING_RECORD(le, space, list_entry);
334
335 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
336
337 le = le->Flink;
338 }
339
340 #ifdef DEBUG_PARANOID
341 // TESTING
342 lastaddr = c->offset;
343 le = c->space.Flink;
344 while (le != &c->space) {
345 s = CONTAINING_RECORD(le, space, list_entry);
346
347 if (s->offset != lastaddr) {
348 ERR("inconsistency detected!\n");
349 int3;
350 }
351
352 lastaddr = s->offset + s->size;
353
354 le = le->Flink;
355 }
356
357 if (lastaddr != c->offset + c->chunk_item->size) {
358 ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
359 int3;
360 }
361 #endif
362 }
363
364 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
365 LIST_ENTRY* le2;
366 chunk* c;
367
368 le2 = Vcb->chunks.Flink;
369 while (le2 != &Vcb->chunks) {
370 c = CONTAINING_RECORD(le2, chunk, list_entry);
371
372 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
373
374 if (address >= c->offset && address < c->offset + c->chunk_item->size)
375 return c;
376
377 le2 = le2->Flink;
378 }
379
380 return NULL;
381 }
382
383 typedef struct {
384 disk_hole* dh;
385 device* device;
386 } stripe;
387
388 static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
389 // LIST_ENTRY* le = s->device->disk_holes.Flink;
390 // disk_hole* dh;
391
392 // ERR("old holes:\n");
393 // while (le != &s->device->disk_holes) {
394 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
395 //
396 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
397 //
398 // le = le->Flink;
399 // }
400
401 if (s->dh->size <= max_stripe_size) {
402 s->dh->provisional = TRUE;
403 } else {
404 disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
405 if (!newdh) {
406 ERR("out of memory\n");
407 return;
408 }
409
410 newdh->address = s->dh->address + max_stripe_size;
411 newdh->size = s->dh->size - max_stripe_size;
412 newdh->provisional = FALSE;
413 InsertTailList(&s->device->disk_holes, &newdh->listentry);
414
415 s->dh->size = max_stripe_size;
416 s->dh->provisional = TRUE;
417 }
418
419 // ERR("new holes:\n");
420 // le = s->device->disk_holes.Flink;
421 // while (le != &s->device->disk_holes) {
422 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
423 //
424 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
425 //
426 // le = le->Flink;
427 // }
428 }
429
430 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
431 KEY searchkey;
432 traverse_ptr tp, next_tp;
433 BOOL b;
434 UINT64 lastaddr;
435 NTSTATUS Status;
436
437 searchkey.obj_id = 0x100;
438 searchkey.obj_type = TYPE_CHUNK_ITEM;
439 searchkey.offset = 0;
440
441 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
442 if (!NT_SUCCESS(Status)) {
443 ERR("error - find_item returned %08x\n", Status);
444 return 0xffffffffffffffff;
445 }
446
447 lastaddr = 0;
448
449 do {
450 if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
451 if (tp.item->size < sizeof(CHUNK_ITEM)) {
452 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
453 } else {
454 CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
455
456 if (tp.item->key.offset >= lastaddr + size)
457 return lastaddr;
458
459 lastaddr = tp.item->key.offset + ci->size;
460 }
461 }
462
463 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
464 if (b) {
465 tp = next_tp;
466
467 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
468 break;
469 }
470 } while (b);
471
472 return lastaddr;
473 }
474
475 static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
476 KEY searchkey;
477 traverse_ptr tp;
478 DEV_ITEM* di;
479 NTSTATUS Status;
480
481 searchkey.obj_id = 1;
482 searchkey.obj_type = TYPE_DEV_ITEM;
483 searchkey.offset = device->devitem.dev_id;
484
485 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
486 if (!NT_SUCCESS(Status)) {
487 ERR("error - find_item returned %08x\n", Status);
488 return FALSE;
489 }
490
491 if (keycmp(&tp.item->key, &searchkey)) {
492 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
493 return FALSE;
494 }
495
496 delete_tree_item(Vcb, &tp, rollback);
497
498 device->devitem.bytes_used += size;
499
500 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
501 if (!di) {
502 ERR("out of memory\n");
503 return FALSE;
504 }
505
506 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
507
508 if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
509 ERR("insert_tree_item failed\n");
510 return FALSE;
511 }
512
513 return TRUE;
514 }
515
516 static void reset_disk_holes(device* device, BOOL commit) {
517 LIST_ENTRY* le = device->disk_holes.Flink;
518 disk_hole* dh;
519
520 // ERR("old holes:\n");
521 // while (le != &device->disk_holes) {
522 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
523 //
524 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
525 //
526 // le = le->Flink;
527 // }
528
529 le = device->disk_holes.Flink;
530 while (le != &device->disk_holes) {
531 LIST_ENTRY* le2 = le->Flink;
532
533 dh = CONTAINING_RECORD(le, disk_hole, listentry);
534
535 if (dh->provisional) {
536 if (commit) {
537 RemoveEntryList(le);
538 ExFreePool(dh);
539 } else {
540 dh->provisional = FALSE;
541 }
542 }
543
544 le = le2;
545 }
546
547 if (!commit) {
548 le = device->disk_holes.Flink;
549 while (le != &device->disk_holes) {
550 LIST_ENTRY* le2 = le->Flink;
551
552 dh = CONTAINING_RECORD(le, disk_hole, listentry);
553
554 while (le2 != &device->disk_holes) {
555 disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
556
557 if (dh2->address == dh->address + dh->size) {
558 LIST_ENTRY* le3 = le2->Flink;
559 dh->size += dh2->size;
560
561 RemoveEntryList(le2);
562 ExFreePool(dh2);
563
564 le2 = le3;
565 } else
566 break;
567 }
568
569 le = le->Flink;
570 }
571 }
572
573 // ERR("new holes:\n");
574 // le = device->disk_holes.Flink;
575 // while (le != &device->disk_holes) {
576 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
577 //
578 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
579 //
580 // le = le->Flink;
581 // }
582 }
583
584 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
585 sys_chunk *sc, *sc2;
586 LIST_ENTRY* le;
587 USHORT i;
588
589 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
590 ERR("error - bootstrap is full\n");
591 return STATUS_INTERNAL_ERROR;
592 }
593
594 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
595 if (!sc) {
596 ERR("out of memory\n");
597 return STATUS_INSUFFICIENT_RESOURCES;
598 }
599
600 sc->key.obj_id = obj_id;
601 sc->key.obj_type = obj_type;
602 sc->key.offset = offset;
603 sc->size = size;
604 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
605 if (!sc->data) {
606 ERR("out of memory\n");
607 ExFreePool(sc);
608 return STATUS_INSUFFICIENT_RESOURCES;
609 }
610
611 RtlCopyMemory(sc->data, data, sc->size);
612
613 le = Vcb->sys_chunks.Flink;
614 while (le != &Vcb->sys_chunks) {
615 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
616
617 if (keycmp(&sc2->key, &sc->key) == 1)
618 break;
619
620 le = le->Flink;
621 }
622 InsertTailList(le, &sc->list_entry);
623
624 Vcb->superblock.n += sizeof(KEY) + size;
625
626 i = 0;
627 le = Vcb->sys_chunks.Flink;
628 while (le != &Vcb->sys_chunks) {
629 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
630
631 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
632
633 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
634 i += sizeof(KEY);
635
636 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
637 i += sc2->size;
638
639 le = le->Flink;
640 }
641
642 return STATUS_SUCCESS;
643 }
644
645 chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
646 UINT64 max_stripe_size, max_chunk_size, stripe_size;
647 UINT64 total_size = 0, i, j, logaddr;
648 int num_stripes;
649 disk_hole* dh;
650 stripe* stripes;
651 ULONG cisize;
652 CHUNK_ITEM* ci;
653 CHUNK_ITEM_STRIPE* cis;
654 chunk* c = NULL;
655 space* s = NULL;
656 BOOL success = FALSE;
657 BLOCK_GROUP_ITEM* bgi;
658
659 for (i = 0; i < Vcb->superblock.num_devices; i++) {
660 total_size += Vcb->devices[i].devitem.num_bytes;
661 }
662 TRACE("total_size = %llx\n", total_size);
663
664 if (flags & BLOCK_FLAG_DATA) {
665 max_stripe_size = 0x40000000; // 1 GB
666 max_chunk_size = 10 * max_stripe_size;
667 } else if (flags & BLOCK_FLAG_METADATA) {
668 if (total_size > 0xC80000000) // 50 GB
669 max_stripe_size = 0x40000000; // 1 GB
670 else
671 max_stripe_size = 0x10000000; // 256 MB
672
673 max_chunk_size = max_stripe_size;
674 } else if (flags & BLOCK_FLAG_SYSTEM) {
675 max_stripe_size = 0x2000000; // 32 MB
676 max_chunk_size = 2 * max_stripe_size;
677 }
678
679 // FIXME - make sure whole number of sectors?
680 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
681
682 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
683
684 if (flags & BLOCK_FLAG_DUPLICATE) {
685 num_stripes = 2;
686 } else if (flags & BLOCK_FLAG_RAID0) {
687 FIXME("RAID0 not yet supported\n");
688 return NULL;
689 } else if (flags & BLOCK_FLAG_RAID1) {
690 FIXME("RAID1 not yet supported\n");
691 return NULL;
692 } else if (flags & BLOCK_FLAG_RAID10) {
693 FIXME("RAID10 not yet supported\n");
694 return NULL;
695 } else if (flags & BLOCK_FLAG_RAID5) {
696 FIXME("RAID5 not yet supported\n");
697 return NULL;
698 } else if (flags & BLOCK_FLAG_RAID6) {
699 FIXME("RAID6 not yet supported\n");
700 return NULL;
701 } else { // SINGLE
702 num_stripes = 1;
703 }
704
705 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
706 if (!stripes) {
707 ERR("out of memory\n");
708 return NULL;
709 }
710
711 for (i = 0; i < num_stripes; i++) {
712 stripes[i].dh = NULL;
713
714 for (j = 0; j < Vcb->superblock.num_devices; j++) {
715 LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
716
717 while (le != &Vcb->devices[j].disk_holes) {
718 dh = CONTAINING_RECORD(le, disk_hole, listentry);
719
720 if (!dh->provisional) {
721 if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
722 stripes[i].dh = dh;
723 stripes[i].device = &Vcb->devices[j];
724
725 if (stripes[i].dh->size >= max_stripe_size)
726 break;
727 }
728 }
729
730 le = le->Flink;
731 }
732
733 if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
734 break;
735 }
736
737 if (stripes[i].dh) {
738 TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
739 } else {
740 TRACE("good DH not found\n");
741 goto end;
742 }
743
744 add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
745 }
746
747 stripe_size = min(stripes[0].dh->size, max_stripe_size);
748 for (i = 1; i < num_stripes; i++) {
749 stripe_size = min(stripe_size, stripes[1].dh->size);
750 }
751 // FIXME - make sure stripe_size aligned properly
752 // FIXME - obey max_chunk_size
753
754 c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
755 if (!c) {
756 ERR("out of memory\n");
757 goto end;
758 }
759
760 // add CHUNK_ITEM to tree 3
761
762 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
763 ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
764 if (!ci) {
765 ERR("out of memory\n");
766 goto end;
767 }
768
769 ci->size = stripe_size; // FIXME for RAID
770 ci->root_id = Vcb->extent_root->id;
771 ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
772 ci->type = flags;
773 ci->opt_io_alignment = ci->stripe_length;
774 ci->opt_io_width = ci->stripe_length;
775 ci->sector_size = stripes[0].device->devitem.minimal_io_size;
776 ci->num_stripes = num_stripes;
777 ci->sub_stripes = 1;
778
779 c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
780 if (!c->devices) {
781 ERR("out of memory\n");
782 ExFreePool(ci);
783 goto end;
784 }
785
786 for (i = 0; i < num_stripes; i++) {
787 if (i == 0)
788 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
789 else
790 cis = &cis[1];
791
792 cis->dev_id = stripes[i].device->devitem.dev_id;
793 cis->offset = stripes[i].dh->address;
794 cis->dev_uuid = stripes[i].device->devitem.device_uuid;
795
796 c->devices[i] = stripes[i].device;
797 }
798
799 logaddr = find_new_chunk_address(Vcb, ci->size);
800 if (logaddr == 0xffffffffffffffff) {
801 ERR("find_new_chunk_address failed\n");
802 ExFreePool(ci);
803 goto end;
804 }
805
806 if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
807 ERR("insert_tree_item failed\n");
808 ExFreePool(ci);
809 goto end;
810 }
811
812 if (flags & BLOCK_FLAG_SYSTEM) {
813 NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
814 if (!NT_SUCCESS(Status)) {
815 ERR("add_to_bootstrap returned %08x\n", Status);
816 goto end;
817 }
818 }
819
820 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
821
822 c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
823 if (!c->chunk_item) {
824 ERR("out of memory\n");
825 goto end;
826 }
827
828 RtlCopyMemory(c->chunk_item, ci, cisize);
829 c->size = cisize;
830 c->offset = logaddr;
831 c->used = c->oldused = 0;
832 c->space_changed = FALSE;
833 c->cache_size = 0;
834 c->cache_inode = 0;
835 InitializeListHead(&c->space);
836
837 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
838 if (!s) {
839 ERR("out of memory\n");
840 goto end;
841 }
842
843 s->offset = c->offset;
844 s->size = c->chunk_item->size;
845 s->type = SPACE_TYPE_FREE;
846 InsertTailList(&c->space, &s->list_entry);
847
848 protect_superblocks(Vcb, c);
849
850 // add BLOCK_GROUP_ITEM to tree 2
851
852 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
853 if (!bgi) {
854 ERR("out of memory\n");
855 goto end;
856 }
857
858 bgi->used = 0;
859 bgi->chunk_tree = 0x100;
860 bgi->flags = flags;
861
862 if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
863 ERR("insert_tree_item failed\n");
864 ExFreePool(bgi);
865 goto end;
866 }
867
868 // add DEV_EXTENTs to tree 4
869
870 for (i = 0; i < num_stripes; i++) {
871 DEV_EXTENT* de;
872
873 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
874 if (!de) {
875 ERR("out of memory\n");
876 goto end;
877 }
878
879 de->chunktree = Vcb->chunk_root->id;
880 de->objid = 0x100;
881 de->address = logaddr;
882 de->length = ci->size;
883 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
884
885 if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
886 ERR("insert_tree_item failed\n");
887 ExFreePool(de);
888 goto end;
889 }
890
891 if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
892 ERR("increase_dev_item_used failed\n");
893 goto end;
894 }
895 }
896
897 for (i = 0; i < num_stripes; i++) {
898 BOOL b = FALSE;
899 for (j = 0; j < i; j++) {
900 if (stripes[j].device == stripes[i].device)
901 b = TRUE;
902 }
903
904 if (!b)
905 reset_disk_holes(stripes[i].device, TRUE);
906 }
907
908 success = TRUE;
909
910 end:
911 ExFreePool(stripes);
912
913 if (!success) {
914 for (i = 0; i < num_stripes; i++) {
915 BOOL b = FALSE;
916 for (j = 0; j < i; j++) {
917 if (stripes[j].device == stripes[i].device)
918 b = TRUE;
919 }
920
921 if (!b)
922 reset_disk_holes(stripes[i].device, FALSE);
923 }
924
925 if (c) ExFreePool(c);
926 if (s) ExFreePool(s);
927 } else
928 InsertTailList(&Vcb->chunks, &c->list_entry);
929
930 return success ? c : NULL;
931 }
932
933 NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
934 KEY searchkey;
935 traverse_ptr tp;
936 CHUNK_ITEM2* ci;
937 NTSTATUS Status;
938 UINT32 i;
939
940 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
941
942 // FIXME - use version cached in Vcb
943
944 searchkey.obj_id = 0x100; // fixed?
945 searchkey.obj_type = TYPE_CHUNK_ITEM;
946 searchkey.offset = address;
947
948 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
949 if (!NT_SUCCESS(Status)) {
950 ERR("error - find_item returned %08x\n", Status);
951 return Status;
952 }
953
954 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
955 ERR("error - unexpected item in chunk tree\n");
956 Status = STATUS_INTERNAL_ERROR;
957 goto end;
958 }
959
960 if (tp.item->size < sizeof(CHUNK_ITEM2)) {
961 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
962 Status = STATUS_INTERNAL_ERROR;
963 goto end;
964 }
965
966 ci = (CHUNK_ITEM2*)tp.item->data;
967
968 if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
969 ERR("error - address %llx was out of chunk bounds\n", address);
970 Status = STATUS_INTERNAL_ERROR;
971 goto end;
972 }
973
974 // FIXME - only do this for chunks marked DUPLICATE?
975 // FIXME - for multiple writes, if PENDING do waits at the end
976 // FIXME - work with RAID
977 for (i = 0; i < ci->ci.num_stripes; i++) {
978 Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
979 if (!NT_SUCCESS(Status)) {
980 ERR("error - write_data_phys failed\n");
981 goto end;
982 }
983 }
984
985 end:
986
987 return Status;
988 }
989
990 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
991 LIST_ENTRY *le, *nextle;
992 space *s, *s2;
993
994 // // TESTING
995 // le = c->space.Flink;
996 // while (le != &c->space) {
997 // s = CONTAINING_RECORD(le, space, list_entry);
998 //
999 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1000 //
1001 // le = le->Flink;
1002 // }
1003
1004 le = c->space.Flink;
1005 while (le != &c->space) {
1006 s = CONTAINING_RECORD(le, space, list_entry);
1007 nextle = le->Flink;
1008
1009 if (s->type == SPACE_TYPE_DELETING)
1010 s->type = SPACE_TYPE_FREE;
1011 else if (s->type == SPACE_TYPE_WRITING)
1012 s->type = SPACE_TYPE_USED;
1013
1014 if (le->Blink != &c->space) {
1015 s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
1016
1017 if (s2->type == s->type) { // do merge
1018 s2->size += s->size;
1019
1020 RemoveEntryList(&s->list_entry);
1021 ExFreePool(s);
1022 }
1023 }
1024
1025 le = nextle;
1026 }
1027
1028 // le = c->space.Flink;
1029 // while (le != &c->space) {
1030 // s = CONTAINING_RECORD(le, space, list_entry);
1031 //
1032 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1033 //
1034 // le = le->Flink;
1035 // }
1036 }
1037
1038 static void clean_space_cache(device_extension* Vcb) {
1039 LIST_ENTRY* le;
1040 chunk* c;
1041
1042 TRACE("(%p)\n", Vcb);
1043
1044 le = Vcb->chunks.Flink;
1045 while (le != &Vcb->chunks) {
1046 c = CONTAINING_RECORD(le, chunk, list_entry);
1047
1048 if (c->space_changed) {
1049 clean_space_cache_chunk(Vcb, c);
1050 c->space_changed = FALSE;
1051 }
1052
1053 le = le->Flink;
1054 }
1055 }
1056
1057 static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
1058 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
1059 LIST_ENTRY* le;
1060
1061 le = Vcb->trees.Flink;
1062 while (le != &Vcb->trees) {
1063 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1064
1065 if (t->write) {
1066 if (t->header.num_items == 0 && t->parent)
1067 return FALSE;
1068
1069 if (t->size > maxsize)
1070 return FALSE;
1071
1072 if (!t->has_new_address)
1073 return FALSE;
1074 }
1075
1076 le = le->Flink;
1077 }
1078
1079 return TRUE;
1080 }
1081
1082 static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
1083 LIST_ENTRY* le;
1084 NTSTATUS Status;
1085
1086 le = Vcb->trees.Flink;
1087 while (le != &Vcb->trees) {
1088 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1089
1090 if (t->write) {
1091 if (t->parent) {
1092 if (!t->parent->write) {
1093 t->parent->write = TRUE;
1094 Vcb->write_trees++;
1095 }
1096 } else if (t->root != Vcb->chunk_root && t->root != Vcb->root_root) {
1097 KEY searchkey;
1098 traverse_ptr tp;
1099
1100 searchkey.obj_id = t->root->id;
1101 searchkey.obj_type = TYPE_ROOT_ITEM;
1102 searchkey.offset = 0xffffffffffffffff;
1103
1104 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1105 if (!NT_SUCCESS(Status)) {
1106 ERR("error - find_item returned %08x\n", Status);
1107 return Status;
1108 }
1109
1110 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1111 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1112 return STATUS_INTERNAL_ERROR;
1113 }
1114
1115 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
1116 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1117 if (!ri) {
1118 ERR("out of memory\n");
1119 return STATUS_INSUFFICIENT_RESOURCES;
1120 }
1121
1122 if (tp.item->size > 0)
1123 RtlCopyMemory(ri, tp.item->data, tp.item->size);
1124
1125 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
1126
1127 delete_tree_item(Vcb, &tp, rollback);
1128
1129 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1130 ERR("insert_tree_item failed\n");
1131 return STATUS_INTERNAL_ERROR;
1132 }
1133 } else {
1134 if (!tp.tree->write) {
1135 tp.tree->write = TRUE;
1136 Vcb->write_trees++;
1137 }
1138 }
1139 }
1140 }
1141
1142 le = le->Flink;
1143 }
1144
1145 return STATUS_SUCCESS;
1146 }
1147
1148 static void add_parents_to_cache(device_extension* Vcb, tree* t) {
1149 KEY searchkey;
1150 traverse_ptr tp;
1151 NTSTATUS Status;
1152
1153 while (t->parent) {
1154 t = t->parent;
1155
1156 if (!t->write) {
1157 t->write = TRUE;
1158 Vcb->write_trees++;
1159 }
1160 }
1161
1162 if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
1163 return;
1164
1165 searchkey.obj_id = t->root->id;
1166 searchkey.obj_type = TYPE_ROOT_ITEM;
1167 searchkey.offset = 0xffffffffffffffff;
1168
1169 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1170 if (!NT_SUCCESS(Status)) {
1171 ERR("error - find_item returned %08x\n", Status);
1172 return;
1173 }
1174
1175 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1176 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1177 return;
1178 }
1179
1180 if (!tp.tree->write) {
1181 tp.tree->write = TRUE;
1182 Vcb->write_trees++;
1183 }
1184 }
1185
1186 static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
1187 EXTENT_ITEM_SKINNY_METADATA* eism;
1188 traverse_ptr insert_tp;
1189
1190 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1191 if (!eism) {
1192 ERR("out of memory\n");
1193 return FALSE;
1194 }
1195
1196 eism->ei.refcount = 1;
1197 eism->ei.generation = Vcb->superblock.generation;
1198 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1199 eism->type = TYPE_TREE_BLOCK_REF;
1200 eism->tbr.offset = root_id;
1201
1202 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1203 ERR("insert_tree_item failed\n");
1204 ExFreePool(eism);
1205 return FALSE;
1206 }
1207
1208 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1209
1210 add_parents_to_cache(Vcb, insert_tp.tree);
1211
1212 return TRUE;
1213 }
1214
1215 static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, LIST_ENTRY* rollback) {
1216 UINT64 address;
1217 EXTENT_ITEM_TREE2* eit2;
1218 traverse_ptr insert_tp;
1219
1220 TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
1221
1222 if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
1223 return FALSE;
1224
1225 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1226 BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, rollback);
1227
1228 if (b)
1229 *new_address = address;
1230
1231 return b;
1232 }
1233
1234 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1235 if (!eit2) {
1236 ERR("out of memory\n");
1237 return FALSE;
1238 }
1239
1240 eit2->eit.extent_item.refcount = 1;
1241 eit2->eit.extent_item.generation = Vcb->superblock.generation;
1242 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1243 // eit2->eit.firstitem = wt->firstitem;
1244 eit2->eit.level = level;
1245 eit2->type = TYPE_TREE_BLOCK_REF;
1246 eit2->tbr.offset = root_id;
1247
1248 // #ifdef DEBUG_PARANOID
1249 // if (wt->firstitem.obj_type == 0xcc) { // TESTING
1250 // ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
1251 // ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
1252 // int3;
1253 // }
1254 // #endif
1255
1256 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1257 ERR("insert_tree_item failed\n");
1258 ExFreePool(eit2);
1259 return FALSE;
1260 }
1261
1262 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1263
1264 add_parents_to_cache(Vcb, insert_tp.tree);
1265
1266 *new_address = address;
1267
1268 return TRUE;
1269 }
1270
1271 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
1272 chunk *origchunk = NULL, *c;
1273 LIST_ENTRY* le;
1274 UINT64 flags = t->flags, addr;
1275
1276 if (flags == 0)
1277 flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
1278
1279 // TRACE("flags = %x\n", (UINT32)wt->flags);
1280
1281 // if (!chunk_test) { // TESTING
1282 // if ((c = alloc_chunk(Vcb, flags))) {
1283 // if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1284 // if (insert_tree_extent(Vcb, t, c)) {
1285 // chunk_test = TRUE;
1286 // return STATUS_SUCCESS;
1287 // }
1288 // }
1289 // }
1290 // }
1291
1292 if (t->has_address) {
1293 origchunk = get_chunk_from_address(Vcb, t->header.address);
1294
1295 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, origchunk, &addr, rollback)) {
1296 t->new_address = addr;
1297 t->has_new_address = TRUE;
1298 return STATUS_SUCCESS;
1299 }
1300 }
1301
1302 le = Vcb->chunks.Flink;
1303 while (le != &Vcb->chunks) {
1304 c = CONTAINING_RECORD(le, chunk, list_entry);
1305
1306 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1307 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, rollback)) {
1308 t->new_address = addr;
1309 t->has_new_address = TRUE;
1310 return STATUS_SUCCESS;
1311 }
1312 }
1313
1314 le = le->Flink;
1315 }
1316
1317 // allocate new chunk if necessary
1318 if ((c = alloc_chunk(Vcb, flags, rollback))) {
1319 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1320 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, rollback)) {
1321 t->new_address = addr;
1322 t->has_new_address = TRUE;
1323 return STATUS_SUCCESS;
1324 }
1325 }
1326 }
1327
1328 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
1329
1330 return STATUS_DISK_FULL;
1331 }
1332
1333 static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1334 KEY searchkey;
1335 traverse_ptr tp;
1336 chunk* c;
1337 EXTENT_ITEM_SKINNY_METADATA* eism;
1338 NTSTATUS Status;
1339
1340 searchkey.obj_id = address;
1341 searchkey.obj_type = TYPE_METADATA_ITEM;
1342 searchkey.offset = 0xffffffffffffffff;
1343
1344 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1345 if (!NT_SUCCESS(Status)) {
1346 ERR("error - find_item returned %08x\n", Status);
1347 return FALSE;
1348 }
1349
1350 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1351 TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1352 return FALSE;
1353 }
1354
1355 if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
1356 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
1357 return FALSE;
1358 }
1359
1360 delete_tree_item(Vcb, &tp, rollback);
1361
1362 eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
1363 if (t && t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
1364 // convert shared data extents
1365
1366 LIST_ENTRY* le = t->itemlist.Flink;
1367 while (le != &t->itemlist) {
1368 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1369
1370 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1371
1372 if (!td->ignore && !td->inserted) {
1373 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1374 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1375
1376 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1377 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1378
1379 if (ed2->address != 0) {
1380 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1381 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1382 }
1383 }
1384 }
1385 }
1386
1387 le = le->Flink;
1388 }
1389
1390 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1391 }
1392
1393 c = get_chunk_from_address(Vcb, address);
1394
1395 if (c) {
1396 decrease_chunk_usage(c, Vcb->superblock.node_size);
1397
1398 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
1399 } else
1400 ERR("could not find chunk for address %llx\n", address);
1401
1402 return TRUE;
1403 }
1404
1405 // TESTING
1406 // static void check_tree_num_items(tree* t) {
1407 // LIST_ENTRY* le2;
1408 // UINT32 ni;
1409 //
1410 // le2 = t->itemlist.Flink;
1411 // ni = 0;
1412 // while (le2 != &t->itemlist) {
1413 // tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1414 // if (!td->ignore)
1415 // ni++;
1416 // le2 = le2->Flink;
1417 // }
1418 //
1419 // if (t->header.num_items != ni) {
1420 // ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
1421 // int3;
1422 // } else {
1423 // ERR("tree %p okay\n", t);
1424 // }
1425 // }
1426 //
1427 // static void check_trees_num_items(LIST_ENTRY* tc) {
1428 // LIST_ENTRY* le = tc->Flink;
1429 // while (le != tc) {
1430 // tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1431 //
1432 // check_tree_num_items(tc2->tree);
1433 //
1434 // le = le->Flink;
1435 // }
1436 // }
1437
1438 static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
1439 KEY searchkey;
1440 traverse_ptr tp, tp2, insert_tp;
1441 EXTENT_REF_V0* erv0;
1442 NTSTATUS Status;
1443
1444 TRACE("(%p, %p, %p)\n", Vcb, td, t);
1445
1446 searchkey.obj_id = td->treeholder.address;
1447 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1448 searchkey.offset = 0xffffffffffffffff;
1449
1450 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1451 if (!NT_SUCCESS(Status)) {
1452 ERR("error - find_item returned %08x\n", Status);
1453 return;
1454 }
1455
1456 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1457 TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
1458 return;
1459 }
1460
1461 searchkey.obj_id = td->treeholder.address;
1462 searchkey.obj_type = TYPE_EXTENT_ITEM;
1463 searchkey.offset = Vcb->superblock.node_size;
1464
1465 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1466 if (!NT_SUCCESS(Status)) {
1467 ERR("error - find_item returned %08x\n", Status);
1468 return;
1469 }
1470
1471 if (keycmp(&searchkey, &tp2.item->key)) {
1472 ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1473 return;
1474 }
1475
1476 if (tp.item->size < sizeof(EXTENT_REF_V0)) {
1477 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
1478 return;
1479 }
1480
1481 erv0 = (EXTENT_REF_V0*)tp.item->data;
1482
1483 delete_tree_item(Vcb, &tp, rollback);
1484 delete_tree_item(Vcb, &tp2, rollback);
1485
1486 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1487 EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1488
1489 if (!eism) {
1490 ERR("out of memory\n");
1491 return;
1492 }
1493
1494 eism->ei.refcount = 1;
1495 eism->ei.generation = erv0->gen;
1496 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1497 eism->type = TYPE_TREE_BLOCK_REF;
1498 eism->tbr.offset = t->header.tree_id;
1499
1500 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1501 ERR("insert_tree_item failed\n");
1502 return;
1503 }
1504 } else {
1505 EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1506
1507 if (!eit2) {
1508 ERR("out of memory\n");
1509 return;
1510 }
1511
1512 eit2->eit.extent_item.refcount = 1;
1513 eit2->eit.extent_item.generation = erv0->gen;
1514 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1515 eit2->eit.firstitem = td->key;
1516 eit2->eit.level = t->header.level - 1;
1517 eit2->type = TYPE_TREE_BLOCK_REF;
1518 eit2->tbr.offset = t->header.tree_id;
1519
1520 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1521 ERR("insert_tree_item failed\n");
1522 return;
1523 }
1524 }
1525
1526 add_parents_to_cache(Vcb, insert_tp.tree);
1527 add_parents_to_cache(Vcb, tp.tree);
1528 add_parents_to_cache(Vcb, tp2.tree);
1529 }
1530
1531 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1532 KEY searchkey;
1533 traverse_ptr tp;
1534 EXTENT_ITEM* ei;
1535 EXTENT_ITEM_V0* eiv0;
1536 chunk* c;
1537 NTSTATUS Status;
1538
1539 // FIXME - deal with refcounts > 1
1540
1541 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
1542
1543 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1544 if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
1545 return STATUS_SUCCESS;
1546 }
1547 }
1548
1549 searchkey.obj_id = address;
1550 searchkey.obj_type = TYPE_EXTENT_ITEM;
1551 searchkey.offset = Vcb->superblock.node_size;
1552
1553 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1554 if (!NT_SUCCESS(Status)) {
1555 ERR("error - find_item returned %08x\n", Status);
1556 return Status;
1557 }
1558
1559 if (keycmp(&tp.item->key, &searchkey)) {
1560 ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1561 int3;
1562 return STATUS_INTERNAL_ERROR;
1563 }
1564
1565 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1566 eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
1567
1568 if (eiv0->refcount > 1) {
1569 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
1570 return STATUS_INTERNAL_ERROR;
1571 }
1572 } else {
1573 if (tp.item->size < sizeof(EXTENT_ITEM)) {
1574 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
1575 return STATUS_INTERNAL_ERROR;
1576 }
1577
1578 ei = (EXTENT_ITEM*)tp.item->data;
1579
1580 if (ei->refcount > 1) {
1581 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
1582 return STATUS_INTERNAL_ERROR;
1583 }
1584
1585 if (t && t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
1586 // convert shared data extents
1587
1588 LIST_ENTRY* le = t->itemlist.Flink;
1589 while (le != &t->itemlist) {
1590 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1591
1592 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1593
1594 if (!td->ignore && !td->inserted) {
1595 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1596 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1597
1598 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1599 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1600
1601 if (ed2->address != 0) {
1602 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1603 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1604 }
1605 }
1606 }
1607 }
1608
1609 le = le->Flink;
1610 }
1611
1612 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1613 }
1614 }
1615
1616 delete_tree_item(Vcb, &tp, rollback);
1617
1618 // if EXTENT_ITEM_V0, delete corresponding B4 item
1619 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1620 traverse_ptr tp2;
1621
1622 searchkey.obj_id = address;
1623 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1624 searchkey.offset = 0xffffffffffffffff;
1625
1626 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1627 if (!NT_SUCCESS(Status)) {
1628 ERR("error - find_item returned %08x\n", Status);
1629 return Status;
1630 }
1631
1632 if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
1633 delete_tree_item(Vcb, &tp2, rollback);
1634 }
1635 }
1636
1637 if (t && !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1638 LIST_ENTRY* le;
1639
1640 // when writing old internal trees, convert related extents
1641
1642 le = t->itemlist.Flink;
1643 while (le != &t->itemlist) {
1644 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1645
1646 // ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1647
1648 if (!td->ignore && !td->inserted) {
1649 if (t->header.level > 0) {
1650 convert_old_tree_extent(Vcb, td, t, rollback);
1651 } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
1652 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1653
1654 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1655 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1656
1657 if (ed2->address != 0) {
1658 TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
1659 convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
1660 }
1661 }
1662 }
1663 }
1664
1665 le = le->Flink;
1666 }
1667 }
1668
1669 c = get_chunk_from_address(Vcb, address);
1670
1671 if (c) {
1672 decrease_chunk_usage(c, tp.item->key.offset);
1673
1674 add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
1675 } else
1676 ERR("could not find chunk for address %llx\n", address);
1677
1678 return STATUS_SUCCESS;
1679 }
1680
1681 static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
1682 LIST_ENTRY* le;
1683 NTSTATUS Status;
1684
1685 TRACE("(%p)\n", Vcb);
1686
1687 le = Vcb->trees.Flink;
1688 while (le != &Vcb->trees) {
1689 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1690
1691 if (t->write && !t->has_new_address) {
1692 chunk* c;
1693
1694 Status = get_tree_new_address(Vcb, t, rollback);
1695 if (!NT_SUCCESS(Status)) {
1696 ERR("get_tree_new_address returned %08x\n", Status);
1697 return Status;
1698 }
1699
1700 TRACE("allocated extent %llx\n", t->new_address);
1701
1702 if (t->has_address) {
1703 Status = reduce_tree_extent(Vcb, t->header.address, t, rollback);
1704
1705 if (!NT_SUCCESS(Status)) {
1706 ERR("reduce_tree_extent returned %08x\n", Status);
1707 return Status;
1708 }
1709 }
1710
1711 c = get_chunk_from_address(Vcb, t->new_address);
1712
1713 if (c) {
1714 increase_chunk_usage(c, Vcb->superblock.node_size);
1715 } else {
1716 ERR("could not find chunk for address %llx\n", t->new_address);
1717 return STATUS_INTERNAL_ERROR;
1718 }
1719 }
1720
1721 le = le->Flink;
1722 }
1723
1724 return STATUS_SUCCESS;
1725 }
1726
1727 static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
1728 LIST_ENTRY* le;
1729 NTSTATUS Status;
1730
1731 TRACE("(%p)\n", Vcb);
1732
1733 le = Vcb->trees.Flink;
1734 while (le != &Vcb->trees) {
1735 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1736
1737 if (t->write && !t->parent) {
1738 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1739 KEY searchkey;
1740 traverse_ptr tp;
1741
1742 searchkey.obj_id = t->root->id;
1743 searchkey.obj_type = TYPE_ROOT_ITEM;
1744 searchkey.offset = 0xffffffffffffffff;
1745
1746 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1747 if (!NT_SUCCESS(Status)) {
1748 ERR("error - find_item returned %08x\n", Status);
1749 return Status;
1750 }
1751
1752 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1753 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1754 return STATUS_INTERNAL_ERROR;
1755 }
1756
1757 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
1758
1759 t->root->root_item.block_number = t->new_address;
1760 t->root->root_item.root_level = t->header.level;
1761 t->root->root_item.generation = Vcb->superblock.generation;
1762 t->root->root_item.generation2 = Vcb->superblock.generation;
1763
1764 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
1765 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1766
1767 if (!ri) {
1768 ERR("out of memory\n");
1769 return STATUS_INSUFFICIENT_RESOURCES;
1770 }
1771
1772 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
1773
1774 delete_tree_item(Vcb, &tp, rollback);
1775
1776 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1777 ERR("insert_tree_item failed\n");
1778 return STATUS_INTERNAL_ERROR;
1779 }
1780 } else
1781 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1782 }
1783
1784 t->root->treeholder.address = t->new_address;
1785 }
1786
1787 le = le->Flink;
1788 }
1789
1790 Status = update_chunk_caches(Vcb, rollback);
1791 if (!NT_SUCCESS(Status)) {
1792 ERR("update_chunk_caches returned %08x\n", Status);
1793 return Status;
1794 }
1795
1796 return STATUS_SUCCESS;
1797 }
1798
1799 static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1800 write_tree_stripe* stripe = conptr;
1801 write_tree_context* context = (write_tree_context*)stripe->context;
1802 LIST_ENTRY* le;
1803 BOOL complete;
1804
1805 if (stripe->status == WriteTreeStatus_Cancelling) {
1806 stripe->status = WriteTreeStatus_Cancelled;
1807 goto end;
1808 }
1809
1810 stripe->iosb = Irp->IoStatus;
1811
1812 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1813 stripe->status = WriteTreeStatus_Success;
1814 } else {
1815 le = context->stripes.Flink;
1816
1817 stripe->status = WriteTreeStatus_Error;
1818
1819 while (le != &context->stripes) {
1820 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1821
1822 if (s2->status == WriteTreeStatus_Pending) {
1823 s2->status = WriteTreeStatus_Cancelling;
1824 IoCancelIrp(s2->Irp);
1825 }
1826
1827 le = le->Flink;
1828 }
1829 }
1830
1831 end:
1832 le = context->stripes.Flink;
1833 complete = TRUE;
1834
1835 while (le != &context->stripes) {
1836 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1837
1838 if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
1839 complete = FALSE;
1840 break;
1841 }
1842
1843 le = le->Flink;
1844 }
1845
1846 if (complete)
1847 KeSetEvent(&context->Event, 0, FALSE);
1848
1849 return STATUS_MORE_PROCESSING_REQUIRED;
1850 }
1851
1852 NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
1853 chunk* c;
1854 CHUNK_ITEM_STRIPE* cis;
1855 write_tree_stripe* stripe;
1856 UINT64 i;
1857
1858 c = get_chunk_from_address(Vcb, addr);
1859
1860 if (!c) {
1861 ERR("get_chunk_from_address failed\n");
1862 return STATUS_INTERNAL_ERROR;
1863 }
1864
1865 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1866
1867 // FIXME - make this work with RAID
1868
1869 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1870 PIO_STACK_LOCATION IrpSp;
1871
1872 // FIXME - handle missing devices
1873
1874 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
1875 if (!stripe) {
1876 ERR("out of memory\n");
1877 return STATUS_INSUFFICIENT_RESOURCES;
1878 }
1879
1880 stripe->context = (struct write_tree_context*)wtc;
1881 stripe->buf = data;
1882 stripe->device = c->devices[i];
1883 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1884 stripe->status = WriteTreeStatus_Pending;
1885
1886 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1887
1888 if (!stripe->Irp) {
1889 ERR("IoAllocateIrp failed\n");
1890 return STATUS_INTERNAL_ERROR;
1891 }
1892
1893 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1894 IrpSp->MajorFunction = IRP_MJ_WRITE;
1895
1896 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
1897 stripe->Irp->AssociatedIrp.SystemBuffer = data;
1898
1899 stripe->Irp->Flags = IRP_BUFFERED_IO;
1900 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1901 stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
1902 if (!stripe->Irp->MdlAddress) {
1903 ERR("IoAllocateMdl failed\n");
1904 return STATUS_INTERNAL_ERROR;
1905 }
1906
1907 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
1908 } else {
1909 stripe->Irp->UserBuffer = data;
1910 }
1911
1912 IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
1913 IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
1914
1915 stripe->Irp->UserIosb = &stripe->iosb;
1916
1917 IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
1918
1919 InsertTailList(&wtc->stripes, &stripe->list_entry);
1920 }
1921
1922 return STATUS_SUCCESS;
1923 }
1924
1925 void free_write_tree_stripes(write_tree_context* wtc) {
1926 LIST_ENTRY *le, *le2, *nextle;
1927
1928 le = wtc->stripes.Flink;
1929 while (le != &wtc->stripes) {
1930 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1931
1932 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1933 MmUnlockPages(stripe->Irp->MdlAddress);
1934 IoFreeMdl(stripe->Irp->MdlAddress);
1935 }
1936
1937 le = le->Flink;
1938 }
1939
1940 le = wtc->stripes.Flink;
1941 while (le != &wtc->stripes) {
1942 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1943
1944 nextle = le->Flink;
1945
1946 if (stripe->buf) {
1947 ExFreePool(stripe->buf);
1948
1949 le2 = le->Flink;
1950 while (le2 != &wtc->stripes) {
1951 write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
1952
1953 if (s2->buf == stripe->buf)
1954 s2->buf = NULL;
1955
1956 le2 = le2->Flink;
1957 }
1958
1959 }
1960
1961 ExFreePool(stripe);
1962
1963 le = nextle;
1964 }
1965 }
1966
1967 static NTSTATUS write_trees(device_extension* Vcb) {
1968 UINT8 level;
1969 UINT8 *data, *body;
1970 UINT32 crc32;
1971 NTSTATUS Status;
1972 LIST_ENTRY* le;
1973 write_tree_context* wtc;
1974
1975 TRACE("(%p)\n", Vcb);
1976
1977 for (level = 0; level <= 255; level++) {
1978 BOOL nothing_found = TRUE;
1979
1980 TRACE("level = %u\n", level);
1981
1982 le = Vcb->trees.Flink;
1983 while (le != &Vcb->trees) {
1984 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1985
1986 if (t->write && t->header.level == level) {
1987 KEY firstitem, searchkey;
1988 LIST_ENTRY* le2;
1989 traverse_ptr tp;
1990 EXTENT_ITEM_TREE* eit;
1991
1992 if (!t->has_new_address) {
1993 ERR("error - tried to write tree with no new address\n");
1994 int3;
1995 }
1996
1997 le2 = t->itemlist.Flink;
1998 while (le2 != &t->itemlist) {
1999 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2000 if (!td->ignore) {
2001 firstitem = td->key;
2002 break;
2003 }
2004 le2 = le2->Flink;
2005 }
2006
2007 if (t->parent) {
2008 t->paritem->key = firstitem;
2009 t->paritem->treeholder.address = t->new_address;
2010 t->paritem->treeholder.generation = Vcb->superblock.generation;
2011 }
2012
2013 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
2014 searchkey.obj_id = t->new_address;
2015 searchkey.obj_type = TYPE_EXTENT_ITEM;
2016 searchkey.offset = Vcb->superblock.node_size;
2017
2018 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2019 if (!NT_SUCCESS(Status)) {
2020 ERR("error - find_item returned %08x\n", Status);
2021 return Status;
2022 }
2023
2024 if (keycmp(&searchkey, &tp.item->key)) {
2025 // traverse_ptr next_tp;
2026 // BOOL b;
2027 // tree_data* paritem;
2028
2029 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2030
2031 // searchkey.obj_id = 0;
2032 // searchkey.obj_type = 0;
2033 // searchkey.offset = 0;
2034 //
2035 // find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2036 //
2037 // paritem = NULL;
2038 // do {
2039 // if (tp.tree->paritem != paritem) {
2040 // paritem = tp.tree->paritem;
2041 // ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
2042 // }
2043 //
2044 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2045 //
2046 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
2047 // if (b) {
2048 // free_traverse_ptr(&tp);
2049 // tp = next_tp;
2050 // }
2051 // } while (b);
2052 //
2053 // free_traverse_ptr(&tp);
2054
2055 return STATUS_INTERNAL_ERROR;
2056 }
2057
2058 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
2059 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
2060 return STATUS_INTERNAL_ERROR;
2061 }
2062
2063 eit = (EXTENT_ITEM_TREE*)tp.item->data;
2064 eit->firstitem = firstitem;
2065 }
2066
2067 nothing_found = FALSE;
2068 }
2069
2070 le = le->Flink;
2071 }
2072
2073 if (nothing_found)
2074 break;
2075 }
2076
2077 TRACE("allocated tree extents\n");
2078
2079 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
2080 if (!wtc) {
2081 ERR("out of memory\n");
2082 return STATUS_INSUFFICIENT_RESOURCES;
2083 }
2084
2085 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
2086 InitializeListHead(&wtc->stripes);
2087
2088 le = Vcb->trees.Flink;
2089 while (le != &Vcb->trees) {
2090 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2091 #ifdef DEBUG_PARANOID
2092 UINT32 num_items = 0, size = 0;
2093 LIST_ENTRY* le2;
2094 BOOL crash = FALSE;
2095 #endif
2096
2097 if (t->write) {
2098 #ifdef DEBUG_PARANOID
2099 le2 = t->itemlist.Flink;
2100 while (le2 != &t->itemlist) {
2101 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2102 if (!td->ignore) {
2103 num_items++;
2104
2105 if (t->header.level == 0)
2106 size += td->size;
2107 }
2108 le2 = le2->Flink;
2109 }
2110
2111 if (t->header.level == 0)
2112 size += num_items * sizeof(leaf_node);
2113 else
2114 size += num_items * sizeof(internal_node);
2115
2116 if (num_items != t->header.num_items) {
2117 ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
2118 crash = TRUE;
2119 }
2120
2121 if (size != t->size) {
2122 ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
2123 crash = TRUE;
2124 }
2125
2126 if (t->header.num_items == 0 && t->parent) {
2127 ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
2128 crash = TRUE;
2129 }
2130
2131 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
2132 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
2133 crash = TRUE;
2134 }
2135
2136 if (crash) {
2137 ERR("tree %p\n", t);
2138 le2 = t->itemlist.Flink;
2139 while (le2 != &t->itemlist) {
2140 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2141 if (!td->ignore) {
2142 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
2143 }
2144 le2 = le2->Flink;
2145 }
2146 int3;
2147 }
2148 #endif
2149 t->header.address = t->new_address;
2150 t->header.generation = Vcb->superblock.generation;
2151 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
2152 t->has_address = TRUE;
2153
2154 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
2155 if (!data) {
2156 ERR("out of memory\n");
2157 Status = STATUS_INSUFFICIENT_RESOURCES;
2158 goto end;
2159 }
2160
2161 body = data + sizeof(tree_header);
2162
2163 RtlCopyMemory(data, &t->header, sizeof(tree_header));
2164 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2165
2166 if (t->header.level == 0) {
2167 leaf_node* itemptr = (leaf_node*)body;
2168 int i = 0;
2169 LIST_ENTRY* le2;
2170 UINT8* dataptr = data + Vcb->superblock.node_size;
2171
2172 le2 = t->itemlist.Flink;
2173 while (le2 != &t->itemlist) {
2174 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2175 if (!td->ignore) {
2176 dataptr = dataptr - td->size;
2177
2178 itemptr[i].key = td->key;
2179 itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
2180 itemptr[i].size = td->size;
2181 i++;
2182
2183 if (td->size > 0)
2184 RtlCopyMemory(dataptr, td->data, td->size);
2185 }
2186
2187 le2 = le2->Flink;
2188 }
2189 } else {
2190 internal_node* itemptr = (internal_node*)body;
2191 int i = 0;
2192 LIST_ENTRY* le2;
2193
2194 le2 = t->itemlist.Flink;
2195 while (le2 != &t->itemlist) {
2196 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2197 if (!td->ignore) {
2198 itemptr[i].key = td->key;
2199 itemptr[i].address = td->treeholder.address;
2200 itemptr[i].generation = td->treeholder.generation;
2201 i++;
2202 }
2203
2204 le2 = le2->Flink;
2205 }
2206 }
2207
2208 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2209 crc32 = ~crc32;
2210 *((UINT32*)data) = crc32;
2211 TRACE("setting crc32 to %08x\n", crc32);
2212
2213 Status = write_tree(Vcb, t->new_address, data, wtc);
2214 if (!NT_SUCCESS(Status)) {
2215 ERR("write_tree returned %08x\n", Status);
2216 goto end;
2217 }
2218 }
2219
2220 le = le->Flink;
2221 }
2222
2223 Status = STATUS_SUCCESS;
2224
2225 if (wtc->stripes.Flink != &wtc->stripes) {
2226 // launch writes and wait
2227 le = wtc->stripes.Flink;
2228 while (le != &wtc->stripes) {
2229 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2230
2231 IoCallDriver(stripe->device->devobj, stripe->Irp);
2232
2233 le = le->Flink;
2234 }
2235
2236 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
2237
2238 le = wtc->stripes.Flink;
2239 while (le != &wtc->stripes) {
2240 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2241
2242 if (!NT_SUCCESS(stripe->iosb.Status)) {
2243 Status = stripe->iosb.Status;
2244 break;
2245 }
2246
2247 le = le->Flink;
2248 }
2249
2250 free_write_tree_stripes(wtc);
2251 }
2252
2253 end:
2254 ExFreePool(wtc);
2255
2256 return Status;
2257 }
2258
2259 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb) {
2260 KEY searchkey;
2261 traverse_ptr tp;
2262
2263 RtlZeroMemory(sb, sizeof(superblock_backup));
2264
2265 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2266 sb->root_tree_generation = Vcb->superblock.generation;
2267 sb->root_level = Vcb->superblock.root_level;
2268
2269 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2270 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2271 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2272
2273 searchkey.obj_id = BTRFS_ROOT_EXTENT;
2274 searchkey.obj_type = TYPE_ROOT_ITEM;
2275 searchkey.offset = 0xffffffffffffffff;
2276
2277 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2278 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2279 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2280
2281 sb->extent_tree_addr = ri->block_number;
2282 sb->extent_tree_generation = ri->generation;
2283 sb->extent_root_level = ri->root_level;
2284 }
2285 }
2286
2287 searchkey.obj_id = BTRFS_ROOT_FSTREE;
2288
2289 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2290 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2291 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2292
2293 sb->fs_tree_addr = ri->block_number;
2294 sb->fs_tree_generation = ri->generation;
2295 sb->fs_root_level = ri->root_level;
2296 }
2297 }
2298
2299 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2300
2301 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2302 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2303 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2304
2305 sb->dev_root_addr = ri->block_number;
2306 sb->dev_root_generation = ri->generation;
2307 sb->dev_root_level = ri->root_level;
2308 }
2309 }
2310
2311 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2312
2313 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2314 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2315 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2316
2317 sb->csum_root_addr = ri->block_number;
2318 sb->csum_root_generation = ri->generation;
2319 sb->csum_root_level = ri->root_level;
2320 }
2321 }
2322
2323 sb->total_bytes = Vcb->superblock.total_bytes;
2324 sb->bytes_used = Vcb->superblock.bytes_used;
2325 sb->num_devices = Vcb->superblock.num_devices;
2326 }
2327
2328 static NTSTATUS write_superblocks(device_extension* Vcb) {
2329 UINT64 i;
2330 NTSTATUS Status;
2331 LIST_ENTRY* le;
2332
2333 TRACE("(%p)\n", Vcb);
2334
2335 le = Vcb->trees.Flink;
2336 while (le != &Vcb->trees) {
2337 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2338
2339 if (t->write && !t->parent) {
2340 if (t->root == Vcb->root_root) {
2341 Vcb->superblock.root_tree_addr = t->new_address;
2342 Vcb->superblock.root_level = t->header.level;
2343 } else if (t->root == Vcb->chunk_root) {
2344 Vcb->superblock.chunk_tree_addr = t->new_address;
2345 Vcb->superblock.chunk_root_generation = t->header.generation;
2346 Vcb->superblock.chunk_root_level = t->header.level;
2347 }
2348 }
2349
2350 le = le->Flink;
2351 }
2352
2353 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2354 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2355 }
2356
2357 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1]);
2358
2359 for (i = 0; i < Vcb->superblock.num_devices; i++) {
2360 if (Vcb->devices[i].devobj) {
2361 Status = write_superblock(Vcb, &Vcb->devices[i]);
2362 if (!NT_SUCCESS(Status)) {
2363 ERR("write_superblock returned %08x\n", Status);
2364 return Status;
2365 }
2366 }
2367 }
2368
2369 return STATUS_SUCCESS;
2370 }
2371
2372 static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
2373 LIST_ENTRY* le = Vcb->chunks.Flink;
2374 chunk* c;
2375 KEY searchkey;
2376 traverse_ptr tp;
2377 BLOCK_GROUP_ITEM* bgi;
2378 NTSTATUS Status;
2379
2380 TRACE("(%p)\n", Vcb);
2381
2382 while (le != &Vcb->chunks) {
2383 c = CONTAINING_RECORD(le, chunk, list_entry);
2384
2385 if (c->used != c->oldused) {
2386 searchkey.obj_id = c->offset;
2387 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2388 searchkey.offset = c->chunk_item->size;
2389
2390 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2391 if (!NT_SUCCESS(Status)) {
2392 ERR("error - find_item returned %08x\n", Status);
2393 return Status;
2394 }
2395
2396 if (keycmp(&searchkey, &tp.item->key)) {
2397 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2398 int3;
2399 return STATUS_INTERNAL_ERROR;
2400 }
2401
2402 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2403 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2404 return STATUS_INTERNAL_ERROR;
2405 }
2406
2407 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2408 if (!bgi) {
2409 ERR("out of memory\n");
2410 return STATUS_INSUFFICIENT_RESOURCES;
2411 }
2412
2413 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2414 bgi->used = c->used;
2415
2416 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2417
2418 delete_tree_item(Vcb, &tp, rollback);
2419
2420 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
2421 ERR("insert_tree_item failed\n");
2422 ExFreePool(bgi);
2423 return STATUS_INTERNAL_ERROR;
2424 }
2425
2426 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2427 TRACE("chunk_item type = %llx\n", c->chunk_item->type);
2428
2429 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
2430 FIXME("RAID0 not yet supported\n");
2431 ExFreePool(bgi);
2432 return STATUS_INTERNAL_ERROR;
2433 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
2434 FIXME("RAID1 not yet supported\n");
2435 ExFreePool(bgi);
2436 return STATUS_INTERNAL_ERROR;
2437 } else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
2438 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
2439 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
2440 FIXME("RAID10 not yet supported\n");
2441 ExFreePool(bgi);
2442 return STATUS_INTERNAL_ERROR;
2443 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2444 FIXME("RAID5 not yet supported\n");
2445 ExFreePool(bgi);
2446 return STATUS_INTERNAL_ERROR;
2447 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2448 FIXME("RAID6 not yet supported\n");
2449 ExFreePool(bgi);
2450 return STATUS_INTERNAL_ERROR;
2451 } else { // SINGLE
2452 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
2453 }
2454
2455 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2456
2457 c->oldused = c->used;
2458 }
2459
2460 le = le->Flink;
2461 }
2462
2463 return STATUS_SUCCESS;
2464 }
2465
2466 static void get_first_item(tree* t, KEY* key) {
2467 LIST_ENTRY* le;
2468
2469 le = t->itemlist.Flink;
2470 while (le != &t->itemlist) {
2471 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2472
2473 *key = td->key;
2474 return;
2475 }
2476 }
2477
2478 static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2479 tree *nt, *pt;
2480 tree_data* td;
2481 tree_data* oldlastitem;
2482 // write_tree* wt2;
2483 // // tree_data *firsttd, *lasttd;
2484 // // LIST_ENTRY* le;
2485 // #ifdef DEBUG_PARANOID
2486 // KEY lastkey1, lastkey2;
2487 // traverse_ptr tp, next_tp;
2488 // ULONG numitems1, numitems2;
2489 // #endif
2490
2491 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2492
2493 // #ifdef DEBUG_PARANOID
2494 // lastkey1.obj_id = 0xffffffffffffffff;
2495 // lastkey1.obj_type = 0xff;
2496 // lastkey1.offset = 0xffffffffffffffff;
2497 //
2498 // if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
2499 // ERR("error - find_item failed\n");
2500 // else {
2501 // lastkey1 = tp.item->key;
2502 // numitems1 = 0;
2503 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2504 // free_traverse_ptr(&tp);
2505 // tp = next_tp;
2506 // numitems1++;
2507 // }
2508 // free_traverse_ptr(&tp);
2509 // }
2510 // #endif
2511
2512 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2513 if (!nt) {
2514 ERR("out of memory\n");
2515 return STATUS_INSUFFICIENT_RESOURCES;
2516 }
2517
2518 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2519 nt->header.address = 0;
2520 nt->header.generation = Vcb->superblock.generation;
2521 nt->header.num_items = t->header.num_items - numitems;
2522 nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2523
2524 nt->has_address = FALSE;
2525 nt->Vcb = Vcb;
2526 nt->parent = t->parent;
2527 nt->root = t->root;
2528 // nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2529 nt->new_address = 0;
2530 nt->has_new_address = FALSE;
2531 nt->flags = t->flags;
2532 InitializeListHead(&nt->itemlist);
2533
2534 // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
2535
2536 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2537
2538 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2539 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2540 // //
2541 // // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
2542 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2543 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2544 // //
2545 // // le = wt->tree->itemlist.Flink;
2546 // // while (le != &wt->tree->itemlist) {
2547 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2548 // // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2549 // // le = le->Flink;
2550 // // }
2551
2552 nt->itemlist.Flink = &newfirstitem->list_entry;
2553 nt->itemlist.Blink = t->itemlist.Blink;
2554 nt->itemlist.Flink->Blink = &nt->itemlist;
2555 nt->itemlist.Blink->Flink = &nt->itemlist;
2556
2557 t->itemlist.Blink = &oldlastitem->list_entry;
2558 t->itemlist.Blink->Flink = &t->itemlist;
2559
2560 // // le = wt->tree->itemlist.Flink;
2561 // // while (le != &wt->tree->itemlist) {
2562 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2563 // // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2564 // // le = le->Flink;
2565 // // }
2566 // //
2567 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2568 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2569 // //
2570 // // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
2571 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2572 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2573
2574 nt->size = t->size - size;
2575 t->size = size;
2576 t->header.num_items = numitems;
2577 nt->write = TRUE;
2578 Vcb->write_trees++;
2579
2580 InterlockedIncrement(&Vcb->open_trees);
2581 InsertTailList(&Vcb->trees, &nt->list_entry);
2582
2583 // // // TESTING
2584 // // td = wt->tree->items;
2585 // // while (td) {
2586 // // if (!td->ignore) {
2587 // // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2588 // // }
2589 // // td = td->next;
2590 // // }
2591
2592 // // oldlastitem->next = NULL;
2593 // // wt->tree->lastitem = oldlastitem;
2594
2595 // // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
2596
2597 if (nt->header.level > 0) {
2598 LIST_ENTRY* le = nt->itemlist.Flink;
2599
2600 while (le != &nt->itemlist) {
2601 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2602
2603 if (td2->treeholder.tree)
2604 td2->treeholder.tree->parent = nt;
2605
2606 le = le->Flink;
2607 }
2608 }
2609
2610 if (nt->parent) {
2611 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2612 if (!td) {
2613 ERR("out of memory\n");
2614 return STATUS_INSUFFICIENT_RESOURCES;
2615 }
2616
2617 td->key = newfirstitem->key;
2618
2619 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2620
2621 td->ignore = FALSE;
2622 td->inserted = TRUE;
2623 td->treeholder.tree = nt;
2624 init_tree_holder(&td->treeholder);
2625 // td->treeholder.nonpaged->status = tree_holder_loaded;
2626 nt->paritem = td;
2627
2628 nt->parent->header.num_items++;
2629 nt->parent->size += sizeof(internal_node);
2630
2631 goto end;
2632 }
2633
2634 TRACE("adding new tree parent\n");
2635
2636 if (nt->header.level == 255) {
2637 ERR("cannot add parent to tree at level 255\n");
2638 return STATUS_INTERNAL_ERROR;
2639 }
2640
2641 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2642 if (!pt) {
2643 ERR("out of memory\n");
2644 return STATUS_INSUFFICIENT_RESOURCES;
2645 }
2646
2647 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2648 pt->header.address = 0;
2649 pt->header.num_items = 2;
2650 pt->header.level = nt->header.level + 1;
2651 pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2652
2653 pt->has_address = FALSE;
2654 pt->Vcb = Vcb;
2655 pt->parent = NULL;
2656 pt->paritem = NULL;
2657 pt->root = t->root;
2658 pt->new_address = 0;
2659 pt->has_new_address = FALSE;
2660 // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2661 pt->size = pt->header.num_items * sizeof(internal_node);
2662 pt->flags = t->flags;
2663 InitializeListHead(&pt->itemlist);
2664
2665 // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
2666
2667 InterlockedIncrement(&Vcb->open_trees);
2668 InsertTailList(&Vcb->trees, &pt->list_entry);
2669
2670 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2671 if (!td) {
2672 ERR("out of memory\n");
2673 return STATUS_INSUFFICIENT_RESOURCES;
2674 }
2675
2676 get_first_item(t, &td->key);
2677 td->ignore = FALSE;
2678 td->inserted = FALSE;
2679 td->treeholder.address = 0;
2680 td->treeholder.generation = Vcb->superblock.generation;
2681 td->treeholder.tree = t;
2682 init_tree_holder(&td->treeholder);
2683 // td->treeholder.nonpaged->status = tree_holder_loaded;
2684 InsertTailList(&pt->itemlist, &td->list_entry);
2685 t->paritem = td;
2686
2687 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2688 if (!td) {
2689 ERR("out of memory\n");
2690 return STATUS_INSUFFICIENT_RESOURCES;
2691 }
2692
2693 td->key = newfirstitem->key;
2694 td->ignore = FALSE;
2695 td->inserted = FALSE;
2696 td->treeholder.address = 0;
2697 td->treeholder.generation = Vcb->superblock.generation;
2698 td->treeholder.tree = nt;
2699 init_tree_holder(&td->treeholder);
2700 // td->treeholder.nonpaged->status = tree_holder_loaded;
2701 InsertTailList(&pt->itemlist, &td->list_entry);
2702 nt->paritem = td;
2703
2704 pt->write = TRUE;
2705 Vcb->write_trees++;
2706
2707 t->root->treeholder.tree = pt;
2708
2709 t->parent = pt;
2710 nt->parent = pt;
2711
2712 end:
2713 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2714
2715 // #ifdef DEBUG_PARANOID
2716 // lastkey2.obj_id = 0xffffffffffffffff;
2717 // lastkey2.obj_type = 0xff;
2718 // lastkey2.offset = 0xffffffffffffffff;
2719 //
2720 // if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
2721 // ERR("error - find_item failed\n");
2722 // else {
2723 // lastkey2 = tp.item->key;
2724 //
2725 // numitems2 = 0;
2726 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2727 // free_traverse_ptr(&tp);
2728 // tp = next_tp;
2729 // numitems2++;
2730 // }
2731 // free_traverse_ptr(&tp);
2732 // }
2733 //
2734 // ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
2735 // ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
2736 // ERR("numitems1 = %u\n", numitems1);
2737 // ERR("numitems2 = %u\n", numitems2);
2738 // #endif
2739
2740 return STATUS_SUCCESS;
2741 }
2742
2743 static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
2744 LIST_ENTRY* le;
2745 UINT32 size, ds, numitems;
2746
2747 size = 0;
2748 numitems = 0;
2749
2750 // FIXME - naïve implementation: maximizes number of filled trees
2751
2752 le = t->itemlist.Flink;
2753 while (le != &t->itemlist) {
2754 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2755
2756 if (!td->ignore) {
2757 if (t->header.level == 0)
2758 ds = sizeof(leaf_node) + td->size;
2759 else
2760 ds = sizeof(internal_node);
2761
2762 // FIXME - move back if previous item was deleted item with same key
2763 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
2764 return split_tree_at(Vcb, t, td, numitems, size);
2765
2766 size += ds;
2767 numitems++;
2768 }
2769
2770 le = le->Flink;
2771 }
2772
2773 return STATUS_SUCCESS;
2774 }
2775
2776 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
2777 LIST_ENTRY* le;
2778 tree_data* nextparitem = NULL;
2779 NTSTATUS Status;
2780 tree *next_tree, *par;
2781 BOOL loaded;
2782
2783 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
2784
2785 // FIXME - doesn't capture everything, as it doesn't ascend
2786 // FIXME - write proper function and put it in treefuncs.c
2787 le = t->paritem->list_entry.Flink;
2788 while (le != &t->parent->itemlist) {
2789 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2790
2791 if (!td->ignore) {
2792 nextparitem = td;
2793 break;
2794 }
2795
2796 le = le->Flink;
2797 }
2798
2799 if (!nextparitem)
2800 return STATUS_SUCCESS;
2801
2802 // FIXME - loop, and capture more than one tree if we can
2803
2804 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
2805 // nextparitem = t->paritem;
2806
2807 // ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
2808
2809 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
2810 if (!NT_SUCCESS(Status)) {
2811 ERR("do_load_tree returned %08x\n", Status);
2812 return Status;
2813 }
2814
2815 // ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
2816
2817 next_tree = nextparitem->treeholder.tree;
2818
2819 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
2820 // merge two trees into one
2821
2822 t->header.num_items += next_tree->header.num_items;
2823 t->size += next_tree->size;
2824
2825 if (next_tree->header.level > 0) {
2826 le = next_tree->itemlist.Flink;
2827
2828 while (le != &next_tree->itemlist) {
2829 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2830
2831 if (td2->treeholder.tree)
2832 td2->treeholder.tree->parent = t;
2833
2834 le = le->Flink;
2835 }
2836 }
2837
2838 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
2839 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
2840 t->itemlist.Blink = next_tree->itemlist.Blink;
2841 t->itemlist.Blink->Flink = &t->itemlist;
2842
2843 // // TESTING
2844 // le = t->itemlist.Flink;
2845 // while (le != &t->itemlist) {
2846 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2847 // if (!td->ignore) {
2848 // ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
2849 // }
2850 // le = le->Flink;
2851 // }
2852
2853 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
2854
2855 next_tree->header.num_items = 0;
2856 next_tree->size = 0;
2857
2858 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
2859 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
2860
2861 if (!NT_SUCCESS(Status)) {
2862 ERR("reduce_tree_extent returned %08x\n", Status);
2863 return Status;
2864 }
2865 } else if (next_tree->has_address) {
2866 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
2867
2868 if (!NT_SUCCESS(Status)) {
2869 ERR("reduce_tree_extent returned %08x\n", Status);
2870 return Status;
2871 }
2872 }
2873
2874 if (!nextparitem->ignore) {
2875 nextparitem->ignore = TRUE;
2876 next_tree->parent->header.num_items--;
2877 next_tree->parent->size -= sizeof(internal_node);
2878 }
2879
2880 par = next_tree->parent;
2881 while (par) {
2882 if (!par->write) {
2883 par->write = TRUE;
2884 Vcb->write_trees++;
2885 }
2886 par = par->parent;
2887 }
2888
2889 RemoveEntryList(&nextparitem->list_entry);
2890 ExFreePool(next_tree->paritem);
2891 next_tree->paritem = NULL;
2892
2893 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
2894
2895 free_tree(next_tree);
2896 } else {
2897 // rebalance by moving items from second tree into first
2898 ULONG avg_size = (t->size + next_tree->size) / 2;
2899 KEY firstitem = {0, 0, 0};
2900
2901 TRACE("attempting rebalance\n");
2902
2903 le = next_tree->itemlist.Flink;
2904 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
2905 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2906 ULONG size;
2907
2908 if (!td->ignore) {
2909 if (next_tree->header.level == 0)
2910 size = sizeof(leaf_node) + td->size;
2911 else
2912 size = sizeof(internal_node);
2913 } else
2914 size = 0;
2915
2916 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
2917 RemoveEntryList(&td->list_entry);
2918 InsertTailList(&t->itemlist, &td->list_entry);
2919
2920 if (next_tree->header.level > 0 && td->treeholder.tree)
2921 td->treeholder.tree->parent = t;
2922
2923 if (!td->ignore) {
2924 next_tree->size -= size;
2925 t->size += size;
2926 next_tree->header.num_items--;
2927 t->header.num_items++;
2928 }
2929 } else
2930 break;
2931
2932 le = next_tree->itemlist.Flink;
2933 }
2934
2935 le = next_tree->itemlist.Flink;
2936 while (le != &next_tree->itemlist) {
2937 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2938
2939 if (!td->ignore) {
2940 firstitem = td->key;
2941 break;
2942 }
2943
2944 le = le->Flink;
2945 }
2946
2947 // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
2948
2949 // FIXME - once ascension is working, make this work with parent's parent, etc.
2950 if (next_tree->paritem)
2951 next_tree->paritem->key = firstitem;
2952
2953 par = next_tree;
2954 while (par) {
2955 if (!par->write) {
2956 par->write = TRUE;
2957 Vcb->write_trees++;
2958 }
2959 par = par->parent;
2960 }
2961 }
2962
2963 return STATUS_SUCCESS;
2964 }
2965
2966 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
2967 KEY searchkey;
2968 traverse_ptr tp;
2969 NTSTATUS Status;
2970
2971 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
2972 searchkey.obj_id = address;
2973 searchkey.obj_type = TYPE_METADATA_ITEM;
2974 searchkey.offset = t->header.level;
2975