[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
21
22 // BOOL did_split;
23 BOOL chunk_test = FALSE;
24
25 typedef struct {
26 KEVENT Event;
27 IO_STATUS_BLOCK iosb;
28 } write_context;
29
30 typedef struct {
31 EXTENT_ITEM ei;
32 UINT8 type;
33 EXTENT_DATA_REF edr;
34 } EXTENT_ITEM_DATA_REF;
35
36 typedef struct {
37 EXTENT_ITEM_TREE eit;
38 UINT8 type;
39 TREE_BLOCK_REF tbr;
40 } EXTENT_ITEM_TREE2;
41
42 typedef struct {
43 EXTENT_ITEM ei;
44 UINT8 type;
45 TREE_BLOCK_REF tbr;
46 } EXTENT_ITEM_SKINNY_METADATA;
47
48 typedef struct {
49 CHUNK_ITEM ci;
50 CHUNK_ITEM_STRIPE stripes[1];
51 } CHUNK_ITEM2;
52
53 typedef struct {
54 LIST_ENTRY list_entry;
55 UINT64 key;
56 } ordered_list;
57
58 typedef struct {
59 ordered_list ol;
60 ULONG length;
61 UINT32* checksums;
62 BOOL deleted;
63 } changed_sector;
64
65 static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
66 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
67 static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
68
69 static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 write_context* context = conptr;
71
72 context->iosb = Irp->IoStatus;
73 KeSetEvent(&context->Event, 0, FALSE);
74
75 // return STATUS_SUCCESS;
76 return STATUS_MORE_PROCESSING_REQUIRED;
77 }
78
79 static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
80 NTSTATUS Status;
81 LARGE_INTEGER offset;
82 PIRP Irp;
83 PIO_STACK_LOCATION IrpSp;
84 write_context* context = NULL;
85
86 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
87
88 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
89 if (!context) {
90 ERR("out of memory\n");
91 return STATUS_INSUFFICIENT_RESOURCES;
92 }
93
94 RtlZeroMemory(context, sizeof(write_context));
95
96 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
97
98 offset.QuadPart = address;
99
100 // Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
101
102 Irp = IoAllocateIrp(device->StackSize, FALSE);
103
104 if (!Irp) {
105 ERR("IoAllocateIrp failed\n");
106 Status = STATUS_INTERNAL_ERROR;
107 goto exit2;
108 }
109
110 IrpSp = IoGetNextIrpStackLocation(Irp);
111 IrpSp->MajorFunction = IRP_MJ_WRITE;
112
113 if (device->Flags & DO_BUFFERED_IO) {
114 Irp->AssociatedIrp.SystemBuffer = data;
115
116 Irp->Flags = IRP_BUFFERED_IO;
117 } else if (device->Flags & DO_DIRECT_IO) {
118 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
119 if (!Irp->MdlAddress) {
120 DbgPrint("IoAllocateMdl failed\n");
121 goto exit;
122 }
123
124 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
125 } else {
126 Irp->UserBuffer = data;
127 }
128
129 IrpSp->Parameters.Write.Length = length;
130 IrpSp->Parameters.Write.ByteOffset = offset;
131
132 Irp->UserIosb = &context->iosb;
133
134 Irp->UserEvent = &context->Event;
135
136 IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
137
138 // FIXME - support multiple devices
139 Status = IoCallDriver(device, Irp);
140
141 if (Status == STATUS_PENDING) {
142 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
143 Status = context->iosb.Status;
144 }
145
146 if (!NT_SUCCESS(Status)) {
147 ERR("IoCallDriver returned %08x\n", Status);
148 }
149
150 if (device->Flags & DO_DIRECT_IO) {
151 MmUnlockPages(Irp->MdlAddress);
152 IoFreeMdl(Irp->MdlAddress);
153 }
154
155 exit:
156 IoFreeIrp(Irp);
157
158 exit2:
159 if (context)
160 ExFreePool(context);
161
162 return Status;
163 }
164
165 static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
166 NTSTATUS Status;
167 unsigned int i = 0;
168 UINT32 crc32;
169
170 #ifdef __REACTOS__
171 Status = STATUS_INTERNAL_ERROR;
172 #endif
173
174 // FIXME - work with RAID
175
176 // FIXME - only write one superblock if on SSD (?)
177 while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
178 TRACE("writing superblock %u\n", i);
179
180 Vcb->superblock.sb_phys_addr = superblock_addrs[i];
181 RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
182
183 crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
184 crc32 = ~crc32;
185 TRACE("crc32 is %08x\n", crc32);
186 RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
187
188 Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
189
190 if (!NT_SUCCESS(Status))
191 break;
192
193 i++;
194 }
195
196 return Status;
197 }
198
199 static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
200 LIST_ENTRY* le;
201 space *s, *bestfit = NULL;
202
203 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
204
205 le = c->space.Flink;
206 while (le != &c->space) {
207 s = CONTAINING_RECORD(le, space, list_entry);
208
209 if (s->type == SPACE_TYPE_FREE) {
210 if (s->size == length) {
211 *address = s->offset;
212 TRACE("returning exact fit at %llx\n", s->offset);
213 return TRUE;
214 } else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
215 bestfit = s;
216 }
217 }
218
219 le = le->Flink;
220 }
221
222 if (bestfit) {
223 TRACE("returning best fit at %llx\n", bestfit->offset);
224 *address = bestfit->offset;
225 return TRUE;
226 }
227
228 return FALSE;
229 }
230
231 void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
232 LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
233 space *s, *s2, *s3;
234 #ifdef DEBUG_PARANOID
235 UINT64 lastaddr;
236 #endif
237
238 TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
239
240 #ifdef DEBUG_PARANOID
241 // TESTING
242 le = c->space.Flink;
243 while (le != &c->space) {
244 s = CONTAINING_RECORD(le, space, list_entry);
245
246 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
247
248 le = le->Flink;
249 }
250 #endif
251
252 c->space_changed = TRUE;
253
254 le = c->space.Flink;
255 insbef = &c->space;
256 while (le != &c->space) {
257 s = CONTAINING_RECORD(le, space, list_entry);
258 nextle = le->Flink;
259
260 if (s->offset >= offset + size) {
261 insbef = le;
262 break;
263 }
264
265 if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
266 if (s->offset + s->size == offset + size) {
267 insbef = s->list_entry.Flink;
268 RemoveEntryList(&s->list_entry);
269 ExFreePool(s);
270 break;
271 }
272
273 RemoveEntryList(&s->list_entry);
274 ExFreePool(s);
275 } else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
276 s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
277 if (!s3) {
278 ERR("out of memory\n");
279 return;
280 }
281
282 s3->offset = offset + size;
283 s3->size = s->size - size - offset + s->offset;
284 s3->type = s->type;
285 InsertHeadList(&s->list_entry, &s3->list_entry);
286 insbef = &s3->list_entry;
287
288 s->size = offset - s->offset;
289 break;
290 } else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
291 s->size = offset - s->offset;
292 } else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
293 s->size -= s->offset - offset + size;
294 s->offset = offset + size;
295
296 insbef = le;
297 break;
298 }
299
300 le = nextle;
301 }
302
303 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
304 if (!s2) {
305 ERR("out of memory\n");
306 return;
307 }
308
309 s2->offset = offset;
310 s2->size = size;
311 s2->type = type;
312 InsertTailList(insbef, &s2->list_entry);
313
314 // merge entries if same type
315
316 if (s2->list_entry.Blink != &c->space) {
317 s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
318
319 if (s->type == type) {
320 s->size += s2->size;
321
322 RemoveEntryList(&s2->list_entry);
323 ExFreePool(s2);
324
325 s2 = s;
326 }
327 }
328
329 if (s2->list_entry.Flink != &c->space) {
330 s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
331
332 if (s->type == type) {
333 s2->size += s->size;
334
335 RemoveEntryList(&s->list_entry);
336 ExFreePool(s);
337 }
338 }
339
340 le = c->space.Flink;
341 while (le != &c->space) {
342 s = CONTAINING_RECORD(le, space, list_entry);
343
344 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
345
346 le = le->Flink;
347 }
348
349 #ifdef DEBUG_PARANOID
350 // TESTING
351 lastaddr = c->offset;
352 le = c->space.Flink;
353 while (le != &c->space) {
354 s = CONTAINING_RECORD(le, space, list_entry);
355
356 if (s->offset != lastaddr) {
357 ERR("inconsistency detected!\n");
358 int3;
359 }
360
361 lastaddr = s->offset + s->size;
362
363 le = le->Flink;
364 }
365
366 if (lastaddr != c->offset + c->chunk_item->size) {
367 ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
368 int3;
369 }
370 #endif
371 }
372
373 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
374 LIST_ENTRY* le2;
375 chunk* c;
376
377 le2 = Vcb->chunks.Flink;
378 while (le2 != &Vcb->chunks) {
379 c = CONTAINING_RECORD(le2, chunk, list_entry);
380
381 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
382
383 if (address >= c->offset && address < c->offset + c->chunk_item->size)
384 return c;
385
386 le2 = le2->Flink;
387 }
388
389 return NULL;
390 }
391
392 typedef struct {
393 disk_hole* dh;
394 device* device;
395 } stripe;
396
397 static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
398 // LIST_ENTRY* le = s->device->disk_holes.Flink;
399 // disk_hole* dh;
400
401 // ERR("old holes:\n");
402 // while (le != &s->device->disk_holes) {
403 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
404 //
405 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
406 //
407 // le = le->Flink;
408 // }
409
410 if (s->dh->size <= max_stripe_size) {
411 s->dh->provisional = TRUE;
412 } else {
413 disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
414 if (!newdh) {
415 ERR("out of memory\n");
416 return;
417 }
418
419 newdh->address = s->dh->address + max_stripe_size;
420 newdh->size = s->dh->size - max_stripe_size;
421 newdh->provisional = FALSE;
422 InsertTailList(&s->device->disk_holes, &newdh->listentry);
423
424 s->dh->size = max_stripe_size;
425 s->dh->provisional = TRUE;
426 }
427
428 // ERR("new holes:\n");
429 // le = s->device->disk_holes.Flink;
430 // while (le != &s->device->disk_holes) {
431 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
432 //
433 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
434 //
435 // le = le->Flink;
436 // }
437 }
438
439 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
440 KEY searchkey;
441 traverse_ptr tp, next_tp;
442 BOOL b;
443 UINT64 lastaddr;
444 NTSTATUS Status;
445
446 searchkey.obj_id = 0x100;
447 searchkey.obj_type = TYPE_CHUNK_ITEM;
448 searchkey.offset = 0;
449
450 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
451 if (!NT_SUCCESS(Status)) {
452 ERR("error - find_item returned %08x\n", Status);
453 return 0xffffffffffffffff;
454 }
455
456 lastaddr = 0;
457
458 do {
459 if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
460 if (tp.item->size < sizeof(CHUNK_ITEM)) {
461 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
462 } else {
463 CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
464
465 if (tp.item->key.offset >= lastaddr + size) {
466 free_traverse_ptr(&tp);
467 return lastaddr;
468 }
469
470 lastaddr = tp.item->key.offset + ci->size;
471 }
472 }
473
474 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
475 if (b) {
476 free_traverse_ptr(&tp);
477 tp = next_tp;
478
479 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
480 break;
481 }
482 } while (b);
483
484 free_traverse_ptr(&tp);
485
486 return lastaddr;
487 }
488
489 static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
490 KEY searchkey;
491 traverse_ptr tp;
492 DEV_ITEM* di;
493 NTSTATUS Status;
494
495 searchkey.obj_id = 1;
496 searchkey.obj_type = TYPE_DEV_ITEM;
497 searchkey.offset = device->devitem.dev_id;
498
499 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
500 if (!NT_SUCCESS(Status)) {
501 ERR("error - find_item returned %08x\n", Status);
502 return FALSE;
503 }
504
505 if (keycmp(&tp.item->key, &searchkey)) {
506 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
507 free_traverse_ptr(&tp);
508 return FALSE;
509 }
510
511 delete_tree_item(Vcb, &tp, rollback);
512
513 free_traverse_ptr(&tp);
514
515 device->devitem.bytes_used += size;
516
517 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
518 if (!di) {
519 ERR("out of memory\n");
520 return FALSE;
521 }
522
523 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
524
525 if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
526 ERR("insert_tree_item failed\n");
527 return FALSE;
528 }
529
530 return TRUE;
531 }
532
533 static void reset_disk_holes(device* device, BOOL commit) {
534 LIST_ENTRY* le = device->disk_holes.Flink;
535 disk_hole* dh;
536
537 // ERR("old holes:\n");
538 // while (le != &device->disk_holes) {
539 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
540 //
541 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
542 //
543 // le = le->Flink;
544 // }
545
546 le = device->disk_holes.Flink;
547 while (le != &device->disk_holes) {
548 LIST_ENTRY* le2 = le->Flink;
549
550 dh = CONTAINING_RECORD(le, disk_hole, listentry);
551
552 if (dh->provisional) {
553 if (commit) {
554 RemoveEntryList(le);
555 ExFreePool(dh);
556 } else {
557 dh->provisional = FALSE;
558 }
559 }
560
561 le = le2;
562 }
563
564 if (!commit) {
565 le = device->disk_holes.Flink;
566 while (le != &device->disk_holes) {
567 LIST_ENTRY* le2 = le->Flink;
568
569 dh = CONTAINING_RECORD(le, disk_hole, listentry);
570
571 while (le2 != &device->disk_holes) {
572 disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
573
574 if (dh2->address == dh->address + dh->size) {
575 LIST_ENTRY* le3 = le2->Flink;
576 dh->size += dh2->size;
577
578 RemoveEntryList(le2);
579 ExFreePool(dh2);
580
581 le2 = le3;
582 } else
583 break;
584 }
585
586 le = le->Flink;
587 }
588 }
589
590 // ERR("new holes:\n");
591 // le = device->disk_holes.Flink;
592 // while (le != &device->disk_holes) {
593 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
594 //
595 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
596 //
597 // le = le->Flink;
598 // }
599 }
600
601 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
602 sys_chunk *sc, *sc2;
603 LIST_ENTRY* le;
604 USHORT i;
605
606 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
607 ERR("error - bootstrap is full\n");
608 return STATUS_INTERNAL_ERROR;
609 }
610
611 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
612 if (!sc) {
613 ERR("out of memory\n");
614 return STATUS_INSUFFICIENT_RESOURCES;
615 }
616
617 sc->key.obj_id = obj_id;
618 sc->key.obj_type = obj_type;
619 sc->key.offset = offset;
620 sc->size = size;
621 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
622 if (!sc->data) {
623 ERR("out of memory\n");
624 ExFreePool(sc);
625 return STATUS_INSUFFICIENT_RESOURCES;
626 }
627
628 RtlCopyMemory(sc->data, data, sc->size);
629
630 le = Vcb->sys_chunks.Flink;
631 while (le != &Vcb->sys_chunks) {
632 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
633
634 if (keycmp(&sc2->key, &sc->key) == 1)
635 break;
636
637 le = le->Flink;
638 }
639 InsertTailList(le, &sc->list_entry);
640
641 Vcb->superblock.n += sizeof(KEY) + size;
642
643 i = 0;
644 le = Vcb->sys_chunks.Flink;
645 while (le != &Vcb->sys_chunks) {
646 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
647
648 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
649
650 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
651 i += sizeof(KEY);
652
653 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
654 i += sc2->size;
655
656 le = le->Flink;
657 }
658
659 return STATUS_SUCCESS;
660 }
661
662 static chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
663 UINT64 max_stripe_size, max_chunk_size, stripe_size;
664 UINT64 total_size = 0, i, j, logaddr;
665 int num_stripes;
666 disk_hole* dh;
667 stripe* stripes;
668 ULONG cisize;
669 CHUNK_ITEM* ci;
670 CHUNK_ITEM_STRIPE* cis;
671 chunk* c = NULL;
672 space* s = NULL;
673 BOOL success = FALSE;
674 BLOCK_GROUP_ITEM* bgi;
675
676 for (i = 0; i < Vcb->superblock.num_devices; i++) {
677 total_size += Vcb->devices[i].devitem.num_bytes;
678 }
679 TRACE("total_size = %llx\n", total_size);
680
681 if (flags & BLOCK_FLAG_DATA) {
682 max_stripe_size = 0x40000000; // 1 GB
683 max_chunk_size = 10 * max_stripe_size;
684 } else if (flags & BLOCK_FLAG_METADATA) {
685 if (total_size > 0xC80000000) // 50 GB
686 max_stripe_size = 0x40000000; // 1 GB
687 else
688 max_stripe_size = 0x10000000; // 256 MB
689
690 max_chunk_size = max_stripe_size;
691 } else if (flags & BLOCK_FLAG_SYSTEM) {
692 max_stripe_size = 0x2000000; // 32 MB
693 max_chunk_size = 2 * max_stripe_size;
694 }
695
696 // FIXME - make sure whole number of sectors?
697 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
698
699 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
700
701 if (flags & BLOCK_FLAG_DUPLICATE) {
702 num_stripes = 2;
703 } else if (flags & BLOCK_FLAG_RAID0) {
704 FIXME("RAID0 not yet supported\n");
705 return NULL;
706 } else if (flags & BLOCK_FLAG_RAID1) {
707 FIXME("RAID1 not yet supported\n");
708 return NULL;
709 } else if (flags & BLOCK_FLAG_RAID10) {
710 FIXME("RAID10 not yet supported\n");
711 return NULL;
712 } else if (flags & BLOCK_FLAG_RAID5) {
713 FIXME("RAID5 not yet supported\n");
714 return NULL;
715 } else if (flags & BLOCK_FLAG_RAID6) {
716 FIXME("RAID6 not yet supported\n");
717 return NULL;
718 } else { // SINGLE
719 num_stripes = 1;
720 }
721
722 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
723 if (!stripes) {
724 ERR("out of memory\n");
725 return NULL;
726 }
727
728 for (i = 0; i < num_stripes; i++) {
729 stripes[i].dh = NULL;
730
731 for (j = 0; j < Vcb->superblock.num_devices; j++) {
732 LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
733
734 while (le != &Vcb->devices[j].disk_holes) {
735 dh = CONTAINING_RECORD(le, disk_hole, listentry);
736
737 if (!dh->provisional) {
738 if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
739 stripes[i].dh = dh;
740 stripes[i].device = &Vcb->devices[j];
741
742 if (stripes[i].dh->size >= max_stripe_size)
743 break;
744 }
745 }
746
747 le = le->Flink;
748 }
749
750 if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
751 break;
752 }
753
754 if (stripes[i].dh) {
755 TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
756 } else {
757 TRACE("good DH not found\n");
758 goto end;
759 }
760
761 add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
762 }
763
764 stripe_size = min(stripes[0].dh->size, max_stripe_size);
765 for (i = 1; i < num_stripes; i++) {
766 stripe_size = min(stripe_size, stripes[1].dh->size);
767 }
768 // FIXME - make sure stripe_size aligned properly
769 // FIXME - obey max_chunk_size
770
771 c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
772 if (!c) {
773 ERR("out of memory\n");
774 goto end;
775 }
776
777 // add CHUNK_ITEM to tree 3
778
779 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
780 ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
781 if (!ci) {
782 ERR("out of memory\n");
783 goto end;
784 }
785
786 ci->size = stripe_size; // FIXME for RAID
787 ci->root_id = Vcb->extent_root->id;
788 ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
789 ci->type = flags;
790 ci->opt_io_alignment = ci->stripe_length;
791 ci->opt_io_width = ci->stripe_length;
792 ci->sector_size = stripes[0].device->devitem.minimal_io_size;
793 ci->num_stripes = num_stripes;
794 ci->sub_stripes = 1;
795
796 c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
797 if (!c->devices) {
798 ERR("out of memory\n");
799 ExFreePool(ci);
800 goto end;
801 }
802
803 for (i = 0; i < num_stripes; i++) {
804 if (i == 0)
805 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
806 else
807 cis = &cis[1];
808
809 cis->dev_id = stripes[i].device->devitem.dev_id;
810 cis->offset = stripes[i].dh->address;
811 cis->dev_uuid = stripes[i].device->devitem.device_uuid;
812
813 c->devices[i] = stripes[i].device;
814 }
815
816 logaddr = find_new_chunk_address(Vcb, ci->size);
817 if (logaddr == 0xffffffffffffffff) {
818 ERR("find_new_chunk_address failed\n");
819 ExFreePool(ci);
820 goto end;
821 }
822
823 if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
824 ERR("insert_tree_item failed\n");
825 ExFreePool(ci);
826 goto end;
827 }
828
829 if (flags & BLOCK_FLAG_SYSTEM) {
830 NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
831 if (!NT_SUCCESS(Status)) {
832 ERR("add_to_bootstrap returned %08x\n", Status);
833 goto end;
834 }
835 }
836
837 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
838
839 c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
840 if (!c->chunk_item) {
841 ERR("out of memory\n");
842 goto end;
843 }
844
845 RtlCopyMemory(c->chunk_item, ci, cisize);
846 c->size = cisize;
847 c->offset = logaddr;
848 c->used = c->oldused = 0;
849 c->space_changed = FALSE;
850 InitializeListHead(&c->space);
851
852 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
853 if (!s) {
854 ERR("out of memory\n");
855 goto end;
856 }
857
858 s->offset = c->offset;
859 s->size = c->chunk_item->size;
860 s->type = SPACE_TYPE_FREE;
861 InsertTailList(&c->space, &s->list_entry);
862
863 protect_superblocks(Vcb, c);
864
865 // add BLOCK_GROUP_ITEM to tree 2
866
867 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
868 if (!bgi) {
869 ERR("out of memory\n");
870 goto end;
871 }
872
873 bgi->used = 0;
874 bgi->chunk_tree = 0x100;
875 bgi->flags = flags;
876
877 if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
878 ERR("insert_tree_item failed\n");
879 ExFreePool(bgi);
880 goto end;
881 }
882
883 // add DEV_EXTENTs to tree 4
884
885 for (i = 0; i < num_stripes; i++) {
886 DEV_EXTENT* de;
887
888 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
889 if (!de) {
890 ERR("out of memory\n");
891 goto end;
892 }
893
894 de->chunktree = Vcb->chunk_root->id;
895 de->objid = 0x100;
896 de->address = logaddr;
897 de->length = ci->size;
898 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
899
900 if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
901 ERR("insert_tree_item failed\n");
902 ExFreePool(de);
903 goto end;
904 }
905
906 if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
907 ERR("increase_dev_item_used failed\n");
908 goto end;
909 }
910 }
911
912 for (i = 0; i < num_stripes; i++) {
913 BOOL b = FALSE;
914 for (j = 0; j < i; j++) {
915 if (stripes[j].device == stripes[i].device)
916 b = TRUE;
917 }
918
919 if (!b)
920 reset_disk_holes(stripes[i].device, TRUE);
921 }
922
923 success = TRUE;
924
925 end:
926 ExFreePool(stripes);
927
928 if (!success) {
929 for (i = 0; i < num_stripes; i++) {
930 BOOL b = FALSE;
931 for (j = 0; j < i; j++) {
932 if (stripes[j].device == stripes[i].device)
933 b = TRUE;
934 }
935
936 if (!b)
937 reset_disk_holes(stripes[i].device, FALSE);
938 }
939
940 if (c) ExFreePool(c);
941 if (s) ExFreePool(s);
942 } else
943 InsertTailList(&Vcb->chunks, &c->list_entry);
944
945 return success ? c : NULL;
946 }
947
948 static void decrease_chunk_usage(chunk* c, UINT64 delta) {
949 c->used -= delta;
950
951 TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
952 }
953
954 static void increase_chunk_usage(chunk* c, UINT64 delta) {
955 c->used += delta;
956
957 TRACE("increasing size of chunk %llx by %llx\n", c->offset, delta);
958 }
959
960 static NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
961 KEY searchkey;
962 traverse_ptr tp;
963 CHUNK_ITEM2* ci;
964 NTSTATUS Status;
965 UINT32 i;
966
967 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
968
969 // FIXME - use version cached in Vcb
970
971 searchkey.obj_id = 0x100; // fixed?
972 searchkey.obj_type = TYPE_CHUNK_ITEM;
973 searchkey.offset = address;
974
975 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
976 if (!NT_SUCCESS(Status)) {
977 ERR("error - find_item returned %08x\n", Status);
978 return Status;
979 }
980
981 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
982 ERR("error - unexpected item in chunk tree\n");
983 Status = STATUS_INTERNAL_ERROR;
984 goto end;
985 }
986
987 if (tp.item->size < sizeof(CHUNK_ITEM2)) {
988 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
989 Status = STATUS_INTERNAL_ERROR;
990 goto end;
991 }
992
993 ci = (CHUNK_ITEM2*)tp.item->data;
994
995 if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
996 ERR("error - address %llx was out of chunk bounds\n", address);
997 Status = STATUS_INTERNAL_ERROR;
998 goto end;
999 }
1000
1001 // FIXME - only do this for chunks marked DUPLICATE?
1002 // FIXME - for multiple writes, if PENDING do waits at the end
1003 // FIXME - work with RAID
1004 for (i = 0; i < ci->ci.num_stripes; i++) {
1005 Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
1006 if (!NT_SUCCESS(Status)) {
1007 ERR("error - write_data_phys failed\n");
1008 goto end;
1009 }
1010 }
1011
1012 end:
1013 free_traverse_ptr(&tp);
1014
1015 return Status;
1016 }
1017
1018 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
1019 LIST_ENTRY *le, *nextle;
1020 space *s, *s2;
1021
1022 // // TESTING
1023 // le = c->space.Flink;
1024 // while (le != &c->space) {
1025 // s = CONTAINING_RECORD(le, space, list_entry);
1026 //
1027 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1028 //
1029 // le = le->Flink;
1030 // }
1031
1032 le = c->space.Flink;
1033 while (le != &c->space) {
1034 s = CONTAINING_RECORD(le, space, list_entry);
1035 nextle = le->Flink;
1036
1037 if (s->type == SPACE_TYPE_DELETING)
1038 s->type = SPACE_TYPE_FREE;
1039 else if (s->type == SPACE_TYPE_WRITING)
1040 s->type = SPACE_TYPE_USED;
1041
1042 if (le->Blink != &c->space) {
1043 s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
1044
1045 if (s2->type == s->type) { // do merge
1046 s2->size += s->size;
1047
1048 RemoveEntryList(&s->list_entry);
1049 ExFreePool(s);
1050 }
1051 }
1052
1053 le = nextle;
1054 }
1055
1056 // le = c->space.Flink;
1057 // while (le != &c->space) {
1058 // s = CONTAINING_RECORD(le, space, list_entry);
1059 //
1060 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1061 //
1062 // le = le->Flink;
1063 // }
1064 }
1065
1066 static void clean_space_cache(device_extension* Vcb) {
1067 LIST_ENTRY* le;
1068 chunk* c;
1069
1070 TRACE("(%p)\n", Vcb);
1071
1072 le = Vcb->chunks.Flink;
1073 while (le != &Vcb->chunks) {
1074 c = CONTAINING_RECORD(le, chunk, list_entry);
1075
1076 if (c->space_changed) {
1077 clean_space_cache_chunk(Vcb, c);
1078 c->space_changed = FALSE;
1079 }
1080
1081 le = le->Flink;
1082 }
1083 }
1084
1085 static BOOL trees_consistent(device_extension* Vcb) {
1086 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
1087 LIST_ENTRY* le;
1088
1089 le = Vcb->tree_cache.Flink;
1090 while (le != &Vcb->tree_cache) {
1091 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1092
1093 if (tc2->write) {
1094 if (tc2->tree->header.num_items == 0 && tc2->tree->parent)
1095 return FALSE;
1096
1097 if (tc2->tree->size > maxsize)
1098 return FALSE;
1099
1100 if (!tc2->tree->has_new_address)
1101 return FALSE;
1102 }
1103
1104 le = le->Flink;
1105 }
1106
1107 return TRUE;
1108 }
1109
1110 static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
1111 LIST_ENTRY* le;
1112 NTSTATUS Status;
1113
1114 le = Vcb->tree_cache.Flink;
1115 while (le != &Vcb->tree_cache) {
1116 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1117
1118 if (tc2->write) {
1119 if (tc2->tree->parent)
1120 add_to_tree_cache(Vcb, tc2->tree->parent, TRUE);
1121 else if (tc2->tree->root != Vcb->chunk_root && tc2->tree->root != Vcb->root_root) {
1122 KEY searchkey;
1123 traverse_ptr tp;
1124
1125 searchkey.obj_id = tc2->tree->root->id;
1126 searchkey.obj_type = TYPE_ROOT_ITEM;
1127 searchkey.offset = 0xffffffffffffffff;
1128
1129 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1130 if (!NT_SUCCESS(Status)) {
1131 ERR("error - find_item returned %08x\n", Status);
1132 return Status;
1133 }
1134
1135 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1136 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1137 free_traverse_ptr(&tp);
1138 return STATUS_INTERNAL_ERROR;
1139 }
1140
1141 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
1142 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1143 if (!ri) {
1144 ERR("out of memory\n");
1145 return STATUS_INSUFFICIENT_RESOURCES;
1146 }
1147
1148 if (tp.item->size > 0)
1149 RtlCopyMemory(ri, tp.item->data, tp.item->size);
1150
1151 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
1152
1153 delete_tree_item(Vcb, &tp, rollback);
1154
1155 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1156 ERR("insert_tree_item failed\n");
1157 return STATUS_INTERNAL_ERROR;
1158 }
1159 } else {
1160 add_to_tree_cache(Vcb, tp.tree, TRUE);
1161 }
1162
1163 free_traverse_ptr(&tp);
1164 }
1165 }
1166
1167 le = le->Flink;
1168 }
1169
1170 return STATUS_SUCCESS;
1171 }
1172
1173 void print_trees(LIST_ENTRY* tc) {
1174 LIST_ENTRY *le, *le2;
1175
1176 le = tc->Flink;
1177 while (le != tc) {
1178 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
1179 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1180 UINT32 num_items = 0;
1181
1182 le2 = tc2->tree->itemlist.Flink;
1183 while (le2 != &tc2->tree->itemlist) {
1184 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1185 if (!td->ignore) {
1186 firstitem = td->key;
1187 num_items++;
1188 }
1189 le2 = le2->Flink;
1190 }
1191
1192 ERR("tree: root %llx, first key %llx,%x,%llx, level %x, num_items %x / %x\n",
1193 tc2->tree->header.tree_id, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
1194
1195 le = le->Flink;
1196 }
1197 }
1198
1199 static void add_parents_to_cache(device_extension* Vcb, tree* t) {
1200 KEY searchkey;
1201 traverse_ptr tp;
1202 NTSTATUS Status;
1203
1204 while (t->parent) {
1205 t = t->parent;
1206
1207 add_to_tree_cache(Vcb, t, TRUE);
1208 }
1209
1210 if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
1211 return;
1212
1213 searchkey.obj_id = t->root->id;
1214 searchkey.obj_type = TYPE_ROOT_ITEM;
1215 searchkey.offset = 0xffffffffffffffff;
1216
1217 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1218 if (!NT_SUCCESS(Status)) {
1219 ERR("error - find_item returned %08x\n", Status);
1220 return;
1221 }
1222
1223 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1224 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1225 free_traverse_ptr(&tp);
1226 return;
1227 }
1228
1229 add_to_tree_cache(Vcb, tp.tree, TRUE);
1230
1231 free_traverse_ptr(&tp);
1232 }
1233
1234 static BOOL insert_tree_extent_skinny(device_extension* Vcb, tree* t, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
1235 EXTENT_ITEM_SKINNY_METADATA* eism;
1236 traverse_ptr insert_tp;
1237
1238 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1239 if (!eism) {
1240 ERR("out of memory\n");
1241 return FALSE;
1242 }
1243
1244 eism->ei.refcount = 1;
1245 eism->ei.generation = Vcb->superblock.generation;
1246 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1247 eism->type = TYPE_TREE_BLOCK_REF;
1248 eism->tbr.offset = t->header.tree_id;
1249
1250 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, t->header.level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1251 ERR("insert_tree_item failed\n");
1252 ExFreePool(eism);
1253 return FALSE;
1254 }
1255
1256 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1257
1258 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1259 add_parents_to_cache(Vcb, insert_tp.tree);
1260
1261 free_traverse_ptr(&insert_tp);
1262
1263 t->new_address = address;
1264 t->has_new_address = TRUE;
1265
1266 return TRUE;
1267 }
1268
1269 static BOOL insert_tree_extent(device_extension* Vcb, tree* t, chunk* c, LIST_ENTRY* rollback) {
1270 UINT64 address;
1271 EXTENT_ITEM_TREE2* eit2;
1272 traverse_ptr insert_tp;
1273
1274 TRACE("(%p, %p, %p, %p)\n", Vcb, t, c, rollback);
1275
1276 if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
1277 return FALSE;
1278
1279 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
1280 return insert_tree_extent_skinny(Vcb, t, c, address, rollback);
1281
1282 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1283 if (!eit2) {
1284 ERR("out of memory\n");
1285 return FALSE;
1286 }
1287
1288 eit2->eit.extent_item.refcount = 1;
1289 eit2->eit.extent_item.generation = Vcb->superblock.generation;
1290 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1291 // eit2->eit.firstitem = wt->firstitem;
1292 eit2->eit.level = t->header.level;
1293 eit2->type = TYPE_TREE_BLOCK_REF;
1294 eit2->tbr.offset = t->header.tree_id;
1295
1296 // #ifdef DEBUG_PARANOID
1297 // if (wt->firstitem.obj_type == 0xcc) { // TESTING
1298 // ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
1299 // ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
1300 // int3;
1301 // }
1302 // #endif
1303
1304 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1305 ERR("insert_tree_item failed\n");
1306 ExFreePool(eit2);
1307 return FALSE;
1308 }
1309
1310 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1311
1312 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1313 add_parents_to_cache(Vcb, insert_tp.tree);
1314
1315 free_traverse_ptr(&insert_tp);
1316
1317 t->new_address = address;
1318 t->has_new_address = TRUE;
1319
1320 return TRUE;
1321 }
1322
1323 static NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
1324 chunk *origchunk = NULL, *c;
1325 LIST_ENTRY* le;
1326 UINT64 flags = t->flags;
1327
1328 if (flags == 0)
1329 flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
1330
1331 // TRACE("flags = %x\n", (UINT32)wt->flags);
1332
1333 // if (!chunk_test) { // TESTING
1334 // if ((c = alloc_chunk(Vcb, flags))) {
1335 // if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1336 // if (insert_tree_extent(Vcb, t, c)) {
1337 // chunk_test = TRUE;
1338 // return STATUS_SUCCESS;
1339 // }
1340 // }
1341 // }
1342 // }
1343
1344 if (t->has_address) {
1345 origchunk = get_chunk_from_address(Vcb, t->header.address);
1346
1347 if (insert_tree_extent(Vcb, t, origchunk, rollback))
1348 return STATUS_SUCCESS;
1349 }
1350
1351 le = Vcb->chunks.Flink;
1352 while (le != &Vcb->chunks) {
1353 c = CONTAINING_RECORD(le, chunk, list_entry);
1354
1355 // FIXME - make sure to avoid superblocks
1356
1357 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1358 if (insert_tree_extent(Vcb, t, c, rollback))
1359 return STATUS_SUCCESS;
1360 }
1361
1362 le = le->Flink;
1363 }
1364
1365 // allocate new chunk if necessary
1366 if ((c = alloc_chunk(Vcb, flags, rollback))) {
1367 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1368 if (insert_tree_extent(Vcb, t, c, rollback))
1369 return STATUS_SUCCESS;
1370 }
1371 }
1372
1373 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
1374
1375 return STATUS_DISK_FULL;
1376 }
1377
1378 static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1379 KEY searchkey;
1380 traverse_ptr tp;
1381 chunk* c;
1382 EXTENT_ITEM_SKINNY_METADATA* eism;
1383 NTSTATUS Status;
1384
1385 searchkey.obj_id = address;
1386 searchkey.obj_type = TYPE_METADATA_ITEM;
1387 searchkey.offset = 0xffffffffffffffff;
1388
1389 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1390 if (!NT_SUCCESS(Status)) {
1391 ERR("error - find_item returned %08x\n", Status);
1392 return FALSE;
1393 }
1394
1395 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1396 TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1397 free_traverse_ptr(&tp);
1398 return FALSE;
1399 }
1400
1401 if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
1402 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
1403 free_traverse_ptr(&tp);
1404 return FALSE;
1405 }
1406
1407 delete_tree_item(Vcb, &tp, rollback);
1408
1409 eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
1410 if (t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
1411 // convert shared data extents
1412
1413 LIST_ENTRY* le = t->itemlist.Flink;
1414 while (le != &t->itemlist) {
1415 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1416
1417 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1418
1419 if (!td->ignore && !td->inserted) {
1420 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1421 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1422
1423 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1424 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1425
1426 if (ed2->address != 0) {
1427 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1428 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1429 }
1430 }
1431 }
1432 }
1433
1434 le = le->Flink;
1435 }
1436
1437 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1438 }
1439
1440 c = get_chunk_from_address(Vcb, address);
1441
1442 if (c) {
1443 decrease_chunk_usage(c, Vcb->superblock.node_size);
1444
1445 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
1446 } else
1447 ERR("could not find chunk for address %llx\n", address);
1448
1449 free_traverse_ptr(&tp);
1450
1451 return TRUE;
1452 }
1453
1454 // TESTING
1455 // static void check_tree_num_items(tree* t) {
1456 // LIST_ENTRY* le2;
1457 // UINT32 ni;
1458 //
1459 // le2 = t->itemlist.Flink;
1460 // ni = 0;
1461 // while (le2 != &t->itemlist) {
1462 // tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1463 // if (!td->ignore)
1464 // ni++;
1465 // le2 = le2->Flink;
1466 // }
1467 //
1468 // if (t->header.num_items != ni) {
1469 // ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
1470 // int3;
1471 // } else {
1472 // ERR("tree %p okay\n", t);
1473 // }
1474 // }
1475 //
1476 // static void check_trees_num_items(LIST_ENTRY* tc) {
1477 // LIST_ENTRY* le = tc->Flink;
1478 // while (le != tc) {
1479 // tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1480 //
1481 // check_tree_num_items(tc2->tree);
1482 //
1483 // le = le->Flink;
1484 // }
1485 // }
1486
1487 static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
1488 KEY searchkey;
1489 traverse_ptr tp, tp2, insert_tp;
1490 EXTENT_REF_V0* erv0;
1491 NTSTATUS Status;
1492
1493 TRACE("(%p, %p, %p)\n", Vcb, td, t);
1494
1495 searchkey.obj_id = td->treeholder.address;
1496 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1497 searchkey.offset = 0xffffffffffffffff;
1498
1499 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1500 if (!NT_SUCCESS(Status)) {
1501 ERR("error - find_item returned %08x\n", Status);
1502 return;
1503 }
1504
1505 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1506 TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
1507 free_traverse_ptr(&tp);
1508 return;
1509 }
1510
1511 searchkey.obj_id = td->treeholder.address;
1512 searchkey.obj_type = TYPE_EXTENT_ITEM;
1513 searchkey.offset = Vcb->superblock.node_size;
1514
1515 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1516 if (!NT_SUCCESS(Status)) {
1517 ERR("error - find_item returned %08x\n", Status);
1518 free_traverse_ptr(&tp);
1519 return;
1520 }
1521
1522 if (keycmp(&searchkey, &tp2.item->key)) {
1523 ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1524 free_traverse_ptr(&tp2);
1525 free_traverse_ptr(&tp);
1526 return;
1527 }
1528
1529 if (tp.item->size < sizeof(EXTENT_REF_V0)) {
1530 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
1531 free_traverse_ptr(&tp2);
1532 free_traverse_ptr(&tp);
1533 return;
1534 }
1535
1536 erv0 = (EXTENT_REF_V0*)tp.item->data;
1537
1538 delete_tree_item(Vcb, &tp, rollback);
1539 delete_tree_item(Vcb, &tp2, rollback);
1540
1541 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1542 EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1543
1544 if (!eism) {
1545 ERR("out of memory\n");
1546 free_traverse_ptr(&tp2);
1547 free_traverse_ptr(&tp);
1548 return;
1549 }
1550
1551 eism->ei.refcount = 1;
1552 eism->ei.generation = erv0->gen;
1553 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1554 eism->type = TYPE_TREE_BLOCK_REF;
1555 eism->tbr.offset = t->header.tree_id;
1556
1557 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1558 ERR("insert_tree_item failed\n");
1559 free_traverse_ptr(&tp2);
1560 free_traverse_ptr(&tp);
1561 return;
1562 }
1563 } else {
1564 EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1565
1566 if (!eit2) {
1567 ERR("out of memory\n");
1568 free_traverse_ptr(&tp2);
1569 free_traverse_ptr(&tp);
1570 return;
1571 }
1572
1573 eit2->eit.extent_item.refcount = 1;
1574 eit2->eit.extent_item.generation = erv0->gen;
1575 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1576 eit2->eit.firstitem = td->key;
1577 eit2->eit.level = t->header.level - 1;
1578 eit2->type = TYPE_TREE_BLOCK_REF;
1579 eit2->tbr.offset = t->header.tree_id;
1580
1581 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1582 ERR("insert_tree_item failed\n");
1583 free_traverse_ptr(&tp2);
1584 free_traverse_ptr(&tp);
1585 return;
1586 }
1587 }
1588
1589 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1590 add_parents_to_cache(Vcb, insert_tp.tree);
1591 add_parents_to_cache(Vcb, tp.tree);
1592 add_parents_to_cache(Vcb, tp2.tree);
1593
1594 free_traverse_ptr(&insert_tp);
1595
1596 free_traverse_ptr(&tp2);
1597 free_traverse_ptr(&tp);
1598 }
1599
1600 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1601 KEY searchkey;
1602 traverse_ptr tp;
1603 EXTENT_ITEM* ei;
1604 EXTENT_ITEM_V0* eiv0;
1605 chunk* c;
1606 NTSTATUS Status;
1607
1608 // FIXME - deal with refcounts > 1
1609
1610 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
1611
1612 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1613 if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
1614 return STATUS_SUCCESS;
1615 }
1616 }
1617
1618 searchkey.obj_id = address;
1619 searchkey.obj_type = TYPE_EXTENT_ITEM;
1620 searchkey.offset = Vcb->superblock.node_size;
1621
1622 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1623 if (!NT_SUCCESS(Status)) {
1624 ERR("error - find_item returned %08x\n", Status);
1625 return Status;
1626 }
1627
1628 if (keycmp(&tp.item->key, &searchkey)) {
1629 ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1630 int3;
1631 free_traverse_ptr(&tp);
1632 return STATUS_INTERNAL_ERROR;
1633 }
1634
1635 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1636 eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
1637
1638 if (eiv0->refcount > 1) {
1639 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
1640 free_traverse_ptr(&tp);
1641 return STATUS_INTERNAL_ERROR;
1642 }
1643 } else {
1644 if (tp.item->size < sizeof(EXTENT_ITEM)) {
1645 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
1646 free_traverse_ptr(&tp);
1647 return STATUS_INTERNAL_ERROR;
1648 }
1649
1650 ei = (EXTENT_ITEM*)tp.item->data;
1651
1652 if (ei->refcount > 1) {
1653 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
1654 free_traverse_ptr(&tp);
1655 return STATUS_INTERNAL_ERROR;
1656 }
1657
1658 if (t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
1659 // convert shared data extents
1660
1661 LIST_ENTRY* le = t->itemlist.Flink;
1662 while (le != &t->itemlist) {
1663 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1664
1665 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1666
1667 if (!td->ignore && !td->inserted) {
1668 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1669 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1670
1671 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1672 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1673
1674 if (ed2->address != 0) {
1675 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1676 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1677 }
1678 }
1679 }
1680 }
1681
1682 le = le->Flink;
1683 }
1684
1685 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1686 }
1687 }
1688
1689 delete_tree_item(Vcb, &tp, rollback);
1690
1691 // if EXTENT_ITEM_V0, delete corresponding B4 item
1692 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1693 traverse_ptr tp2;
1694
1695 searchkey.obj_id = address;
1696 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1697 searchkey.offset = 0xffffffffffffffff;
1698
1699 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1700 if (!NT_SUCCESS(Status)) {
1701 ERR("error - find_item returned %08x\n", Status);
1702 free_traverse_ptr(&tp);
1703 return Status;
1704 }
1705
1706 if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
1707 delete_tree_item(Vcb, &tp2, rollback);
1708 }
1709 free_traverse_ptr(&tp2);
1710 }
1711
1712 if (!(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1713 LIST_ENTRY* le;
1714
1715 // when writing old internal trees, convert related extents
1716
1717 le = t->itemlist.Flink;
1718 while (le != &t->itemlist) {
1719 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1720
1721 // ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1722
1723 if (!td->ignore && !td->inserted) {
1724 if (t->header.level > 0) {
1725 convert_old_tree_extent(Vcb, td, t, rollback);
1726 } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
1727 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1728
1729 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1730 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1731
1732 if (ed2->address != 0) {
1733 TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
1734 convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
1735 }
1736 }
1737 }
1738 }
1739
1740 le = le->Flink;
1741 }
1742 }
1743
1744 c = get_chunk_from_address(Vcb, address);
1745
1746 if (c) {
1747 decrease_chunk_usage(c, tp.item->key.offset);
1748
1749 add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
1750 } else
1751 ERR("could not find chunk for address %llx\n", address);
1752
1753 free_traverse_ptr(&tp);
1754
1755 return STATUS_SUCCESS;
1756 }
1757
1758 static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
1759 LIST_ENTRY* le;
1760 NTSTATUS Status;
1761
1762 TRACE("(%p)\n", Vcb);
1763
1764 le = Vcb->tree_cache.Flink;
1765 while (le != &Vcb->tree_cache) {
1766 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1767
1768 if (tc2->write && !tc2->tree->has_new_address) {
1769 chunk* c;
1770
1771 Status = get_tree_new_address(Vcb, tc2->tree, rollback);
1772 if (!NT_SUCCESS(Status)) {
1773 ERR("get_tree_new_address returned %08x\n", Status);
1774 return Status;
1775 }
1776
1777 TRACE("allocated extent %llx\n", tc2->tree->new_address);
1778
1779 if (tc2->tree->has_address) {
1780 Status = reduce_tree_extent(Vcb, tc2->tree->header.address, tc2->tree, rollback);
1781
1782 if (!NT_SUCCESS(Status)) {
1783 ERR("reduce_tree_extent returned %08x\n", Status);
1784 return Status;
1785 }
1786 }
1787
1788 c = get_chunk_from_address(Vcb, tc2->tree->new_address);
1789
1790 if (c) {
1791 increase_chunk_usage(c, Vcb->superblock.node_size);
1792 } else {
1793 ERR("could not find chunk for address %llx\n", tc2->tree->new_address);
1794 return STATUS_INTERNAL_ERROR;
1795 }
1796 }
1797
1798 le = le->Flink;
1799 }
1800
1801 return STATUS_SUCCESS;
1802 }
1803
1804 static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
1805 LIST_ENTRY* le;
1806 NTSTATUS Status;
1807
1808 TRACE("(%p)\n", Vcb);
1809
1810 le = Vcb->tree_cache.Flink;
1811 while (le != &Vcb->tree_cache) {
1812 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1813
1814 if (tc2->write && !tc2->tree->parent) {
1815 if (tc2->tree->root != Vcb->root_root && tc2->tree->root != Vcb->chunk_root) {
1816 KEY searchkey;
1817 traverse_ptr tp;
1818
1819 searchkey.obj_id = tc2->tree->root->id;
1820 searchkey.obj_type = TYPE_ROOT_ITEM;
1821 searchkey.offset = 0xffffffffffffffff;
1822
1823 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1824 if (!NT_SUCCESS(Status)) {
1825 ERR("error - find_item returned %08x\n", Status);
1826 return Status;
1827 }
1828
1829 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1830 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1831 free_traverse_ptr(&tp);
1832 return STATUS_INTERNAL_ERROR;
1833 }
1834
1835 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, tc2->tree->new_address);
1836
1837 tc2->tree->root->root_item.block_number = tc2->tree->new_address;
1838 tc2->tree->root->root_item.root_level = tc2->tree->header.level;
1839 tc2->tree->root->root_item.generation = Vcb->superblock.generation;
1840 tc2->tree->root->root_item.generation2 = Vcb->superblock.generation;
1841
1842 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
1843 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1844
1845 if (!ri) {
1846 ERR("out of memory\n");
1847 return STATUS_INSUFFICIENT_RESOURCES;
1848 }
1849
1850 RtlCopyMemory(ri, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
1851
1852 delete_tree_item(Vcb, &tp, rollback);
1853
1854 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1855 ERR("insert_tree_item failed\n");
1856 return STATUS_INTERNAL_ERROR;
1857 }
1858 } else
1859 RtlCopyMemory(tp.item->data, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
1860
1861 free_traverse_ptr(&tp);
1862 }
1863
1864 tc2->tree->root->treeholder.address = tc2->tree->new_address;
1865 }
1866
1867 le = le->Flink;
1868 }
1869
1870 return STATUS_SUCCESS;
1871 }
1872
1873 enum write_tree_status {
1874 WriteTreeStatus_Pending,
1875 WriteTreeStatus_Success,
1876 WriteTreeStatus_Error,
1877 WriteTreeStatus_Cancelling,
1878 WriteTreeStatus_Cancelled
1879 };
1880
1881 struct write_tree_context;
1882
1883 typedef struct {
1884 struct write_tree_context* context;
1885 UINT8* buf;
1886 device* device;
1887 PIRP Irp;
1888 IO_STATUS_BLOCK iosb;
1889 enum write_tree_status status;
1890 LIST_ENTRY list_entry;
1891 } write_tree_stripe;
1892
1893 typedef struct {
1894 KEVENT Event;
1895 LIST_ENTRY stripes;
1896 } write_tree_context;
1897
1898 static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1899 write_tree_stripe* stripe = conptr;
1900 write_tree_context* context = (write_tree_context*)stripe->context;
1901 LIST_ENTRY* le;
1902 BOOL complete;
1903
1904 if (stripe->status == WriteTreeStatus_Cancelling) {
1905 stripe->status = WriteTreeStatus_Cancelled;
1906 goto end;
1907 }
1908
1909 stripe->iosb = Irp->IoStatus;
1910
1911 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1912 stripe->status = WriteTreeStatus_Success;
1913 } else {
1914 le = context->stripes.Flink;
1915
1916 stripe->status = WriteTreeStatus_Error;
1917
1918 while (le != &context->stripes) {
1919 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1920
1921 if (s2->status == WriteTreeStatus_Pending) {
1922 s2->status = WriteTreeStatus_Cancelling;
1923 IoCancelIrp(s2->Irp);
1924 }
1925
1926 le = le->Flink;
1927 }
1928 }
1929
1930 end:
1931 le = context->stripes.Flink;
1932 complete = TRUE;
1933
1934 while (le != &context->stripes) {
1935 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1936
1937 if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
1938 complete = FALSE;
1939 break;
1940 }
1941
1942 le = le->Flink;
1943 }
1944
1945 if (complete)
1946 KeSetEvent(&context->Event, 0, FALSE);
1947
1948 return STATUS_MORE_PROCESSING_REQUIRED;
1949 }
1950
1951 static NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
1952 chunk* c;
1953 CHUNK_ITEM_STRIPE* cis;
1954 write_tree_stripe* stripe;
1955 UINT64 i;
1956
1957 c = get_chunk_from_address(Vcb, addr);
1958
1959 if (!c) {
1960 ERR("get_chunk_from_address failed\n");
1961 return STATUS_INTERNAL_ERROR;
1962 }
1963
1964 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1965
1966 // FIXME - make this work with RAID
1967
1968 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1969 PIO_STACK_LOCATION IrpSp;
1970
1971 // FIXME - handle missing devices
1972
1973 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
1974 if (!stripe) {
1975 ERR("out of memory\n");
1976 return STATUS_INSUFFICIENT_RESOURCES;
1977 }
1978
1979 stripe->context = (struct write_tree_context*)wtc;
1980 stripe->buf = data;
1981 stripe->device = c->devices[i];
1982 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1983 stripe->status = WriteTreeStatus_Pending;
1984
1985 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1986
1987 if (!stripe->Irp) {
1988 ERR("IoAllocateIrp failed\n");
1989 return STATUS_INTERNAL_ERROR;
1990 }
1991
1992 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1993 IrpSp->MajorFunction = IRP_MJ_WRITE;
1994
1995 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
1996 stripe->Irp->AssociatedIrp.SystemBuffer = data;
1997
1998 stripe->Irp->Flags = IRP_BUFFERED_IO;
1999 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
2000 stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
2001 if (!stripe->Irp->MdlAddress) {
2002 ERR("IoAllocateMdl failed\n");
2003 return STATUS_INTERNAL_ERROR;
2004 }
2005
2006 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
2007 } else {
2008 stripe->Irp->UserBuffer = data;
2009 }
2010
2011 IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
2012 IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
2013
2014 stripe->Irp->UserIosb = &stripe->iosb;
2015
2016 IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
2017
2018 InsertTailList(&wtc->stripes, &stripe->list_entry);
2019 }
2020
2021 return STATUS_SUCCESS;
2022 }
2023
2024 static void free_stripes(write_tree_context* wtc) {
2025 LIST_ENTRY *le, *le2, *nextle;
2026
2027 le = wtc->stripes.Flink;
2028 while (le != &wtc->stripes) {
2029 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2030
2031 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
2032 MmUnlockPages(stripe->Irp->MdlAddress);
2033 IoFreeMdl(stripe->Irp->MdlAddress);
2034 }
2035
2036 le = le->Flink;
2037 }
2038
2039 le = wtc->stripes.Flink;
2040 while (le != &wtc->stripes) {
2041 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2042
2043 nextle = le->Flink;
2044
2045 if (stripe->buf) {
2046 ExFreePool(stripe->buf);
2047
2048 le2 = le->Flink;
2049 while (le2 != &wtc->stripes) {
2050 write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
2051
2052 if (s2->buf == stripe->buf)
2053 s2->buf = NULL;
2054
2055 le2 = le2->Flink;
2056 }
2057
2058 }
2059
2060 ExFreePool(stripe);
2061
2062 le = nextle;
2063 }
2064 }
2065
2066 static NTSTATUS write_trees(device_extension* Vcb) {
2067 UINT8 level;
2068 UINT8 *data, *body;
2069 UINT32 crc32;
2070 NTSTATUS Status;
2071 LIST_ENTRY* le;
2072 write_tree_context* wtc;
2073
2074 TRACE("(%p)\n", Vcb);
2075
2076 for (level = 0; level <= 255; level++) {
2077 BOOL nothing_found = TRUE;
2078
2079 TRACE("level = %u\n", level);
2080
2081 le = Vcb->tree_cache.Flink;
2082 while (le != &Vcb->tree_cache) {
2083 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2084
2085 if (tc2->write && tc2->tree->header.level == level) {
2086 KEY firstitem, searchkey;
2087 LIST_ENTRY* le2;
2088 traverse_ptr tp;
2089 EXTENT_ITEM_TREE* eit;
2090
2091 if (!tc2->tree->has_new_address) {
2092 ERR("error - tried to write tree with no new address\n");
2093 int3;
2094 }
2095
2096 le2 = tc2->tree->itemlist.Flink;
2097 while (le2 != &tc2->tree->itemlist) {
2098 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2099 if (!td->ignore) {
2100 firstitem = td->key;
2101 break;
2102 }
2103 le2 = le2->Flink;
2104 }
2105
2106 if (tc2->tree->parent) {
2107 tc2->tree->paritem->key = firstitem;
2108 tc2->tree->paritem->treeholder.address = tc2->tree->new_address;
2109 tc2->tree->paritem->treeholder.generation = Vcb->superblock.generation;
2110 }
2111
2112 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
2113 searchkey.obj_id = tc2->tree->new_address;
2114 searchkey.obj_type = TYPE_EXTENT_ITEM;
2115 searchkey.offset = Vcb->superblock.node_size;
2116
2117 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2118 if (!NT_SUCCESS(Status)) {
2119 ERR("error - find_item returned %08x\n", Status);
2120 return Status;
2121 }
2122
2123 if (keycmp(&searchkey, &tp.item->key)) {
2124 // traverse_ptr next_tp;
2125 // BOOL b;
2126 // tree_data* paritem;
2127
2128 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2129 free_traverse_ptr(&tp);
2130
2131 // searchkey.obj_id = 0;
2132 // searchkey.obj_type = 0;
2133 // searchkey.offset = 0;
2134 //
2135 // find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2136 //
2137 // paritem = NULL;
2138 // do {
2139 // if (tp.tree->paritem != paritem) {
2140 // paritem = tp.tree->paritem;
2141 // ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
2142 // }
2143 //
2144 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2145 //
2146 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
2147 // if (b) {
2148 // free_traverse_ptr(&tp);
2149 // tp = next_tp;
2150 // }
2151 // } while (b);
2152 //
2153 // free_traverse_ptr(&tp);
2154
2155 return STATUS_INTERNAL_ERROR;
2156 }
2157
2158 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
2159 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
2160 free_traverse_ptr(&tp);
2161 return STATUS_INTERNAL_ERROR;
2162 }
2163
2164 eit = (EXTENT_ITEM_TREE*)tp.item->data;
2165 eit->firstitem = firstitem;
2166
2167 free_traverse_ptr(&tp);
2168 }
2169
2170 nothing_found = FALSE;
2171 }
2172
2173 le = le->Flink;
2174 }
2175
2176 if (nothing_found)
2177 break;
2178 }
2179
2180 TRACE("allocated tree extents\n");
2181
2182 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
2183 if (!wtc) {
2184 ERR("out of memory\n");
2185 return STATUS_INSUFFICIENT_RESOURCES;
2186 }
2187
2188 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
2189 InitializeListHead(&wtc->stripes);
2190
2191 le = Vcb->tree_cache.Flink;
2192 while (le != &Vcb->tree_cache) {
2193 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2194 #ifdef DEBUG_PARANOID
2195 UINT32 num_items = 0, size = 0;
2196 LIST_ENTRY* le2;
2197 BOOL crash = FALSE;
2198 #endif
2199
2200 if (tc2->write) {
2201 #ifdef DEBUG_PARANOID
2202 le2 = tc2->tree->itemlist.Flink;
2203 while (le2 != &tc2->tree->itemlist) {
2204 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2205 if (!td->ignore) {
2206 num_items++;
2207
2208 if (tc2->tree->header.level == 0)
2209 size += td->size;
2210 }
2211 le2 = le2->Flink;
2212 }
2213
2214 if (tc2->tree->header.level == 0)
2215 size += num_items * sizeof(leaf_node);
2216 else
2217 size += num_items * sizeof(internal_node);
2218
2219 if (num_items != tc2->tree->header.num_items) {
2220 ERR("tree %llx, level %x: num_items was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
2221 crash = TRUE;
2222 }
2223
2224 if (size != tc2->tree->size) {
2225 ERR("tree %llx, level %x: size was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, size, tc2->tree->size);
2226 crash = TRUE;
2227 }
2228
2229 if (tc2->tree->header.num_items == 0 && tc2->tree->parent) {
2230 ERR("tree %llx, level %x: tried to write empty tree with parent\n", tc2->tree->root->id, tc2->tree->header.level);
2231 crash = TRUE;
2232 }
2233
2234 if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
2235 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", tc2->tree->root->id, tc2->tree->header.level, tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
2236 crash = TRUE;
2237 }
2238
2239 if (crash) {
2240 ERR("tree %p\n", tc2->tree);
2241 le2 = tc2->tree->itemlist.Flink;
2242 while (le2 != &tc2->tree->itemlist) {
2243 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2244 if (!td->ignore) {
2245 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
2246 }
2247 le2 = le2->Flink;
2248 }
2249 int3;
2250 }
2251 #endif
2252 tc2->tree->header.address = tc2->tree->new_address;
2253 tc2->tree->header.generation = Vcb->superblock.generation;
2254 tc2->tree->header.flags |= HEADER_FLAG_MIXED_BACKREF;
2255 tc2->tree->has_address = TRUE;
2256
2257 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
2258 if (!data) {
2259 ERR("out of memory\n");
2260 Status = STATUS_INSUFFICIENT_RESOURCES;
2261 goto end;
2262 }
2263
2264 body = data + sizeof(tree_header);
2265
2266 RtlCopyMemory(data, &tc2->tree->header, sizeof(tree_header));
2267 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2268
2269 if (tc2->tree->header.level == 0) {
2270 leaf_node* itemptr = (leaf_node*)body;
2271 int i = 0;
2272 LIST_ENTRY* le2;
2273 UINT8* dataptr = data + Vcb->superblock.node_size;
2274
2275 le2 = tc2->tree->itemlist.Flink;
2276 while (le2 != &tc2->tree->itemlist) {
2277 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2278 if (!td->ignore) {
2279 dataptr = dataptr - td->size;
2280
2281 itemptr[i].key = td->key;
2282 itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
2283 itemptr[i].size = td->size;
2284 i++;
2285
2286 if (td->size > 0)
2287 RtlCopyMemory(dataptr, td->data, td->size);
2288 }
2289
2290 le2 = le2->Flink;
2291 }
2292 } else {
2293 internal_node* itemptr = (internal_node*)body;
2294 int i = 0;
2295 LIST_ENTRY* le2;
2296
2297 le2 = tc2->tree->itemlist.Flink;
2298 while (le2 != &tc2->tree->itemlist) {
2299 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2300 if (!td->ignore) {
2301 itemptr[i].key = td->key;
2302 itemptr[i].address = td->treeholder.address;
2303 itemptr[i].generation = td->treeholder.generation;
2304 i++;
2305 }
2306
2307 le2 = le2->Flink;
2308 }
2309 }
2310
2311 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2312 crc32 = ~crc32;
2313 *((UINT32*)data) = crc32;
2314 TRACE("setting crc32 to %08x\n", crc32);
2315
2316 Status = write_tree(Vcb, tc2->tree->new_address, data, wtc);
2317 if (!NT_SUCCESS(Status)) {
2318 ERR("write_tree returned %08x\n", Status);
2319 goto end;
2320 }
2321 }
2322
2323 le = le->Flink;
2324 }
2325
2326 Status = STATUS_SUCCESS;
2327
2328 if (wtc->stripes.Flink != &wtc->stripes) {
2329 // launch writes and wait
2330 le = wtc->stripes.Flink;
2331 while (le != &wtc->stripes) {
2332 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2333
2334 IoCallDriver(stripe->device->devobj, stripe->Irp);
2335
2336 le = le->Flink;
2337 }
2338
2339 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
2340
2341 le = wtc->stripes.Flink;
2342 while (le != &wtc->stripes) {
2343 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2344
2345 if (!NT_SUCCESS(stripe->iosb.Status)) {
2346 Status = stripe->iosb.Status;
2347 break;
2348 }
2349
2350 le = le->Flink;
2351 }
2352
2353 free_stripes(wtc);
2354 }
2355
2356 end:
2357 ExFreePool(wtc);
2358
2359 return Status;
2360 }
2361
2362 static NTSTATUS write_superblocks(device_extension* Vcb) {
2363 UINT64 i;
2364 NTSTATUS Status;
2365 LIST_ENTRY* le;
2366
2367 TRACE("(%p)\n", Vcb);
2368
2369 le = Vcb->tree_cache.Flink;
2370 while (le != &Vcb->tree_cache) {
2371 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2372
2373 if (tc2->write && !tc2->tree->parent) {
2374 if (tc2->tree->root == Vcb->root_root) {
2375 Vcb->superblock.root_tree_addr = tc2->tree->new_address;
2376 Vcb->superblock.root_level = tc2->tree->header.level;
2377 } else if (tc2->tree->root == Vcb->chunk_root) {
2378 Vcb->superblock.chunk_tree_addr = tc2->tree->new_address;
2379 Vcb->superblock.chunk_root_generation = tc2->tree->header.generation;
2380 Vcb->superblock.chunk_root_level = tc2->tree->header.level;
2381 }
2382 }
2383
2384 le = le->Flink;
2385 }
2386
2387 for (i = 0; i < Vcb->superblock.num_devices; i++) {
2388 if (Vcb->devices[i].devobj) {
2389 Status = write_superblock(Vcb, &Vcb->devices[i]);
2390 if (!NT_SUCCESS(Status)) {
2391 ERR("write_superblock returned %08x\n", Status);
2392 return Status;
2393 }
2394 }
2395 }
2396
2397 return STATUS_SUCCESS;
2398 }
2399
2400 static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
2401 LIST_ENTRY* le = Vcb->chunks.Flink;
2402 chunk* c;
2403 KEY searchkey;
2404 traverse_ptr tp;
2405 BLOCK_GROUP_ITEM* bgi;
2406 NTSTATUS Status;
2407
2408 TRACE("(%p)\n", Vcb);
2409
2410 while (le != &Vcb->chunks) {
2411 c = CONTAINING_RECORD(le, chunk, list_entry);
2412
2413 if (c->used != c->oldused) {
2414 searchkey.obj_id = c->offset;
2415 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2416 searchkey.offset = c->chunk_item->size;
2417
2418 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2419 if (!NT_SUCCESS(Status)) {
2420 ERR("error - find_item returned %08x\n", Status);
2421 return Status;
2422 }
2423
2424 if (keycmp(&searchkey, &tp.item->key)) {
2425 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2426 int3;
2427 free_traverse_ptr(&tp);
2428 return STATUS_INTERNAL_ERROR;
2429 }
2430
2431 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2432 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2433 free_traverse_ptr(&tp);
2434 return STATUS_INTERNAL_ERROR;
2435 }
2436
2437 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2438 if (!bgi) {
2439 ERR("out of memory\n");
2440 free_traverse_ptr(&tp);
2441 return STATUS_INSUFFICIENT_RESOURCES;
2442 }
2443
2444 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2445 bgi->used = c->used;
2446
2447 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2448
2449 delete_tree_item(Vcb, &tp, rollback);
2450
2451 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
2452 ERR("insert_tree_item failed\n");
2453 ExFreePool(bgi);
2454 return STATUS_INTERNAL_ERROR;
2455 }
2456
2457 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2458 TRACE("chunk_item type = %llx\n", c->chunk_item->type);
2459
2460 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
2461 FIXME("RAID0 not yet supported\n");
2462 ExFreePool(bgi);
2463 free_traverse_ptr(&tp);
2464 return STATUS_INTERNAL_ERROR;
2465 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
2466 FIXME("RAID1 not yet supported\n");
2467 ExFreePool(bgi);
2468 free_traverse_ptr(&tp);
2469 return STATUS_INTERNAL_ERROR;
2470 } else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
2471 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
2472 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
2473 FIXME("RAID10 not yet supported\n");
2474 ExFreePool(bgi);
2475 free_traverse_ptr(&tp);
2476 return STATUS_INTERNAL_ERROR;
2477 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2478 FIXME("RAID5 not yet supported\n");
2479 ExFreePool(bgi);
2480 free_traverse_ptr(&tp);
2481 return STATUS_INTERNAL_ERROR;
2482 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2483 FIXME("RAID6 not yet supported\n");
2484 ExFreePool(bgi);
2485 free_traverse_ptr(&tp);
2486 return STATUS_INTERNAL_ERROR;
2487 } else { // SINGLE
2488 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
2489 }
2490
2491 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2492
2493 free_traverse_ptr(&tp);
2494
2495 c->oldused = c->used;
2496 }
2497
2498 le = le->Flink;
2499 }
2500
2501 return STATUS_SUCCESS;
2502 }
2503
2504 static void get_first_item(tree* t, KEY* key) {
2505 LIST_ENTRY* le;
2506
2507 le = t->itemlist.Flink;
2508 while (le != &t->itemlist) {
2509 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2510
2511 *key = td->key;
2512 return;
2513 }
2514 }
2515
2516 static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2517 tree *nt, *pt;
2518 tree_data* td;
2519 tree_data* oldlastitem;
2520 // write_tree* wt2;
2521 // // tree_data *firsttd, *lasttd;
2522 // // LIST_ENTRY* le;
2523 // #ifdef DEBUG_PARANOID
2524 // KEY lastkey1, lastkey2;
2525 // traverse_ptr tp, next_tp;
2526 // ULONG numitems1, numitems2;
2527 // #endif
2528
2529 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2530
2531 // #ifdef DEBUG_PARANOID
2532 // lastkey1.obj_id = 0xffffffffffffffff;
2533 // lastkey1.obj_type = 0xff;
2534 // lastkey1.offset = 0xffffffffffffffff;
2535 //
2536 // if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
2537 // ERR("error - find_item failed\n");
2538 // else {
2539 // lastkey1 = tp.item->key;
2540 // numitems1 = 0;
2541 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2542 // free_traverse_ptr(&tp);
2543 // tp = next_tp;
2544 // numitems1++;
2545 // }
2546 // free_traverse_ptr(&tp);
2547 // }
2548 // #endif
2549
2550 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2551 if (!nt) {
2552 ERR("out of memory\n");
2553 return STATUS_INSUFFICIENT_RESOURCES;
2554 }
2555
2556 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2557 nt->header.address = 0;
2558 nt->header.generation = Vcb->superblock.generation;
2559 nt->header.num_items = t->header.num_items - numitems;
2560 nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2561
2562 nt->refcount = 0;
2563 nt->has_address = FALSE;
2564 nt->Vcb = Vcb;
2565 nt->parent = t->parent;
2566 nt->root = t->root;
2567 // nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2568 nt->new_address = 0;
2569 nt->has_new_address = FALSE;
2570 nt->flags = t->flags;
2571 InitializeListHead(&nt->itemlist);
2572
2573 // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
2574
2575 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2576
2577 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2578 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2579 // //
2580 // // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
2581 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2582 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2583 // //
2584 // // le = wt->tree->itemlist.Flink;
2585 // // while (le != &wt->tree->itemlist) {
2586 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2587 // // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2588 // // le = le->Flink;
2589 // // }
2590
2591 nt->itemlist.Flink = &newfirstitem->list_entry;
2592 nt->itemlist.Blink = t->itemlist.Blink;
2593 nt->itemlist.Flink->Blink = &nt->itemlist;
2594 nt->itemlist.Blink->Flink = &nt->itemlist;
2595
2596 t->itemlist.Blink = &oldlastitem->list_entry;
2597 t->itemlist.Blink->Flink = &t->itemlist;
2598
2599 // // le = wt->tree->itemlist.Flink;
2600 // // while (le != &wt->tree->itemlist) {
2601 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2602 // // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2603 // // le = le->Flink;
2604 // // }
2605 // //
2606 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2607 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2608 // //
2609 // // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
2610 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2611 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2612
2613 nt->size = t->size - size;
2614 t->size = size;
2615 t->header.num_items = numitems;
2616 add_to_tree_cache(Vcb, nt, TRUE);
2617
2618 InterlockedIncrement(&Vcb->open_trees);
2619 #ifdef DEBUG_TREE_REFCOUNTS
2620 TRACE("created new split tree %p\n", nt);
2621 #endif
2622 InsertTailList(&Vcb->trees, &nt->list_entry);
2623
2624 // // // TESTING
2625 // // td = wt->tree->items;
2626 // // while (td) {
2627 // // if (!td->ignore) {
2628 // // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2629 // // }
2630 // // td = td->next;
2631 // // }
2632
2633 // // oldlastitem->next = NULL;
2634 // // wt->tree->lastitem = oldlastitem;
2635
2636 // // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
2637
2638 if (nt->header.level > 0) {
2639 LIST_ENTRY* le = nt->itemlist.Flink;
2640
2641 while (le != &nt->itemlist) {
2642 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2643
2644 if (td2->treeholder.tree) {
2645 td2->treeholder.tree->parent = nt;
2646 increase_tree_rc(nt);
2647 free_tree(t);
2648 }
2649
2650 le = le->Flink;
2651 }
2652 }
2653
2654 if (nt->parent) {
2655 increase_tree_rc(nt->parent);
2656
2657 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2658 if (!td) {
2659 ERR("out of memory\n");
2660 return STATUS_INSUFFICIENT_RESOURCES;
2661 }
2662
2663 td->key = newfirstitem->key;
2664
2665 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2666
2667 td->ignore = FALSE;
2668 td->inserted = TRUE;
2669 td->treeholder.tree = nt;
2670 init_tree_holder(&td->treeholder);
2671 // td->treeholder.nonpaged->status = tree_holder_loaded;
2672 nt->paritem = td;
2673
2674 nt->parent->header.num_items++;
2675 nt->parent->size += sizeof(internal_node);
2676
2677 goto end;
2678 }
2679
2680 TRACE("adding new tree parent\n");
2681
2682 if (nt->header.level == 255) {
2683 ERR("cannot add parent to tree at level 255\n");
2684 return STATUS_INTERNAL_ERROR;
2685 }
2686
2687 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2688 if (!pt) {
2689 ERR("out of memory\n");
2690 return STATUS_INSUFFICIENT_RESOURCES;
2691 }
2692
2693 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2694 pt->header.address = 0;
2695 pt->header.num_items = 2;
2696 pt->header.level = nt->header.level + 1;
2697 pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2698
2699 pt->refcount = 2;
2700 pt->has_address = FALSE;
2701 pt->Vcb = Vcb;
2702 pt->parent = NULL;
2703 pt->paritem = NULL;
2704 pt->root = t->root;
2705 pt->new_address = 0;
2706 pt->has_new_address = FALSE;
2707 // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2708 pt->size = pt->header.num_items * sizeof(internal_node);
2709 pt->flags = t->flags;
2710 InitializeListHead(&pt->itemlist);
2711
2712 // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
2713
2714 InterlockedIncrement(&Vcb->open_trees);
2715 #ifdef DEBUG_TREE_REFCOUNTS
2716 TRACE("created new parent tree %p\n", pt);
2717 #endif
2718 InsertTailList(&Vcb->trees, &pt->list_entry);
2719
2720 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2721 if (!td) {
2722 ERR("out of memory\n");
2723 return STATUS_INSUFFICIENT_RESOURCES;
2724 }
2725
2726 get_first_item(t, &td->key);
2727 td->ignore = FALSE;
2728 td->inserted = FALSE;
2729 td->treeholder.address = 0;
2730 td->treeholder.generation = Vcb->superblock.generation;
2731 td->treeholder.tree = t;
2732 init_tree_holder(&td->treeholder);
2733 // td->treeholder.nonpaged->status = tree_holder_loaded;
2734 InsertTailList(&pt->itemlist, &td->list_entry);
2735 t->paritem = td;
2736
2737 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2738 if (!td) {
2739 ERR("out of memory\n");
2740 return STATUS_INSUFFICIENT_RESOURCES;
2741 }
2742
2743 td->key = newfirstitem->key;
2744 td->ignore = FALSE;
2745 td->inserted = FALSE;
2746 td->treeholder.address = 0;
2747 td->treeholder.generation = Vcb->superblock.generation;
2748 td->treeholder.tree = nt;
2749 init_tree_holder(&td->treeholder);
2750 // td->treeholder.nonpaged->status = tree_holder_loaded;
2751 InsertTailList(&pt->itemlist, &td->list_entry);
2752 nt->paritem = td;
2753
2754 add_to_tree_cache(Vcb, pt, TRUE);
2755
2756 t->root->treeholder.tree = pt;
2757
2758 t->parent = pt;
2759 nt->parent = pt;
2760
2761 end:
2762 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2763
2764 // #ifdef DEBUG_PARANOID
2765 // lastkey2.obj_id = 0xffffffffffffffff;
2766 // lastkey2.obj_type = 0xff;
2767 // lastkey2.offset = 0xffffffffffffffff;
2768 //
2769 // if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
2770 // ERR("error - find_item failed\n");
2771 // else {
2772 // lastkey2 = tp.item->key;
2773 //
2774 // numitems2 = 0;
2775 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2776 // free_traverse_ptr(&tp);
2777 // tp = next_tp;
2778 // numitems2++;
2779 // }
2780 // free_traverse_ptr(&tp);
2781 // }
2782 //
2783 // ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
2784 // ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
2785 // ERR("numitems1 = %u\n", numitems1);
2786 // ERR("numitems2 = %u\n", numitems2);
2787 // #endif
2788
2789 return STATUS_SUCCESS;
2790 }
2791
2792 static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
2793 LIST_ENTRY* le;
2794 UINT32 size, ds, numitems;
2795
2796 size = 0;
2797 numitems = 0;
2798
2799 // FIXME - naïve implementation: maximizes number of filled trees
2800
2801 le = t->itemlist.Flink;
2802 while (le != &t->itemlist) {
2803 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2804
2805 if (!td->ignore) {
2806 if (t->header.level == 0)
2807 ds = sizeof(leaf_node) + td->size;
2808 else
2809 ds = sizeof(internal_node);
2810
2811 // FIXME - move back if previous item was deleted item with same key
2812 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
2813 return split_tree_at(Vcb, t, td, numitems, size);
2814
2815 size += ds;
2816 numitems++;
2817 }
2818
2819 le = le->Flink;
2820 }
2821
2822 return STATUS_SUCCESS;
2823 }
2824
2825 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
2826 LIST_ENTRY* le;
2827 tree_data* nextparitem = NULL;
2828 NTSTATUS Status;
2829 tree *next_tree, *par;
2830 BOOL loaded;
2831
2832 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
2833
2834 // FIXME - doesn't capture everything, as it doesn't ascend
2835 // FIXME - write proper function and put it in treefuncs.c
2836 le = t->paritem->list_entry.Flink;
2837 while (le != &t->parent->itemlist) {
2838 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2839
2840 if (!td->ignore) {
2841 nextparitem = td;
2842 break;
2843 }
2844
2845 le = le->Flink;
2846 }
2847
2848 if (!nextparitem)
2849 return STATUS_SUCCESS;
2850
2851 // FIXME - loop, and capture more than one tree if we can
2852
2853 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
2854 // nextparitem = t->paritem;
2855
2856 // ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
2857
2858 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
2859 if (!NT_SUCCESS(Status)) {
2860 ERR("do_load_tree returned %08x\n", Status);
2861 return Status;
2862 }
2863
2864 if (loaded)
2865 increase_tree_rc(t->parent);
2866
2867 // ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
2868
2869 next_tree = nextparitem->treeholder.tree;
2870
2871 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
2872 // merge two trees into one
2873
2874 t->header.num_items += next_tree->header.num_items;
2875 t->size += next_tree->size;
2876
2877 if (next_tree->header.level > 0) {
2878 le = next_tree->itemlist.Flink;
2879
2880 while (le != &next_tree->itemlist) {
2881 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2882
2883 if (td2->treeholder.tree) {
2884 td2->treeholder.tree->parent = t;
2885 increase_tree_rc(t);
2886 free_tree(next_tree);
2887 }
2888
2889 le = le->Flink;
2890 }
2891 }
2892
2893 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
2894 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
2895 t->itemlist.Blink = next_tree->itemlist.Blink;
2896 t->itemlist.Blink->Flink = &t->itemlist;
2897
2898 // // TESTING
2899 // le = t->itemlist.Flink;
2900 // while (le != &t->itemlist) {
2901 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2902 // if (!td->ignore) {
2903 // ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
2904 // }
2905 // le = le->Flink;
2906 // }
2907
2908 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
2909
2910 next_tree->header.num_items = 0;
2911 next_tree->size = 0;
2912
2913 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
2914 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
2915
2916 if (!NT_SUCCESS(Status)) {
2917 ERR("reduce_tree_extent returned %08x\n", Status);
2918 free_tree(next_tree);
2919 return Status;
2920 }
2921 } else if (next_tree->has_address) {
2922 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
2923
2924 if (!NT_SUCCESS(Status)) {
2925 ERR("reduce_tree_extent returned %08x\n", Status);
2926 free_tree(next_tree);
2927 return Status;
2928 }
2929 }
2930
2931 if (!nextparitem->ignore) {
2932 nextparitem->ignore = TRUE;
2933 next_tree->parent->header.num_items--;
2934 next_tree->parent->size -= sizeof(internal_node);
2935 }
2936
2937 par = next_tree->parent;
2938 while (par) {
2939 add_to_tree_cache(Vcb, par, TRUE);
2940 par = par->parent;
2941 }
2942
2943 RemoveEntryList(&nextparitem->list_entry);
2944 ExFreePool(next_tree->paritem);
2945 next_tree->paritem = NULL;
2946
2947 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
2948
2949 free_tree(next_tree);
2950
2951 // remove next_tree from tree cache
2952 le = Vcb->tree_cache.Flink;
2953 while (le != &Vcb->tree_cache) {
2954 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2955
2956 if (tc2->tree == next_tree) {
2957 free_tree(next_tree);
2958 RemoveEntryList(le);
2959 ExFreePool(tc2);
2960 break;
2961 }
2962
2963 le = le->Flink;
2964 }
2965 } else {
2966 // rebalance by moving items from second tree into first
2967 ULONG avg_size = (t->size + next_tree->size) / 2;
2968 KEY firstitem = {0, 0, 0};
2969
2970 TRACE("attempting rebalance\n");
2971
2972 le = next_tree->itemlist.Flink;
2973 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
2974 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2975 ULONG size;
2976
2977 if (!td->ignore) {
2978 if (next_tree->header.level == 0)
2979 size = sizeof(leaf_node) + td->size;
2980 else
2981 size = sizeof(internal_node);
2982 } else
2983 size = 0;
2984
2985 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
2986 RemoveEntryList(&td->list_entry);
2987 InsertTailList(&t->itemlist, &td->list_entry);
2988
2989 if (next_tree->header.level > 0 && td->treeholder.tree) {
2990 td->treeholder.tree->parent = t;
2991 increase_tree_rc(t);
2992 free_tree(next_tree);
2993 }
2994
2995 if (!td->ignore) {
2996 next_tree->size -= size;
2997 t->size += size;
2998 next_tree->header.num_items--;
2999 t->header.num_items++;
3000 }
3001 } else
3002 break;
3003
3004 le = next_tree->itemlist.Flink;
3005 }
3006
3007 le = next_tree->itemlist.Flink;
3008 while (le != &next_tree->itemlist) {
3009 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3010
3011 if (!td->ignore) {
3012 firstitem = td->key;
3013 break;
3014 }
3015
3016 le = le->Flink;
3017 }
3018
3019 // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
3020
3021 // FIXME - once ascension is working, make this work with parent's parent, etc.
3022 if (next_tree->paritem)
3023 next_tree->paritem->key = firstitem;
3024
3025 par = next_tree;
3026 while (par) {
3027 add_to_tree_cache(Vcb, par, TRUE);
3028 par = par->parent;
3029 }
3030
3031 free_tree(next_tree);
3032 }
3033
3034 return STATUS_SUCCESS;
3035 }
3036
3037 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
3038 KEY searchkey;
3039 traverse_ptr tp;
3040 NTSTATUS Status;
3041
3042 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3043 searchkey.obj_id = address;
3044 searchkey.obj_type = TYPE_METADATA_ITEM;
3045 searchkey.offset = t->header.level;
3046
3047 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3048 if (!NT_SUCCESS(Status)) {
3049 ERR("error - find_item returned %08x\n", Status);
3050 return Status;
3051 }
3052
3053 if (!keycmp(&tp.item->key, &searchkey)) {
3054 EXTENT_ITEM_SKINNY_METADATA* eism;
3055
3056 if (tp.item->size > 0) {
3057 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3058
3059 if (!eism) {
3060 ERR("out of memory\n");
3061 free_traverse_ptr(&tp);
3062 return STATUS_INSUFFICIENT_RESOURCES;
3063 }
3064
3065 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3066 } else
3067 eism = NULL;
3068
3069 delete_tree_item(Vcb, &tp, rollback);
3070
3071 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, rollback)) {
3072 ERR("insert_tree_item failed\n");
3073 ExFreePool(eism);
3074 free_traverse_ptr(&tp);
3075 return STATUS_INTERNAL_ERROR;
3076 }
3077
3078 free_traverse_ptr(&tp);
3079 return STATUS_SUCCESS;
3080 }
3081
3082 free_traverse_ptr(&tp);
3083 }
3084
3085 searchkey.obj_id = address;
3086 searchkey.obj_type = TYPE_EXTENT_ITEM;
3087 searchkey.offset = 0xffffffffffffffff;
3088
3089 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3090 if (!NT_SUCCESS(Status)) {
3091 ERR("error - find_item returned %08x\n", Status);
3092 return Status;
3093 }
3094
3095 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3096 EXTENT_ITEM_TREE* eit;
3097
3098 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3099 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3100 free_traverse_ptr(&tp);
3101 return STATUS_INTERNAL_ERROR;
3102 }
3103
3104 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3105
3106 if (!eit) {
3107 ERR("out of memory\n");
3108 free_traverse_ptr(&tp);
3109 return STATUS_INSUFFICIENT_RESOURCES;
3110 }
3111
3112 RtlCopyMemory(eit, tp.item->data, tp.item->size);
3113
3114 delete_tree_item(Vcb, &tp, rollback);
3115
3116 eit->level = level;
3117
3118 if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, rollback)) {
3119 ERR("insert_tree_item failed\n");
3120 ExFreePool(eit);
3121 free_traverse_ptr(&tp);
3122 return STATUS_INTERNAL_ERROR;
3123 }
3124
3125 free_traverse_ptr(&tp);
3126
3127 return STATUS_SUCCESS;
3128 }
3129
3130 ERR("could not find EXTENT_ITEM for address %llx\n", address);
3131
3132 free_traverse_ptr(&tp);
3133
3134 return STATUS_INTERNAL_ERROR;
3135 }
3136
3137 static NTSTATUS STDCALL do_splits(device_extension* Vcb, LIST_ENTRY* rollback) {
3138 // LIST_ENTRY *le, *le2;
3139 // write_tree* wt;
3140 // tree_data* td;
3141 UINT8 level, max_level;
3142 UINT32 min_size;
3143 BOOL empty, done_deletions = FALSE;
3144 NTSTATUS Status;
3145 tree_cache* tc2;
3146
3147 TRACE("(%p)\n", Vcb);
3148
3149 max_level = 0;
3150
3151 for (level = 0; level <= 255; level++) {
3152 LIST_ENTRY *le, *nextle;
3153
3154 empty = TRUE;
3155
3156 TRACE("doing level %u\n", level);
3157
3158 le = Vcb->tree_cache.Flink;
3159
3160 while (le != &Vcb->tree_cache) {
3161 tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3162
3163 nextle = le->Flink;
3164
3165 if (tc2->write && tc2->tree->header.level == level) {
3166 empty = FALSE;
3167
3168 if (tc2->tree->header.num_items == 0) {
3169 if (tc2->tree->parent) {
3170 LIST_ENTRY* le2;
3171 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
3172
3173 done_deletions = TRUE;
3174
3175 le2 = tc2->tree->itemlist.Flink;
3176 while (le2 != &tc2->tree->itemlist) {
3177 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
3178 firstitem = td->key;
3179 break;
3180 }
3181 TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
3182 tc2->tree->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
3183
3184 tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3185
3186 if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
3187 Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
3188
3189 if (!NT_SUCCESS(Status)) {
3190 ERR("reduce_tree_extent returned %08x\n", Status);
3191 return Status;
3192 }
3193 } else if (tc2->tree->has_address) {
3194 Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
3195
3196 if (!NT_SUCCESS(Status)) {
3197 ERR("reduce_tree_extent returned %08x\n", Status);
3198 return Status;
3199 }
3200 }
3201
3202 if (!tc2->tree->paritem->ignore) {
3203 tc2->tree->paritem->ignore = TRUE;
3204 tc2->tree->parent->header.num_items--;
3205 tc2->tree->parent->size -= sizeof(internal_node);
3206 }
3207
3208 RemoveEntryList(&tc2->tree->paritem->list_entry);
3209 ExFreePool(tc2->tree->paritem);
3210 tc2->tree->paritem = NULL;
3211
3212 free_tree(tc2->tree);
3213
3214 RemoveEntryList(le);
3215 ExFreePool(tc2);
3216 } else if (tc2->tree->header.level != 0) {
3217 if (tc2->tree->has_new_address) {
3218 Status = update_extent_level(Vcb, tc2->tree->new_address, tc2->tree, 0, rollback);
3219
3220 if (!NT_SUCCESS(Status)) {
3221 ERR("update_extent_level returned %08x\n", Status);
3222 return Status;
3223 }
3224 }
3225
3226 tc2->tree->header.level = 0;
3227 }
3228 } else if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3229 TRACE("splitting overlarge tree (%x > %x)\n", tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
3230 Status = split_tree(Vcb, tc2->tree);
3231
3232 if (!NT_SUCCESS(Status)) {
3233 ERR("split_tree returned %08x\n", Status);
3234 return Status;
3235 }
3236 }
3237 }
3238
3239 le = nextle;
3240 }
3241
3242 if (!empty) {
3243 max_level = level;
3244 } else {
3245 TRACE("nothing found for level %u\n", level);
3246 break;
3247 }
3248 }
3249
3250 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3251
3252 for (level = 0; level <= max_level; level++) {
3253 LIST_ENTRY* le;
3254
3255 le = Vcb->tree_cache.Flink;
3256
3257 while (le != &Vcb->tree_cache) {
3258 tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3259
3260 if (tc2->write && tc2->tree->header.level == level && tc2->tree->header.num_items > 0 && tc2->tree->parent && tc2->tree->size < min_size) {
3261 Status = try_tree_amalgamate(Vcb, tc2->tree, rollback);
3262 if (!NT_SUCCESS(Status)) {
3263 ERR("try_tree_amalgamate returned %08x\n", Status);
3264 return Status;
3265 }
3266 }
3267
3268 le = le->Flink;
3269 }
3270 }
3271
3272 // simplify trees if top tree only has one entry
3273
3274 if (done_deletions) {
3275 for (level = max_level; level > 0; level--) {
3276 LIST_ENTRY *le, *nextle;
3277
3278 le = Vcb->tree_cache.Flink;
3279 while (le != &Vcb->tree_cache) {
3280 nextle = le->Flink;
3281 tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3282
3283 if (tc2->write && tc2->tree->header.level == level) {
3284 if (!tc2->tree->parent && tc2->tree->header.num_items == 1) {
3285 LIST_ENTRY* le2 = tc2->tree->itemlist.Flink;
3286 tree_data* td;
3287 tree* child_tree = NULL;
3288
3289 while (le2 != &tc2->tree->itemlist) {
3290 td = CONTAINING_RECORD(le2, tree_data, list_entry);
3291 if (!td->ignore)
3292 break;
3293 le2 = le2->Flink;
3294 }
3295
3296 TRACE("deleting top-level tree in root %llx with one item\n", tc2->tree->root->id);
3297
3298 if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
3299 Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
3300
3301 if (!NT_SUCCESS(Status)) {
3302 ERR("reduce_tree_extent returned %08x\n", Status);
3303 return Status;
3304 }
3305 } else if (tc2->tree->has_address) {
3306 Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
3307
3308 if (!NT_SUCCESS(Status)) {
3309 ERR("reduce_tree_extent returned %08x\n", Status);
3310 return Status;
3311 }
3312 }
3313
3314 if (!td->treeholder.tree) { // load first item if not already loaded
3315 KEY searchkey = {0,0,0};
3316 traverse_ptr tp;
3317
3318 Status = find_item(Vcb, tc2->tree->root, &tp, &searchkey, FALSE);
3319 if (!NT_SUCCESS(Status)) {
3320 ERR("error - find_item returned %08x\n", Status);
3321 return Status;
3322 }
3323
3324 free_traverse_ptr(&tp);
3325 }
3326
3327 child_tree = td->treeholder.tree;
3328
3329 if (child_tree) {
3330 child_tree->parent = NULL;
3331 child_tree->paritem = NULL;
3332 free_tree(tc2->tree);
3333 }
3334
3335 tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3336
3337 free_tree(tc2->tree);
3338
3339 if (child_tree)
3340 child_tree->root->treeholder.tree = child_tree;
3341
3342 RemoveEntryList(le);
3343 ExFreePool(tc2);
3344 }
3345 }
3346
3347 le = nextle;
3348 }
3349 }
3350 }
3351
3352 return STATUS_SUCCESS;
3353 }
3354
3355 NTSTATUS STDCALL do_write(device_extension* Vcb, LIST_ENTRY* rollback) {
3356 NTSTATUS Status;
3357 LIST_ENTRY* le;
3358
3359 TRACE("(%p)\n", Vcb);
3360
3361 // If only changing superblock, e.g. changing label, we still need to rewrite
3362 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
3363 if (Vcb->write_trees > 0) {
3364 KEY searchkey;
3365 traverse_ptr tp;
3366
3367 searchkey.obj_id = 0;
3368 searchkey.obj_type = 0;
3369 searchkey.offset = 0;
3370
3371 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
3372 if (!NT_SUCCESS(Status)) {
3373 ERR("error - find_item returned %08x\n", Status);
3374 return Status;
3375 }
3376
3377 add_to_tree_cache(Vcb, Vcb->root_root->treeholder.tree, TRUE);
3378
3379 free_traverse_ptr(&tp);
3380 }
3381
3382 do {
3383 Status = add_parents(Vcb, rollback);
3384 if (!NT_SUCCESS(Status)) {
3385 ERR("add_parents returned %08x\n", Status);
3386 goto end;
3387 }
3388
3389 Status = do_splits(Vcb, rollback);
3390 if (!NT_SUCCESS(Status)) {
3391 ERR("do_splits returned %08x\n", Status);
3392 goto end;
3393 }
3394
3395 Status = allocate_tree_extents(Vcb, rollback);
3396 if (!NT_SUCCESS(Status)) {
3397 ERR("add_parents returned %08x\n", Status);
3398 goto end;
3399 }
3400
3401 Status = update_chunk_usage(Vcb, rollback);
3402 if (!NT_SUCCESS(Status)) {
3403 ERR("update_chunk_usage returned %08x\n", Status);
3404 goto end;
3405 }
3406 } while (!trees_consistent(Vcb));
3407
3408 TRACE("trees consistent\n");
3409
3410 Status = update_root_root(Vcb, rollback);
3411 if (!NT_SUCCESS(Status)) {
3412 ERR("update_root_root returned %08x\n", Status);
3413 goto end;
3414 }
3415
3416 Status = write_trees(Vcb);
3417 if (!NT_SUCCESS(Status)) {
3418 ERR("write_trees returned %08x\n", Status);
3419 goto end;
3420 }
3421
3422 Status = write_superblocks(Vcb);
3423 if (!NT_SUCCESS(Status)) {
3424 ERR("write_superblocks returned %08x\n", Status);
3425 goto end;
3426 }
3427
3428 clean_space_cache(Vcb);
3429
3430 Vcb->superblock.generation++;
3431
3432 // print_trees(tc); // TESTING
3433
3434 Status = STATUS_SUCCESS;
3435
3436 le = Vcb->tree_cache.Flink;
3437 while (le != &Vcb->tree_cache) {
3438 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3439
3440 tc2->write = FALSE;
3441
3442 le = le->Flink;
3443 }
3444
3445 Vcb->write_trees = 0;
3446
3447 end:
3448 TRACE("do_write returning %08x\n", Status);
3449
3450 return Status;
3451 }
3452
3453 NTSTATUS consider_write(device_extension* Vcb) {
3454 // FIXME - call do_write if Vcb->write_trees high
3455 #if 0
3456 LIST_ENTRY rollback;
3457 NTSTATUS Status = STATUS_SUCCESS;
3458
3459 InitializeListHead(&rollback);
3460
3461 if (Vcb->write_trees > 0)
3462 Status = do_write(Vcb, &rollback);
3463
3464 free_tree_cache(&Vcb->tree_cache);
3465
3466 if (!NT_SUCCESS(Status))
3467 do_rollback(Vcb, &rollback);
3468 else
3469 clear_rollback(&rollback);
3470
3471 return Status;
3472 #else
3473 return STATUS_SUCCESS;
3474 #endif
3475 }
3476
3477 static __inline void insert_into_ordered_list(LIST_ENTRY* list, ordered_list* ins) {
3478 LIST_ENTRY* le = list->Flink;
3479 ordered_list* ol;
3480
3481 while (le != list) {
3482 ol = (ordered_list*)le;
3483
3484 if (ol->key > ins->key) {
3485 le->Blink->Flink = &ins->list_entry;
3486 ins->list_entry.Blink = le->Blink;
3487 le->Blink = &ins->list_entry;
3488 ins->list_entry.Flink = le;
3489 return;
3490 }
3491
3492 le = le->Flink;
3493 }
3494
3495 InsertTailList(list, &ins->list_entry);
3496 }
3497
3498 static UINT64 get_extent_data_ref_hash(UINT64 root, UINT64 objid, UINT64 offset) {
3499 UINT32 high_crc = 0xffffffff, low_crc = 0xffffffff;
3500
3501 // FIXME - can we test this?
3502
3503 // FIXME - make sure numbers here are little-endian
3504 high_crc = calc_crc32c(high_crc, (UINT8*)&root, sizeof(UINT64));
3505 low_crc = calc_crc32c(low_crc, (UINT8*)&objid, sizeof(UINT64));
3506 low_crc = calc_crc32c(low_crc, (UINT8*)&offset, sizeof(UINT64));
3507
3508 return ((UINT64)high_crc << 31) ^ (UINT64)low_crc;
3509 }
3510
3511 NTSTATUS STDCALL add_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* rollback) {
3512 KEY searchkey;
3513 traverse_ptr tp;
3514 EXTENT_ITEM* ei;
3515 UINT8 *siptr, *type;
3516 ULONG len;
3517 UINT64 hash;
3518 EXTENT_DATA_REF* edr;
3519 NTSTATUS Status;
3520
3521 TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
3522
3523 searchkey.obj_id = address;
3524 searchkey.obj_type = TYPE_EXTENT_ITEM;
3525 searchkey.offset = size;
3526
3527 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3528 if (!NT_SUCCESS(Status)) {
3529 ERR("error - find_item returned %08x\n", Status);
3530 return Status;
3531 }
3532
3533 if (keycmp(&tp.item->key, &searchkey)) {
3534 // create new entry
3535
3536 len = sizeof(EXTENT_ITEM) + sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3537 free_traverse_ptr(&tp);
3538
3539 ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3540 if (!ei) {
3541 ERR("out of memory\n");
3542 return STATUS_INSUFFICIENT_RESOURCES;
3543 }
3544
3545 ei->refcount = 1;
3546 ei->generation = Vcb->superblock.generation;
3547 ei->flags = EXTENT_ITEM_DATA;
3548
3549 type = (UINT8*)&ei[1];
3550 *type = TYPE_EXTENT_DATA_REF;
3551
3552 edr = (EXTENT_DATA_REF*)&type[1];
3553 edr->root = subvol->id;
3554 edr->objid = inode;
3555 edr->offset = offset;
3556 edr->count = 1;
3557
3558 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3559 ERR("error - failed to insert item\n");
3560 return STATUS_INTERNAL_ERROR;
3561 }
3562
3563 // FIXME - update free space in superblock and CHUNK_ITEM
3564
3565 return STATUS_SUCCESS;
3566 }
3567
3568 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
3569 NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
3570 if (!NT_SUCCESS(Status)) {
3571 ERR("convert_old_data_extent returned %08x\n", Status);
3572 free_traverse_ptr(&tp);
3573 return Status;
3574 }
3575
3576 free_traverse_ptr(&tp);
3577
3578 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3579 if (!NT_SUCCESS(Status)) {
3580 ERR("error - find_item returned %08x\n", Status);
3581 return Status;
3582 }
3583
3584 if (keycmp(&tp.item->key, &searchkey)) {
3585 WARN("extent item not found for address %llx, size %llx\n", address, size);
3586 free_traverse_ptr(&tp);
3587 return STATUS_SUCCESS;
3588 }
3589 }
3590
3591 ei = (EXTENT_ITEM*)tp.item->data;
3592
3593 if (tp.item->size < sizeof(EXTENT_ITEM)) {
3594 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
3595 free_traverse_ptr(&tp);
3596 return STATUS_INTERNAL_ERROR;
3597 }
3598
3599 if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
3600 NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
3601 if (!NT_SUCCESS(Status)) {
3602 ERR("convert_shared_data_extent returned %08x\n", Status);
3603 free_traverse_ptr(&tp);
3604 return Status;
3605 }
3606
3607 free_traverse_ptr(&tp);
3608
3609 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3610 if (!NT_SUCCESS(Status)) {
3611 ERR("error - find_item returned %08x\n", Status);
3612 return Status;
3613 }
3614
3615 if (keycmp(&tp.item->key, &searchkey)) {
3616 WARN("extent item not found for address %llx, size %llx\n", address, size);
3617 free_traverse_ptr(&tp);
3618 return STATUS_SUCCESS;
3619 }
3620
3621 ei = (EXTENT_ITEM*)tp.item->data;
3622 }
3623
3624 if (ei->flags != EXTENT_ITEM_DATA) {
3625 ERR("error - flag was not EXTENT_ITEM_DATA\n");
3626 free_traverse_ptr(&tp);
3627 return STATUS_INTERNAL_ERROR;
3628 }
3629
3630 // FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
3631
3632 hash = get_extent_data_ref_hash(subvol->id, inode, offset);
3633
3634 len = tp.item->size - sizeof(EXTENT_ITEM);
3635 siptr = (UINT8*)&ei[1];
3636
3637 // FIXME - increase subitem refcount if there already?
3638 do {
3639 if (*siptr == TYPE_EXTENT_DATA_REF) {
3640 UINT64 sihash;
3641
3642 edr = (EXTENT_DATA_REF*)&siptr[1];
3643 sihash = get_extent_data_ref_hash(edr->root, edr->objid, edr->offset);
3644
3645 if (sihash >= hash)
3646 break;
3647
3648 siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3649
3650 if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
3651 len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
3652 } else
3653 break;
3654 // FIXME - TYPE_TREE_BLOCK_REF 0xB0
3655 } else {
3656 ERR("unrecognized extent subitem %x\n", *siptr);
3657 free_traverse_ptr(&tp);
3658 return STATUS_INTERNAL_ERROR;
3659 }
3660 } while (len > 0);
3661
3662 len = tp.item->size + sizeof(UINT8) + sizeof(EXTENT_DATA_REF); // FIXME - die if too big
3663 ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3664 if (!ei) {
3665 ERR("out of memory\n");
3666 return STATUS_INSUFFICIENT_RESOURCES;
3667 }
3668
3669 RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
3670 ei->refcount++;
3671
3672 type = (UINT8*)ei + (siptr - tp.item->data);
3673 *type = TYPE_EXTENT_DATA_REF;
3674
3675 edr = (EXTENT_DATA_REF*)&type[1];
3676 edr->root = subvol->id;
3677 edr->objid = inode;
3678 edr->offset = offset;
3679 edr->count = 1;
3680
3681 if (siptr < tp.item->data + tp.item->size)
3682 RtlCopyMemory(&edr[1], siptr, tp.item->data + tp.item->size - siptr);
3683
3684 delete_tree_item(Vcb, &tp, rollback);
3685
3686 free_traverse_ptr(&tp);
3687
3688 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3689 ERR("error - failed to insert item\n");
3690 ExFreePool(ei);
3691 return STATUS_INTERNAL_ERROR;
3692 }
3693
3694 return STATUS_SUCCESS;
3695 }
3696
3697 typedef struct {
3698 EXTENT_DATA_REF edr;
3699 LIST_ENTRY list_entry;
3700 } data_ref;
3701
3702 static void add_data_ref(LIST_ENTRY* data_refs, UINT64 root, UINT64 objid, UINT64 offset) {
3703 data_ref* dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_ref), ALLOC_TAG);
3704
3705 if (!dr) {
3706 ERR("out of memory\n");
3707 return;
3708 }
3709
3710 // FIXME - increase count if entry there already
3711 // FIXME - put in order?
3712
3713 dr->edr.root = root;
3714 dr->edr.objid = objid;
3715 dr->edr.offset = offset;
3716 dr->edr.count = 1;
3717
3718 InsertTailList(data_refs, &dr->list_entry);
3719 }
3720
3721 static void free_data_refs(LIST_ENTRY* data_refs) {
3722 while (!IsListEmpty(data_refs)) {
3723 LIST_ENTRY* le = RemoveHeadList(data_refs);
3724 data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
3725
3726 ExFreePool(dr);
3727 }
3728 }
3729
3730 static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
3731 KEY searchkey;
3732 traverse_ptr tp, next_tp;
3733 BOOL b;
3734 LIST_ENTRY data_refs;
3735 LIST_ENTRY* le;
3736 UINT64 refcount;
3737 EXTENT_ITEM* ei;
3738 ULONG eisize;
3739 UINT8* type;
3740 NTSTATUS Status;
3741
3742 searchkey.obj_id = address;
3743 searchkey.obj_type = TYPE_EXTENT_ITEM;
3744 searchkey.offset = size;
3745
3746 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3747 if (!NT_SUCCESS(Status)) {
3748 ERR("error - find_item returned %08x\n", Status);
3749 return Status;
3750 }
3751
3752 if (keycmp(&tp.item->key, &searchkey)) {
3753 WARN("extent item not found for address %llx, size %llx\n", address, size);
3754 free_traverse_ptr(&tp);
3755 return STATUS_SUCCESS;
3756 }
3757
3758 if (tp.item->size != sizeof(EXTENT_ITEM_V0)) {
3759 TRACE("extent does not appear to be old - returning STATUS_SUCCESS\n");
3760 free_traverse_ptr(&tp);
3761 return STATUS_SUCCESS;
3762 }
3763
3764 delete_tree_item(Vcb, &tp, rollback);
3765
3766 free_traverse_ptr(&tp);
3767
3768 searchkey.obj_id = address;
3769 searchkey.obj_type = TYPE_EXTENT_REF_V0;
3770 searchkey.offset = 0;
3771
3772 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3773 if (!NT_SUCCESS(Status)) {
3774 ERR("error - find_item returned %08x\n", Status);
3775 return Status;
3776 }
3777
3778 InitializeListHead(&data_refs);
3779
3780 do {
3781 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
3782
3783 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3784 tree* t;
3785
3786 // normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
3787
3788 Status = load_tree(Vcb, tp.item->key.offset, NULL, &t);
3789
3790 if (!NT_SUCCESS(Status)) {
3791 ERR("load tree for address %llx returned %08x\n", tp.item->key.offset, Status);
3792 free_traverse_ptr(&tp);
3793 free_data_refs(&data_refs);
3794 return Status;
3795 }
3796
3797 if (t->header.level == 0) {
3798 le = t->itemlist.Flink;
3799 while (le != &t->itemlist) {
3800 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3801
3802 if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
3803 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
3804
3805 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
3806 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3807
3808 if (ed2->address == address)
3809 add_data_ref(&data_refs, t->header.tree_id, td->key.obj_id, td->key.offset);
3810 }
3811 }
3812
3813 le = le->Flink;
3814 }
3815 }
3816
3817 free_tree(t);
3818
3819 delete_tree_item(Vcb, &tp, rollback);
3820 }
3821
3822 if (b) {
3823 free_traverse_ptr(&tp);
3824 tp = next_tp;
3825
3826 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
3827 break;
3828 }
3829 } while (b);
3830
3831 free_traverse_ptr(&tp);
3832
3833 if (IsListEmpty(&data_refs)) {
3834 WARN("no data refs found\n");
3835 return STATUS_SUCCESS;
3836 }
3837
3838 // create new entry
3839
3840 refcount = 0;
3841
3842 le = data_refs.Flink;
3843 while (le != &data_refs) {
3844 refcount++;
3845 le = le->Flink;
3846 }
3847
3848 eisize = sizeof(EXTENT_ITEM) + ((sizeof(char) + sizeof(EXTENT_DATA_REF)) * refcount);
3849 ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
3850 if (!ei) {
3851 ERR("out of memory\n");
3852 return STATUS_INSUFFICIENT_RESOURCES;
3853 }
3854
3855 ei->refcount = refcount;
3856 ei->generation = Vcb->superblock.generation;
3857 ei->flags = EXTENT_ITEM_DATA;
3858
3859 type = (UINT8*)&ei[1];
3860
3861 le = data_refs.Flink;
3862 while (le != &data_refs) {
3863 data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
3864
3865 type[0] = TYPE_EXTENT_DATA_REF;
3866 RtlCopyMemory(&type[1], &dr->edr, sizeof(EXTENT_DATA_REF));
3867
3868 type = &type[1 + sizeof(EXTENT_DATA_REF)];
3869
3870 le = le->Flink;
3871 }
3872
3873 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, rollback)) {
3874 ERR("error - failed to insert item\n");
3875 ExFreePool(ei);
3876 return STATUS_INTERNAL_ERROR;
3877 }
3878
3879 free_data_refs(&data_refs);
3880
3881 return STATUS_SUCCESS;
3882 }
3883
3884 typedef struct {
3885 UINT8 type;
3886 void* data;
3887 BOOL allocated;
3888 LIST_ENTRY list_entry;
3889 } extent_ref;
3890
3891 static void free_extent_refs(LIST_ENTRY* extent_refs) {
3892 while (!IsListEmpty(extent_refs)) {
3893 LIST_ENTRY* le = RemoveHeadList(extent_refs);
3894 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
3895
3896 if (er->allocated)
3897 ExFreePool(er->data);
3898
3899 ExFreePool(er);
3900 }
3901 }
3902
3903 static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
3904 KEY searchkey;
3905 traverse_ptr tp;
3906 LIST_ENTRY extent_refs;
3907 LIST_ENTRY *le, *next_le;
3908 EXTENT_ITEM *ei, *newei;
3909 UINT8* siptr;
3910 ULONG len;
3911 UINT64 count;
3912 NTSTATUS Status;
3913
3914 searchkey.obj_id = address;
3915 searchkey.obj_type = TYPE_EXTENT_ITEM;
3916 searchkey.offset = size;
3917
3918 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3919 if (!NT_SUCCESS(Status)) {
3920 ERR("error - find_item returned %08x\n", Status);
3921 return Status;
3922 }
3923
3924 if (keycmp(&tp.item->key, &searchkey)) {
3925 WARN("extent item not found for address %llx, size %llx\n", address, size);
3926 free_traverse_ptr(&tp);
3927 return STATUS_SUCCESS;
3928 }
3929
3930 if (tp.item->size < sizeof(EXTENT_ITEM)) {
3931 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
3932 free_traverse_ptr(&tp);
3933 return STATUS_INTERNAL_ERROR;
3934 }
3935
3936 ei = (EXTENT_ITEM*)tp.item->data;
3937 len = tp.item->size - sizeof(EXTENT_ITEM);
3938
3939 InitializeListHead(&extent_refs);
3940
3941 siptr = (UINT8*)&ei[1];
3942
3943 do {
3944 extent_ref* er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
3945 if (!er) {
3946 ERR("out of memory\n");
3947 free_traverse_ptr(&tp);
3948 return STATUS_INSUFFICIENT_RESOURCES;
3949 }
3950
3951 er->type = *siptr;
3952 er->data = siptr+1;
3953 er->allocated = FALSE;
3954
3955 InsertTailList(&extent_refs, &er->list_entry);
3956
3957 if (*siptr == TYPE_TREE_BLOCK_REF) {
3958 siptr += sizeof(TREE_BLOCK_REF);
3959 len -= sizeof(TREE_BLOCK_REF) + 1;
3960 } else if (*siptr == TYPE_EXTENT_DATA_REF) {
3961 siptr += sizeof(EXTENT_DATA_REF);
3962 len -= sizeof(EXTENT_DATA_REF) + 1;
3963 } else if (*siptr == TYPE_SHARED_BLOCK_REF) {
3964 siptr += sizeof(SHARED_BLOCK_REF);
3965 len -= sizeof(SHARED_BLOCK_REF) + 1;
3966 } else if (*siptr == TYPE_SHARED_DATA_REF) {
3967 siptr += sizeof(SHARED_DATA_REF);
3968 len -= sizeof(SHARED_DATA_REF) + 1;
3969 } else {
3970 ERR("unrecognized extent subitem %x\n", *siptr);
3971 free_traverse_ptr(&tp);
3972 free_extent_refs(&extent_refs);
3973 return STATUS_INTERNAL_ERROR;
3974 }
3975 } while (len > 0);
3976
3977 le = extent_refs.Flink;
3978 while (le != &extent_refs) {
3979 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
3980 next_le = le->Flink;
3981
3982 if (er->type == TYPE_SHARED_DATA_REF) {
3983 // normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
3984 SHARED_DATA_REF* sdr = er->data;
3985 tree* t;
3986
3987 Status = load_tree(Vcb, sdr->offset, NULL, &t);
3988 if (!NT_SUCCESS(Status)) {
3989 ERR("load_tree for address %llx returned %08x\n", sdr->offset, Status);
3990 free_traverse_ptr(&tp);
3991 free_data_refs(&extent_refs);
3992 return Status;
3993 }
3994
3995 if (t->header.level == 0) {
3996 LIST_ENTRY* le2 = t->itemlist.Flink;
3997 while (le2 != &t->itemlist) {
3998 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
3999
4000 if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
4001 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
4002
4003 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
4004 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4005
4006 if (ed2->address == address) {
4007 extent_ref* er2;
4008 EXTENT_DATA_REF* edr;
4009
4010 er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
4011 if (!er2) {
4012 ERR("out of memory\n");
4013 free_traverse_ptr(&tp);
4014 return STATUS_INSUFFICIENT_RESOURCES;
4015 }
4016
4017 edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
4018 if (!edr) {
4019 ERR("out of memory\n");
4020 free_traverse_ptr(&tp);
4021 ExFreePool(er2);
4022 return STATUS_INSUFFICIENT_RESOURCES;
4023 }
4024
4025 edr->root = t->header.tree_id;
4026 edr->objid = td->key.obj_id;
4027 edr->offset = td->key.offset;
4028 edr->count = 1;
4029
4030 er2->type = TYPE_EXTENT_DATA_REF;
4031 er2->data = edr;
4032 er2->allocated = TRUE;
4033
4034 InsertTailList(&extent_refs, &er2->list_entry); // FIXME - list should be in order
4035 }
4036 }
4037 }
4038
4039 le2 = le2->Flink;
4040 }
4041 }
4042
4043 free_tree(t);
4044
4045 RemoveEntryList(&er->list_entry);
4046
4047 if (er->allocated)
4048 ExFreePool(er->data);
4049
4050 ExFreePool(er);
4051 }
4052 // FIXME - also do for SHARED_BLOCK_REF?
4053
4054 le = next_le;
4055 }
4056
4057 if (IsListEmpty(&extent_refs)) {
4058 WARN("no extent refs found\n");
4059 delete_tree_item(Vcb, &tp, rollback);
4060 free_traverse_ptr(&tp);
4061 return STATUS_SUCCESS;
4062 }
4063
4064 len = 0;
4065 count = 0;
4066 le = extent_refs.Flink;
4067 while (le != &extent_refs) {
4068 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
4069
4070 len++;
4071 if (er->type == TYPE_TREE_BLOCK_REF) {
4072 len += sizeof(TREE_BLOCK_REF);
4073 } else if (er->type == TYPE_EXTENT_DATA_REF) {
4074 len += sizeof(EXTENT_DATA_REF);
4075 } else {
4076 ERR("unexpected extent subitem %x\n", er->type);
4077 }
4078
4079 count++;
4080
4081 le = le->Flink;
4082 }
4083
4084 newei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM) + len, ALLOC_TAG);
4085 if (!newei) {
4086 ERR("out of memory\n");
4087 free_traverse_ptr(&tp);
4088 return STATUS_INSUFFICIENT_RESOURCES;
4089 }
4090
4091 RtlCopyMemory(newei, ei, sizeof(EXTENT_ITEM));
4092 newei->refcount = count;
4093
4094 siptr = (UINT8*)&newei[1];
4095 le = extent_refs.Flink;
4096 while (le != &extent_refs) {
4097 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
4098
4099 *siptr = er->type;
4100 siptr++;
4101
4102 if (er->type == TYPE_TREE_BLOCK_REF) {
4103 RtlCopyMemory(siptr, er->data, sizeof(TREE_BLOCK_REF));
4104 } else if (er->type == TYPE_EXTENT_DATA_REF) {
4105 RtlCopyMemory(siptr, er->data, sizeof(EXTENT_DATA_REF));
4106 } else {
4107 ERR("unexpected extent subitem %x\n", er->type);
4108 }
4109
4110 le = le->Flink;
4111 }
4112
4113 delete_tree_item(Vcb, &tp, rollback);
4114 free_traverse_ptr(&tp);
4115
4116 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, newei, sizeof(EXTENT_ITEM) + len, NULL, rollback)) {
4117 ERR("error - failed to insert item\n");
4118 ExFreePool(newei);
4119 free_extent_refs(&extent_refs);
4120 return STATUS_INTERNAL_ERROR;
4121 }
4122
4123 free_extent_refs(&extent_refs);
4124
4125 return STATUS_SUCCESS;
4126 }
4127
4128 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len) {
4129 UINT8* siptr = (UINT8*)&ei[1];
4130
4131 do {
4132 if (*siptr == TYPE_TREE_BLOCK_REF) {
4133 siptr += sizeof(TREE_BLOCK_REF) + 1;
4134 len -= sizeof(TREE_BLOCK_REF) + 1;
4135 } else if (*siptr == TYPE_EXTENT_DATA_REF) {
4136 siptr += sizeof(EXTENT_DATA_REF) + 1;
4137 len -= sizeof(EXTENT_DATA_REF) + 1;
4138 } else if (*siptr == TYPE_SHARED_BLOCK_REF) {
4139 return TRUE;
4140 } else if (*siptr == TYPE_SHARED_DATA_REF) {
4141 return TRUE;
4142 } else {
4143 ERR("unrecognized extent subitem %x\n", *siptr);
4144 return FALSE;
4145 }
4146 } while (len > 0);
4147
4148 return FALSE;
4149 }
4150
4151 NTSTATUS STDCALL remove_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4152 KEY searchkey;
4153 traverse_ptr tp;
4154 EXTENT_ITEM* ei;
4155 UINT8* siptr;
4156 ULONG len;
4157 EXTENT_DATA_REF* edr;
4158 BOOL found;
4159 NTSTATUS Status;
4160
4161 TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
4162
4163 searchkey.obj_id = address;
4164 searchkey.obj_type = TYPE_EXTENT_ITEM;
4165 searchkey.offset = size;
4166
4167 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
4168 if (!NT_SUCCESS(Status)) {
4169 ERR("error - find_item returned %08x\n", Status);
4170 return Status;
4171 }
4172
4173 if (keycmp(&tp.item->key, &searchkey)) {
4174 WARN("extent item not found for address %llx, size %llx\n", address, size);
4175 free_traverse_ptr(&tp);
4176 return STATUS_SUCCESS;
4177 }
4178
4179 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
4180 NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
4181 if (!NT_SUCCESS(Status)) {
4182 ERR("convert_old_data_extent returned %08x\n", Status);
4183 free_traverse_ptr(&tp);
4184 return Status;
4185 }
4186
4187 free_traverse_ptr(&tp);
4188
4189 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
4190 if (!NT_SUCCESS(Status)) {
4191 ERR("error - find_item returned %08x\n", Status);
4192 return Status;
4193 }
4194
4195 if (keycmp(&tp.item->key, &searchkey)) {
4196 WARN("extent item not found for address %llx, size %llx\n", address, size);
4197 free_traverse_ptr(&tp);
4198 return STATUS_SUCCESS;
4199 }
4200 }
4201
4202 ei = (EXTENT_ITEM*)tp.item->data;
4203
4204 if (tp.item->size < sizeof(EXTENT_ITEM)) {
4205 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
4206 free_traverse_ptr(&tp);
4207 return STATUS_INTERNAL_ERROR;
4208 }
4209
4210 if (!(ei->flags & EXTENT_ITEM_DATA)) {
4211 ERR("error - EXTENT_ITEM_DATA flag not set\n");
4212 free_traverse_ptr(&tp);
4213 return STATUS_INTERNAL_ERROR;
4214 }
4215
4216 // FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
4217
4218 if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
4219 NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
4220 if (!NT_SUCCESS(Status)) {
4221 ERR("convert_shared_data_extent returned %08x\n", Status);
4222 free_traverse_ptr(&tp);
4223 return Status;
4224 }
4225
4226 free_traverse_ptr(&tp);
4227
4228 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
4229 if (!NT_SUCCESS(Status)) {
4230 ERR("error - find_item returned %08x\n", Status);
4231 return Status;
4232 }
4233
4234 if (keycmp(&tp.item->key, &searchkey)) {
4235 WARN("extent item not found for address %llx, size %llx\n", address, size);
4236 free_traverse_ptr(&tp);
4237 return STATUS_SUCCESS;
4238 }
4239
4240 ei = (EXTENT_ITEM*)tp.item->data;
4241 }
4242
4243 len = tp.item->size - sizeof(EXTENT_ITEM);
4244 siptr = (UINT8*)&ei[1];
4245 found = FALSE;
4246
4247 do {
4248 if (*siptr == TYPE_EXTENT_DATA_REF) {
4249 edr = (EXTENT_DATA_REF*)&siptr[1];
4250
4251 if (edr->root == subvol->id && edr->objid == inode && edr->offset == offset) {
4252 found = TRUE;
4253 break;
4254 }
4255
4256 siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
4257
4258 if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
4259 len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
4260 } else
4261 break;
4262 // // FIXME - TYPE_TREE_BLOCK_REF 0xB0
4263 } else {
4264 ERR("unrecognized extent subitem %x\n", *siptr);
4265 free_traverse_ptr(&tp);
4266 return STATUS_INTERNAL_ERROR;
4267 }
4268 } while (len > 0);
4269
4270 if (!found) {
4271 WARN("could not find extent data ref\n");
4272 free_traverse_ptr(&tp);
4273 return STATUS_SUCCESS;
4274 }
4275
4276 // FIXME - decrease subitem refcount if there already?
4277
4278 len = tp.item->size - sizeof(UINT8) - sizeof(EXTENT_DATA_REF);
4279
4280 delete_tree_item(Vcb, &tp, rollback);
4281
4282 if (len == sizeof(EXTENT_ITEM)) { // extent no longer needed
4283 chunk* c;
4284 LIST_ENTRY* le2;
4285
4286 if (changed_sector_list) {
4287 changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4288 if (!sc) {
4289 ERR("out of memory\n");
4290 free_traverse_ptr(&tp);
4291 return STATUS_INSUFFICIENT_RESOURCES;
4292 }
4293
4294 sc->ol.key = address;
4295 sc->checksums = NULL;
4296 sc->length = size / Vcb->superblock.sector_size;
4297
4298 sc->deleted = TRUE;
4299
4300 insert_into_ordered_list(changed_sector_list, &sc->ol);
4301 }
4302
4303 c = NULL;
4304 le2 = Vcb->chunks.Flink;
4305 while (le2 != &Vcb->chunks) {
4306 c = CONTAINING_RECORD(le2, chunk, list_entry);
4307
4308 TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
4309
4310 if (address >= c->offset && address + size < c->offset + c->chunk_item->size)
4311 break;
4312
4313 le2 = le2->Flink;
4314 }
4315 if (le2 == &Vcb->chunks) c = NULL;
4316
4317 if (c) {
4318 decrease_chunk_usage(c, size);
4319
4320 add_to_space_list(c, address, size, SPACE_TYPE_DELETING);
4321 }
4322
4323 free_traverse_ptr(&tp);
4324 return STATUS_SUCCESS;
4325 }
4326
4327 ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
4328 if (!ei) {
4329 ERR("out of memory\n");
4330 free_traverse_ptr(&tp);
4331 return STATUS_INSUFFICIENT_RESOURCES;
4332 }
4333
4334 RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
4335 ei->refcount--;
4336 ei->generation = Vcb->superblock.generation;
4337
4338 if (tp.item->data + len != siptr)
4339 RtlCopyMemory((UINT8*)ei + (siptr - tp.item->data), siptr + sizeof(UINT8) + sizeof(EXTENT_DATA_REF), tp.item->size - (siptr - tp.item->data) - sizeof(UINT8) - sizeof(EXTENT_DATA_REF));
4340
4341 free_traverse_ptr(&tp);
4342
4343 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
4344 ERR("error - failed to insert item\n");
4345 ExFreePool(ei);
4346 return STATUS_INTERNAL_ERROR;
4347 }
4348
4349 return STATUS_SUCCESS;
4350 }
4351
4352 static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
4353 LIST_ENTRY* le = list->Flink;
4354 ordered_list* ol;
4355
4356 while (le != list) {
4357 ol = (ordered_list*)le;
4358
4359 if (ol->key > value)
4360 return FALSE;
4361 else if (ol->key == value)
4362 return TRUE;
4363
4364 le = le->Flink;
4365 }
4366
4367 return FALSE;
4368 }
4369
4370 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4371 KEY searchkey;
4372 traverse_ptr tp, next_tp;
4373 NTSTATUS Status;
4374 BOOL b;
4375
4376 TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, end_data, changed_sector_list);
4377
4378 searchkey.obj_id = fcb->inode;
4379 searchkey.obj_type = TYPE_EXTENT_DATA;
4380 searchkey.offset = start_data;
4381
4382 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
4383 if (!NT_SUCCESS(Status)) {
4384 ERR("error - find_item returned %08x\n", Status);
4385 return Status;
4386 }
4387
4388 do {
4389 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
4390 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4391 UINT64 len;
4392
4393 if (tp.item->size < sizeof(EXTENT_DATA)) {
4394 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4395 Status = STATUS_INTERNAL_ERROR;
4396 goto end;
4397 }
4398
4399 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4400 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4401 Status = STATUS_INTERNAL_ERROR;
4402 goto end;
4403 }
4404
4405 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
4406
4407 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
4408
4409 if (tp.item->key.offset < end_data && tp.item->key.offset + len >= start_data) {
4410 if (ed->compression != BTRFS_COMPRESSION_NONE) {
4411 FIXME("FIXME - compression not supported at present\n");
4412 Status = STATUS_NOT_SUPPORTED;
4413 goto end;
4414 }
4415
4416 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
4417 WARN("root %llx, inode %llx, extent %llx: encryption not supported (type %x)\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, ed->encryption);
4418 Status = STATUS_NOT_SUPPORTED;
4419 goto end;
4420 }
4421
4422 if (ed->encoding != BTRFS_ENCODING_NONE) {
4423 WARN("other encodings not supported\n");
4424 Status = STATUS_NOT_SUPPORTED;
4425 goto end;
4426 }
4427
4428 if (ed->type == EXTENT_TYPE_INLINE) {
4429 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
4430 delete_tree_item(Vcb, &tp, rollback);
4431
4432 fcb->inode_item.st_blocks -= len;
4433 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
4434 EXTENT_DATA* ned;
4435 UINT64 size;
4436
4437 delete_tree_item(Vcb, &tp, rollback);
4438
4439 size = len - (end_data - tp.item->key.offset);
4440
4441 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4442 if (!ned) {
4443 ERR("out of memory\n");
4444 Status = STATUS_INSUFFICIENT_RESOURCES;
4445 goto end;
4446 }
4447
4448 ned->generation = Vcb->superblock.generation;
4449 ned->decoded_size = size;
4450 ned->compression = ed->compression;
4451 ned->encryption = ed->encryption;
4452 ned->encoding = ed->encoding;
4453 ned->type = ed->type;
4454
4455 RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
4456
4457 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4458 ERR("insert_tree_item failed\n");
4459 ExFreePool(ned);
4460 Status = STATUS_INTERNAL_ERROR;
4461 goto end;
4462 }
4463
4464 fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
4465 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
4466 EXTENT_DATA* ned;
4467 UINT64 size;
4468
4469 delete_tree_item(Vcb, &tp, rollback);
4470
4471 size = start_data - tp.item->key.offset;
4472
4473 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4474 if (!ned) {
4475 ERR("out of memory\n");
4476 Status = STATUS_INSUFFICIENT_RESOURCES;
4477 goto end;
4478 }
4479
4480 ned->generation = Vcb->superblock.generation;
4481 ned->decoded_size = size;
4482 ned->compression = ed->compression;
4483 ned->encryption = ed->encryption;
4484 ned->encoding = ed->encoding;
4485 ned->type = ed->type;
4486
4487 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
4488
4489 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4490 ERR("insert_tree_item failed\n");
4491 ExFreePool(ned);
4492 Status = STATUS_INTERNAL_ERROR;
4493 goto end;
4494 }
4495
4496 fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
4497 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
4498 EXTENT_DATA* ned;
4499 UINT64 size;
4500
4501 delete_tree_item(Vcb, &tp, rollback);
4502
4503 size = start_data - tp.item->key.offset;
4504
4505 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4506 if (!ned) {
4507 ERR("out of memory\n");
4508 Status = STATUS_INSUFFICIENT_RESOURCES;
4509 goto end;
4510 }
4511
4512 ned->generation = Vcb->superblock.generation;
4513 ned->decoded_size = size;
4514 ned->compression = ed->compression;
4515 ned->encryption = ed->encryption;
4516 ned->encoding = ed->encoding;
4517 ned->type = ed->type;
4518
4519 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
4520
4521 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4522 ERR("insert_tree_item failed\n");
4523 ExFreePool(ned);
4524 Status = STATUS_INTERNAL_ERROR;
4525 goto end;
4526 }
4527
4528 size = tp.item->key.offset + len - end_data;
4529
4530 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4531 if (!ned) {
4532 ERR("out of memory\n");
4533 Status = STATUS_INSUFFICIENT_RESOURCES;
4534 goto end;
4535 }
4536
4537 ned->generation = Vcb->superblock.generation;
4538 ned->decoded_size = size;
4539 ned->compression = ed->compression;
4540 ned->encryption = ed->encryption;
4541 ned->encoding = ed->encoding;
4542 ned->type = ed->type;
4543
4544 RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
4545
4546 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4547 ERR("insert_tree_item failed\n");
4548 ExFreePool(ned);
4549 Status = STATUS_INTERNAL_ERROR;
4550 goto end;
4551 }
4552
4553 fcb->inode_item.st_blocks -= end_data - start_data;
4554 }
4555 } else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
4556 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
4557 if (ed2->address != 0) {
4558 Status = remove_extent_ref(Vcb, ed2->address, ed2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, changed_sector_list, rollback);
4559 if (!NT_SUCCESS(Status)) {
4560 ERR("remove_extent_ref returned %08x\n", Status);
4561 goto end;
4562 }
4563
4564 fcb->inode_item.st_blocks -= len;
4565 }
4566
4567 delete_tree_item(Vcb, &tp, rollback);
4568 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
4569 EXTENT_DATA* ned;
4570 EXTENT_DATA2* ned2;
4571
4572 if (ed2->address != 0)
4573 fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
4574
4575 delete_tree_item(Vcb, &tp, rollback);
4576
4577 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4578 if (!ned) {
4579 ERR("out of memory\n");
4580 Status = STATUS_INSUFFICIENT_RESOURCES;
4581 goto end;
4582 }
4583
4584 ned2 = (EXTENT_DATA2*)&ned->data[0];
4585
4586 ned->generation = Vcb->superblock.generation;
4587 ned->decoded_size = ed->decoded_size;
4588 ned->compression = ed->compression;
4589 ned->encryption = ed->encryption;
4590 ned->encoding = ed->encoding;
4591 ned->type = ed->type;
4592 ned2->address = ed2->address;
4593 ned2->size = ed2->size;
4594 ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
4595 ned2->num_bytes = ed2->num_bytes - (end_data - tp.item->key.offset);
4596
4597 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4598 ERR("insert_tree_item failed\n");
4599 ExFreePool(ned);
4600 Status = STATUS_INTERNAL_ERROR;
4601 goto end;
4602 }
4603 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
4604 EXTENT_DATA* ned;
4605 EXTENT_DATA2* ned2;
4606
4607 if (ed2->address != 0)
4608 fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
4609
4610 delete_tree_item(Vcb, &tp, rollback);
4611
4612 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4613 if (!ned) {
4614 ERR("out of memory\n");
4615 Status = STATUS_INSUFFICIENT_RESOURCES;
4616 goto end;
4617 }
4618
4619 ned2 = (EXTENT_DATA2*)&ned->data[0];
4620
4621 ned->generation = Vcb->superblock.generation;
4622 ned->decoded_size = ed->decoded_size;
4623 ned->compression = ed->compression;
4624 ned->encryption = ed->encryption;
4625 ned->encoding = ed->encoding;
4626 ned->type = ed->type;
4627 ned2->address = ed2->address;
4628 ned2->size = ed2->size;
4629 ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
4630 ned2->num_bytes = start_data - tp.item->key.offset;
4631
4632 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4633 ERR("insert_tree_item failed\n");
4634 ExFreePool(ned);
4635 Status = STATUS_INTERNAL_ERROR;
4636 goto end;
4637 }
4638 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
4639 EXTENT_DATA* ned;
4640 EXTENT_DATA2* ned2;
4641
4642 if (ed2->address != 0)
4643 fcb->inode_item.st_blocks -= end_data - start_data;
4644
4645 delete_tree_item(Vcb, &tp, rollback);
4646
4647 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4648 if (!ned) {
4649 ERR("out of memory\n");
4650 Status = STATUS_INSUFFICIENT_RESOURCES;
4651 goto end;
4652 }
4653
4654 ned2 = (EXTENT_DATA2*)&ned->data[0];
4655
4656 ned->generation = Vcb->superblock.generation;
4657 ned->decoded_size = ed->decoded_size;
4658 ned->compression = ed->compression;
4659 ned->encryption = ed->encryption;
4660 ned->encoding = ed->encoding;
4661 ned->type = ed->type;
4662 ned2->address = ed2->address;
4663 ned2->size = ed2->size;
4664 ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
4665 ned2->num_bytes = start_data - tp.item->key.offset;
4666
4667 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4668 ERR("insert_tree_item failed\n");
4669 ExFreePool(ned);
4670 Status = STATUS_INTERNAL_ERROR;
4671 goto end;
4672 }
4673
4674 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4675 if (!ned) {
4676 ERR("out of memory\n");
4677 Status = STATUS_INSUFFICIENT_RESOURCES;
4678 goto end;
4679 }
4680
4681 ned2 = (EXTENT_DATA2*)&ned->data[0];
4682
4683 ned->generation = Vcb->superblock.generation;
4684 ned->decoded_size = ed->decoded_size;
4685 ned->compression = ed->compression;
4686 ned->encryption = ed->encryption;
4687 ned->encoding = ed->encoding;
4688 ned->type = ed->type;
4689 ned2->address = ed2->address;
4690 ned2->size = ed2->size;
4691 ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
4692 ned2->num_bytes = tp.item->key.offset + len - end_data;
4693
4694 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4695 ERR("insert_tree_item failed\n");
4696 ExFreePool(ned);
4697 Status = STATUS_INTERNAL_ERROR;
4698 goto end;
4699 }
4700 }
4701 }
4702 }
4703
4704 if (b) {
4705 free_traverse_ptr(&tp);
4706 tp = next_tp;
4707
4708 if (tp.item->key.obj_id > fcb->inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
4709 break;
4710 }
4711 } while (b);
4712
4713 // FIXME - do bitmap analysis of changed extents, and free what we can
4714
4715 Status = STATUS_SUCCESS;
4716
4717 end:
4718 free_traverse_ptr(&tp);
4719
4720 return Status;
4721 }
4722
4723 static BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4724 UINT64 address;
4725 NTSTATUS Status;
4726 EXTENT_ITEM_DATA_REF* eidr;
4727 EXTENT_DATA* ed;
4728 EXTENT_DATA2* ed2;
4729 ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
4730 changed_sector* sc;
4731 traverse_ptr tp;
4732 int i;
4733
4734 TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, data, changed_sector_list);
4735
4736 if (!find_address_in_chunk(Vcb, c, length, &address))
4737 return FALSE;
4738
4739 eidr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_DATA_REF), ALLOC_TAG);
4740 if (!eidr) {
4741 ERR("out of memory\n");
4742 return FALSE;
4743 }
4744
4745 eidr->ei.refcount = 1;
4746 eidr->ei.generation = Vcb->superblock.generation;
4747 eidr->ei.flags = EXTENT_ITEM_DATA;
4748 eidr->type = TYPE_EXTENT_DATA_REF;
4749 eidr->edr.root = fcb->subvol->id;
4750 eidr->edr.objid = fcb->inode;
4751 eidr->edr.offset = start_data;
4752 eidr->edr.count = 1;
4753
4754 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, length, eidr, sizeof(EXTENT_ITEM_DATA_REF), &tp, rollback)) {
4755 ERR("insert_tree_item failed\n");
4756 ExFreePool(eidr);
4757 return FALSE;
4758 }
4759
4760 tp.tree->header.generation = eidr->ei.generation;
4761
4762 free_traverse_ptr(&tp);
4763
4764 Status = write_data(Vcb, address, data, length);
4765 if (!NT_SUCCESS(Status)) {
4766 ERR("write_data returned %08x\n", Status);
4767 return FALSE;
4768 }
4769
4770 if (changed_sector_list) {
4771 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4772 if (!sc) {
4773 ERR("out of memory\n");
4774 return FALSE;
4775 }
4776
4777 sc->ol.key = address;
4778 sc->length = length / Vcb->superblock.sector_size;
4779 sc->deleted = FALSE;
4780
4781 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
4782 if (!sc->checksums) {
4783 ERR("out of memory\n");
4784 ExFreePool(sc);
4785 return FALSE;
4786 }
4787
4788 for (i = 0; i < sc->length; i++) {
4789 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
4790 }
4791
4792 insert_into_ordered_list(changed_sector_list, &sc->ol);
4793 }
4794
4795 // add extent data to inode
4796 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
4797 if (!ed) {
4798 ERR("out of memory\n");
4799 return FALSE;
4800 }
4801
4802 ed->generation = Vcb->superblock.generation;
4803 ed->decoded_size = length;
4804 ed->compression = BTRFS_COMPRESSION_NONE;
4805 ed->encryption = BTRFS_ENCRYPTION_NONE;
4806 ed->encoding = BTRFS_ENCODING_NONE;
4807 ed->type = EXTENT_TYPE_REGULAR;
4808
4809 ed2 = (EXTENT_DATA2*)ed->data;
4810 ed2->address = address;
4811 ed2->size = length;
4812 ed2->offset = 0;
4813 ed2->num_bytes = length;
4814
4815 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start_data, ed, edsize, NULL, rollback)) {
4816 ERR("insert_tree_item failed\n");
4817 ExFreePool(ed);
4818 return FALSE;
4819 }
4820
4821 increase_chunk_usage(c, length);
4822 add_to_space_list(c, address, length, SPACE_TYPE_WRITING);
4823
4824 fcb->inode_item.st_blocks += length;
4825
4826 return TRUE;
4827 }
4828
4829 static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
4830 LIST_ENTRY* changed_sector_list, traverse_ptr* edtp, traverse_ptr* eitp, LIST_ENTRY* rollback) {
4831 EXTENT_DATA* ed;
4832 EXTENT_DATA2* ed2;
4833 EXTENT_ITEM* ei;
4834 NTSTATUS Status;
4835 changed_sector* sc;
4836 chunk* c;
4837 int i;
4838
4839 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
4840 length, data, changed_sector_list, edtp, eitp);
4841
4842 ed = ExAllocatePoolWithTag(PagedPool, edtp->item->size, ALLOC_TAG);
4843 if (!ed) {
4844 ERR("out of memory\n");
4845 return FALSE;
4846 }
4847
4848 RtlCopyMemory(ed, edtp->item->data, edtp->item->size);
4849
4850 ed->decoded_size += length;
4851 ed2 = (EXTENT_DATA2*)ed->data;
4852 ed2->size += length;
4853 ed2->num_bytes += length;
4854
4855 delete_tree_item(Vcb, edtp, rollback);
4856
4857 if (!insert_tree_item(Vcb, fcb->subvol, edtp->item->key.obj_id, edtp->item->key.obj_type, edtp->item->key.offset, ed, edtp->item->size, NULL, rollback)) {
4858 TRACE("insert_tree_item failed\n");
4859
4860 ExFreePool(ed);
4861 return FALSE;
4862 }
4863
4864 ei = ExAllocatePoolWithTag(PagedPool, eitp->item->size, ALLOC_TAG);
4865 if (!ei) {
4866 ERR("out of memory\n");
4867 ExFreePool(ed);
4868 return FALSE;
4869 }
4870
4871 RtlCopyMemory(ei, eitp->item->data, eitp->item->size);
4872
4873 if (!insert_tree_item(Vcb, Vcb->extent_root, eitp->item->key.obj_id, eitp->item->key.obj_type, eitp->item->key.offset + length, ei, eitp->item->size, NULL, rollback)) {
4874 ERR("insert_tree_item failed\n");
4875
4876 ExFreePool(ei);
4877 return FALSE;
4878 }
4879
4880 delete_tree_item(Vcb, eitp, rollback);
4881
4882 Status = write_data(Vcb, eitp->item->key.obj_id + eitp->item->key.offset, data, length);
4883 if (!NT_SUCCESS(Status)) {
4884 ERR("write_data returned %08x\n", Status);
4885 return FALSE;
4886 }
4887
4888 if (changed_sector_list) {
4889 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4890 if (!sc) {
4891 ERR("out of memory\n");
4892 return FALSE;
4893 }
4894
4895 sc->ol.key = eitp->item->key.obj_id + eitp->item->key.offset;
4896 sc->length = length / Vcb->superblock.sector_size;
4897 sc->deleted = FALSE;
4898
4899 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
4900 if (!sc->checksums) {
4901 ERR("out of memory\n");
4902 ExFreePool(sc);
4903 return FALSE;
4904 }
4905
4906 for (i = 0; i < sc->length; i++) {
4907 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
4908 }
4909 insert_into_ordered_list(changed_sector_list, &sc->ol);
4910 }
4911
4912 c = get_chunk_from_address(Vcb, eitp->item->key.obj_id);
4913
4914 if (c) {
4915 increase_chunk_usage(c, length);
4916
4917 add_to_space_list(c, eitp->item->key.obj_id + eitp->item->key.offset, length, SPACE_TYPE_WRITING);
4918 }
4919
4920 fcb->inode_item.st_blocks += length;
4921
4922 return TRUE;
4923 }
4924
4925 static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
4926 LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4927 KEY searchkey;
4928 traverse_ptr tp, tp2;
4929 BOOL success = FALSE;
4930 EXTENT_DATA* ed;
4931 EXTENT_DATA2* ed2;
4932 EXTENT_ITEM* ei;
4933 chunk* c;
4934 LIST_ENTRY* le;
4935 space* s;
4936 NTSTATUS Status;
4937
4938 searchkey.obj_id = fcb->inode;
4939 searchkey.obj_type = TYPE_EXTENT_DATA;
4940 searchkey.offset = start_data;
4941
4942 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
4943 if (!NT_SUCCESS(Status)) {
4944 ERR("error - find_item returned %08x\n", Status);
4945 return FALSE;
4946 }
4947
4948 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
4949 WARN("previous EXTENT_DATA not found\n");
4950 goto end;
4951 }
4952
4953 ed = (EXTENT_DATA*)tp.item->data;
4954
4955 if (tp.item->size < sizeof(EXTENT_DATA)) {
4956 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4957 goto end;
4958 }
4959
4960 if (ed->type != EXTENT_TYPE_REGULAR) {
4961 TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
4962 goto end;
4963 }
4964
4965 ed2 = (EXTENT_DATA2*)ed->data;
4966
4967 if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4968 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4969 goto end;
4970 }
4971
4972 if (tp.item->key.offset + ed2->num_bytes != start_data) {
4973 TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", tp.item->key.offset, ed2->num_bytes, start_data);
4974 goto end;
4975 }
4976
4977 if (ed->compression != BTRFS_COMPRESSION_NONE) {
4978 FIXME("FIXME: compression not yet supported\n");
4979 goto end;
4980 }
4981
4982 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
4983 WARN("encryption not supported\n");
4984 goto end;
4985 }
4986
4987 if (ed->encoding != BTRFS_ENCODING_NONE) {
4988 WARN("other encodings not supported\n");
4989 goto end;
4990 }
4991
4992 if (ed2->size - ed2->offset != ed2->num_bytes) {
4993 TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
4994 goto end;
4995 }
4996
4997 searchkey.obj_id = ed2->address;
4998 searchkey.obj_type = TYPE_EXTENT_ITEM;
4999 searchkey.offset = ed2->size;
5000
5001 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
5002 if (!NT_SUCCESS(Status)) {
5003 ERR("error - find_item returned %08x\n", Status);
5004 goto end;
5005 }
5006
5007 if (keycmp(&tp2.item->key, &searchkey)) {
5008 ERR("error - extent %llx,%llx not found in tree\n", ed2->address, ed2->size);
5009 int3; // TESTING
5010 goto end2;
5011 }
5012
5013 if (tp2.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
5014 NTSTATUS Status = convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
5015 if (!NT_SUCCESS(Status)) {
5016 ERR("convert_old_data_extent returned %08x\n", Status);
5017 goto end2;
5018 }
5019
5020 free_traverse_ptr(&tp2);
5021
5022 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
5023 if (!NT_SUCCESS(Status)) {
5024 ERR("error - find_item returned %08x\n", Status);
5025 goto end;
5026 }
5027
5028 if (keycmp(&tp2.item->key, &searchkey)) {
5029 WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
5030 goto end2;
5031 }
5032 }
5033
5034 ei = (EXTENT_ITEM*)tp2.item->data;
5035
5036 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5037 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5038 goto end2;
5039 }
5040
5041 // FIXME - test this
5042 if (extent_item_is_shared(ei, tp2.item->size - sizeof(EXTENT_ITEM))) {
5043 NTSTATUS Status = convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
5044 if (!NT_SUCCESS(Status)) {
5045 ERR("convert_shared_data_extent returned %08x\n", Status);
5046 goto end2;
5047 }
5048
5049 free_traverse_ptr(&tp2);
5050
5051 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
5052 if (!NT_SUCCESS(Status)) {
5053 ERR("error - find_item returned %08x\n", Status);
5054 goto end;
5055 }
5056
5057 if (keycmp(&tp2.item->key, &searchkey)) {
5058 WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
5059 goto end2;
5060 }
5061
5062 ei = (EXTENT_ITEM*)tp2.item->data;
5063
5064 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5065 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5066 goto end2;
5067 }
5068 }
5069
5070 if (ei->refcount != 1) {
5071 TRACE("extent refcount was not 1\n");
5072 goto end2;
5073 }
5074
5075 if (ei->flags != EXTENT_ITEM_DATA) {
5076 ERR("error - extent was not a data extent\n");
5077 goto end2;
5078 }
5079
5080 c = get_chunk_from_address(Vcb, ed2->address);
5081
5082 le = c->space.Flink;
5083 while (le != &c->space) {
5084 s = CONTAINING_RECORD(le, space, list_entry);
5085
5086 if (s->offset == ed2->address + ed2->size) {
5087 if (s->type == SPACE_TYPE_FREE && s->size >= length) {
5088 success = extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, &tp, &tp2, rollback);
5089 }
5090 break;
5091 } else if (s->offset > ed2->address + ed2->size)
5092 break;
5093
5094 le = le->Flink;
5095 }
5096
5097 end2:
5098 free_traverse_ptr(&tp2);
5099
5100 end:
5101 free_traverse_ptr(&tp);
5102
5103 return success;
5104 }
5105
5106 NTSTATUS insert_sparse_extent(device_extension* Vcb, root* r, UINT64 inode, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
5107 EXTENT_DATA* ed;
5108 EXTENT_DATA2* ed2;
5109
5110 TRACE("(%p, %llx, %llx, %llx, %llx)\n", Vcb, r->id, inode, start, length);
5111
5112 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
5113 if (!ed) {
5114 ERR("out of memory\n");
5115 return STATUS_INSUFFICIENT_RESOURCES;
5116 }
5117
5118 ed->generation = Vcb->superblock.generation;
5119 ed->decoded_size = length;
5120 ed->compression = BTRFS_COMPRESSION_NONE;
5121 ed->encryption = BTRFS_ENCRYPTION_NONE;
5122 ed->encoding = BTRFS_ENCODING_NONE;
5123 ed->type = EXTENT_TYPE_REGULAR;
5124
5125 ed2 = (EXTENT_DATA2*)ed->data;
5126 ed2->address = 0;
5127 ed2->size = 0;
5128 ed2->offset = 0;
5129 ed2->num_bytes = length;
5130
5131 if (!insert_tree_item(Vcb, r, inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
5132 ERR("insert_tree_item failed\n");
5133 ExFreePool(ed);
5134 return STATUS_INTERNAL_ERROR;
5135 }
5136
5137 return STATUS_SUCCESS;
5138 }
5139
5140 // static void print_tree(tree* t) {
5141 // LIST_ENTRY* le = t->itemlist.Flink;
5142 // while (le != &t->itemlist) {
5143 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
5144 // ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
5145 // le = le->Flink;
5146 // }
5147 // }
5148
5149 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5150 LIST_ENTRY* le = Vcb->chunks.Flink;
5151 chunk* c;
5152 KEY searchkey;
5153 UINT64 flags;
5154
5155 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
5156
5157 // FIXME - split data up if not enough space for just one extent
5158
5159 if (start_data > 0 && try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, rollback))
5160 return STATUS_SUCCESS;
5161
5162 // if there is a gap before start_data, plug it with a sparse extent
5163 if (start_data > 0) {
5164 traverse_ptr tp;
5165 NTSTATUS Status;
5166 EXTENT_DATA* ed;
5167 UINT64 len;
5168
5169 searchkey.obj_id = fcb->inode;
5170 searchkey.obj_type = TYPE_EXTENT_DATA;
5171 searchkey.offset = start_data;
5172
5173 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5174 if (!NT_SUCCESS(Status)) {
5175 ERR("error - find_item returned %08x\n", Status);
5176 return Status;
5177 }
5178
5179 // if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
5180 // traverse_ptr next_tp;
5181 //
5182 // ERR("error - did not find EXTENT_DATA expected - looking for %llx,%x,%llx, found %llx,%x,%llx\n",
5183 // searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5184 // print_tree(tp.tree);
5185 //
5186 // if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
5187 // ERR("---\n");
5188 // ERR("key = %llx,%x,%llx\n", next_tp.tree->paritem->key.obj_id, next_tp.tree->paritem->key.obj_type, next_tp.tree->paritem->key.offset);
5189 // print_tree(next_tp.tree);
5190 //
5191 // free_traverse_ptr(&next_tp);
5192 // } else
5193 // ERR("next item not found\n");
5194 //
5195 // int3;
5196 // free_traverse_ptr(&tp);
5197 // return STATUS_INTERNAL_ERROR;
5198 // }
5199
5200 if (tp.item->key.obj_type == TYPE_EXTENT_DATA && tp.item->size >= sizeof(EXTENT_DATA)) {
5201 EXTENT_DATA2* ed2;
5202
5203 ed = (EXTENT_DATA*)tp.item->data;
5204 ed2 = (EXTENT_DATA2*)ed->data;
5205
5206 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
5207 } else
5208 ed = NULL;
5209
5210 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || !ed || tp.item->key.offset + len < start_data) {
5211 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
5212 Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, 0, start_data, rollback);
5213 else if (!ed)
5214 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5215 else {
5216 Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, tp.item->key.offset + len,
5217 start_data - tp.item->key.offset - len, rollback);
5218 }
5219 if (!NT_SUCCESS(Status)) {
5220 ERR("insert_sparse_extent returned %08x\n", Status);
5221 free_traverse_ptr(&tp);
5222 return Status;
5223 }
5224 }
5225
5226 free_traverse_ptr(&tp);
5227 }
5228
5229 // FIXME - how do we know which RAID level to put this to?
5230 flags = BLOCK_FLAG_DATA; // SINGLE
5231
5232 // if (!chunk_test) { // TESTING
5233 // if ((c = alloc_chunk(Vcb, flags, NULL))) {
5234 // ERR("chunk_item->type = %llx\n", c->chunk_item->type);
5235 // ERR("size = %llx\n", c->chunk_item->size);
5236 // ERR("used = %llx\n", c->used);
5237 //
5238 // if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
5239 // if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list)) {
5240 // // chunk_test = TRUE;
5241 // ERR("SUCCESS\n");
5242 // return STATUS_SUCCESS;
5243 // } else
5244 // ERR(":-(\n");
5245 // } else
5246 // ERR("???\n");
5247 // }
5248 // }
5249
5250 while (le != &Vcb->chunks) {
5251 c = CONTAINING_RECORD(le, chunk, list_entry);
5252
5253 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
5254 if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
5255 return STATUS_SUCCESS;
5256 }
5257
5258 le = le->Flink;
5259 }
5260
5261 if ((c = alloc_chunk(Vcb, flags, rollback))) {
5262 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
5263 if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
5264 return STATUS_SUCCESS;
5265 }
5266 }
5267
5268 // FIXME - rebalance chunks if free space elsewhere?
5269 WARN("couldn't find any data chunks with %llx bytes free\n", length);
5270
5271 return STATUS_DISK_FULL;
5272 }
5273
5274 void update_checksum_tree(device_extension* Vcb, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5275 LIST_ENTRY* le = changed_sector_list->Flink;
5276 changed_sector* cs;
5277 traverse_ptr tp, next_tp;
5278 KEY searchkey;
5279 UINT32* data;
5280 NTSTATUS Status;
5281
5282 if (!Vcb->checksum_root) {
5283 ERR("no checksum root\n");
5284 goto exit;
5285 }
5286
5287 while (le != changed_sector_list) {
5288 UINT64 startaddr, endaddr;
5289 ULONG len;
5290 UINT32* checksums;
5291 RTL_BITMAP bmp;
5292 ULONG* bmparr;
5293 ULONG runlength, index;
5294
5295 cs = (changed_sector*)le;
5296
5297 searchkey.obj_id = EXTENT_CSUM_ID;
5298 searchkey.obj_type = TYPE_EXTENT_CSUM;
5299 searchkey.offset = cs->ol.key;
5300
5301 // FIXME - create checksum_root if it doesn't exist at all
5302
5303 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5304 if (!NT_SUCCESS(Status)) { // tree is completely empty
5305 // FIXME - do proper check here that tree is empty
5306 if (!cs->deleted) {
5307 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
5308 if (!checksums) {
5309 ERR("out of memory\n");
5310 goto exit;
5311 }
5312
5313 RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
5314
5315 if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, rollback)) {
5316 ERR("insert_tree_item failed\n");
5317 ExFreePool(checksums);
5318 goto exit;
5319 }
5320 }
5321 } else {
5322 UINT32 tplen;
5323
5324 // FIXME - check entry is TYPE_EXTENT_CSUM?
5325
5326 if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
5327 startaddr = tp.item->key.offset;
5328 else
5329 startaddr = cs->ol.key;
5330
5331 free_traverse_ptr(&tp);
5332
5333 searchkey.obj_id = EXTENT_CSUM_ID;
5334 searchkey.obj_type = TYPE_EXTENT_CSUM;
5335 searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
5336
5337 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5338 if (!NT_SUCCESS(Status)) {
5339 ERR("error - find_item returned %08x\n", Status);
5340 goto exit;
5341 }
5342
5343 tplen = tp.item->size / sizeof(UINT32);
5344
5345 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
5346 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
5347 else
5348 endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
5349
5350 free_traverse_ptr(&tp);
5351
5352 TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
5353 TRACE("startaddr = %llx\n", startaddr);
5354 TRACE("endaddr = %llx\n", endaddr);
5355
5356 len = (endaddr - startaddr) / Vcb->superblock.sector_size;
5357
5358 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
5359 if (!checksums) {
5360 ERR("out of memory\n");
5361 goto exit;
5362 }
5363
5364 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
5365 if (!bmparr) {
5366 ERR("out of memory\n");
5367 ExFreePool(checksums);
5368 goto exit;
5369 }
5370
5371 RtlInitializeBitMap(&bmp, bmparr, len);
5372 RtlSetAllBits(&bmp);
5373
5374 searchkey.obj_id = EXTENT_CSUM_ID;
5375 searchkey.obj_type = TYPE_EXTENT_CSUM;
5376 searchkey.offset = cs->ol.key;
5377
5378 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5379 if (!NT_SUCCESS(Status)) {
5380 ERR("error - find_item returned %08x\n", Status);
5381 goto exit;
5382 }
5383
5384 // set bit = free space, cleared bit = allocated sector
5385
5386 // ERR("start loop\n");
5387 while (tp.item->key.offset < endaddr) {
5388 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5389 if (tp.item->key.offset >= startaddr) {
5390 if (tp.item->size > 0) {
5391 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
5392 RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
5393 }
5394
5395 delete_tree_item(Vcb, &tp, rollback);
5396 }
5397
5398 if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
5399 free_traverse_ptr(&tp);
5400 tp = next_tp;
5401 } else
5402 break;
5403 }
5404 // ERR("end loop\n");
5405
5406 free_traverse_ptr(&tp);
5407
5408 if (cs->deleted) {
5409 RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
5410 } else {
5411 RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
5412 RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
5413 }
5414
5415 runlength = RtlFindFirstRunClear(&bmp, &index);
5416
5417 while (runlength != 0) {
5418 do {
5419 ULONG rl;
5420
5421 if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
5422 rl = MAX_CSUM_SIZE / sizeof(UINT32);
5423 else
5424 rl = runlength;
5425
5426 data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
5427 if (!data) {
5428 ERR("out of memory\n");
5429 ExFreePool(bmparr);
5430 ExFreePool(checksums);
5431 goto exit;
5432 }
5433
5434 RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
5435
5436 if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, rollback)) {
5437 ERR("insert_tree_item failed\n");
5438 ExFreePool(data);
5439 ExFreePool(bmparr);
5440 ExFreePool(checksums);
5441 goto exit;
5442 }
5443
5444 runlength -= rl;
5445 index += rl;
5446 } while (runlength > 0);
5447
5448 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
5449 }
5450
5451 ExFreePool(bmparr);
5452 ExFreePool(checksums);
5453 }
5454
5455 le = le->Flink;
5456 }
5457
5458 exit:
5459 while (!IsListEmpty(changed_sector_list)) {
5460 le = RemoveHeadList(changed_sector_list);
5461 cs = (changed_sector*)le;
5462
5463 if (cs->checksums)
5464 ExFreePool(cs->checksums);
5465
5466 ExFreePool(cs);
5467 }
5468 }
5469
5470 NTSTATUS truncate_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
5471 LIST_ENTRY changed_sector_list;
5472 NTSTATUS Status;
5473 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
5474
5475 if (!nocsum)
5476 InitializeListHead(&changed_sector_list);
5477
5478 // FIXME - convert into inline extent if short enough
5479
5480 Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
5481 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), nocsum ? NULL : &changed_sector_list, rollback);
5482 if (!NT_SUCCESS(Status)) {
5483 ERR("error - excise_extents failed\n");
5484 return Status;
5485 }
5486
5487 fcb->inode_item.st_size = end;
5488 TRACE("setting st_size to %llx\n", end);
5489
5490 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
5491 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
5492 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
5493 // FIXME - inform cache manager of this
5494
5495 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
5496
5497 if (!nocsum)
5498 update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
5499
5500 return STATUS_SUCCESS;
5501 }
5502
5503 NTSTATUS extend_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
5504 UINT64 oldalloc, newalloc;
5505 KEY searchkey;
5506 traverse_ptr tp;
5507 BOOL cur_inline;
5508 NTSTATUS Status;
5509
5510 TRACE("(%p, %x, %p)\n", fcb, end, rollback);
5511
5512 if (fcb->ads) {
5513 FIXME("FIXME - support streams here\n"); // FIXME
5514 return STATUS_NOT_IMPLEMENTED;
5515 } else {
5516 searchkey.obj_id = fcb->inode;
5517 searchkey.obj_type = TYPE_EXTENT_DATA;
5518 searchkey.offset = 0xffffffffffffffff;
5519
5520 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5521 if (!NT_SUCCESS(Status)) {
5522 ERR("error - find_item returned %08x\n", Status);
5523 return Status;
5524 }
5525
5526 oldalloc = 0;
5527 if (tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_EXTENT_DATA) {
5528 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
5529 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5530
5531 if (tp.item->size < sizeof(EXTENT_DATA)) {
5532 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5533 free_traverse_ptr(&tp);
5534 return STATUS_INTERNAL_ERROR;
5535 }
5536
5537 oldalloc = tp.item->key.offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
5538 cur_inline = ed->type == EXTENT_TYPE_INLINE;
5539
5540 if (cur_inline && end > fcb->Vcb->max_inline) {
5541 LIST_ENTRY changed_sector_list;
5542 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
5543 UINT64 origlength, length;
5544 UINT8* data;
5545
5546 TRACE("giving inline file proper extents\n");
5547
5548 origlength = ed->decoded_size;
5549
5550 cur_inline = FALSE;
5551
5552 if (!nocsum)
5553 InitializeListHead(&changed_sector_list);
5554
5555 delete_tree_item(fcb->Vcb, &tp, rollback);
5556
5557 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
5558
5559 data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG);
5560 if (!data) {
5561 ERR("could not allocate %llx bytes for data\n", length);
5562 free_traverse_ptr(&tp);
5563 return STATUS_INSUFFICIENT_RESOURCES;
5564 }
5565
5566 if (length > origlength)
5567 RtlZeroMemory(data + origlength, length - origlength);
5568
5569 RtlCopyMemory(data, ed->data, origlength);
5570
5571 fcb->inode_item.st_blocks -= origlength;
5572
5573 Status = insert_extent(fcb->Vcb, fcb, tp.item->key.offset, length, data, nocsum ? NULL : &changed_sector_list, rollback);
5574 if (!NT_SUCCESS(Status)) {
5575 ERR("insert_extent returned %08x\n", Status);
5576 free_traverse_ptr(&tp);
5577 ExFreePool(data);
5578 return Status;
5579 }
5580
5581 oldalloc = tp.item->key.offset + length;
5582
5583 ExFreePool(data);
5584
5585 if (!nocsum)
5586 update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
5587 }
5588
5589 if (cur_inline) {
5590 ULONG edsize;
5591
5592 if (end > oldalloc) {
5593 edsize = sizeof(EXTENT_DATA) - 1 + end - tp.item->key.offset;
5594 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
5595
5596 if (!ed) {
5597 ERR("out of memory\n");
5598 free_traverse_ptr(&tp);
5599 return STATUS_INSUFFICIENT_RESOURCES;
5600 }
5601
5602 RtlZeroMemory(ed, edsize);
5603 RtlCopyMemory(ed, tp.item->data, tp.item->size);
5604
5605 ed->decoded_size = end - tp.item->key.offset;
5606
5607 delete_tree_item(fcb->Vcb, &tp, rollback);
5608
5609 if (!insert_tree_item(fcb->Vcb, fcb->subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ed, edsize, NULL, rollback)) {
5610 ERR("error - failed to insert item\n");
5611 ExFreePool(ed);
5612 free_traverse_ptr(&tp);
5613 return STATUS_INTERNAL_ERROR;
5614 }
5615 }
5616
5617 TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
5618
5619 fcb->inode_item.st_size = end;
5620 TRACE("setting st_size to %llx\n", end);
5621
5622 fcb->inode_item.st_blocks = end;
5623
5624 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5625 } else {
5626 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
5627
5628 if (newalloc > oldalloc) {
5629 Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, oldalloc, newalloc - oldalloc, rollback);
5630
5631 if (!NT_SUCCESS(Status)) {
5632 ERR("insert_sparse_extent returned %08x\n", Status);
5633 free_traverse_ptr(&tp);
5634 return Status;
5635 }
5636 }
5637
5638 fcb->inode_item.st_size = end;
5639 TRACE("setting st_size to %llx\n", end);
5640
5641 TRACE("newalloc = %llx\n", newalloc);
5642
5643 fcb->Header.AllocationSize.QuadPart = newalloc;
5644 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5645 }
5646 } else {
5647 if (end > fcb->Vcb->max_inline) {
5648 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
5649
5650 Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, 0, newalloc, rollback);
5651
5652 if (!NT_SUCCESS(Status)) {
5653 ERR("insert_sparse_extent returned %08x\n", Status);
5654 free_traverse_ptr(&tp);
5655 return Status;
5656 }
5657
5658 fcb->inode_item.st_size = end;
5659 TRACE("setting st_size to %llx\n", end);
5660
5661 TRACE("newalloc = %llx\n", newalloc);
5662
5663 fcb->Header.AllocationSize.QuadPart = newalloc;
5664 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5665 } else {
5666 EXTENT_DATA* ed;
5667 ULONG edsize;
5668
5669 edsize = sizeof(EXTENT_DATA) - 1 + end;
5670 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
5671
5672 if (!ed) {
5673 ERR("out of memory\n");
5674 free_traverse_ptr(&tp);
5675 return STATUS_INSUFFICIENT_RESOURCES;
5676 }
5677
5678 ed->generation = fcb->Vcb->superblock.generation;
5679 ed->decoded_size = end;
5680 ed->compression = BTRFS_COMPRESSION_NONE;
5681 ed->encryption = BTRFS_ENCRYPTION_NONE;
5682 ed->encoding = BTRFS_ENCODING_NONE;
5683 ed->type = EXTENT_TYPE_INLINE;
5684
5685 RtlZeroMemory(ed->data, end);
5686
5687 if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed, edsize, NULL, rollback)) {
5688 ERR("error - failed to insert item\n");
5689 ExFreePool(ed);
5690 free_traverse_ptr(&tp);
5691 return STATUS_INTERNAL_ERROR;
5692 }
5693
5694 fcb->inode_item.st_size = end;
5695 TRACE("setting st_size to %llx\n", end);
5696
5697 fcb->inode_item.st_blocks = end;
5698
5699 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5700 }
5701 }
5702
5703 free_traverse_ptr(&tp);
5704 }
5705
5706 return STATUS_SUCCESS;
5707 }
5708
5709 static UINT64 get_extent_item_refcount(device_extension* Vcb, UINT64 address) {
5710 KEY searchkey;
5711 traverse_ptr tp;
5712 EXTENT_ITEM* ei;
5713 UINT64 rc;
5714 NTSTATUS Status;
5715
5716 searchkey.obj_id = address;
5717 searchkey.obj_type = TYPE_EXTENT_ITEM;
5718 searchkey.offset = 0xffffffffffffffff;
5719
5720 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
5721 if (!NT_SUCCESS(Status)) {
5722 ERR("error - find_item returned %08x\n", Status);
5723 return 0;
5724 }
5725
5726 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5727 ERR("error - could not find EXTENT_ITEM for %llx\n", address);
5728 free_traverse_ptr(&tp);
5729 return 0;
5730 }
5731
5732 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5733 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5734 free_traverse_ptr(&tp);
5735 return 0;
5736 }
5737
5738 ei = (EXTENT_ITEM*)tp.item->data;
5739 rc = ei->refcount;
5740
5741 free_traverse_ptr(&tp);
5742
5743 return rc;
5744 }
5745
5746 static NTSTATUS do_nocow_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5747 KEY searchkey;
5748 traverse_ptr tp, next_tp;
5749 NTSTATUS Status;
5750 EXTENT_DATA* ed;
5751 BOOL b, do_cow;
5752 EXTENT_DATA2* eds;
5753 UINT64 size, new_start, new_end, last_write = 0;
5754
5755 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
5756
5757 searchkey.obj_id = fcb->inode;
5758 searchkey.obj_type = TYPE_EXTENT_DATA;
5759 searchkey.offset = start_data;
5760
5761 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5762 if (!NT_SUCCESS(Status)) {
5763 ERR("error - find_item returned %08x\n", Status);
5764 return Status;
5765 }
5766
5767 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset > start_data) {
5768 ERR("previous EXTENT_DATA not found (found %llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5769 Status = STATUS_INTERNAL_ERROR;
5770 goto end;
5771 }
5772
5773 do {
5774 if (tp.item->size < sizeof(EXTENT_DATA)) {
5775 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5776 Status = STATUS_INTERNAL_ERROR;
5777 goto end;
5778 }
5779
5780 ed = (EXTENT_DATA*)tp.item->data;
5781
5782 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
5783 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
5784 Status = STATUS_INTERNAL_ERROR;
5785 goto end;
5786 }
5787
5788 eds = (EXTENT_DATA2*)&ed->data[0];
5789
5790 b = find_next_item(Vcb, &tp, &next_tp, TRUE);
5791
5792 switch (ed->type) {
5793 case EXTENT_TYPE_REGULAR:
5794 {
5795 UINT64 rc = get_extent_item_refcount(Vcb, eds->address);
5796
5797 if (rc == 0) {
5798 ERR("get_extent_item_refcount failed\n");
5799 Status = STATUS_INTERNAL_ERROR;
5800 goto end;
5801 }
5802
5803 do_cow = rc > 1;
5804 break;
5805 }
5806
5807 case EXTENT_TYPE_INLINE:
5808 do_cow = TRUE;
5809 break;
5810
5811 case EXTENT_TYPE_PREALLOC:
5812 FIXME("FIXME - handle prealloc extents\n"); // FIXME
5813 Status = STATUS_NOT_SUPPORTED;
5814 goto end;
5815
5816 default:
5817 ERR("error - unknown extent type %x\n", ed->type);
5818 Status = STATUS_NOT_SUPPORTED;
5819 goto end;
5820 }
5821
5822 if (ed->compression != BTRFS_COMPRESSION_NONE) {
5823 FIXME("FIXME: compression not yet supported\n");
5824 Status = STATUS_NOT_SUPPORTED;
5825 goto end;
5826 }
5827
5828 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
5829 WARN("encryption not supported\n");
5830 Status = STATUS_INTERNAL_ERROR;
5831 goto end;
5832 }
5833
5834 if (ed->encoding != BTRFS_ENCODING_NONE) {
5835 WARN("other encodings not supported\n");
5836 Status = STATUS_INTERNAL_ERROR;
5837 goto end;
5838 }
5839
5840 size = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : eds->num_bytes;
5841
5842 TRACE("extent: start = %llx, length = %llx\n", tp.item->key.offset, size);
5843
5844 new_start = tp.item->key.offset < start_data ? start_data : tp.item->key.offset;
5845 new_end = tp.item->key.offset + size > start_data + length ? (start_data + length) : (tp.item->key.offset + size);
5846
5847 TRACE("new_start = %llx\n", new_start);
5848 TRACE("new_end = %llx\n", new_end);
5849
5850 if (do_cow) {
5851 TRACE("doing COW write\n");
5852
5853 Status = excise_extents(Vcb, fcb, new_start, new_start + new_end, changed_sector_list, rollback);
5854
5855 if (!NT_SUCCESS(Status)) {
5856 ERR("error - excise_extents returned %08x\n", Status);
5857 goto end;
5858 }
5859
5860 Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
5861
5862 if (!NT_SUCCESS(Status)) {
5863 ERR("error - insert_extent returned %08x\n", Status);
5864 goto end;
5865 }
5866 } else {
5867 UINT64 writeaddr;
5868
5869 writeaddr = eds->address + eds->offset + new_start - tp.item->key.offset;
5870 TRACE("doing non-COW write to %llx\n", writeaddr);
5871
5872 Status = write_data(Vcb, writeaddr, (UINT8*)data + new_start - start_data, new_end - new_start);
5873
5874 if (!NT_SUCCESS(Status)) {
5875 ERR("error - write_data returned %08x\n", Status);
5876 goto end;
5877 }
5878
5879 if (changed_sector_list) {
5880 unsigned int i;
5881 changed_sector* sc;
5882
5883 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
5884 if (!sc) {
5885 ERR("out of memory\n");
5886 Status = STATUS_INSUFFICIENT_RESOURCES;
5887 goto end;
5888 }
5889
5890 sc->ol.key = writeaddr;
5891 sc->length = (new_end - new_start) / Vcb->superblock.sector_size;
5892 sc->deleted = FALSE;
5893
5894 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
5895 if (!sc->checksums) {
5896 ERR("out of memory\n");
5897 ExFreePool(sc);
5898 Status = STATUS_INSUFFICIENT_RESOURCES;
5899 goto end;
5900 }
5901
5902 for (i = 0; i < sc->length; i++) {
5903 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + new_start - start_data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
5904 }
5905
5906 insert_into_ordered_list(changed_sector_list, &sc->ol);
5907 }
5908 }
5909
5910 last_write = new_end;
5911
5912 if (b) {
5913 free_traverse_ptr(&tp);
5914 tp = next_tp;
5915
5916 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data + length)
5917 b = FALSE;
5918 }
5919 } while (b);
5920
5921 if (last_write < start_data + length) {
5922 new_start = last_write;
5923 new_end = start_data + length;
5924
5925 TRACE("new_start = %llx\n", new_start);
5926 TRACE("new_end = %llx\n", new_end);
5927
5928 Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
5929
5930 if (!NT_SUCCESS(Status)) {
5931 ERR("error - insert_extent returned %08x\n", Status);
5932 goto end;
5933 }
5934 }
5935
5936 Status = STATUS_SUCCESS;
5937
5938 end:
5939 free_traverse_ptr(&tp);
5940
5941 return Status;
5942 }
5943
5944 #ifdef DEBUG_PARANOID
5945 static void print_loaded_trees(tree* t, int spaces) {
5946 char pref[10];
5947 int i;
5948 LIST_ENTRY* le;
5949
5950 for (i = 0; i < spaces; i++) {
5951 pref[i] = ' ';
5952 }
5953 pref[spaces] = 0;
5954
5955 if (!t) {
5956 ERR("%s(not loaded)\n", pref);
5957 return;
5958 }
5959
5960 le = t->itemlist.Flink;
5961 while (le != &t->itemlist) {
5962 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
5963
5964 ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
5965
5966 if (t->header.level > 0) {
5967 print_loaded_trees(td->treeholder.tree, spaces+1);
5968 }
5969
5970 le = le->Flink;
5971 }
5972 }
5973
5974 static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
5975 KEY searchkey;
5976 traverse_ptr tp, next_tp;
5977 UINT64 length, oldlength, lastoff, alloc;
5978 NTSTATUS Status;
5979 EXTENT_DATA* ed;
5980 EXTENT_DATA2* ed2;
5981
5982 if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
5983 return;
5984
5985 TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
5986
5987 searchkey.obj_id = fcb->inode;
5988 searchkey.obj_type = TYPE_EXTENT_DATA;
5989 searchkey.offset = 0;
5990
5991 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5992 if (!NT_SUCCESS(Status)) {
5993 ERR("error - find_item returned %08x\n", Status);
5994 goto failure2;
5995 }
5996
5997 if (keycmp(&searchkey, &tp.item->key)) {
5998 ERR("could not find EXTENT_DATA at offset 0\n");
5999 goto failure;
6000 }
6001
6002 if (tp.item->size < sizeof(EXTENT_DATA)) {
6003 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
6004 goto failure;
6005 }
6006
6007 ed = (EXTENT_DATA*)tp.item->data;
6008 ed2 = (EXTENT_DATA2*)&ed->data[0];
6009
6010 length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
6011 lastoff = tp.item->key.offset;
6012
6013 TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
6014
6015 alloc = 0;
6016 if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
6017 alloc += length;
6018 }
6019
6020 while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
6021 if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type) {
6022 free_traverse_ptr(&next_tp);
6023 break;
6024 }
6025
6026 free_traverse_ptr(&tp);
6027 tp = next_tp;
6028
6029 if (tp.item->size < sizeof(EXTENT_DATA)) {
6030 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
6031 goto failure;
6032 }
6033
6034 ed = (EXTENT_DATA*)tp.item->data;
6035 ed2 = (EXTENT_DATA2*)&ed->data[0];
6036
6037 length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
6038
6039 TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
6040
6041 if (tp.item->key.offset != lastoff + oldlength) {
6042 ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
6043 goto failure;
6044 }
6045
6046 if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
6047 alloc += length;
6048 }
6049
6050 oldlength = length;
6051 lastoff = tp.item->key.offset;
6052 }
6053
6054 if (alloc != fcb->inode_item.st_blocks) {
6055 ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
6056 goto failure;
6057 }
6058
6059 // if (fcb->inode_item.st_blocks != lastoff + oldlength) {
6060 // ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
6061 // goto failure;
6062 // }
6063
6064 free_traverse_ptr(&tp);
6065
6066 return;
6067
6068 failure:
6069 free_traverse_ptr(&tp);
6070
6071 failure2:
6072 if (fcb->subvol->treeholder.tree)
6073 print_loaded_trees(fcb->subvol->treeholder.tree, 0);
6074
6075 int3;
6076 }
6077
6078 // static void check_extent_tree_consistent(device_extension* Vcb) {
6079 // KEY searchkey;
6080 // traverse_ptr tp, next_tp;
6081 // UINT64 lastaddr;
6082 // BOOL b, inconsistency;
6083 //
6084 // searchkey.obj_id = 0;
6085 // searchkey.obj_type = 0;
6086 // searchkey.offset = 0;
6087 //
6088 // if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
6089 // ERR("error - could not find any entries in extent_root\n");
6090 // int3;
6091 // }
6092 //
6093 // lastaddr = 0;
6094 // inconsistency = FALSE;
6095 //
6096 // do {
6097 // if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
6098 // // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
6099 //
6100 // if (tp.item->key.obj_id < lastaddr) {
6101 // // ERR("inconsistency!\n");
6102 // // int3;
6103 // inconsistency = TRUE;
6104 // }
6105 //
6106 // lastaddr = tp.item->key.obj_id + tp.item->key.offset;
6107 // }
6108 //
6109 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
6110 // if (b) {
6111 // free_traverse_ptr(&tp);
6112 // tp = next_tp;
6113 // }
6114 // } while (b);
6115 //
6116 // free_traverse_ptr(&tp);
6117 //
6118 // if (!inconsistency)
6119 // return;
6120 //
6121 // ERR("Inconsistency detected:\n");
6122 //
6123 // if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
6124 // ERR("error - could not find any entries in extent_root\n");
6125 // int3;
6126 // }
6127 //
6128 // do {
6129 // if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
6130 // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
6131 //
6132 // if (tp.item->key.obj_id < lastaddr) {
6133 // ERR("inconsistency!\n");
6134 // }
6135 //
6136 // lastaddr = tp.item->key.obj_id + tp.item->key.offset;
6137 // }
6138 //
6139 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
6140 // if (b) {
6141 // free_traverse_ptr(&tp);
6142 // tp = next_tp;
6143 // }
6144 // } while (b);
6145 //
6146 // free_traverse_ptr(&tp);
6147 //
6148 // int3;
6149 // }
6150 #endif
6151
6152 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, LIST_ENTRY* rollback) {
6153 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
6154 PFILE_OBJECT FileObject = IrpSp->FileObject;
6155 KEY searchkey;
6156 traverse_ptr tp;
6157 EXTENT_DATA* ed2;
6158 UINT64 newlength, start_data, end_data;
6159 UINT32 bufhead;
6160 BOOL make_inline;
6161 UINT8* data;
6162 LIST_ENTRY changed_sector_list;
6163 INODE_ITEM *ii, *origii;
6164 BOOL changed_length = FALSE, nocsum, nocow/*, lazy_writer = FALSE, write_eof = FALSE*/;
6165 NTSTATUS Status;
6166 LARGE_INTEGER time;
6167 BTRFS_TIME now;
6168 fcb* fcb;
6169 BOOL paging_lock = FALSE;
6170
6171 TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
6172
6173 if (*length == 0) {
6174 WARN("returning success for zero-length write\n");
6175 return STATUS_SUCCESS;
6176 }
6177
6178 if (!FileObject) {
6179 ERR("error - FileObject was NULL\n");
6180 return STATUS_ACCESS_DENIED;
6181 }
6182
6183 fcb = FileObject->FsContext;
6184
6185 if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
6186 WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
6187 return STATUS_ACCESS_DENIED;
6188 }
6189
6190 if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) {
6191 offset = fcb->Header.FileSize;
6192 // write_eof = TRUE;
6193 }
6194
6195 TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
6196
6197 if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
6198 IO_STATUS_BLOCK iosb;
6199
6200 ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
6201
6202 CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
6203
6204 if (!NT_SUCCESS(iosb.Status)) {
6205 ExReleaseResourceLite(fcb->Header.PagingIoResource);
6206 ERR("CcFlushCache returned %08x\n", iosb.Status);
6207 return iosb.Status;
6208 }
6209
6210 paging_lock = TRUE;
6211
6212 CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
6213 }
6214
6215 if (paging_io) {
6216 ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, TRUE);
6217 paging_lock = TRUE;
6218 }
6219
6220 nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
6221 nocow = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATACOW;
6222
6223 newlength = fcb->ads ? fcb->adssize : fcb->inode_item.st_size;
6224
6225 if (fcb->deleted)
6226 newlength = 0;
6227
6228 TRACE("newlength = %llx\n", newlength);
6229
6230 // if (KeGetCurrentThread() == fcb->lazy_writer_thread) {
6231 // ERR("lazy writer on the TV\n");
6232 // lazy_writer = TRUE;
6233 // }
6234
6235 if (offset.QuadPart + *length > newlength) {
6236 if (paging_io) {
6237 if (offset.QuadPart >= newlength) {
6238 TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length);
6239 TRACE("filename %.*S\n", fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer);
6240 TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
6241 fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
6242 Status = STATUS_SUCCESS;
6243 goto end;
6244 }
6245
6246 *length = newlength - offset.QuadPart;
6247 } else {
6248 newlength = offset.QuadPart + *length;
6249 changed_length = TRUE;
6250
6251 TRACE("extending length to %llx\n", newlength);
6252 }
6253 }
6254
6255 make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->max_inline;
6256
6257 if (changed_length) {
6258 if (newlength > fcb->Header.AllocationSize.QuadPart) {
6259 Status = extend_file(fcb, newlength, rollback);
6260 if (!NT_SUCCESS(Status)) {
6261 ERR("extend_file returned %08x\n", Status);
6262 goto end;
6263 }
6264 } else if (fcb->ads)
6265 fcb->adssize = newlength;
6266 else
6267 fcb->inode_item.st_size = newlength;
6268
6269 fcb->Header.FileSize.QuadPart = newlength;
6270 fcb->Header.ValidDataLength.QuadPart = newlength;
6271
6272 TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
6273 TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
6274 TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
6275 }
6276
6277 if (!no_cache) {
6278 BOOL wait;
6279
6280 if (!FileObject->PrivateCacheMap || changed_length) {
6281 CC_FILE_SIZES ccfs;
6282
6283 ccfs.AllocationSize = fcb->Header.AllocationSize;
6284 ccfs.FileSize = fcb->Header.FileSize;
6285 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6286
6287 if (!FileObject->PrivateCacheMap) {
6288 TRACE("calling CcInitializeCacheMap...\n");
6289 CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject);
6290
6291 CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
6292 } else {
6293 CcSetFileSizes(FileObject, &ccfs);
6294 }
6295 }
6296
6297 // FIXME - uncomment this when async is working
6298 // wait = IoIsOperationSynchronous(Irp) ? TRUE : FALSE;
6299 wait = TRUE;
6300
6301 TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf);
6302 if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
6303 TRACE("CcCopyWrite failed.\n");
6304
6305 IoMarkIrpPending(Irp);
6306 Status = STATUS_PENDING;
6307 goto end;
6308 }
6309 TRACE("CcCopyWrite finished\n");
6310
6311 Status = STATUS_SUCCESS;
6312 goto end;
6313 }
6314
6315 if (fcb->ads) {
6316 UINT16 datalen;
6317 UINT8* data2;
6318 UINT32 maxlen;
6319
6320 if (!get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, &data, &datalen)) {
6321 ERR("get_xattr failed\n");
6322 Status = STATUS_INTERNAL_ERROR;
6323 goto end;
6324 }
6325
6326 if (changed_length) {
6327 // find maximum length of xattr
6328 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
6329
6330 searchkey.obj_id = fcb->inode;
6331 searchkey.obj_type = TYPE_XATTR_ITEM;
6332 searchkey.offset = fcb->adshash;
6333
6334 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6335 if (!NT_SUCCESS(Status)) {
6336 ERR("error - find_item returned %08x\n", Status);
6337 goto end;
6338 }
6339
6340 if (keycmp(&tp.item->key, &searchkey)) {
6341 ERR("error - could not find key for xattr\n");
6342 free_traverse_ptr(&tp);
6343 Status = STATUS_INTERNAL_ERROR;
6344 goto end;
6345 }
6346
6347 if (tp.item->size < datalen) {
6348 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, datalen);
6349 free_traverse_ptr(&tp);
6350 Status = STATUS_INTERNAL_ERROR;
6351 goto end;
6352 }
6353
6354 maxlen -= tp.item->size - datalen; // subtract XATTR_ITEM overhead
6355
6356 free_traverse_ptr(&tp);
6357
6358 if (newlength > maxlen) {
6359 ERR("error - xattr too long (%llu > %u)\n", newlength, maxlen);
6360 Status = STATUS_DISK_FULL;
6361 goto end;
6362 }
6363
6364 fcb->adssize = newlength;
6365
6366 data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG);
6367 if (!data2) {
6368 ERR("out of memory\n");
6369 Status = STATUS_INSUFFICIENT_RESOURCES;
6370 goto end;
6371 }
6372
6373 RtlCopyMemory(data2, data, datalen);
6374
6375 if (offset.QuadPart > datalen)
6376 RtlZeroMemory(&data2[datalen], offset.QuadPart - datalen);
6377 } else
6378 data2 = data;
6379
6380 if (*length > 0)
6381 RtlCopyMemory(&data2[offset.QuadPart], buf, *length);
6382
6383 Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, data2, newlength, rollback);
6384 if (!NT_SUCCESS(Status)) {
6385 ERR("set_xattr returned %08x\n", Status);
6386 goto end;
6387 }
6388
6389 if (data) ExFreePool(data);
6390 if (data2 != data) ExFreePool(data2);
6391
6392 fcb->Header.ValidDataLength.QuadPart = newlength;
6393 } else {
6394 if (make_inline) {
6395 start_data = 0;
6396 end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
6397 bufhead = sizeof(EXTENT_DATA) - 1;
6398 } else {
6399 start_data = offset.QuadPart & ~(fcb->Vcb->superblock.sector_size - 1);
6400 end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size);
6401 bufhead = 0;
6402 }
6403
6404 fcb->Header.ValidDataLength.QuadPart = newlength;
6405 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
6406
6407 data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG);
6408 if (!data) {
6409 ERR("out of memory\n");
6410 Status = STATUS_INSUFFICIENT_RESOURCES;
6411 goto end;
6412 }
6413
6414 RtlZeroMemory(data + bufhead, end_data - start_data);
6415
6416 TRACE("start_data = %llx\n", start_data);
6417 TRACE("end_data = %llx\n", end_data);
6418
6419 if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) {
6420 if (changed_length) {
6421 if (fcb->inode_item.st_size > start_data)
6422 Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL);
6423 else
6424 Status = STATUS_SUCCESS;
6425 } else
6426 Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, end_data - start_data, NULL);
6427
6428 if (!NT_SUCCESS(Status)) {
6429 ERR("read_file returned %08x\n", Status);
6430 ExFreePool(data);
6431 goto end;
6432 }
6433 }
6434
6435 RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length);
6436
6437 if (!nocsum)
6438 InitializeListHead(&changed_sector_list);
6439
6440 if (make_inline || !nocow) {
6441 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, nocsum ? NULL : &changed_sector_list, rollback);
6442 if (!NT_SUCCESS(Status)) {
6443 ERR("error - excise_extents returned %08x\n", Status);
6444 ExFreePool(data);
6445 goto end;
6446 }
6447
6448 if (!make_inline) {
6449 Status = insert_extent(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6450
6451 if (!NT_SUCCESS(Status)) {
6452 ERR("error - insert_extent returned %08x\n", Status);
6453 ExFreePool(data);
6454 goto end;
6455 }
6456
6457 ExFreePool(data);
6458 } else {
6459 ed2 = (EXTENT_DATA*)data;
6460 ed2->generation = fcb->Vcb->superblock.generation;
6461 ed2->decoded_size = newlength;
6462 ed2->compression = BTRFS_COMPRESSION_NONE;
6463 ed2->encryption = BTRFS_ENCRYPTION_NONE;
6464 ed2->encoding = BTRFS_ENCODING_NONE;
6465 ed2->type = EXTENT_TYPE_INLINE;
6466
6467 insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, NULL, rollback);
6468
6469 fcb->inode_item.st_blocks += newlength;
6470 }
6471 } else {
6472 Status = do_nocow_write(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6473
6474 if (!NT_SUCCESS(Status)) {
6475 ERR("error - do_nocow_write returned %08x\n", Status);
6476 ExFreePool(data);
6477 goto end;
6478 }
6479
6480 ExFreePool(data);
6481 }
6482 }
6483
6484 KeQuerySystemTime(&time);
6485 win_time_to_unix(time, &now);
6486
6487 // ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap);
6488
6489 // if (!no_cache) {
6490 // if (!FileObject->PrivateCacheMap) {
6491 // CC_FILE_SIZES ccfs;
6492 //
6493 // ccfs.AllocationSize = fcb->Header.AllocationSize;
6494 // ccfs.FileSize = fcb->Header.FileSize;
6495 // ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6496 //
6497 // TRACE("calling CcInitializeCacheMap...\n");
6498 // CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb);
6499 //
6500 // changed_length = FALSE;
6501 // }
6502 // }
6503
6504 if (fcb->ads)
6505 origii = &fcb->par->inode_item;
6506 else
6507 origii = &fcb->inode_item;
6508
6509 origii->transid = Vcb->superblock.generation;
6510 origii->sequence++;
6511 origii->st_ctime = now;
6512
6513 if (!fcb->ads) {
6514 TRACE("setting st_size to %llx\n", newlength);
6515 origii->st_size = newlength;
6516 origii->st_mtime = now;
6517 }
6518
6519 searchkey.obj_id = fcb->inode;
6520 searchkey.obj_type = TYPE_INODE_ITEM;
6521 searchkey.offset = 0;
6522
6523 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6524 if (!NT_SUCCESS(Status)) {
6525 ERR("error - find_item returned %08x\n", Status);
6526 goto end;
6527 }
6528
6529 if (!keycmp(&tp.item->key, &searchkey))
6530 delete_tree_item(Vcb, &tp, rollback);
6531 else
6532 WARN("couldn't find existing INODE_ITEM\n");
6533
6534 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
6535 if (!ii) {
6536 ERR("out of memory\n");
6537 free_traverse_ptr(&tp);
6538 Status = STATUS_INSUFFICIENT_RESOURCES;
6539 goto end;
6540 }
6541
6542 RtlCopyMemory(ii, origii, sizeof(INODE_ITEM));
6543 insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, rollback);
6544
6545 free_traverse_ptr(&tp);
6546
6547 // FIXME - update inode_item of open FCBs pointing to the same inode (i.e. hardlinked files)
6548
6549 if (!nocsum)
6550 update_checksum_tree(Vcb, &changed_sector_list, rollback);
6551
6552 if (changed_length) {
6553 CC_FILE_SIZES ccfs;
6554
6555 ccfs.AllocationSize = fcb->Header.AllocationSize;
6556 ccfs.FileSize = fcb->Header.FileSize;
6557 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6558
6559 CcSetFileSizes(FileObject, &ccfs);
6560 }
6561
6562 // FIXME - make sure this still called if STATUS_PENDING and async
6563 // if (!no_cache) {
6564 // if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) {
6565 // ERR("CcCopyWrite failed.\n");
6566 // }
6567 // }
6568
6569 fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
6570 fcb->subvol->root_item.ctime = now;
6571
6572 Status = STATUS_SUCCESS;
6573
6574 end:
6575 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
6576 TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
6577 FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
6578 TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
6579 }
6580
6581 if (paging_lock)
6582 ExReleaseResourceLite(fcb->Header.PagingIoResource);
6583
6584 return Status;
6585 }
6586
6587 NTSTATUS write_file(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
6588 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
6589 device_extension* Vcb = DeviceObject->DeviceExtension;
6590 void* buf;
6591 NTSTATUS Status;
6592 LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
6593 PFILE_OBJECT FileObject = IrpSp->FileObject;
6594 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
6595 BOOL locked = FALSE;
6596 // LARGE_INTEGER freq, time1, time2;
6597 LIST_ENTRY rollback;
6598
6599 InitializeListHead(&rollback);
6600
6601 if (Vcb->readonly)
6602 return STATUS_MEDIA_WRITE_PROTECTED;
6603
6604 if (fcb && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)
6605 return STATUS_ACCESS_DENIED;
6606
6607 // time1 = KeQueryPerformanceCounter(&freq);
6608
6609 TRACE("write\n");
6610
6611 Irp->IoStatus.Information = 0;
6612
6613 switch (IrpSp->MinorFunction) {
6614 case IRP_MN_COMPLETE:
6615 FIXME("unsupported - IRP_MN_COMPLETE\n");
6616 break;
6617
6618 case IRP_MN_COMPLETE_MDL:
6619 FIXME("unsupported - IRP_MN_COMPLETE_MDL\n");
6620 break;
6621
6622 case IRP_MN_COMPLETE_MDL_DPC:
6623 FIXME("unsupported - IRP_MN_COMPLETE_MDL_DPC\n");
6624 break;
6625
6626 case IRP_MN_COMPRESSED:
6627 FIXME("unsupported - IRP_MN_COMPRESSED\n");
6628 break;
6629
6630 case IRP_MN_DPC:
6631 FIXME("unsupported - IRP_MN_DPC\n");
6632 break;
6633
6634 case IRP_MN_MDL:
6635 FIXME("unsupported - IRP_MN_MDL\n");
6636 break;
6637
6638 case IRP_MN_MDL_DPC:
6639 FIXME("unsupported - IRP_MN_MDL_DPC\n");
6640 break;
6641
6642 case IRP_MN_NORMAL:
6643 TRACE("IRP_MN_NORMAL\n");
6644 break;
6645
6646 default:
6647 WARN("unknown minor function %x\n", IrpSp->MinorFunction);
6648 break;
6649 }
6650
6651 TRACE("offset = %llx\n", offset.QuadPart);
6652 TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
6653
6654 if (!Irp->AssociatedIrp.SystemBuffer) {
6655 buf = map_user_buffer(Irp);
6656
6657 if (Irp->MdlAddress && !buf) {
6658 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
6659 Status = STATUS_INSUFFICIENT_RESOURCES;
6660 goto exit;
6661 }
6662 } else
6663 buf = Irp->AssociatedIrp.SystemBuffer;
6664
6665 TRACE("buf = %p\n", buf);
6666
6667 acquire_tree_lock(Vcb, TRUE);
6668 locked = TRUE;
6669
6670 if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
6671 WARN("tried to write to locked region\n");
6672 Status = STATUS_FILE_LOCK_CONFLICT;
6673 goto exit;
6674 }
6675
6676 // ERR("Irp->Flags = %x\n", Irp->Flags);
6677 Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE, &rollback);
6678 if (!NT_SUCCESS(Status)) {
6679 if (Status != STATUS_PENDING)
6680 ERR("write_file2 returned %08x\n", Status);
6681 goto exit;
6682 }
6683
6684 Status = consider_write(Vcb);
6685
6686 if (NT_SUCCESS(Status)) {
6687 Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
6688
6689 #ifdef DEBUG_PARANOID
6690 check_extents_consistent(Vcb, FileObject->FsContext); // TESTING
6691
6692 // check_extent_tree_consistent(Vcb);
6693 #endif
6694 }
6695
6696 exit:
6697 if (locked) {
6698 if (NT_SUCCESS(Status))
6699 clear_rollback(&rollback);
6700 else
6701 do_rollback(Vcb, &rollback);
6702
6703 release_tree_lock(Vcb, TRUE);
6704 }
6705
6706 // time2 = KeQueryPerformanceCounter(NULL);
6707
6708 // ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart);
6709
6710 return Status;
6711 }