[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
21
22 // BOOL did_split;
23 BOOL chunk_test = FALSE;
24
25 typedef struct {
26 KEVENT Event;
27 IO_STATUS_BLOCK iosb;
28 } write_context;
29
30 typedef struct {
31 EXTENT_ITEM ei;
32 UINT8 type;
33 EXTENT_DATA_REF edr;
34 } EXTENT_ITEM_DATA_REF;
35
36 typedef struct {
37 EXTENT_ITEM_TREE eit;
38 UINT8 type;
39 TREE_BLOCK_REF tbr;
40 } EXTENT_ITEM_TREE2;
41
42 typedef struct {
43 EXTENT_ITEM ei;
44 UINT8 type;
45 TREE_BLOCK_REF tbr;
46 } EXTENT_ITEM_SKINNY_METADATA;
47
48 typedef struct {
49 CHUNK_ITEM ci;
50 CHUNK_ITEM_STRIPE stripes[1];
51 } CHUNK_ITEM2;
52
53 typedef struct {
54 LIST_ENTRY list_entry;
55 UINT64 key;
56 } ordered_list;
57
58 typedef struct {
59 ordered_list ol;
60 ULONG length;
61 UINT32* checksums;
62 BOOL deleted;
63 } changed_sector;
64
65 static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
66 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
67 static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
68
69 static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 write_context* context = conptr;
71
72 context->iosb = Irp->IoStatus;
73 KeSetEvent(&context->Event, 0, FALSE);
74
75 // return STATUS_SUCCESS;
76 return STATUS_MORE_PROCESSING_REQUIRED;
77 }
78
79 static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
80 NTSTATUS Status;
81 LARGE_INTEGER offset;
82 PIRP Irp;
83 PIO_STACK_LOCATION IrpSp;
84 write_context* context = NULL;
85
86 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
87
88 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
89 if (!context) {
90 ERR("out of memory\n");
91 return STATUS_INSUFFICIENT_RESOURCES;
92 }
93
94 RtlZeroMemory(context, sizeof(write_context));
95
96 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
97
98 offset.QuadPart = address;
99
100 // Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
101
102 Irp = IoAllocateIrp(device->StackSize, FALSE);
103
104 if (!Irp) {
105 ERR("IoAllocateIrp failed\n");
106 Status = STATUS_INTERNAL_ERROR;
107 goto exit2;
108 }
109
110 IrpSp = IoGetNextIrpStackLocation(Irp);
111 IrpSp->MajorFunction = IRP_MJ_WRITE;
112
113 if (device->Flags & DO_BUFFERED_IO) {
114 Irp->AssociatedIrp.SystemBuffer = data;
115
116 Irp->Flags = IRP_BUFFERED_IO;
117 } else if (device->Flags & DO_DIRECT_IO) {
118 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
119 if (!Irp->MdlAddress) {
120 DbgPrint("IoAllocateMdl failed\n");
121 goto exit;
122 }
123
124 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
125 } else {
126 Irp->UserBuffer = data;
127 }
128
129 IrpSp->Parameters.Write.Length = length;
130 IrpSp->Parameters.Write.ByteOffset = offset;
131
132 Irp->UserIosb = &context->iosb;
133
134 Irp->UserEvent = &context->Event;
135
136 IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
137
138 // FIXME - support multiple devices
139 Status = IoCallDriver(device, Irp);
140
141 if (Status == STATUS_PENDING) {
142 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
143 Status = context->iosb.Status;
144 }
145
146 if (!NT_SUCCESS(Status)) {
147 ERR("IoCallDriver returned %08x\n", Status);
148 }
149
150 if (device->Flags & DO_DIRECT_IO) {
151 MmUnlockPages(Irp->MdlAddress);
152 IoFreeMdl(Irp->MdlAddress);
153 }
154
155 exit:
156 IoFreeIrp(Irp);
157
158 exit2:
159 if (context)
160 ExFreePool(context);
161
162 return Status;
163 }
164
165 static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
166 NTSTATUS Status;
167 unsigned int i = 0;
168 UINT32 crc32;
169
170 #ifdef __REACTOS__
171 Status = STATUS_INTERNAL_ERROR;
172 #endif
173
174 // FIXME - work with RAID
175
176 // FIXME - only write one superblock if on SSD (?)
177 while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
178 TRACE("writing superblock %u\n", i);
179
180 Vcb->superblock.sb_phys_addr = superblock_addrs[i];
181 RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
182
183 crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
184 crc32 = ~crc32;
185 TRACE("crc32 is %08x\n", crc32);
186 RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
187
188 Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
189
190 if (!NT_SUCCESS(Status))
191 break;
192
193 i++;
194 }
195
196 return Status;
197 }
198
199 static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
200 LIST_ENTRY* le;
201 space *s, *bestfit = NULL;
202
203 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
204
205 le = c->space.Flink;
206 while (le != &c->space) {
207 s = CONTAINING_RECORD(le, space, list_entry);
208
209 if (s->type == SPACE_TYPE_FREE) {
210 if (s->size == length) {
211 *address = s->offset;
212 TRACE("returning exact fit at %llx\n", s->offset);
213 return TRUE;
214 } else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
215 bestfit = s;
216 }
217 }
218
219 le = le->Flink;
220 }
221
222 if (bestfit) {
223 TRACE("returning best fit at %llx\n", bestfit->offset);
224 *address = bestfit->offset;
225 return TRUE;
226 }
227
228 return FALSE;
229 }
230
231 void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
232 LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
233 space *s, *s2, *s3;
234 #ifdef DEBUG_PARANOID
235 UINT64 lastaddr;
236 #endif
237
238 TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
239
240 #ifdef DEBUG_PARANOID
241 // TESTING
242 le = c->space.Flink;
243 while (le != &c->space) {
244 s = CONTAINING_RECORD(le, space, list_entry);
245
246 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
247
248 le = le->Flink;
249 }
250 #endif
251
252 c->space_changed = TRUE;
253
254 le = c->space.Flink;
255 insbef = &c->space;
256 while (le != &c->space) {
257 s = CONTAINING_RECORD(le, space, list_entry);
258 nextle = le->Flink;
259
260 if (s->offset >= offset + size) {
261 insbef = le;
262 break;
263 }
264
265 if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
266 #ifndef __REACTOS__
267 RemoveEntryList(&s->list_entry);
268
269 #endif
270 if (s->offset + s->size == offset + size) {
271 insbef = s->list_entry.Flink;
272 RemoveEntryList(&s->list_entry);
273 ExFreePool(s);
274 break;
275 }
276
277 RemoveEntryList(&s->list_entry);
278 ExFreePool(s);
279 } else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
280 s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
281 if (!s3) {
282 ERR("out of memory\n");
283 return;
284 }
285
286 s3->offset = offset + size;
287 s3->size = s->size - size - offset + s->offset;
288 s3->type = s->type;
289 InsertHeadList(&s->list_entry, &s3->list_entry);
290 insbef = &s3->list_entry;
291
292 s->size = offset - s->offset;
293 break;
294 } else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
295 s->size = offset - s->offset;
296 } else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
297 s->size -= s->offset - offset + size;
298 s->offset = offset + size;
299
300 insbef = le;
301 break;
302 }
303
304 le = nextle;
305 }
306
307 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
308 if (!s2) {
309 ERR("out of memory\n");
310 return;
311 }
312
313 s2->offset = offset;
314 s2->size = size;
315 s2->type = type;
316 InsertTailList(insbef, &s2->list_entry);
317
318 // merge entries if same type
319
320 if (s2->list_entry.Blink != &c->space) {
321 s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
322
323 if (s->type == type) {
324 s->size += s2->size;
325
326 RemoveEntryList(&s2->list_entry);
327 ExFreePool(s2);
328
329 s2 = s;
330 }
331 }
332
333 if (s2->list_entry.Flink != &c->space) {
334 s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
335
336 if (s->type == type) {
337 s2->size += s->size;
338
339 RemoveEntryList(&s->list_entry);
340 ExFreePool(s);
341 }
342 }
343
344 le = c->space.Flink;
345 while (le != &c->space) {
346 s = CONTAINING_RECORD(le, space, list_entry);
347
348 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
349
350 le = le->Flink;
351 }
352
353 #ifdef DEBUG_PARANOID
354 // TESTING
355 lastaddr = c->offset;
356 le = c->space.Flink;
357 while (le != &c->space) {
358 s = CONTAINING_RECORD(le, space, list_entry);
359
360 if (s->offset != lastaddr) {
361 ERR("inconsistency detected!\n");
362 int3;
363 }
364
365 lastaddr = s->offset + s->size;
366
367 le = le->Flink;
368 }
369
370 if (lastaddr != c->offset + c->chunk_item->size) {
371 ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
372 int3;
373 }
374 #endif
375 }
376
377 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
378 LIST_ENTRY* le2;
379 chunk* c;
380
381 le2 = Vcb->chunks.Flink;
382 while (le2 != &Vcb->chunks) {
383 c = CONTAINING_RECORD(le2, chunk, list_entry);
384
385 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
386
387 if (address >= c->offset && address < c->offset + c->chunk_item->size)
388 return c;
389
390 le2 = le2->Flink;
391 }
392
393 return NULL;
394 }
395
396 typedef struct {
397 disk_hole* dh;
398 device* device;
399 } stripe;
400
401 static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
402 // LIST_ENTRY* le = s->device->disk_holes.Flink;
403 // disk_hole* dh;
404
405 // ERR("old holes:\n");
406 // while (le != &s->device->disk_holes) {
407 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
408 //
409 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
410 //
411 // le = le->Flink;
412 // }
413
414 if (s->dh->size <= max_stripe_size) {
415 s->dh->provisional = TRUE;
416 } else {
417 disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
418 if (!newdh) {
419 ERR("out of memory\n");
420 return;
421 }
422
423 newdh->address = s->dh->address + max_stripe_size;
424 newdh->size = s->dh->size - max_stripe_size;
425 newdh->provisional = FALSE;
426 InsertTailList(&s->device->disk_holes, &newdh->listentry);
427
428 s->dh->size = max_stripe_size;
429 s->dh->provisional = TRUE;
430 }
431
432 // ERR("new holes:\n");
433 // le = s->device->disk_holes.Flink;
434 // while (le != &s->device->disk_holes) {
435 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
436 //
437 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
438 //
439 // le = le->Flink;
440 // }
441 }
442
443 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
444 KEY searchkey;
445 traverse_ptr tp, next_tp;
446 BOOL b;
447 UINT64 lastaddr;
448 NTSTATUS Status;
449
450 searchkey.obj_id = 0x100;
451 searchkey.obj_type = TYPE_CHUNK_ITEM;
452 searchkey.offset = 0;
453
454 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
455 if (!NT_SUCCESS(Status)) {
456 ERR("error - find_item returned %08x\n", Status);
457 return 0xffffffffffffffff;
458 }
459
460 lastaddr = 0;
461
462 do {
463 if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
464 if (tp.item->size < sizeof(CHUNK_ITEM)) {
465 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
466 } else {
467 CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
468
469 if (tp.item->key.offset >= lastaddr + size) {
470 free_traverse_ptr(&tp);
471 return lastaddr;
472 }
473
474 lastaddr = tp.item->key.offset + ci->size;
475 }
476 }
477
478 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
479 if (b) {
480 free_traverse_ptr(&tp);
481 tp = next_tp;
482
483 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
484 break;
485 }
486 } while (b);
487
488 free_traverse_ptr(&tp);
489
490 return lastaddr;
491 }
492
493 static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
494 KEY searchkey;
495 traverse_ptr tp;
496 DEV_ITEM* di;
497 NTSTATUS Status;
498
499 searchkey.obj_id = 1;
500 searchkey.obj_type = TYPE_DEV_ITEM;
501 searchkey.offset = device->devitem.dev_id;
502
503 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
504 if (!NT_SUCCESS(Status)) {
505 ERR("error - find_item returned %08x\n", Status);
506 return FALSE;
507 }
508
509 if (keycmp(&tp.item->key, &searchkey)) {
510 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
511 free_traverse_ptr(&tp);
512 return FALSE;
513 }
514
515 delete_tree_item(Vcb, &tp, rollback);
516
517 free_traverse_ptr(&tp);
518
519 device->devitem.bytes_used += size;
520
521 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
522 if (!di) {
523 ERR("out of memory\n");
524 return FALSE;
525 }
526
527 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
528
529 if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
530 ERR("insert_tree_item failed\n");
531 return FALSE;
532 }
533
534 return TRUE;
535 }
536
537 static void reset_disk_holes(device* device, BOOL commit) {
538 LIST_ENTRY* le = device->disk_holes.Flink;
539 disk_hole* dh;
540
541 // ERR("old holes:\n");
542 // while (le != &device->disk_holes) {
543 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
544 //
545 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
546 //
547 // le = le->Flink;
548 // }
549
550 le = device->disk_holes.Flink;
551 while (le != &device->disk_holes) {
552 LIST_ENTRY* le2 = le->Flink;
553
554 dh = CONTAINING_RECORD(le, disk_hole, listentry);
555
556 if (dh->provisional) {
557 if (commit) {
558 RemoveEntryList(le);
559 ExFreePool(dh);
560 } else {
561 dh->provisional = FALSE;
562 }
563 }
564
565 le = le2;
566 }
567
568 if (!commit) {
569 le = device->disk_holes.Flink;
570 while (le != &device->disk_holes) {
571 LIST_ENTRY* le2 = le->Flink;
572
573 dh = CONTAINING_RECORD(le, disk_hole, listentry);
574
575 while (le2 != &device->disk_holes) {
576 disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
577
578 if (dh2->address == dh->address + dh->size) {
579 LIST_ENTRY* le3 = le2->Flink;
580 dh->size += dh2->size;
581
582 RemoveEntryList(le2);
583 ExFreePool(dh2);
584
585 le2 = le3;
586 } else
587 break;
588 }
589
590 le = le->Flink;
591 }
592 }
593
594 // ERR("new holes:\n");
595 // le = device->disk_holes.Flink;
596 // while (le != &device->disk_holes) {
597 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
598 //
599 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
600 //
601 // le = le->Flink;
602 // }
603 }
604
605 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
606 sys_chunk *sc, *sc2;
607 LIST_ENTRY* le;
608 USHORT i;
609
610 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
611 ERR("error - bootstrap is full\n");
612 return STATUS_INTERNAL_ERROR;
613 }
614
615 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
616 if (!sc) {
617 ERR("out of memory\n");
618 return STATUS_INSUFFICIENT_RESOURCES;
619 }
620
621 sc->key.obj_id = obj_id;
622 sc->key.obj_type = obj_type;
623 sc->key.offset = offset;
624 sc->size = size;
625 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
626 if (!sc->data) {
627 ERR("out of memory\n");
628 ExFreePool(sc);
629 return STATUS_INSUFFICIENT_RESOURCES;
630 }
631
632 RtlCopyMemory(sc->data, data, sc->size);
633
634 le = Vcb->sys_chunks.Flink;
635 while (le != &Vcb->sys_chunks) {
636 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
637
638 if (keycmp(&sc2->key, &sc->key) == 1)
639 break;
640
641 le = le->Flink;
642 }
643 InsertTailList(le, &sc->list_entry);
644
645 Vcb->superblock.n += sizeof(KEY) + size;
646
647 i = 0;
648 le = Vcb->sys_chunks.Flink;
649 while (le != &Vcb->sys_chunks) {
650 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
651
652 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
653
654 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
655 i += sizeof(KEY);
656
657 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
658 i += sc2->size;
659
660 le = le->Flink;
661 }
662
663 return STATUS_SUCCESS;
664 }
665
666 static chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
667 UINT64 max_stripe_size, max_chunk_size, stripe_size;
668 UINT64 total_size = 0, i, j, logaddr;
669 int num_stripes;
670 disk_hole* dh;
671 stripe* stripes;
672 ULONG cisize;
673 CHUNK_ITEM* ci;
674 CHUNK_ITEM_STRIPE* cis;
675 chunk* c = NULL;
676 space* s = NULL;
677 BOOL success = FALSE;
678 BLOCK_GROUP_ITEM* bgi;
679
680 for (i = 0; i < Vcb->superblock.num_devices; i++) {
681 total_size += Vcb->devices[i].devitem.num_bytes;
682 }
683 TRACE("total_size = %llx\n", total_size);
684
685 if (flags & BLOCK_FLAG_DATA) {
686 max_stripe_size = 0x40000000; // 1 GB
687 max_chunk_size = 10 * max_stripe_size;
688 } else if (flags & BLOCK_FLAG_METADATA) {
689 if (total_size > 0xC80000000) // 50 GB
690 max_stripe_size = 0x40000000; // 1 GB
691 else
692 max_stripe_size = 0x10000000; // 256 MB
693
694 max_chunk_size = max_stripe_size;
695 } else if (flags & BLOCK_FLAG_SYSTEM) {
696 max_stripe_size = 0x2000000; // 32 MB
697 max_chunk_size = 2 * max_stripe_size;
698 }
699
700 // FIXME - make sure whole number of sectors?
701 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
702
703 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
704
705 if (flags & BLOCK_FLAG_DUPLICATE) {
706 num_stripes = 2;
707 } else if (flags & BLOCK_FLAG_RAID0) {
708 FIXME("RAID0 not yet supported\n");
709 return NULL;
710 } else if (flags & BLOCK_FLAG_RAID1) {
711 FIXME("RAID1 not yet supported\n");
712 return NULL;
713 } else if (flags & BLOCK_FLAG_RAID10) {
714 FIXME("RAID10 not yet supported\n");
715 return NULL;
716 } else if (flags & BLOCK_FLAG_RAID5) {
717 FIXME("RAID5 not yet supported\n");
718 return NULL;
719 } else if (flags & BLOCK_FLAG_RAID6) {
720 FIXME("RAID6 not yet supported\n");
721 return NULL;
722 } else { // SINGLE
723 num_stripes = 1;
724 }
725
726 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
727 if (!stripes) {
728 ERR("out of memory\n");
729 return NULL;
730 }
731
732 for (i = 0; i < num_stripes; i++) {
733 stripes[i].dh = NULL;
734
735 for (j = 0; j < Vcb->superblock.num_devices; j++) {
736 LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
737
738 while (le != &Vcb->devices[j].disk_holes) {
739 dh = CONTAINING_RECORD(le, disk_hole, listentry);
740
741 if (!dh->provisional) {
742 if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
743 stripes[i].dh = dh;
744 stripes[i].device = &Vcb->devices[j];
745
746 if (stripes[i].dh->size >= max_stripe_size)
747 break;
748 }
749 }
750
751 le = le->Flink;
752 }
753
754 if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
755 break;
756 }
757
758 if (stripes[i].dh) {
759 TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
760 } else {
761 TRACE("good DH not found\n");
762 goto end;
763 }
764
765 add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
766 }
767
768 stripe_size = min(stripes[0].dh->size, max_stripe_size);
769 for (i = 1; i < num_stripes; i++) {
770 stripe_size = min(stripe_size, stripes[1].dh->size);
771 }
772 // FIXME - make sure stripe_size aligned properly
773 // FIXME - obey max_chunk_size
774
775 c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
776 if (!c) {
777 ERR("out of memory\n");
778 goto end;
779 }
780
781 // add CHUNK_ITEM to tree 3
782
783 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
784 ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
785 if (!ci) {
786 ERR("out of memory\n");
787 goto end;
788 }
789
790 ci->size = stripe_size; // FIXME for RAID
791 ci->root_id = Vcb->extent_root->id;
792 ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
793 ci->type = flags;
794 ci->opt_io_alignment = ci->stripe_length;
795 ci->opt_io_width = ci->stripe_length;
796 ci->sector_size = stripes[0].device->devitem.minimal_io_size;
797 ci->num_stripes = num_stripes;
798 ci->sub_stripes = 1;
799
800 c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
801 if (!c->devices) {
802 ERR("out of memory\n");
803 ExFreePool(ci);
804 goto end;
805 }
806
807 for (i = 0; i < num_stripes; i++) {
808 if (i == 0)
809 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
810 else
811 cis = &cis[1];
812
813 cis->dev_id = stripes[i].device->devitem.dev_id;
814 cis->offset = stripes[i].dh->address;
815 cis->dev_uuid = stripes[i].device->devitem.device_uuid;
816
817 c->devices[i] = stripes[i].device;
818 }
819
820 logaddr = find_new_chunk_address(Vcb, ci->size);
821 if (logaddr == 0xffffffffffffffff) {
822 ERR("find_new_chunk_address failed\n");
823 ExFreePool(ci);
824 goto end;
825 }
826
827 if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
828 ERR("insert_tree_item failed\n");
829 ExFreePool(ci);
830 goto end;
831 }
832
833 if (flags & BLOCK_FLAG_SYSTEM) {
834 NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
835 if (!NT_SUCCESS(Status)) {
836 ERR("add_to_bootstrap returned %08x\n", Status);
837 goto end;
838 }
839 }
840
841 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
842
843 c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
844 if (!c->chunk_item) {
845 ERR("out of memory\n");
846 goto end;
847 }
848
849 RtlCopyMemory(c->chunk_item, ci, cisize);
850 c->size = cisize;
851 c->offset = logaddr;
852 c->used = c->oldused = 0;
853 c->space_changed = FALSE;
854 InitializeListHead(&c->space);
855
856 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
857 if (!s) {
858 ERR("out of memory\n");
859 goto end;
860 }
861
862 s->offset = c->offset;
863 s->size = c->chunk_item->size;
864 s->type = SPACE_TYPE_FREE;
865 InsertTailList(&c->space, &s->list_entry);
866
867 protect_superblocks(Vcb, c);
868
869 // add BLOCK_GROUP_ITEM to tree 2
870
871 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
872 if (!bgi) {
873 ERR("out of memory\n");
874 goto end;
875 }
876
877 bgi->used = 0;
878 bgi->chunk_tree = 0x100;
879 bgi->flags = flags;
880
881 if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
882 ERR("insert_tree_item failed\n");
883 ExFreePool(bgi);
884 goto end;
885 }
886
887 // add DEV_EXTENTs to tree 4
888
889 for (i = 0; i < num_stripes; i++) {
890 DEV_EXTENT* de;
891
892 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
893 if (!de) {
894 ERR("out of memory\n");
895 goto end;
896 }
897
898 de->chunktree = Vcb->chunk_root->id;
899 de->objid = 0x100;
900 de->address = logaddr;
901 de->length = ci->size;
902 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
903
904 if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
905 ERR("insert_tree_item failed\n");
906 ExFreePool(de);
907 goto end;
908 }
909
910 if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
911 ERR("increase_dev_item_used failed\n");
912 goto end;
913 }
914 }
915
916 for (i = 0; i < num_stripes; i++) {
917 BOOL b = FALSE;
918 for (j = 0; j < i; j++) {
919 if (stripes[j].device == stripes[i].device)
920 b = TRUE;
921 }
922
923 if (!b)
924 reset_disk_holes(stripes[i].device, TRUE);
925 }
926
927 success = TRUE;
928
929 end:
930 ExFreePool(stripes);
931
932 if (!success) {
933 for (i = 0; i < num_stripes; i++) {
934 BOOL b = FALSE;
935 for (j = 0; j < i; j++) {
936 if (stripes[j].device == stripes[i].device)
937 b = TRUE;
938 }
939
940 if (!b)
941 reset_disk_holes(stripes[i].device, FALSE);
942 }
943
944 if (c) ExFreePool(c);
945 if (s) ExFreePool(s);
946 } else
947 InsertTailList(&Vcb->chunks, &c->list_entry);
948
949 return success ? c : NULL;
950 }
951
952 static void decrease_chunk_usage(chunk* c, UINT64 delta) {
953 c->used -= delta;
954
955 TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
956 }
957
958 static void increase_chunk_usage(chunk* c, UINT64 delta) {
959 c->used += delta;
960
961 TRACE("increasing size of chunk %llx by %llx\n", c->offset, delta);
962 }
963
964 static NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
965 KEY searchkey;
966 traverse_ptr tp;
967 CHUNK_ITEM2* ci;
968 NTSTATUS Status;
969 UINT32 i;
970
971 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
972
973 // FIXME - use version cached in Vcb
974
975 searchkey.obj_id = 0x100; // fixed?
976 searchkey.obj_type = TYPE_CHUNK_ITEM;
977 searchkey.offset = address;
978
979 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
980 if (!NT_SUCCESS(Status)) {
981 ERR("error - find_item returned %08x\n", Status);
982 return Status;
983 }
984
985 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
986 ERR("error - unexpected item in chunk tree\n");
987 Status = STATUS_INTERNAL_ERROR;
988 goto end;
989 }
990
991 if (tp.item->size < sizeof(CHUNK_ITEM2)) {
992 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
993 Status = STATUS_INTERNAL_ERROR;
994 goto end;
995 }
996
997 ci = (CHUNK_ITEM2*)tp.item->data;
998
999 if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
1000 ERR("error - address %llx was out of chunk bounds\n", address);
1001 Status = STATUS_INTERNAL_ERROR;
1002 goto end;
1003 }
1004
1005 // FIXME - only do this for chunks marked DUPLICATE?
1006 // FIXME - for multiple writes, if PENDING do waits at the end
1007 // FIXME - work with RAID
1008 for (i = 0; i < ci->ci.num_stripes; i++) {
1009 Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
1010 if (!NT_SUCCESS(Status)) {
1011 ERR("error - write_data_phys failed\n");
1012 goto end;
1013 }
1014 }
1015
1016 end:
1017 free_traverse_ptr(&tp);
1018
1019 return Status;
1020 }
1021
1022 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
1023 LIST_ENTRY *le, *nextle;
1024 space *s, *s2;
1025
1026 // // TESTING
1027 // le = c->space.Flink;
1028 // while (le != &c->space) {
1029 // s = CONTAINING_RECORD(le, space, list_entry);
1030 //
1031 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1032 //
1033 // le = le->Flink;
1034 // }
1035
1036 le = c->space.Flink;
1037 while (le != &c->space) {
1038 s = CONTAINING_RECORD(le, space, list_entry);
1039 nextle = le->Flink;
1040
1041 if (s->type == SPACE_TYPE_DELETING)
1042 s->type = SPACE_TYPE_FREE;
1043 else if (s->type == SPACE_TYPE_WRITING)
1044 s->type = SPACE_TYPE_USED;
1045
1046 if (le->Blink != &c->space) {
1047 s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
1048
1049 if (s2->type == s->type) { // do merge
1050 s2->size += s->size;
1051
1052 RemoveEntryList(&s->list_entry);
1053 ExFreePool(s);
1054 }
1055 }
1056
1057 le = nextle;
1058 }
1059
1060 // le = c->space.Flink;
1061 // while (le != &c->space) {
1062 // s = CONTAINING_RECORD(le, space, list_entry);
1063 //
1064 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1065 //
1066 // le = le->Flink;
1067 // }
1068 }
1069
1070 static void clean_space_cache(device_extension* Vcb) {
1071 LIST_ENTRY* le;
1072 chunk* c;
1073
1074 TRACE("(%p)\n", Vcb);
1075
1076 le = Vcb->chunks.Flink;
1077 while (le != &Vcb->chunks) {
1078 c = CONTAINING_RECORD(le, chunk, list_entry);
1079
1080 if (c->space_changed) {
1081 clean_space_cache_chunk(Vcb, c);
1082 c->space_changed = FALSE;
1083 }
1084
1085 le = le->Flink;
1086 }
1087 }
1088
1089 static BOOL trees_consistent(device_extension* Vcb) {
1090 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
1091 LIST_ENTRY* le;
1092
1093 le = Vcb->tree_cache.Flink;
1094 while (le != &Vcb->tree_cache) {
1095 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1096
1097 if (tc2->write) {
1098 if (tc2->tree->header.num_items == 0 && tc2->tree->parent)
1099 return FALSE;
1100
1101 if (tc2->tree->size > maxsize)
1102 return FALSE;
1103
1104 if (!tc2->tree->has_new_address)
1105 return FALSE;
1106 }
1107
1108 le = le->Flink;
1109 }
1110
1111 return TRUE;
1112 }
1113
1114 static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
1115 LIST_ENTRY* le;
1116 NTSTATUS Status;
1117
1118 le = Vcb->tree_cache.Flink;
1119 while (le != &Vcb->tree_cache) {
1120 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1121
1122 if (tc2->write) {
1123 if (tc2->tree->parent)
1124 add_to_tree_cache(Vcb, tc2->tree->parent, TRUE);
1125 else if (tc2->tree->root != Vcb->chunk_root && tc2->tree->root != Vcb->root_root) {
1126 KEY searchkey;
1127 traverse_ptr tp;
1128
1129 searchkey.obj_id = tc2->tree->root->id;
1130 searchkey.obj_type = TYPE_ROOT_ITEM;
1131 searchkey.offset = 0xffffffffffffffff;
1132
1133 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1134 if (!NT_SUCCESS(Status)) {
1135 ERR("error - find_item returned %08x\n", Status);
1136 return Status;
1137 }
1138
1139 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1140 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1141 free_traverse_ptr(&tp);
1142 return STATUS_INTERNAL_ERROR;
1143 }
1144
1145 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
1146 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1147 if (!ri) {
1148 ERR("out of memory\n");
1149 return STATUS_INSUFFICIENT_RESOURCES;
1150 }
1151
1152 if (tp.item->size > 0)
1153 RtlCopyMemory(ri, tp.item->data, tp.item->size);
1154
1155 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
1156
1157 delete_tree_item(Vcb, &tp, rollback);
1158
1159 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1160 ERR("insert_tree_item failed\n");
1161 return STATUS_INTERNAL_ERROR;
1162 }
1163 } else {
1164 add_to_tree_cache(Vcb, tp.tree, TRUE);
1165 }
1166
1167 free_traverse_ptr(&tp);
1168 }
1169 }
1170
1171 le = le->Flink;
1172 }
1173
1174 return STATUS_SUCCESS;
1175 }
1176
1177 void print_trees(LIST_ENTRY* tc) {
1178 LIST_ENTRY *le, *le2;
1179
1180 le = tc->Flink;
1181 while (le != tc) {
1182 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
1183 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1184 UINT32 num_items = 0;
1185
1186 le2 = tc2->tree->itemlist.Flink;
1187 while (le2 != &tc2->tree->itemlist) {
1188 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1189 if (!td->ignore) {
1190 firstitem = td->key;
1191 num_items++;
1192 }
1193 le2 = le2->Flink;
1194 }
1195
1196 ERR("tree: root %llx, first key %llx,%x,%llx, level %x, num_items %x / %x\n",
1197 tc2->tree->header.tree_id, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
1198
1199 le = le->Flink;
1200 }
1201 }
1202
1203 static void add_parents_to_cache(device_extension* Vcb, tree* t) {
1204 KEY searchkey;
1205 traverse_ptr tp;
1206 NTSTATUS Status;
1207
1208 while (t->parent) {
1209 t = t->parent;
1210
1211 add_to_tree_cache(Vcb, t, TRUE);
1212 }
1213
1214 if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
1215 return;
1216
1217 searchkey.obj_id = t->root->id;
1218 searchkey.obj_type = TYPE_ROOT_ITEM;
1219 searchkey.offset = 0xffffffffffffffff;
1220
1221 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1222 if (!NT_SUCCESS(Status)) {
1223 ERR("error - find_item returned %08x\n", Status);
1224 return;
1225 }
1226
1227 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1228 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1229 free_traverse_ptr(&tp);
1230 return;
1231 }
1232
1233 add_to_tree_cache(Vcb, tp.tree, TRUE);
1234
1235 free_traverse_ptr(&tp);
1236 }
1237
1238 static BOOL insert_tree_extent_skinny(device_extension* Vcb, tree* t, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
1239 EXTENT_ITEM_SKINNY_METADATA* eism;
1240 traverse_ptr insert_tp;
1241
1242 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1243 if (!eism) {
1244 ERR("out of memory\n");
1245 return FALSE;
1246 }
1247
1248 eism->ei.refcount = 1;
1249 eism->ei.generation = Vcb->superblock.generation;
1250 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1251 eism->type = TYPE_TREE_BLOCK_REF;
1252 eism->tbr.offset = t->header.tree_id;
1253
1254 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, t->header.level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1255 ERR("insert_tree_item failed\n");
1256 ExFreePool(eism);
1257 return FALSE;
1258 }
1259
1260 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1261
1262 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1263 add_parents_to_cache(Vcb, insert_tp.tree);
1264
1265 free_traverse_ptr(&insert_tp);
1266
1267 t->new_address = address;
1268 t->has_new_address = TRUE;
1269
1270 return TRUE;
1271 }
1272
1273 static BOOL insert_tree_extent(device_extension* Vcb, tree* t, chunk* c, LIST_ENTRY* rollback) {
1274 UINT64 address;
1275 EXTENT_ITEM_TREE2* eit2;
1276 traverse_ptr insert_tp;
1277
1278 TRACE("(%p, %p, %p, %p)\n", Vcb, t, c, rollback);
1279
1280 if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
1281 return FALSE;
1282
1283 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
1284 return insert_tree_extent_skinny(Vcb, t, c, address, rollback);
1285
1286 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1287 if (!eit2) {
1288 ERR("out of memory\n");
1289 return FALSE;
1290 }
1291
1292 eit2->eit.extent_item.refcount = 1;
1293 eit2->eit.extent_item.generation = Vcb->superblock.generation;
1294 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1295 // eit2->eit.firstitem = wt->firstitem;
1296 eit2->eit.level = t->header.level;
1297 eit2->type = TYPE_TREE_BLOCK_REF;
1298 eit2->tbr.offset = t->header.tree_id;
1299
1300 // #ifdef DEBUG_PARANOID
1301 // if (wt->firstitem.obj_type == 0xcc) { // TESTING
1302 // ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
1303 // ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
1304 // int3;
1305 // }
1306 // #endif
1307
1308 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1309 ERR("insert_tree_item failed\n");
1310 ExFreePool(eit2);
1311 return FALSE;
1312 }
1313
1314 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1315
1316 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1317 add_parents_to_cache(Vcb, insert_tp.tree);
1318
1319 free_traverse_ptr(&insert_tp);
1320
1321 t->new_address = address;
1322 t->has_new_address = TRUE;
1323
1324 return TRUE;
1325 }
1326
1327 static NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
1328 chunk *origchunk = NULL, *c;
1329 LIST_ENTRY* le;
1330 UINT64 flags = t->flags;
1331
1332 if (flags == 0)
1333 flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
1334
1335 // TRACE("flags = %x\n", (UINT32)wt->flags);
1336
1337 // if (!chunk_test) { // TESTING
1338 // if ((c = alloc_chunk(Vcb, flags))) {
1339 // if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1340 // if (insert_tree_extent(Vcb, t, c)) {
1341 // chunk_test = TRUE;
1342 // return STATUS_SUCCESS;
1343 // }
1344 // }
1345 // }
1346 // }
1347
1348 if (t->has_address) {
1349 origchunk = get_chunk_from_address(Vcb, t->header.address);
1350
1351 if (insert_tree_extent(Vcb, t, origchunk, rollback))
1352 return STATUS_SUCCESS;
1353 }
1354
1355 le = Vcb->chunks.Flink;
1356 while (le != &Vcb->chunks) {
1357 c = CONTAINING_RECORD(le, chunk, list_entry);
1358
1359 // FIXME - make sure to avoid superblocks
1360
1361 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1362 if (insert_tree_extent(Vcb, t, c, rollback))
1363 return STATUS_SUCCESS;
1364 }
1365
1366 le = le->Flink;
1367 }
1368
1369 // allocate new chunk if necessary
1370 if ((c = alloc_chunk(Vcb, flags, rollback))) {
1371 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1372 if (insert_tree_extent(Vcb, t, c, rollback))
1373 return STATUS_SUCCESS;
1374 }
1375 }
1376
1377 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
1378
1379 return STATUS_DISK_FULL;
1380 }
1381
1382 static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1383 KEY searchkey;
1384 traverse_ptr tp;
1385 chunk* c;
1386 EXTENT_ITEM_SKINNY_METADATA* eism;
1387 NTSTATUS Status;
1388
1389 searchkey.obj_id = address;
1390 searchkey.obj_type = TYPE_METADATA_ITEM;
1391 searchkey.offset = 0xffffffffffffffff;
1392
1393 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1394 if (!NT_SUCCESS(Status)) {
1395 ERR("error - find_item returned %08x\n", Status);
1396 return FALSE;
1397 }
1398
1399 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1400 TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1401 free_traverse_ptr(&tp);
1402 return FALSE;
1403 }
1404
1405 if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
1406 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
1407 free_traverse_ptr(&tp);
1408 return FALSE;
1409 }
1410
1411 delete_tree_item(Vcb, &tp, rollback);
1412
1413 eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
1414 if (t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
1415 // convert shared data extents
1416
1417 LIST_ENTRY* le = t->itemlist.Flink;
1418 while (le != &t->itemlist) {
1419 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1420
1421 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1422
1423 if (!td->ignore && !td->inserted) {
1424 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1425 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1426
1427 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1428 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1429
1430 if (ed2->address != 0) {
1431 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1432 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1433 }
1434 }
1435 }
1436 }
1437
1438 le = le->Flink;
1439 }
1440
1441 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1442 }
1443
1444 c = get_chunk_from_address(Vcb, address);
1445
1446 if (c) {
1447 decrease_chunk_usage(c, Vcb->superblock.node_size);
1448
1449 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
1450 } else
1451 ERR("could not find chunk for address %llx\n", address);
1452
1453 free_traverse_ptr(&tp);
1454
1455 return TRUE;
1456 }
1457
1458 // TESTING
1459 // static void check_tree_num_items(tree* t) {
1460 // LIST_ENTRY* le2;
1461 // UINT32 ni;
1462 //
1463 // le2 = t->itemlist.Flink;
1464 // ni = 0;
1465 // while (le2 != &t->itemlist) {
1466 // tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1467 // if (!td->ignore)
1468 // ni++;
1469 // le2 = le2->Flink;
1470 // }
1471 //
1472 // if (t->header.num_items != ni) {
1473 // ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
1474 // int3;
1475 // } else {
1476 // ERR("tree %p okay\n", t);
1477 // }
1478 // }
1479 //
1480 // static void check_trees_num_items(LIST_ENTRY* tc) {
1481 // LIST_ENTRY* le = tc->Flink;
1482 // while (le != tc) {
1483 // tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1484 //
1485 // check_tree_num_items(tc2->tree);
1486 //
1487 // le = le->Flink;
1488 // }
1489 // }
1490
1491 static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
1492 KEY searchkey;
1493 traverse_ptr tp, tp2, insert_tp;
1494 EXTENT_REF_V0* erv0;
1495 NTSTATUS Status;
1496
1497 TRACE("(%p, %p, %p)\n", Vcb, td, t);
1498
1499 searchkey.obj_id = td->treeholder.address;
1500 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1501 searchkey.offset = 0xffffffffffffffff;
1502
1503 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1504 if (!NT_SUCCESS(Status)) {
1505 ERR("error - find_item returned %08x\n", Status);
1506 return;
1507 }
1508
1509 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1510 TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
1511 free_traverse_ptr(&tp);
1512 return;
1513 }
1514
1515 searchkey.obj_id = td->treeholder.address;
1516 searchkey.obj_type = TYPE_EXTENT_ITEM;
1517 searchkey.offset = Vcb->superblock.node_size;
1518
1519 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1520 if (!NT_SUCCESS(Status)) {
1521 ERR("error - find_item returned %08x\n", Status);
1522 free_traverse_ptr(&tp);
1523 return;
1524 }
1525
1526 if (keycmp(&searchkey, &tp2.item->key)) {
1527 ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1528 free_traverse_ptr(&tp2);
1529 free_traverse_ptr(&tp);
1530 return;
1531 }
1532
1533 if (tp.item->size < sizeof(EXTENT_REF_V0)) {
1534 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
1535 free_traverse_ptr(&tp2);
1536 free_traverse_ptr(&tp);
1537 return;
1538 }
1539
1540 erv0 = (EXTENT_REF_V0*)tp.item->data;
1541
1542 delete_tree_item(Vcb, &tp, rollback);
1543 delete_tree_item(Vcb, &tp2, rollback);
1544
1545 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1546 EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1547
1548 if (!eism) {
1549 ERR("out of memory\n");
1550 free_traverse_ptr(&tp2);
1551 free_traverse_ptr(&tp);
1552 return;
1553 }
1554
1555 eism->ei.refcount = 1;
1556 eism->ei.generation = erv0->gen;
1557 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1558 eism->type = TYPE_TREE_BLOCK_REF;
1559 eism->tbr.offset = t->header.tree_id;
1560
1561 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1562 ERR("insert_tree_item failed\n");
1563 free_traverse_ptr(&tp2);
1564 free_traverse_ptr(&tp);
1565 return;
1566 }
1567 } else {
1568 EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1569
1570 if (!eit2) {
1571 ERR("out of memory\n");
1572 free_traverse_ptr(&tp2);
1573 free_traverse_ptr(&tp);
1574 return;
1575 }
1576
1577 eit2->eit.extent_item.refcount = 1;
1578 eit2->eit.extent_item.generation = erv0->gen;
1579 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1580 eit2->eit.firstitem = td->key;
1581 eit2->eit.level = t->header.level - 1;
1582 eit2->type = TYPE_TREE_BLOCK_REF;
1583 eit2->tbr.offset = t->header.tree_id;
1584
1585 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1586 ERR("insert_tree_item failed\n");
1587 free_traverse_ptr(&tp2);
1588 free_traverse_ptr(&tp);
1589 return;
1590 }
1591 }
1592
1593 // add_to_tree_cache(tc, insert_tp.tree, TRUE);
1594 add_parents_to_cache(Vcb, insert_tp.tree);
1595 add_parents_to_cache(Vcb, tp.tree);
1596 add_parents_to_cache(Vcb, tp2.tree);
1597
1598 free_traverse_ptr(&insert_tp);
1599
1600 free_traverse_ptr(&tp2);
1601 free_traverse_ptr(&tp);
1602 }
1603
1604 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1605 KEY searchkey;
1606 traverse_ptr tp;
1607 EXTENT_ITEM* ei;
1608 EXTENT_ITEM_V0* eiv0;
1609 chunk* c;
1610 NTSTATUS Status;
1611
1612 // FIXME - deal with refcounts > 1
1613
1614 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
1615
1616 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1617 if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
1618 return STATUS_SUCCESS;
1619 }
1620 }
1621
1622 searchkey.obj_id = address;
1623 searchkey.obj_type = TYPE_EXTENT_ITEM;
1624 searchkey.offset = Vcb->superblock.node_size;
1625
1626 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1627 if (!NT_SUCCESS(Status)) {
1628 ERR("error - find_item returned %08x\n", Status);
1629 return Status;
1630 }
1631
1632 if (keycmp(&tp.item->key, &searchkey)) {
1633 ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1634 int3;
1635 free_traverse_ptr(&tp);
1636 return STATUS_INTERNAL_ERROR;
1637 }
1638
1639 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1640 eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
1641
1642 if (eiv0->refcount > 1) {
1643 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
1644 free_traverse_ptr(&tp);
1645 return STATUS_INTERNAL_ERROR;
1646 }
1647 } else {
1648 if (tp.item->size < sizeof(EXTENT_ITEM)) {
1649 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
1650 free_traverse_ptr(&tp);
1651 return STATUS_INTERNAL_ERROR;
1652 }
1653
1654 ei = (EXTENT_ITEM*)tp.item->data;
1655
1656 if (ei->refcount > 1) {
1657 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
1658 free_traverse_ptr(&tp);
1659 return STATUS_INTERNAL_ERROR;
1660 }
1661
1662 if (t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
1663 // convert shared data extents
1664
1665 LIST_ENTRY* le = t->itemlist.Flink;
1666 while (le != &t->itemlist) {
1667 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1668
1669 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1670
1671 if (!td->ignore && !td->inserted) {
1672 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1673 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1674
1675 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1676 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1677
1678 if (ed2->address != 0) {
1679 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1680 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1681 }
1682 }
1683 }
1684 }
1685
1686 le = le->Flink;
1687 }
1688
1689 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1690 }
1691 }
1692
1693 delete_tree_item(Vcb, &tp, rollback);
1694
1695 // if EXTENT_ITEM_V0, delete corresponding B4 item
1696 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1697 traverse_ptr tp2;
1698
1699 searchkey.obj_id = address;
1700 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1701 searchkey.offset = 0xffffffffffffffff;
1702
1703 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1704 if (!NT_SUCCESS(Status)) {
1705 ERR("error - find_item returned %08x\n", Status);
1706 free_traverse_ptr(&tp);
1707 return Status;
1708 }
1709
1710 if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
1711 delete_tree_item(Vcb, &tp2, rollback);
1712 }
1713 free_traverse_ptr(&tp2);
1714 }
1715
1716 if (!(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1717 LIST_ENTRY* le;
1718
1719 // when writing old internal trees, convert related extents
1720
1721 le = t->itemlist.Flink;
1722 while (le != &t->itemlist) {
1723 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1724
1725 // ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1726
1727 if (!td->ignore && !td->inserted) {
1728 if (t->header.level > 0) {
1729 convert_old_tree_extent(Vcb, td, t, rollback);
1730 } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
1731 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1732
1733 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1734 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1735
1736 if (ed2->address != 0) {
1737 TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
1738 convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
1739 }
1740 }
1741 }
1742 }
1743
1744 le = le->Flink;
1745 }
1746 }
1747
1748 c = get_chunk_from_address(Vcb, address);
1749
1750 if (c) {
1751 decrease_chunk_usage(c, tp.item->key.offset);
1752
1753 add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
1754 } else
1755 ERR("could not find chunk for address %llx\n", address);
1756
1757 free_traverse_ptr(&tp);
1758
1759 return STATUS_SUCCESS;
1760 }
1761
1762 static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
1763 LIST_ENTRY* le;
1764 NTSTATUS Status;
1765
1766 TRACE("(%p)\n", Vcb);
1767
1768 le = Vcb->tree_cache.Flink;
1769 while (le != &Vcb->tree_cache) {
1770 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1771
1772 if (tc2->write && !tc2->tree->has_new_address) {
1773 chunk* c;
1774
1775 Status = get_tree_new_address(Vcb, tc2->tree, rollback);
1776 if (!NT_SUCCESS(Status)) {
1777 ERR("get_tree_new_address returned %08x\n", Status);
1778 return Status;
1779 }
1780
1781 TRACE("allocated extent %llx\n", tc2->tree->new_address);
1782
1783 if (tc2->tree->has_address) {
1784 Status = reduce_tree_extent(Vcb, tc2->tree->header.address, tc2->tree, rollback);
1785
1786 if (!NT_SUCCESS(Status)) {
1787 ERR("reduce_tree_extent returned %08x\n", Status);
1788 return Status;
1789 }
1790 }
1791
1792 c = get_chunk_from_address(Vcb, tc2->tree->new_address);
1793
1794 if (c) {
1795 increase_chunk_usage(c, Vcb->superblock.node_size);
1796 } else {
1797 ERR("could not find chunk for address %llx\n", tc2->tree->new_address);
1798 return STATUS_INTERNAL_ERROR;
1799 }
1800 }
1801
1802 le = le->Flink;
1803 }
1804
1805 return STATUS_SUCCESS;
1806 }
1807
1808 static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
1809 LIST_ENTRY* le;
1810 NTSTATUS Status;
1811
1812 TRACE("(%p)\n", Vcb);
1813
1814 le = Vcb->tree_cache.Flink;
1815 while (le != &Vcb->tree_cache) {
1816 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1817
1818 if (tc2->write && !tc2->tree->parent) {
1819 if (tc2->tree->root != Vcb->root_root && tc2->tree->root != Vcb->chunk_root) {
1820 KEY searchkey;
1821 traverse_ptr tp;
1822
1823 searchkey.obj_id = tc2->tree->root->id;
1824 searchkey.obj_type = TYPE_ROOT_ITEM;
1825 searchkey.offset = 0xffffffffffffffff;
1826
1827 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1828 if (!NT_SUCCESS(Status)) {
1829 ERR("error - find_item returned %08x\n", Status);
1830 return Status;
1831 }
1832
1833 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1834 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1835 free_traverse_ptr(&tp);
1836 return STATUS_INTERNAL_ERROR;
1837 }
1838
1839 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, tc2->tree->new_address);
1840
1841 tc2->tree->root->root_item.block_number = tc2->tree->new_address;
1842 tc2->tree->root->root_item.root_level = tc2->tree->header.level;
1843 tc2->tree->root->root_item.generation = Vcb->superblock.generation;
1844 tc2->tree->root->root_item.generation2 = Vcb->superblock.generation;
1845
1846 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
1847 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1848
1849 if (!ri) {
1850 ERR("out of memory\n");
1851 return STATUS_INSUFFICIENT_RESOURCES;
1852 }
1853
1854 RtlCopyMemory(ri, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
1855
1856 delete_tree_item(Vcb, &tp, rollback);
1857
1858 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1859 ERR("insert_tree_item failed\n");
1860 return STATUS_INTERNAL_ERROR;
1861 }
1862 } else
1863 RtlCopyMemory(tp.item->data, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
1864
1865 free_traverse_ptr(&tp);
1866 }
1867
1868 tc2->tree->root->treeholder.address = tc2->tree->new_address;
1869 }
1870
1871 le = le->Flink;
1872 }
1873
1874 return STATUS_SUCCESS;
1875 }
1876
1877 enum write_tree_status {
1878 WriteTreeStatus_Pending,
1879 WriteTreeStatus_Success,
1880 WriteTreeStatus_Error,
1881 WriteTreeStatus_Cancelling,
1882 WriteTreeStatus_Cancelled
1883 };
1884
1885 struct write_tree_context;
1886
1887 typedef struct {
1888 struct write_tree_context* context;
1889 UINT8* buf;
1890 device* device;
1891 PIRP Irp;
1892 IO_STATUS_BLOCK iosb;
1893 enum write_tree_status status;
1894 LIST_ENTRY list_entry;
1895 } write_tree_stripe;
1896
1897 typedef struct {
1898 KEVENT Event;
1899 LIST_ENTRY stripes;
1900 } write_tree_context;
1901
1902 static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1903 write_tree_stripe* stripe = conptr;
1904 write_tree_context* context = (write_tree_context*)stripe->context;
1905 LIST_ENTRY* le;
1906 BOOL complete;
1907
1908 if (stripe->status == WriteTreeStatus_Cancelling) {
1909 stripe->status = WriteTreeStatus_Cancelled;
1910 goto end;
1911 }
1912
1913 stripe->iosb = Irp->IoStatus;
1914
1915 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1916 stripe->status = WriteTreeStatus_Success;
1917 } else {
1918 le = context->stripes.Flink;
1919
1920 stripe->status = WriteTreeStatus_Error;
1921
1922 while (le != &context->stripes) {
1923 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1924
1925 if (s2->status == WriteTreeStatus_Pending) {
1926 s2->status = WriteTreeStatus_Cancelling;
1927 IoCancelIrp(s2->Irp);
1928 }
1929
1930 le = le->Flink;
1931 }
1932 }
1933
1934 end:
1935 le = context->stripes.Flink;
1936 complete = TRUE;
1937
1938 while (le != &context->stripes) {
1939 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1940
1941 if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
1942 complete = FALSE;
1943 break;
1944 }
1945
1946 le = le->Flink;
1947 }
1948
1949 if (complete)
1950 KeSetEvent(&context->Event, 0, FALSE);
1951
1952 return STATUS_MORE_PROCESSING_REQUIRED;
1953 }
1954
1955 static NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
1956 chunk* c;
1957 CHUNK_ITEM_STRIPE* cis;
1958 write_tree_stripe* stripe;
1959 UINT64 i;
1960
1961 c = get_chunk_from_address(Vcb, addr);
1962
1963 if (!c) {
1964 ERR("get_chunk_from_address failed\n");
1965 return STATUS_INTERNAL_ERROR;
1966 }
1967
1968 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1969
1970 // FIXME - make this work with RAID
1971
1972 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1973 PIO_STACK_LOCATION IrpSp;
1974
1975 // FIXME - handle missing devices
1976
1977 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
1978 if (!stripe) {
1979 ERR("out of memory\n");
1980 return STATUS_INSUFFICIENT_RESOURCES;
1981 }
1982
1983 stripe->context = (struct write_tree_context*)wtc;
1984 stripe->buf = data;
1985 stripe->device = c->devices[i];
1986 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1987 stripe->status = WriteTreeStatus_Pending;
1988
1989 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1990
1991 if (!stripe->Irp) {
1992 ERR("IoAllocateIrp failed\n");
1993 return STATUS_INTERNAL_ERROR;
1994 }
1995
1996 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1997 IrpSp->MajorFunction = IRP_MJ_WRITE;
1998
1999 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2000 stripe->Irp->AssociatedIrp.SystemBuffer = data;
2001
2002 stripe->Irp->Flags = IRP_BUFFERED_IO;
2003 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
2004 stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
2005 if (!stripe->Irp->MdlAddress) {
2006 ERR("IoAllocateMdl failed\n");
2007 return STATUS_INTERNAL_ERROR;
2008 }
2009
2010 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
2011 } else {
2012 stripe->Irp->UserBuffer = data;
2013 }
2014
2015 IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
2016 IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
2017
2018 stripe->Irp->UserIosb = &stripe->iosb;
2019
2020 IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
2021
2022 InsertTailList(&wtc->stripes, &stripe->list_entry);
2023 }
2024
2025 return STATUS_SUCCESS;
2026 }
2027
2028 static void free_stripes(write_tree_context* wtc) {
2029 LIST_ENTRY *le, *le2, *nextle;
2030
2031 le = wtc->stripes.Flink;
2032 while (le != &wtc->stripes) {
2033 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2034
2035 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
2036 MmUnlockPages(stripe->Irp->MdlAddress);
2037 IoFreeMdl(stripe->Irp->MdlAddress);
2038 }
2039
2040 le = le->Flink;
2041 }
2042
2043 le = wtc->stripes.Flink;
2044 while (le != &wtc->stripes) {
2045 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2046
2047 nextle = le->Flink;
2048
2049 if (stripe->buf) {
2050 ExFreePool(stripe->buf);
2051
2052 le2 = le->Flink;
2053 while (le2 != &wtc->stripes) {
2054 write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
2055
2056 if (s2->buf == stripe->buf)
2057 s2->buf = NULL;
2058
2059 le2 = le2->Flink;
2060 }
2061
2062 }
2063
2064 ExFreePool(stripe);
2065
2066 le = nextle;
2067 }
2068 }
2069
2070 static NTSTATUS write_trees(device_extension* Vcb) {
2071 UINT8 level;
2072 UINT8 *data, *body;
2073 UINT32 crc32;
2074 NTSTATUS Status;
2075 LIST_ENTRY* le;
2076 write_tree_context* wtc;
2077
2078 TRACE("(%p)\n", Vcb);
2079
2080 for (level = 0; level <= 255; level++) {
2081 BOOL nothing_found = TRUE;
2082
2083 TRACE("level = %u\n", level);
2084
2085 le = Vcb->tree_cache.Flink;
2086 while (le != &Vcb->tree_cache) {
2087 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2088
2089 if (tc2->write && tc2->tree->header.level == level) {
2090 KEY firstitem, searchkey;
2091 LIST_ENTRY* le2;
2092 traverse_ptr tp;
2093 EXTENT_ITEM_TREE* eit;
2094
2095 if (!tc2->tree->has_new_address) {
2096 ERR("error - tried to write tree with no new address\n");
2097 int3;
2098 }
2099
2100 le2 = tc2->tree->itemlist.Flink;
2101 while (le2 != &tc2->tree->itemlist) {
2102 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2103 if (!td->ignore) {
2104 firstitem = td->key;
2105 break;
2106 }
2107 le2 = le2->Flink;
2108 }
2109
2110 if (tc2->tree->parent) {
2111 tc2->tree->paritem->key = firstitem;
2112 tc2->tree->paritem->treeholder.address = tc2->tree->new_address;
2113 tc2->tree->paritem->treeholder.generation = Vcb->superblock.generation;
2114 }
2115
2116 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
2117 searchkey.obj_id = tc2->tree->new_address;
2118 searchkey.obj_type = TYPE_EXTENT_ITEM;
2119 searchkey.offset = Vcb->superblock.node_size;
2120
2121 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2122 if (!NT_SUCCESS(Status)) {
2123 ERR("error - find_item returned %08x\n", Status);
2124 return Status;
2125 }
2126
2127 if (keycmp(&searchkey, &tp.item->key)) {
2128 // traverse_ptr next_tp;
2129 // BOOL b;
2130 // tree_data* paritem;
2131
2132 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2133 free_traverse_ptr(&tp);
2134
2135 // searchkey.obj_id = 0;
2136 // searchkey.obj_type = 0;
2137 // searchkey.offset = 0;
2138 //
2139 // find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2140 //
2141 // paritem = NULL;
2142 // do {
2143 // if (tp.tree->paritem != paritem) {
2144 // paritem = tp.tree->paritem;
2145 // ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
2146 // }
2147 //
2148 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2149 //
2150 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
2151 // if (b) {
2152 // free_traverse_ptr(&tp);
2153 // tp = next_tp;
2154 // }
2155 // } while (b);
2156 //
2157 // free_traverse_ptr(&tp);
2158
2159 return STATUS_INTERNAL_ERROR;
2160 }
2161
2162 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
2163 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
2164 free_traverse_ptr(&tp);
2165 return STATUS_INTERNAL_ERROR;
2166 }
2167
2168 eit = (EXTENT_ITEM_TREE*)tp.item->data;
2169 eit->firstitem = firstitem;
2170
2171 free_traverse_ptr(&tp);
2172 }
2173
2174 nothing_found = FALSE;
2175 }
2176
2177 le = le->Flink;
2178 }
2179
2180 if (nothing_found)
2181 break;
2182 }
2183
2184 TRACE("allocated tree extents\n");
2185
2186 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
2187 if (!wtc) {
2188 ERR("out of memory\n");
2189 return STATUS_INSUFFICIENT_RESOURCES;
2190 }
2191
2192 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
2193 InitializeListHead(&wtc->stripes);
2194
2195 le = Vcb->tree_cache.Flink;
2196 while (le != &Vcb->tree_cache) {
2197 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2198 #ifdef DEBUG_PARANOID
2199 UINT32 num_items = 0, size = 0;
2200 LIST_ENTRY* le2;
2201 BOOL crash = FALSE;
2202 #endif
2203
2204 if (tc2->write) {
2205 #ifdef DEBUG_PARANOID
2206 le2 = tc2->tree->itemlist.Flink;
2207 while (le2 != &tc2->tree->itemlist) {
2208 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2209 if (!td->ignore) {
2210 num_items++;
2211
2212 if (tc2->tree->header.level == 0)
2213 size += td->size;
2214 }
2215 le2 = le2->Flink;
2216 }
2217
2218 if (tc2->tree->header.level == 0)
2219 size += num_items * sizeof(leaf_node);
2220 else
2221 size += num_items * sizeof(internal_node);
2222
2223 if (num_items != tc2->tree->header.num_items) {
2224 ERR("tree %llx, level %x: num_items was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
2225 crash = TRUE;
2226 }
2227
2228 if (size != tc2->tree->size) {
2229 ERR("tree %llx, level %x: size was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, size, tc2->tree->size);
2230 crash = TRUE;
2231 }
2232
2233 if (tc2->tree->header.num_items == 0 && tc2->tree->parent) {
2234 ERR("tree %llx, level %x: tried to write empty tree with parent\n", tc2->tree->root->id, tc2->tree->header.level);
2235 crash = TRUE;
2236 }
2237
2238 if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
2239 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", tc2->tree->root->id, tc2->tree->header.level, tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
2240 crash = TRUE;
2241 }
2242
2243 if (crash) {
2244 ERR("tree %p\n", tc2->tree);
2245 le2 = tc2->tree->itemlist.Flink;
2246 while (le2 != &tc2->tree->itemlist) {
2247 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2248 if (!td->ignore) {
2249 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
2250 }
2251 le2 = le2->Flink;
2252 }
2253 int3;
2254 }
2255 #endif
2256 tc2->tree->header.address = tc2->tree->new_address;
2257 tc2->tree->header.generation = Vcb->superblock.generation;
2258 tc2->tree->header.flags |= HEADER_FLAG_MIXED_BACKREF;
2259 tc2->tree->has_address = TRUE;
2260
2261 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
2262 if (!data) {
2263 ERR("out of memory\n");
2264 Status = STATUS_INSUFFICIENT_RESOURCES;
2265 goto end;
2266 }
2267
2268 body = data + sizeof(tree_header);
2269
2270 RtlCopyMemory(data, &tc2->tree->header, sizeof(tree_header));
2271 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2272
2273 if (tc2->tree->header.level == 0) {
2274 leaf_node* itemptr = (leaf_node*)body;
2275 int i = 0;
2276 LIST_ENTRY* le2;
2277 UINT8* dataptr = data + Vcb->superblock.node_size;
2278
2279 le2 = tc2->tree->itemlist.Flink;
2280 while (le2 != &tc2->tree->itemlist) {
2281 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2282 if (!td->ignore) {
2283 dataptr = dataptr - td->size;
2284
2285 itemptr[i].key = td->key;
2286 itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
2287 itemptr[i].size = td->size;
2288 i++;
2289
2290 if (td->size > 0)
2291 RtlCopyMemory(dataptr, td->data, td->size);
2292 }
2293
2294 le2 = le2->Flink;
2295 }
2296 } else {
2297 internal_node* itemptr = (internal_node*)body;
2298 int i = 0;
2299 LIST_ENTRY* le2;
2300
2301 le2 = tc2->tree->itemlist.Flink;
2302 while (le2 != &tc2->tree->itemlist) {
2303 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2304 if (!td->ignore) {
2305 itemptr[i].key = td->key;
2306 itemptr[i].address = td->treeholder.address;
2307 itemptr[i].generation = td->treeholder.generation;
2308 i++;
2309 }
2310
2311 le2 = le2->Flink;
2312 }
2313 }
2314
2315 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2316 crc32 = ~crc32;
2317 *((UINT32*)data) = crc32;
2318 TRACE("setting crc32 to %08x\n", crc32);
2319
2320 Status = write_tree(Vcb, tc2->tree->new_address, data, wtc);
2321 if (!NT_SUCCESS(Status)) {
2322 ERR("write_tree returned %08x\n", Status);
2323 goto end;
2324 }
2325 }
2326
2327 le = le->Flink;
2328 }
2329
2330 Status = STATUS_SUCCESS;
2331
2332 if (wtc->stripes.Flink != &wtc->stripes) {
2333 // launch writes and wait
2334 le = wtc->stripes.Flink;
2335 while (le != &wtc->stripes) {
2336 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2337
2338 IoCallDriver(stripe->device->devobj, stripe->Irp);
2339
2340 le = le->Flink;
2341 }
2342
2343 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
2344
2345 le = wtc->stripes.Flink;
2346 while (le != &wtc->stripes) {
2347 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2348
2349 if (!NT_SUCCESS(stripe->iosb.Status)) {
2350 Status = stripe->iosb.Status;
2351 break;
2352 }
2353
2354 le = le->Flink;
2355 }
2356
2357 free_stripes(wtc);
2358 }
2359
2360 end:
2361 ExFreePool(wtc);
2362
2363 return Status;
2364 }
2365
2366 static NTSTATUS write_superblocks(device_extension* Vcb) {
2367 UINT64 i;
2368 NTSTATUS Status;
2369 LIST_ENTRY* le;
2370
2371 TRACE("(%p)\n", Vcb);
2372
2373 le = Vcb->tree_cache.Flink;
2374 while (le != &Vcb->tree_cache) {
2375 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2376
2377 if (tc2->write && !tc2->tree->parent) {
2378 if (tc2->tree->root == Vcb->root_root) {
2379 Vcb->superblock.root_tree_addr = tc2->tree->new_address;
2380 Vcb->superblock.root_level = tc2->tree->header.level;
2381 } else if (tc2->tree->root == Vcb->chunk_root) {
2382 Vcb->superblock.chunk_tree_addr = tc2->tree->new_address;
2383 Vcb->superblock.chunk_root_generation = tc2->tree->header.generation;
2384 Vcb->superblock.chunk_root_level = tc2->tree->header.level;
2385 }
2386 }
2387
2388 le = le->Flink;
2389 }
2390
2391 for (i = 0; i < Vcb->superblock.num_devices; i++) {
2392 if (Vcb->devices[i].devobj) {
2393 Status = write_superblock(Vcb, &Vcb->devices[i]);
2394 if (!NT_SUCCESS(Status)) {
2395 ERR("write_superblock returned %08x\n", Status);
2396 return Status;
2397 }
2398 }
2399 }
2400
2401 return STATUS_SUCCESS;
2402 }
2403
2404 static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
2405 LIST_ENTRY* le = Vcb->chunks.Flink;
2406 chunk* c;
2407 KEY searchkey;
2408 traverse_ptr tp;
2409 BLOCK_GROUP_ITEM* bgi;
2410 NTSTATUS Status;
2411
2412 TRACE("(%p)\n", Vcb);
2413
2414 while (le != &Vcb->chunks) {
2415 c = CONTAINING_RECORD(le, chunk, list_entry);
2416
2417 if (c->used != c->oldused) {
2418 searchkey.obj_id = c->offset;
2419 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2420 searchkey.offset = c->chunk_item->size;
2421
2422 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2423 if (!NT_SUCCESS(Status)) {
2424 ERR("error - find_item returned %08x\n", Status);
2425 return Status;
2426 }
2427
2428 if (keycmp(&searchkey, &tp.item->key)) {
2429 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2430 int3;
2431 free_traverse_ptr(&tp);
2432 return STATUS_INTERNAL_ERROR;
2433 }
2434
2435 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2436 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2437 free_traverse_ptr(&tp);
2438 return STATUS_INTERNAL_ERROR;
2439 }
2440
2441 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2442 if (!bgi) {
2443 ERR("out of memory\n");
2444 free_traverse_ptr(&tp);
2445 return STATUS_INSUFFICIENT_RESOURCES;
2446 }
2447
2448 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2449 bgi->used = c->used;
2450
2451 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2452
2453 delete_tree_item(Vcb, &tp, rollback);
2454
2455 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
2456 ERR("insert_tree_item failed\n");
2457 ExFreePool(bgi);
2458 return STATUS_INTERNAL_ERROR;
2459 }
2460
2461 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2462 TRACE("chunk_item type = %llx\n", c->chunk_item->type);
2463
2464 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
2465 FIXME("RAID0 not yet supported\n");
2466 ExFreePool(bgi);
2467 free_traverse_ptr(&tp);
2468 return STATUS_INTERNAL_ERROR;
2469 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
2470 FIXME("RAID1 not yet supported\n");
2471 ExFreePool(bgi);
2472 free_traverse_ptr(&tp);
2473 return STATUS_INTERNAL_ERROR;
2474 } else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
2475 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
2476 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
2477 FIXME("RAID10 not yet supported\n");
2478 ExFreePool(bgi);
2479 free_traverse_ptr(&tp);
2480 return STATUS_INTERNAL_ERROR;
2481 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2482 FIXME("RAID5 not yet supported\n");
2483 ExFreePool(bgi);
2484 free_traverse_ptr(&tp);
2485 return STATUS_INTERNAL_ERROR;
2486 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2487 FIXME("RAID6 not yet supported\n");
2488 ExFreePool(bgi);
2489 free_traverse_ptr(&tp);
2490 return STATUS_INTERNAL_ERROR;
2491 } else { // SINGLE
2492 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
2493 }
2494
2495 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2496
2497 free_traverse_ptr(&tp);
2498
2499 c->oldused = c->used;
2500 }
2501
2502 le = le->Flink;
2503 }
2504
2505 return STATUS_SUCCESS;
2506 }
2507
2508 static void get_first_item(tree* t, KEY* key) {
2509 LIST_ENTRY* le;
2510
2511 le = t->itemlist.Flink;
2512 while (le != &t->itemlist) {
2513 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2514
2515 *key = td->key;
2516 return;
2517 }
2518 }
2519
2520 static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2521 tree *nt, *pt;
2522 tree_data* td;
2523 tree_data* oldlastitem;
2524 // write_tree* wt2;
2525 // // tree_data *firsttd, *lasttd;
2526 // // LIST_ENTRY* le;
2527 // #ifdef DEBUG_PARANOID
2528 // KEY lastkey1, lastkey2;
2529 // traverse_ptr tp, next_tp;
2530 // ULONG numitems1, numitems2;
2531 // #endif
2532
2533 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2534
2535 // #ifdef DEBUG_PARANOID
2536 // lastkey1.obj_id = 0xffffffffffffffff;
2537 // lastkey1.obj_type = 0xff;
2538 // lastkey1.offset = 0xffffffffffffffff;
2539 //
2540 // if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
2541 // ERR("error - find_item failed\n");
2542 // else {
2543 // lastkey1 = tp.item->key;
2544 // numitems1 = 0;
2545 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2546 // free_traverse_ptr(&tp);
2547 // tp = next_tp;
2548 // numitems1++;
2549 // }
2550 // free_traverse_ptr(&tp);
2551 // }
2552 // #endif
2553
2554 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2555 if (!nt) {
2556 ERR("out of memory\n");
2557 return STATUS_INSUFFICIENT_RESOURCES;
2558 }
2559
2560 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2561 nt->header.address = 0;
2562 nt->header.generation = Vcb->superblock.generation;
2563 nt->header.num_items = t->header.num_items - numitems;
2564 nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2565
2566 nt->refcount = 0;
2567 nt->has_address = FALSE;
2568 nt->Vcb = Vcb;
2569 nt->parent = t->parent;
2570 nt->root = t->root;
2571 // nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2572 nt->new_address = 0;
2573 nt->has_new_address = FALSE;
2574 nt->flags = t->flags;
2575 InitializeListHead(&nt->itemlist);
2576
2577 // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
2578
2579 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2580
2581 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2582 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2583 // //
2584 // // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
2585 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2586 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2587 // //
2588 // // le = wt->tree->itemlist.Flink;
2589 // // while (le != &wt->tree->itemlist) {
2590 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2591 // // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2592 // // le = le->Flink;
2593 // // }
2594
2595 nt->itemlist.Flink = &newfirstitem->list_entry;
2596 nt->itemlist.Blink = t->itemlist.Blink;
2597 nt->itemlist.Flink->Blink = &nt->itemlist;
2598 nt->itemlist.Blink->Flink = &nt->itemlist;
2599
2600 t->itemlist.Blink = &oldlastitem->list_entry;
2601 t->itemlist.Blink->Flink = &t->itemlist;
2602
2603 // // le = wt->tree->itemlist.Flink;
2604 // // while (le != &wt->tree->itemlist) {
2605 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2606 // // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2607 // // le = le->Flink;
2608 // // }
2609 // //
2610 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2611 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2612 // //
2613 // // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
2614 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2615 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2616
2617 nt->size = t->size - size;
2618 t->size = size;
2619 t->header.num_items = numitems;
2620 add_to_tree_cache(Vcb, nt, TRUE);
2621
2622 InterlockedIncrement(&Vcb->open_trees);
2623 #ifdef DEBUG_TREE_REFCOUNTS
2624 TRACE("created new split tree %p\n", nt);
2625 #endif
2626 InsertTailList(&Vcb->trees, &nt->list_entry);
2627
2628 // // // TESTING
2629 // // td = wt->tree->items;
2630 // // while (td) {
2631 // // if (!td->ignore) {
2632 // // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2633 // // }
2634 // // td = td->next;
2635 // // }
2636
2637 // // oldlastitem->next = NULL;
2638 // // wt->tree->lastitem = oldlastitem;
2639
2640 // // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
2641
2642 if (nt->header.level > 0) {
2643 LIST_ENTRY* le = nt->itemlist.Flink;
2644
2645 while (le != &nt->itemlist) {
2646 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2647
2648 if (td2->treeholder.tree) {
2649 td2->treeholder.tree->parent = nt;
2650 increase_tree_rc(nt);
2651 free_tree(t);
2652 }
2653
2654 le = le->Flink;
2655 }
2656 }
2657
2658 if (nt->parent) {
2659 increase_tree_rc(nt->parent);
2660
2661 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2662 if (!td) {
2663 ERR("out of memory\n");
2664 return STATUS_INSUFFICIENT_RESOURCES;
2665 }
2666
2667 td->key = newfirstitem->key;
2668
2669 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2670
2671 td->ignore = FALSE;
2672 td->inserted = TRUE;
2673 td->treeholder.tree = nt;
2674 init_tree_holder(&td->treeholder);
2675 // td->treeholder.nonpaged->status = tree_holder_loaded;
2676 nt->paritem = td;
2677
2678 nt->parent->header.num_items++;
2679 nt->parent->size += sizeof(internal_node);
2680
2681 goto end;
2682 }
2683
2684 TRACE("adding new tree parent\n");
2685
2686 if (nt->header.level == 255) {
2687 ERR("cannot add parent to tree at level 255\n");
2688 return STATUS_INTERNAL_ERROR;
2689 }
2690
2691 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2692 if (!pt) {
2693 ERR("out of memory\n");
2694 return STATUS_INSUFFICIENT_RESOURCES;
2695 }
2696
2697 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2698 pt->header.address = 0;
2699 pt->header.num_items = 2;
2700 pt->header.level = nt->header.level + 1;
2701 pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2702
2703 pt->refcount = 2;
2704 pt->has_address = FALSE;
2705 pt->Vcb = Vcb;
2706 pt->parent = NULL;
2707 pt->paritem = NULL;
2708 pt->root = t->root;
2709 pt->new_address = 0;
2710 pt->has_new_address = FALSE;
2711 // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2712 pt->size = pt->header.num_items * sizeof(internal_node);
2713 pt->flags = t->flags;
2714 InitializeListHead(&pt->itemlist);
2715
2716 // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
2717
2718 InterlockedIncrement(&Vcb->open_trees);
2719 #ifdef DEBUG_TREE_REFCOUNTS
2720 TRACE("created new parent tree %p\n", pt);
2721 #endif
2722 InsertTailList(&Vcb->trees, &pt->list_entry);
2723
2724 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2725 if (!td) {
2726 ERR("out of memory\n");
2727 return STATUS_INSUFFICIENT_RESOURCES;
2728 }
2729
2730 get_first_item(t, &td->key);
2731 td->ignore = FALSE;
2732 td->inserted = FALSE;
2733 td->treeholder.address = 0;
2734 td->treeholder.generation = Vcb->superblock.generation;
2735 td->treeholder.tree = t;
2736 init_tree_holder(&td->treeholder);
2737 // td->treeholder.nonpaged->status = tree_holder_loaded;
2738 InsertTailList(&pt->itemlist, &td->list_entry);
2739 t->paritem = td;
2740
2741 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2742 if (!td) {
2743 ERR("out of memory\n");
2744 return STATUS_INSUFFICIENT_RESOURCES;
2745 }
2746
2747 td->key = newfirstitem->key;
2748 td->ignore = FALSE;
2749 td->inserted = FALSE;
2750 td->treeholder.address = 0;
2751 td->treeholder.generation = Vcb->superblock.generation;
2752 td->treeholder.tree = nt;
2753 init_tree_holder(&td->treeholder);
2754 // td->treeholder.nonpaged->status = tree_holder_loaded;
2755 InsertTailList(&pt->itemlist, &td->list_entry);
2756 nt->paritem = td;
2757
2758 add_to_tree_cache(Vcb, pt, TRUE);
2759
2760 t->root->treeholder.tree = pt;
2761
2762 t->parent = pt;
2763 nt->parent = pt;
2764
2765 end:
2766 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2767
2768 // #ifdef DEBUG_PARANOID
2769 // lastkey2.obj_id = 0xffffffffffffffff;
2770 // lastkey2.obj_type = 0xff;
2771 // lastkey2.offset = 0xffffffffffffffff;
2772 //
2773 // if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
2774 // ERR("error - find_item failed\n");
2775 // else {
2776 // lastkey2 = tp.item->key;
2777 //
2778 // numitems2 = 0;
2779 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2780 // free_traverse_ptr(&tp);
2781 // tp = next_tp;
2782 // numitems2++;
2783 // }
2784 // free_traverse_ptr(&tp);
2785 // }
2786 //
2787 // ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
2788 // ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
2789 // ERR("numitems1 = %u\n", numitems1);
2790 // ERR("numitems2 = %u\n", numitems2);
2791 // #endif
2792
2793 return STATUS_SUCCESS;
2794 }
2795
2796 static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
2797 LIST_ENTRY* le;
2798 UINT32 size, ds, numitems;
2799
2800 size = 0;
2801 numitems = 0;
2802
2803 // FIXME - naïve implementation: maximizes number of filled trees
2804
2805 le = t->itemlist.Flink;
2806 while (le != &t->itemlist) {
2807 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2808
2809 if (!td->ignore) {
2810 if (t->header.level == 0)
2811 ds = sizeof(leaf_node) + td->size;
2812 else
2813 ds = sizeof(internal_node);
2814
2815 // FIXME - move back if previous item was deleted item with same key
2816 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
2817 return split_tree_at(Vcb, t, td, numitems, size);
2818
2819 size += ds;
2820 numitems++;
2821 }
2822
2823 le = le->Flink;
2824 }
2825
2826 return STATUS_SUCCESS;
2827 }
2828
2829 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
2830 LIST_ENTRY* le;
2831 tree_data* nextparitem = NULL;
2832 NTSTATUS Status;
2833 tree *next_tree, *par;
2834 BOOL loaded;
2835
2836 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
2837
2838 // FIXME - doesn't capture everything, as it doesn't ascend
2839 // FIXME - write proper function and put it in treefuncs.c
2840 le = t->paritem->list_entry.Flink;
2841 while (le != &t->parent->itemlist) {
2842 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2843
2844 if (!td->ignore) {
2845 nextparitem = td;
2846 break;
2847 }
2848
2849 le = le->Flink;
2850 }
2851
2852 if (!nextparitem)
2853 return STATUS_SUCCESS;
2854
2855 // FIXME - loop, and capture more than one tree if we can
2856
2857 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
2858 // nextparitem = t->paritem;
2859
2860 // ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
2861
2862 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
2863 if (!NT_SUCCESS(Status)) {
2864 ERR("do_load_tree returned %08x\n", Status);
2865 return Status;
2866 }
2867
2868 if (loaded)
2869 increase_tree_rc(t->parent);
2870
2871 // ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
2872
2873 next_tree = nextparitem->treeholder.tree;
2874
2875 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
2876 // merge two trees into one
2877
2878 t->header.num_items += next_tree->header.num_items;
2879 t->size += next_tree->size;
2880
2881 if (next_tree->header.level > 0) {
2882 le = next_tree->itemlist.Flink;
2883
2884 while (le != &next_tree->itemlist) {
2885 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2886
2887 if (td2->treeholder.tree) {
2888 td2->treeholder.tree->parent = t;
2889 increase_tree_rc(t);
2890 free_tree(next_tree);
2891 }
2892
2893 le = le->Flink;
2894 }
2895 }
2896
2897 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
2898 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
2899 t->itemlist.Blink = next_tree->itemlist.Blink;
2900 t->itemlist.Blink->Flink = &t->itemlist;
2901
2902 // // TESTING
2903 // le = t->itemlist.Flink;
2904 // while (le != &t->itemlist) {
2905 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2906 // if (!td->ignore) {
2907 // ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
2908 // }
2909 // le = le->Flink;
2910 // }
2911
2912 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
2913
2914 next_tree->header.num_items = 0;
2915 next_tree->size = 0;
2916
2917 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
2918 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
2919
2920 if (!NT_SUCCESS(Status)) {
2921 ERR("reduce_tree_extent returned %08x\n", Status);
2922 free_tree(next_tree);
2923 return Status;
2924 }
2925 } else if (next_tree->has_address) {
2926 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
2927
2928 if (!NT_SUCCESS(Status)) {
2929 ERR("reduce_tree_extent returned %08x\n", Status);
2930 free_tree(next_tree);
2931 return Status;
2932 }
2933 }
2934
2935 if (!nextparitem->ignore) {
2936 nextparitem->ignore = TRUE;
2937 next_tree->parent->header.num_items--;
2938 next_tree->parent->size -= sizeof(internal_node);
2939 }
2940
2941 par = next_tree->parent;
2942 while (par) {
2943 add_to_tree_cache(Vcb, par, TRUE);
2944 par = par->parent;
2945 }
2946
2947 RemoveEntryList(&nextparitem->list_entry);
2948 ExFreePool(next_tree->paritem);
2949 next_tree->paritem = NULL;
2950
2951 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
2952
2953 free_tree(next_tree);
2954
2955 // remove next_tree from tree cache
2956 le = Vcb->tree_cache.Flink;
2957 while (le != &Vcb->tree_cache) {
2958 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
2959
2960 if (tc2->tree == next_tree) {
2961 free_tree(next_tree);
2962 RemoveEntryList(le);
2963 ExFreePool(tc2);
2964 break;
2965 }
2966
2967 le = le->Flink;
2968 }
2969 } else {
2970 // rebalance by moving items from second tree into first
2971 ULONG avg_size = (t->size + next_tree->size) / 2;
2972 KEY firstitem = {0, 0, 0};
2973
2974 TRACE("attempting rebalance\n");
2975
2976 le = next_tree->itemlist.Flink;
2977 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
2978 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2979 ULONG size;
2980
2981 if (!td->ignore) {
2982 if (next_tree->header.level == 0)
2983 size = sizeof(leaf_node) + td->size;
2984 else
2985 size = sizeof(internal_node);
2986 } else
2987 size = 0;
2988
2989 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
2990 RemoveEntryList(&td->list_entry);
2991 InsertTailList(&t->itemlist, &td->list_entry);
2992
2993 if (next_tree->header.level > 0 && td->treeholder.tree) {
2994 td->treeholder.tree->parent = t;
2995 increase_tree_rc(t);
2996 free_tree(next_tree);
2997 }
2998
2999 if (!td->ignore) {
3000 next_tree->size -= size;
3001 t->size += size;
3002 next_tree->header.num_items--;
3003 t->header.num_items++;
3004 }
3005 } else
3006 break;
3007
3008 le = next_tree->itemlist.Flink;
3009 }
3010
3011 le = next_tree->itemlist.Flink;
3012 while (le != &next_tree->itemlist) {
3013 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3014
3015 if (!td->ignore) {
3016 firstitem = td->key;
3017 break;
3018 }
3019
3020 le = le->Flink;
3021 }
3022
3023 // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
3024
3025 // FIXME - once ascension is working, make this work with parent's parent, etc.
3026 if (next_tree->paritem)
3027 next_tree->paritem->key = firstitem;
3028
3029 par = next_tree;
3030 while (par) {
3031 add_to_tree_cache(Vcb, par, TRUE);
3032 par = par->parent;
3033 }
3034
3035 free_tree(next_tree);
3036 }
3037
3038 return STATUS_SUCCESS;
3039 }
3040
3041 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
3042 KEY searchkey;
3043 traverse_ptr tp;
3044 NTSTATUS Status;
3045
3046 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3047 searchkey.obj_id = address;
3048 searchkey.obj_type = TYPE_METADATA_ITEM;
3049 searchkey.offset = t->header.level;
3050
3051 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3052 if (!NT_SUCCESS(Status)) {
3053 ERR("error - find_item returned %08x\n", Status);
3054 return Status;
3055 }
3056
3057 if (!keycmp(&tp.item->key, &searchkey)) {
3058 EXTENT_ITEM_SKINNY_METADATA* eism;
3059
3060 if (tp.item->size > 0) {
3061 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3062
3063 if (!eism) {
3064 ERR("out of memory\n");
3065 free_traverse_ptr(&tp);
3066 return STATUS_INSUFFICIENT_RESOURCES;
3067 }
3068
3069 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3070 } else
3071 eism = NULL;
3072
3073 delete_tree_item(Vcb, &tp, rollback);
3074
3075 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, rollback)) {
3076 ERR("insert_tree_item failed\n");
3077 ExFreePool(eism);
3078 free_traverse_ptr(&tp);
3079 return STATUS_INTERNAL_ERROR;
3080 }
3081
3082 free_traverse_ptr(&tp);
3083 return STATUS_SUCCESS;
3084 }
3085
3086 free_traverse_ptr(&tp);
3087 }
3088
3089 searchkey.obj_id = address;
3090 searchkey.obj_type = TYPE_EXTENT_ITEM;
3091 searchkey.offset = 0xffffffffffffffff;
3092
3093 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3094 if (!NT_SUCCESS(Status)) {
3095 ERR("error - find_item returned %08x\n", Status);
3096 return Status;
3097 }
3098
3099 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3100 EXTENT_ITEM_TREE* eit;
3101
3102 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3103 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3104 free_traverse_ptr(&tp);
3105 return STATUS_INTERNAL_ERROR;
3106 }
3107
3108 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3109
3110 if (!eit) {
3111 ERR("out of memory\n");
3112 free_traverse_ptr(&tp);
3113 return STATUS_INSUFFICIENT_RESOURCES;
3114 }
3115
3116 RtlCopyMemory(eit, tp.item->data, tp.item->size);
3117
3118 delete_tree_item(Vcb, &tp, rollback);
3119
3120 eit->level = level;
3121
3122 if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, rollback)) {
3123 ERR("insert_tree_item failed\n");
3124 ExFreePool(eit);
3125 free_traverse_ptr(&tp);
3126 return STATUS_INTERNAL_ERROR;
3127 }
3128
3129 free_traverse_ptr(&tp);
3130
3131 return STATUS_SUCCESS;
3132 }
3133
3134 ERR("could not find EXTENT_ITEM for address %llx\n", address);
3135
3136 free_traverse_ptr(&tp);
3137
3138 return STATUS_INTERNAL_ERROR;
3139 }
3140
3141 static NTSTATUS STDCALL do_splits(device_extension* Vcb, LIST_ENTRY* rollback) {
3142 // LIST_ENTRY *le, *le2;
3143 // write_tree* wt;
3144 // tree_data* td;
3145 UINT8 level, max_level;
3146 UINT32 min_size;
3147 BOOL empty, done_deletions = FALSE;
3148 NTSTATUS Status;
3149 tree_cache* tc2;
3150
3151 TRACE("(%p)\n", Vcb);
3152
3153 max_level = 0;
3154
3155 for (level = 0; level <= 255; level++) {
3156 LIST_ENTRY *le, *nextle;
3157
3158 empty = TRUE;
3159
3160 TRACE("doing level %u\n", level);
3161
3162 le = Vcb->tree_cache.Flink;
3163
3164 while (le != &Vcb->tree_cache) {
3165 tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3166
3167 nextle = le->Flink;
3168
3169 if (tc2->write && tc2->tree->header.level == level) {
3170 empty = FALSE;
3171
3172 if (tc2->tree->header.num_items == 0) {
3173 if (tc2->tree->parent) {
3174 LIST_ENTRY* le2;
3175 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
3176
3177 done_deletions = TRUE;
3178
3179 le2 = tc2->tree->itemlist.Flink;
3180 while (le2 != &tc2->tree->itemlist) {
3181 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
3182 firstitem = td->key;
3183 break;
3184 }
3185 TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
3186 tc2->tree->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
3187
3188 tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3189
3190 if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
3191 Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
3192
3193 if (!NT_SUCCESS(Status)) {
3194 ERR("reduce_tree_extent returned %08x\n", Status);
3195 return Status;
3196 }
3197 } else if (tc2->tree->has_address) {
3198 Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
3199
3200 if (!NT_SUCCESS(Status)) {
3201 ERR("reduce_tree_extent returned %08x\n", Status);
3202 return Status;
3203 }
3204 }
3205
3206 if (!tc2->tree->paritem->ignore) {
3207 tc2->tree->paritem->ignore = TRUE;
3208 tc2->tree->parent->header.num_items--;
3209 tc2->tree->parent->size -= sizeof(internal_node);
3210 }
3211
3212 RemoveEntryList(&tc2->tree->paritem->list_entry);
3213 ExFreePool(tc2->tree->paritem);
3214 tc2->tree->paritem = NULL;
3215
3216 free_tree(tc2->tree);
3217
3218 RemoveEntryList(le);
3219 ExFreePool(tc2);
3220 } else if (tc2->tree->header.level != 0) {
3221 if (tc2->tree->has_new_address) {
3222 Status = update_extent_level(Vcb, tc2->tree->new_address, tc2->tree, 0, rollback);
3223
3224 if (!NT_SUCCESS(Status)) {
3225 ERR("update_extent_level returned %08x\n", Status);
3226 return Status;
3227 }
3228 }
3229
3230 tc2->tree->header.level = 0;
3231 }
3232 } else if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3233 TRACE("splitting overlarge tree (%x > %x)\n", tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
3234 Status = split_tree(Vcb, tc2->tree);
3235
3236 if (!NT_SUCCESS(Status)) {
3237 ERR("split_tree returned %08x\n", Status);
3238 return Status;
3239 }
3240 }
3241 }
3242
3243 le = nextle;
3244 }
3245
3246 if (!empty) {
3247 max_level = level;
3248 } else {
3249 TRACE("nothing found for level %u\n", level);
3250 break;
3251 }
3252 }
3253
3254 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3255
3256 for (level = 0; level <= max_level; level++) {
3257 LIST_ENTRY* le;
3258
3259 le = Vcb->tree_cache.Flink;
3260
3261 while (le != &Vcb->tree_cache) {
3262 tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3263
3264 if (tc2->write && tc2->tree->header.level == level && tc2->tree->header.num_items > 0 && tc2->tree->parent && tc2->tree->size < min_size) {
3265 Status = try_tree_amalgamate(Vcb, tc2->tree, rollback);
3266 if (!NT_SUCCESS(Status)) {
3267 ERR("try_tree_amalgamate returned %08x\n", Status);
3268 return Status;
3269 }
3270 }
3271
3272 le = le->Flink;
3273 }
3274 }
3275
3276 // simplify trees if top tree only has one entry
3277
3278 if (done_deletions) {
3279 for (level = max_level; level > 0; level--) {
3280 LIST_ENTRY *le, *nextle;
3281
3282 le = Vcb->tree_cache.Flink;
3283 while (le != &Vcb->tree_cache) {
3284 nextle = le->Flink;
3285 tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3286
3287 if (tc2->write && tc2->tree->header.level == level) {
3288 if (!tc2->tree->parent && tc2->tree->header.num_items == 1) {
3289 LIST_ENTRY* le2 = tc2->tree->itemlist.Flink;
3290 tree_data* td;
3291 tree* child_tree = NULL;
3292
3293 while (le2 != &tc2->tree->itemlist) {
3294 td = CONTAINING_RECORD(le2, tree_data, list_entry);
3295 if (!td->ignore)
3296 break;
3297 le2 = le2->Flink;
3298 }
3299
3300 TRACE("deleting top-level tree in root %llx with one item\n", tc2->tree->root->id);
3301
3302 if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
3303 Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
3304
3305 if (!NT_SUCCESS(Status)) {
3306 ERR("reduce_tree_extent returned %08x\n", Status);
3307 return Status;
3308 }
3309 } else if (tc2->tree->has_address) {
3310 Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
3311
3312 if (!NT_SUCCESS(Status)) {
3313 ERR("reduce_tree_extent returned %08x\n", Status);
3314 return Status;
3315 }
3316 }
3317
3318 if (!td->treeholder.tree) { // load first item if not already loaded
3319 KEY searchkey = {0,0,0};
3320 traverse_ptr tp;
3321
3322 Status = find_item(Vcb, tc2->tree->root, &tp, &searchkey, FALSE);
3323 if (!NT_SUCCESS(Status)) {
3324 ERR("error - find_item returned %08x\n", Status);
3325 return Status;
3326 }
3327
3328 free_traverse_ptr(&tp);
3329 }
3330
3331 child_tree = td->treeholder.tree;
3332
3333 if (child_tree) {
3334 child_tree->parent = NULL;
3335 child_tree->paritem = NULL;
3336 free_tree(tc2->tree);
3337 }
3338
3339 tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3340
3341 free_tree(tc2->tree);
3342
3343 if (child_tree)
3344 child_tree->root->treeholder.tree = child_tree;
3345
3346 RemoveEntryList(le);
3347 ExFreePool(tc2);
3348 }
3349 }
3350
3351 le = nextle;
3352 }
3353 }
3354 }
3355
3356 return STATUS_SUCCESS;
3357 }
3358
3359 NTSTATUS STDCALL do_write(device_extension* Vcb, LIST_ENTRY* rollback) {
3360 NTSTATUS Status;
3361 LIST_ENTRY* le;
3362
3363 TRACE("(%p)\n", Vcb);
3364
3365 // If only changing superblock, e.g. changing label, we still need to rewrite
3366 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
3367 if (Vcb->write_trees > 0) {
3368 KEY searchkey;
3369 traverse_ptr tp;
3370
3371 searchkey.obj_id = 0;
3372 searchkey.obj_type = 0;
3373 searchkey.offset = 0;
3374
3375 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
3376 if (!NT_SUCCESS(Status)) {
3377 ERR("error - find_item returned %08x\n", Status);
3378 return Status;
3379 }
3380
3381 add_to_tree_cache(Vcb, Vcb->root_root->treeholder.tree, TRUE);
3382
3383 free_traverse_ptr(&tp);
3384 }
3385
3386 do {
3387 Status = add_parents(Vcb, rollback);
3388 if (!NT_SUCCESS(Status)) {
3389 ERR("add_parents returned %08x\n", Status);
3390 goto end;
3391 }
3392
3393 Status = do_splits(Vcb, rollback);
3394 if (!NT_SUCCESS(Status)) {
3395 ERR("do_splits returned %08x\n", Status);
3396 goto end;
3397 }
3398
3399 Status = allocate_tree_extents(Vcb, rollback);
3400 if (!NT_SUCCESS(Status)) {
3401 ERR("add_parents returned %08x\n", Status);
3402 goto end;
3403 }
3404
3405 Status = update_chunk_usage(Vcb, rollback);
3406 if (!NT_SUCCESS(Status)) {
3407 ERR("update_chunk_usage returned %08x\n", Status);
3408 goto end;
3409 }
3410 } while (!trees_consistent(Vcb));
3411
3412 TRACE("trees consistent\n");
3413
3414 Status = update_root_root(Vcb, rollback);
3415 if (!NT_SUCCESS(Status)) {
3416 ERR("update_root_root returned %08x\n", Status);
3417 goto end;
3418 }
3419
3420 Status = write_trees(Vcb);
3421 if (!NT_SUCCESS(Status)) {
3422 ERR("write_trees returned %08x\n", Status);
3423 goto end;
3424 }
3425
3426 Status = write_superblocks(Vcb);
3427 if (!NT_SUCCESS(Status)) {
3428 ERR("write_superblocks returned %08x\n", Status);
3429 goto end;
3430 }
3431
3432 clean_space_cache(Vcb);
3433
3434 Vcb->superblock.generation++;
3435
3436 // print_trees(tc); // TESTING
3437
3438 Status = STATUS_SUCCESS;
3439
3440 le = Vcb->tree_cache.Flink;
3441 while (le != &Vcb->tree_cache) {
3442 tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
3443
3444 tc2->write = FALSE;
3445
3446 le = le->Flink;
3447 }
3448
3449 Vcb->write_trees = 0;
3450
3451 end:
3452 TRACE("do_write returning %08x\n", Status);
3453
3454 return Status;
3455 }
3456
3457 NTSTATUS consider_write(device_extension* Vcb) {
3458 // FIXME - call do_write if Vcb->write_trees high
3459 #if 0
3460 LIST_ENTRY rollback;
3461 NTSTATUS Status = STATUS_SUCCESS;
3462
3463 InitializeListHead(&rollback);
3464
3465 if (Vcb->write_trees > 0)
3466 Status = do_write(Vcb, &rollback);
3467
3468 free_tree_cache(&Vcb->tree_cache);
3469
3470 if (!NT_SUCCESS(Status))
3471 do_rollback(Vcb, &rollback);
3472 else
3473 clear_rollback(&rollback);
3474
3475 return Status;
3476 #else
3477 return STATUS_SUCCESS;
3478 #endif
3479 }
3480
3481 static __inline void insert_into_ordered_list(LIST_ENTRY* list, ordered_list* ins) {
3482 LIST_ENTRY* le = list->Flink;
3483 ordered_list* ol;
3484
3485 while (le != list) {
3486 ol = (ordered_list*)le;
3487
3488 if (ol->key > ins->key) {
3489 le->Blink->Flink = &ins->list_entry;
3490 ins->list_entry.Blink = le->Blink;
3491 le->Blink = &ins->list_entry;
3492 ins->list_entry.Flink = le;
3493 return;
3494 }
3495
3496 le = le->Flink;
3497 }
3498
3499 InsertTailList(list, &ins->list_entry);
3500 }
3501
3502 static UINT64 get_extent_data_ref_hash(UINT64 root, UINT64 objid, UINT64 offset) {
3503 UINT32 high_crc = 0xffffffff, low_crc = 0xffffffff;
3504
3505 // FIXME - can we test this?
3506
3507 // FIXME - make sure numbers here are little-endian
3508 high_crc = calc_crc32c(high_crc, (UINT8*)&root, sizeof(UINT64));
3509 low_crc = calc_crc32c(low_crc, (UINT8*)&objid, sizeof(UINT64));
3510 low_crc = calc_crc32c(low_crc, (UINT8*)&offset, sizeof(UINT64));
3511
3512 return ((UINT64)high_crc << 31) ^ (UINT64)low_crc;
3513 }
3514
3515 NTSTATUS STDCALL add_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* rollback) {
3516 KEY searchkey;
3517 traverse_ptr tp;
3518 EXTENT_ITEM* ei;
3519 UINT8 *siptr, *type;
3520 ULONG len;
3521 UINT64 hash;
3522 EXTENT_DATA_REF* edr;
3523 NTSTATUS Status;
3524
3525 TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
3526
3527 searchkey.obj_id = address;
3528 searchkey.obj_type = TYPE_EXTENT_ITEM;
3529 searchkey.offset = size;
3530
3531 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3532 if (!NT_SUCCESS(Status)) {
3533 ERR("error - find_item returned %08x\n", Status);
3534 return Status;
3535 }
3536
3537 if (keycmp(&tp.item->key, &searchkey)) {
3538 // create new entry
3539
3540 len = sizeof(EXTENT_ITEM) + sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3541 free_traverse_ptr(&tp);
3542
3543 ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3544 if (!ei) {
3545 ERR("out of memory\n");
3546 return STATUS_INSUFFICIENT_RESOURCES;
3547 }
3548
3549 ei->refcount = 1;
3550 ei->generation = Vcb->superblock.generation;
3551 ei->flags = EXTENT_ITEM_DATA;
3552
3553 type = (UINT8*)&ei[1];
3554 *type = TYPE_EXTENT_DATA_REF;
3555
3556 edr = (EXTENT_DATA_REF*)&type[1];
3557 edr->root = subvol->id;
3558 edr->objid = inode;
3559 edr->offset = offset;
3560 edr->count = 1;
3561
3562 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3563 ERR("error - failed to insert item\n");
3564 return STATUS_INTERNAL_ERROR;
3565 }
3566
3567 // FIXME - update free space in superblock and CHUNK_ITEM
3568
3569 return STATUS_SUCCESS;
3570 }
3571
3572 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
3573 NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
3574 if (!NT_SUCCESS(Status)) {
3575 ERR("convert_old_data_extent returned %08x\n", Status);
3576 free_traverse_ptr(&tp);
3577 return Status;
3578 }
3579
3580 free_traverse_ptr(&tp);
3581
3582 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3583 if (!NT_SUCCESS(Status)) {
3584 ERR("error - find_item returned %08x\n", Status);
3585 return Status;
3586 }
3587
3588 if (keycmp(&tp.item->key, &searchkey)) {
3589 WARN("extent item not found for address %llx, size %llx\n", address, size);
3590 free_traverse_ptr(&tp);
3591 return STATUS_SUCCESS;
3592 }
3593 }
3594
3595 ei = (EXTENT_ITEM*)tp.item->data;
3596
3597 if (tp.item->size < sizeof(EXTENT_ITEM)) {
3598 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
3599 free_traverse_ptr(&tp);
3600 return STATUS_INTERNAL_ERROR;
3601 }
3602
3603 if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
3604 NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
3605 if (!NT_SUCCESS(Status)) {
3606 ERR("convert_shared_data_extent returned %08x\n", Status);
3607 free_traverse_ptr(&tp);
3608 return Status;
3609 }
3610
3611 free_traverse_ptr(&tp);
3612
3613 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3614 if (!NT_SUCCESS(Status)) {
3615 ERR("error - find_item returned %08x\n", Status);
3616 return Status;
3617 }
3618
3619 if (keycmp(&tp.item->key, &searchkey)) {
3620 WARN("extent item not found for address %llx, size %llx\n", address, size);
3621 free_traverse_ptr(&tp);
3622 return STATUS_SUCCESS;
3623 }
3624
3625 ei = (EXTENT_ITEM*)tp.item->data;
3626 }
3627
3628 if (ei->flags != EXTENT_ITEM_DATA) {
3629 ERR("error - flag was not EXTENT_ITEM_DATA\n");
3630 free_traverse_ptr(&tp);
3631 return STATUS_INTERNAL_ERROR;
3632 }
3633
3634 // FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
3635
3636 hash = get_extent_data_ref_hash(subvol->id, inode, offset);
3637
3638 len = tp.item->size - sizeof(EXTENT_ITEM);
3639 siptr = (UINT8*)&ei[1];
3640
3641 // FIXME - increase subitem refcount if there already?
3642 do {
3643 if (*siptr == TYPE_EXTENT_DATA_REF) {
3644 UINT64 sihash;
3645
3646 edr = (EXTENT_DATA_REF*)&siptr[1];
3647 sihash = get_extent_data_ref_hash(edr->root, edr->objid, edr->offset);
3648
3649 if (sihash >= hash)
3650 break;
3651
3652 siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3653
3654 if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
3655 len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
3656 } else
3657 break;
3658 // FIXME - TYPE_TREE_BLOCK_REF 0xB0
3659 } else {
3660 ERR("unrecognized extent subitem %x\n", *siptr);
3661 free_traverse_ptr(&tp);
3662 return STATUS_INTERNAL_ERROR;
3663 }
3664 } while (len > 0);
3665
3666 len = tp.item->size + sizeof(UINT8) + sizeof(EXTENT_DATA_REF); // FIXME - die if too big
3667 ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3668 if (!ei) {
3669 ERR("out of memory\n");
3670 return STATUS_INSUFFICIENT_RESOURCES;
3671 }
3672
3673 RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
3674 ei->refcount++;
3675
3676 type = (UINT8*)ei + (siptr - tp.item->data);
3677 *type = TYPE_EXTENT_DATA_REF;
3678
3679 edr = (EXTENT_DATA_REF*)&type[1];
3680 edr->root = subvol->id;
3681 edr->objid = inode;
3682 edr->offset = offset;
3683 edr->count = 1;
3684
3685 if (siptr < tp.item->data + tp.item->size)
3686 RtlCopyMemory(&edr[1], siptr, tp.item->data + tp.item->size - siptr);
3687
3688 delete_tree_item(Vcb, &tp, rollback);
3689
3690 free_traverse_ptr(&tp);
3691
3692 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3693 ERR("error - failed to insert item\n");
3694 ExFreePool(ei);
3695 return STATUS_INTERNAL_ERROR;
3696 }
3697
3698 return STATUS_SUCCESS;
3699 }
3700
3701 typedef struct {
3702 EXTENT_DATA_REF edr;
3703 LIST_ENTRY list_entry;
3704 } data_ref;
3705
3706 static void add_data_ref(LIST_ENTRY* data_refs, UINT64 root, UINT64 objid, UINT64 offset) {
3707 data_ref* dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_ref), ALLOC_TAG);
3708
3709 if (!dr) {
3710 ERR("out of memory\n");
3711 return;
3712 }
3713
3714 // FIXME - increase count if entry there already
3715 // FIXME - put in order?
3716
3717 dr->edr.root = root;
3718 dr->edr.objid = objid;
3719 dr->edr.offset = offset;
3720 dr->edr.count = 1;
3721
3722 InsertTailList(data_refs, &dr->list_entry);
3723 }
3724
3725 static void free_data_refs(LIST_ENTRY* data_refs) {
3726 while (!IsListEmpty(data_refs)) {
3727 LIST_ENTRY* le = RemoveHeadList(data_refs);
3728 data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
3729
3730 ExFreePool(dr);
3731 }
3732 }
3733
3734 static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
3735 KEY searchkey;
3736 traverse_ptr tp, next_tp;
3737 BOOL b;
3738 LIST_ENTRY data_refs;
3739 LIST_ENTRY* le;
3740 UINT64 refcount;
3741 EXTENT_ITEM* ei;
3742 ULONG eisize;
3743 UINT8* type;
3744 NTSTATUS Status;
3745
3746 searchkey.obj_id = address;
3747 searchkey.obj_type = TYPE_EXTENT_ITEM;
3748 searchkey.offset = size;
3749
3750 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3751 if (!NT_SUCCESS(Status)) {
3752 ERR("error - find_item returned %08x\n", Status);
3753 return Status;
3754 }
3755
3756 if (keycmp(&tp.item->key, &searchkey)) {
3757 WARN("extent item not found for address %llx, size %llx\n", address, size);
3758 free_traverse_ptr(&tp);
3759 return STATUS_SUCCESS;
3760 }
3761
3762 if (tp.item->size != sizeof(EXTENT_ITEM_V0)) {
3763 TRACE("extent does not appear to be old - returning STATUS_SUCCESS\n");
3764 free_traverse_ptr(&tp);
3765 return STATUS_SUCCESS;
3766 }
3767
3768 delete_tree_item(Vcb, &tp, rollback);
3769
3770 free_traverse_ptr(&tp);
3771
3772 searchkey.obj_id = address;
3773 searchkey.obj_type = TYPE_EXTENT_REF_V0;
3774 searchkey.offset = 0;
3775
3776 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3777 if (!NT_SUCCESS(Status)) {
3778 ERR("error - find_item returned %08x\n", Status);
3779 return Status;
3780 }
3781
3782 InitializeListHead(&data_refs);
3783
3784 do {
3785 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
3786
3787 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3788 tree* t;
3789
3790 // normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
3791
3792 Status = load_tree(Vcb, tp.item->key.offset, NULL, &t);
3793
3794 if (!NT_SUCCESS(Status)) {
3795 ERR("load tree for address %llx returned %08x\n", tp.item->key.offset, Status);
3796 free_traverse_ptr(&tp);
3797 free_data_refs(&data_refs);
3798 return Status;
3799 }
3800
3801 if (t->header.level == 0) {
3802 le = t->itemlist.Flink;
3803 while (le != &t->itemlist) {
3804 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3805
3806 if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
3807 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
3808
3809 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
3810 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3811
3812 if (ed2->address == address)
3813 add_data_ref(&data_refs, t->header.tree_id, td->key.obj_id, td->key.offset);
3814 }
3815 }
3816
3817 le = le->Flink;
3818 }
3819 }
3820
3821 free_tree(t);
3822
3823 delete_tree_item(Vcb, &tp, rollback);
3824 }
3825
3826 if (b) {
3827 free_traverse_ptr(&tp);
3828 tp = next_tp;
3829
3830 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
3831 break;
3832 }
3833 } while (b);
3834
3835 free_traverse_ptr(&tp);
3836
3837 if (IsListEmpty(&data_refs)) {
3838 WARN("no data refs found\n");
3839 return STATUS_SUCCESS;
3840 }
3841
3842 // create new entry
3843
3844 refcount = 0;
3845
3846 le = data_refs.Flink;
3847 while (le != &data_refs) {
3848 refcount++;
3849 le = le->Flink;
3850 }
3851
3852 eisize = sizeof(EXTENT_ITEM) + ((sizeof(char) + sizeof(EXTENT_DATA_REF)) * refcount);
3853 ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
3854 if (!ei) {
3855 ERR("out of memory\n");
3856 return STATUS_INSUFFICIENT_RESOURCES;
3857 }
3858
3859 ei->refcount = refcount;
3860 ei->generation = Vcb->superblock.generation;
3861 ei->flags = EXTENT_ITEM_DATA;
3862
3863 type = (UINT8*)&ei[1];
3864
3865 le = data_refs.Flink;
3866 while (le != &data_refs) {
3867 data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
3868
3869 type[0] = TYPE_EXTENT_DATA_REF;
3870 RtlCopyMemory(&type[1], &dr->edr, sizeof(EXTENT_DATA_REF));
3871
3872 type = &type[1 + sizeof(EXTENT_DATA_REF)];
3873
3874 le = le->Flink;
3875 }
3876
3877 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, rollback)) {
3878 ERR("error - failed to insert item\n");
3879 ExFreePool(ei);
3880 return STATUS_INTERNAL_ERROR;
3881 }
3882
3883 free_data_refs(&data_refs);
3884
3885 return STATUS_SUCCESS;
3886 }
3887
3888 typedef struct {
3889 UINT8 type;
3890 void* data;
3891 BOOL allocated;
3892 LIST_ENTRY list_entry;
3893 } extent_ref;
3894
3895 static void free_extent_refs(LIST_ENTRY* extent_refs) {
3896 while (!IsListEmpty(extent_refs)) {
3897 LIST_ENTRY* le = RemoveHeadList(extent_refs);
3898 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
3899
3900 if (er->allocated)
3901 ExFreePool(er->data);
3902
3903 ExFreePool(er);
3904 }
3905 }
3906
3907 static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
3908 KEY searchkey;
3909 traverse_ptr tp;
3910 LIST_ENTRY extent_refs;
3911 LIST_ENTRY *le, *next_le;
3912 EXTENT_ITEM *ei, *newei;
3913 UINT8* siptr;
3914 ULONG len;
3915 UINT64 count;
3916 NTSTATUS Status;
3917
3918 searchkey.obj_id = address;
3919 searchkey.obj_type = TYPE_EXTENT_ITEM;
3920 searchkey.offset = size;
3921
3922 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3923 if (!NT_SUCCESS(Status)) {
3924 ERR("error - find_item returned %08x\n", Status);
3925 return Status;
3926 }
3927
3928 if (keycmp(&tp.item->key, &searchkey)) {
3929 WARN("extent item not found for address %llx, size %llx\n", address, size);
3930 free_traverse_ptr(&tp);
3931 return STATUS_SUCCESS;
3932 }
3933
3934 if (tp.item->size < sizeof(EXTENT_ITEM)) {
3935 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
3936 free_traverse_ptr(&tp);
3937 return STATUS_INTERNAL_ERROR;
3938 }
3939
3940 ei = (EXTENT_ITEM*)tp.item->data;
3941 len = tp.item->size - sizeof(EXTENT_ITEM);
3942
3943 InitializeListHead(&extent_refs);
3944
3945 siptr = (UINT8*)&ei[1];
3946
3947 do {
3948 extent_ref* er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
3949 if (!er) {
3950 ERR("out of memory\n");
3951 free_traverse_ptr(&tp);
3952 return STATUS_INSUFFICIENT_RESOURCES;
3953 }
3954
3955 er->type = *siptr;
3956 er->data = siptr+1;
3957 er->allocated = FALSE;
3958
3959 InsertTailList(&extent_refs, &er->list_entry);
3960
3961 if (*siptr == TYPE_TREE_BLOCK_REF) {
3962 siptr += sizeof(TREE_BLOCK_REF);
3963 len -= sizeof(TREE_BLOCK_REF) + 1;
3964 } else if (*siptr == TYPE_EXTENT_DATA_REF) {
3965 siptr += sizeof(EXTENT_DATA_REF);
3966 len -= sizeof(EXTENT_DATA_REF) + 1;
3967 } else if (*siptr == TYPE_SHARED_BLOCK_REF) {
3968 siptr += sizeof(SHARED_BLOCK_REF);
3969 len -= sizeof(SHARED_BLOCK_REF) + 1;
3970 } else if (*siptr == TYPE_SHARED_DATA_REF) {
3971 siptr += sizeof(SHARED_DATA_REF);
3972 len -= sizeof(SHARED_DATA_REF) + 1;
3973 } else {
3974 ERR("unrecognized extent subitem %x\n", *siptr);
3975 free_traverse_ptr(&tp);
3976 free_extent_refs(&extent_refs);
3977 return STATUS_INTERNAL_ERROR;
3978 }
3979 } while (len > 0);
3980
3981 le = extent_refs.Flink;
3982 while (le != &extent_refs) {
3983 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
3984 next_le = le->Flink;
3985
3986 if (er->type == TYPE_SHARED_DATA_REF) {
3987 // normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
3988 SHARED_DATA_REF* sdr = er->data;
3989 tree* t;
3990
3991 Status = load_tree(Vcb, sdr->offset, NULL, &t);
3992 if (!NT_SUCCESS(Status)) {
3993 ERR("load_tree for address %llx returned %08x\n", sdr->offset, Status);
3994 free_traverse_ptr(&tp);
3995 free_data_refs(&extent_refs);
3996 return Status;
3997 }
3998
3999 if (t->header.level == 0) {
4000 LIST_ENTRY* le2 = t->itemlist.Flink;
4001 while (le2 != &t->itemlist) {
4002 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
4003
4004 if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
4005 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
4006
4007 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
4008 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4009
4010 if (ed2->address == address) {
4011 extent_ref* er2;
4012 EXTENT_DATA_REF* edr;
4013
4014 er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
4015 if (!er2) {
4016 ERR("out of memory\n");
4017 free_traverse_ptr(&tp);
4018 return STATUS_INSUFFICIENT_RESOURCES;
4019 }
4020
4021 edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
4022 if (!edr) {
4023 ERR("out of memory\n");
4024 free_traverse_ptr(&tp);
4025 ExFreePool(er2);
4026 return STATUS_INSUFFICIENT_RESOURCES;
4027 }
4028
4029 edr->root = t->header.tree_id;
4030 edr->objid = td->key.obj_id;
4031 edr->offset = td->key.offset;
4032 edr->count = 1;
4033
4034 er2->type = TYPE_EXTENT_DATA_REF;
4035 er2->data = edr;
4036 er2->allocated = TRUE;
4037
4038 InsertTailList(&extent_refs, &er2->list_entry); // FIXME - list should be in order
4039 }
4040 }
4041 }
4042
4043 le2 = le2->Flink;
4044 }
4045 }
4046
4047 free_tree(t);
4048
4049 RemoveEntryList(&er->list_entry);
4050
4051 if (er->allocated)
4052 ExFreePool(er->data);
4053
4054 ExFreePool(er);
4055 }
4056 // FIXME - also do for SHARED_BLOCK_REF?
4057
4058 le = next_le;
4059 }
4060
4061 if (IsListEmpty(&extent_refs)) {
4062 WARN("no extent refs found\n");
4063 delete_tree_item(Vcb, &tp, rollback);
4064 free_traverse_ptr(&tp);
4065 return STATUS_SUCCESS;
4066 }
4067
4068 len = 0;
4069 count = 0;
4070 le = extent_refs.Flink;
4071 while (le != &extent_refs) {
4072 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
4073
4074 len++;
4075 if (er->type == TYPE_TREE_BLOCK_REF) {
4076 len += sizeof(TREE_BLOCK_REF);
4077 } else if (er->type == TYPE_EXTENT_DATA_REF) {
4078 len += sizeof(EXTENT_DATA_REF);
4079 } else {
4080 ERR("unexpected extent subitem %x\n", er->type);
4081 }
4082
4083 count++;
4084
4085 le = le->Flink;
4086 }
4087
4088 newei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM) + len, ALLOC_TAG);
4089 if (!newei) {
4090 ERR("out of memory\n");
4091 free_traverse_ptr(&tp);
4092 return STATUS_INSUFFICIENT_RESOURCES;
4093 }
4094
4095 RtlCopyMemory(newei, ei, sizeof(EXTENT_ITEM));
4096 newei->refcount = count;
4097
4098 siptr = (UINT8*)&newei[1];
4099 le = extent_refs.Flink;
4100 while (le != &extent_refs) {
4101 extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
4102
4103 *siptr = er->type;
4104 siptr++;
4105
4106 if (er->type == TYPE_TREE_BLOCK_REF) {
4107 RtlCopyMemory(siptr, er->data, sizeof(TREE_BLOCK_REF));
4108 } else if (er->type == TYPE_EXTENT_DATA_REF) {
4109 RtlCopyMemory(siptr, er->data, sizeof(EXTENT_DATA_REF));
4110 } else {
4111 ERR("unexpected extent subitem %x\n", er->type);
4112 }
4113
4114 le = le->Flink;
4115 }
4116
4117 delete_tree_item(Vcb, &tp, rollback);
4118 free_traverse_ptr(&tp);
4119
4120 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, newei, sizeof(EXTENT_ITEM) + len, NULL, rollback)) {
4121 ERR("error - failed to insert item\n");
4122 ExFreePool(newei);
4123 free_extent_refs(&extent_refs);
4124 return STATUS_INTERNAL_ERROR;
4125 }
4126
4127 free_extent_refs(&extent_refs);
4128
4129 return STATUS_SUCCESS;
4130 }
4131
4132 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len) {
4133 UINT8* siptr = (UINT8*)&ei[1];
4134
4135 do {
4136 if (*siptr == TYPE_TREE_BLOCK_REF) {
4137 siptr += sizeof(TREE_BLOCK_REF) + 1;
4138 len -= sizeof(TREE_BLOCK_REF) + 1;
4139 } else if (*siptr == TYPE_EXTENT_DATA_REF) {
4140 siptr += sizeof(EXTENT_DATA_REF) + 1;
4141 len -= sizeof(EXTENT_DATA_REF) + 1;
4142 } else if (*siptr == TYPE_SHARED_BLOCK_REF) {
4143 return TRUE;
4144 } else if (*siptr == TYPE_SHARED_DATA_REF) {
4145 return TRUE;
4146 } else {
4147 ERR("unrecognized extent subitem %x\n", *siptr);
4148 return FALSE;
4149 }
4150 } while (len > 0);
4151
4152 return FALSE;
4153 }
4154
4155 NTSTATUS STDCALL remove_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4156 KEY searchkey;
4157 traverse_ptr tp;
4158 EXTENT_ITEM* ei;
4159 UINT8* siptr;
4160 ULONG len;
4161 EXTENT_DATA_REF* edr;
4162 BOOL found;
4163 NTSTATUS Status;
4164
4165 TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
4166
4167 searchkey.obj_id = address;
4168 searchkey.obj_type = TYPE_EXTENT_ITEM;
4169 searchkey.offset = size;
4170
4171 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
4172 if (!NT_SUCCESS(Status)) {
4173 ERR("error - find_item returned %08x\n", Status);
4174 return Status;
4175 }
4176
4177 if (keycmp(&tp.item->key, &searchkey)) {
4178 WARN("extent item not found for address %llx, size %llx\n", address, size);
4179 free_traverse_ptr(&tp);
4180 return STATUS_SUCCESS;
4181 }
4182
4183 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
4184 NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
4185 if (!NT_SUCCESS(Status)) {
4186 ERR("convert_old_data_extent returned %08x\n", Status);
4187 free_traverse_ptr(&tp);
4188 return Status;
4189 }
4190
4191 free_traverse_ptr(&tp);
4192
4193 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
4194 if (!NT_SUCCESS(Status)) {
4195 ERR("error - find_item returned %08x\n", Status);
4196 return Status;
4197 }
4198
4199 if (keycmp(&tp.item->key, &searchkey)) {
4200 WARN("extent item not found for address %llx, size %llx\n", address, size);
4201 free_traverse_ptr(&tp);
4202 return STATUS_SUCCESS;
4203 }
4204 }
4205
4206 ei = (EXTENT_ITEM*)tp.item->data;
4207
4208 if (tp.item->size < sizeof(EXTENT_ITEM)) {
4209 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
4210 free_traverse_ptr(&tp);
4211 return STATUS_INTERNAL_ERROR;
4212 }
4213
4214 if (!(ei->flags & EXTENT_ITEM_DATA)) {
4215 ERR("error - EXTENT_ITEM_DATA flag not set\n");
4216 free_traverse_ptr(&tp);
4217 return STATUS_INTERNAL_ERROR;
4218 }
4219
4220 // FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
4221
4222 if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
4223 NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
4224 if (!NT_SUCCESS(Status)) {
4225 ERR("convert_shared_data_extent returned %08x\n", Status);
4226 free_traverse_ptr(&tp);
4227 return Status;
4228 }
4229
4230 free_traverse_ptr(&tp);
4231
4232 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
4233 if (!NT_SUCCESS(Status)) {
4234 ERR("error - find_item returned %08x\n", Status);
4235 return Status;
4236 }
4237
4238 if (keycmp(&tp.item->key, &searchkey)) {
4239 WARN("extent item not found for address %llx, size %llx\n", address, size);
4240 free_traverse_ptr(&tp);
4241 return STATUS_SUCCESS;
4242 }
4243
4244 ei = (EXTENT_ITEM*)tp.item->data;
4245 }
4246
4247 len = tp.item->size - sizeof(EXTENT_ITEM);
4248 siptr = (UINT8*)&ei[1];
4249 found = FALSE;
4250
4251 do {
4252 if (*siptr == TYPE_EXTENT_DATA_REF) {
4253 edr = (EXTENT_DATA_REF*)&siptr[1];
4254
4255 if (edr->root == subvol->id && edr->objid == inode && edr->offset == offset) {
4256 found = TRUE;
4257 break;
4258 }
4259
4260 siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
4261
4262 if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
4263 len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
4264 } else
4265 break;
4266 // // FIXME - TYPE_TREE_BLOCK_REF 0xB0
4267 } else {
4268 ERR("unrecognized extent subitem %x\n", *siptr);
4269 free_traverse_ptr(&tp);
4270 return STATUS_INTERNAL_ERROR;
4271 }
4272 } while (len > 0);
4273
4274 if (!found) {
4275 WARN("could not find extent data ref\n");
4276 free_traverse_ptr(&tp);
4277 return STATUS_SUCCESS;
4278 }
4279
4280 // FIXME - decrease subitem refcount if there already?
4281
4282 len = tp.item->size - sizeof(UINT8) - sizeof(EXTENT_DATA_REF);
4283
4284 delete_tree_item(Vcb, &tp, rollback);
4285
4286 if (len == sizeof(EXTENT_ITEM)) { // extent no longer needed
4287 chunk* c;
4288 LIST_ENTRY* le2;
4289
4290 if (changed_sector_list) {
4291 changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4292 if (!sc) {
4293 ERR("out of memory\n");
4294 free_traverse_ptr(&tp);
4295 return STATUS_INSUFFICIENT_RESOURCES;
4296 }
4297
4298 sc->ol.key = address;
4299 sc->checksums = NULL;
4300 sc->length = size / Vcb->superblock.sector_size;
4301
4302 sc->deleted = TRUE;
4303
4304 insert_into_ordered_list(changed_sector_list, &sc->ol);
4305 }
4306
4307 c = NULL;
4308 le2 = Vcb->chunks.Flink;
4309 while (le2 != &Vcb->chunks) {
4310 c = CONTAINING_RECORD(le2, chunk, list_entry);
4311
4312 TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
4313
4314 if (address >= c->offset && address + size < c->offset + c->chunk_item->size)
4315 break;
4316
4317 le2 = le2->Flink;
4318 }
4319 if (le2 == &Vcb->chunks) c = NULL;
4320
4321 if (c) {
4322 decrease_chunk_usage(c, size);
4323
4324 add_to_space_list(c, address, size, SPACE_TYPE_DELETING);
4325 }
4326
4327 free_traverse_ptr(&tp);
4328 return STATUS_SUCCESS;
4329 }
4330
4331 ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
4332 if (!ei) {
4333 ERR("out of memory\n");
4334 free_traverse_ptr(&tp);
4335 return STATUS_INSUFFICIENT_RESOURCES;
4336 }
4337
4338 RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
4339 ei->refcount--;
4340 ei->generation = Vcb->superblock.generation;
4341
4342 if (tp.item->data + len != siptr)
4343 RtlCopyMemory((UINT8*)ei + (siptr - tp.item->data), siptr + sizeof(UINT8) + sizeof(EXTENT_DATA_REF), tp.item->size - (siptr - tp.item->data) - sizeof(UINT8) - sizeof(EXTENT_DATA_REF));
4344
4345 free_traverse_ptr(&tp);
4346
4347 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
4348 ERR("error - failed to insert item\n");
4349 ExFreePool(ei);
4350 return STATUS_INTERNAL_ERROR;
4351 }
4352
4353 return STATUS_SUCCESS;
4354 }
4355
4356 static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
4357 LIST_ENTRY* le = list->Flink;
4358 ordered_list* ol;
4359
4360 while (le != list) {
4361 ol = (ordered_list*)le;
4362
4363 if (ol->key > value)
4364 return FALSE;
4365 else if (ol->key == value)
4366 return TRUE;
4367
4368 le = le->Flink;
4369 }
4370
4371 return FALSE;
4372 }
4373
4374 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4375 KEY searchkey;
4376 traverse_ptr tp, next_tp;
4377 NTSTATUS Status;
4378 BOOL b;
4379
4380 TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, end_data, changed_sector_list);
4381
4382 searchkey.obj_id = fcb->inode;
4383 searchkey.obj_type = TYPE_EXTENT_DATA;
4384 searchkey.offset = start_data;
4385
4386 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
4387 if (!NT_SUCCESS(Status)) {
4388 ERR("error - find_item returned %08x\n", Status);
4389 return Status;
4390 }
4391
4392 do {
4393 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
4394 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4395 UINT64 len;
4396
4397 if (tp.item->size < sizeof(EXTENT_DATA)) {
4398 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4399 Status = STATUS_INTERNAL_ERROR;
4400 goto end;
4401 }
4402
4403 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4404 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4405 Status = STATUS_INTERNAL_ERROR;
4406 goto end;
4407 }
4408
4409 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
4410
4411 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
4412
4413 if (tp.item->key.offset < end_data && tp.item->key.offset + len >= start_data) {
4414 if (ed->compression != BTRFS_COMPRESSION_NONE) {
4415 FIXME("FIXME - compression not supported at present\n");
4416 Status = STATUS_NOT_SUPPORTED;
4417 goto end;
4418 }
4419
4420 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
4421 WARN("root %llx, inode %llx, extent %llx: encryption not supported (type %x)\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, ed->encryption);
4422 Status = STATUS_NOT_SUPPORTED;
4423 goto end;
4424 }
4425
4426 if (ed->encoding != BTRFS_ENCODING_NONE) {
4427 WARN("other encodings not supported\n");
4428 Status = STATUS_NOT_SUPPORTED;
4429 goto end;
4430 }
4431
4432 if (ed->type == EXTENT_TYPE_INLINE) {
4433 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
4434 delete_tree_item(Vcb, &tp, rollback);
4435
4436 fcb->inode_item.st_blocks -= len;
4437 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
4438 EXTENT_DATA* ned;
4439 UINT64 size;
4440
4441 delete_tree_item(Vcb, &tp, rollback);
4442
4443 size = len - (end_data - tp.item->key.offset);
4444
4445 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4446 if (!ned) {
4447 ERR("out of memory\n");
4448 Status = STATUS_INSUFFICIENT_RESOURCES;
4449 goto end;
4450 }
4451
4452 ned->generation = Vcb->superblock.generation;
4453 ned->decoded_size = size;
4454 ned->compression = ed->compression;
4455 ned->encryption = ed->encryption;
4456 ned->encoding = ed->encoding;
4457 ned->type = ed->type;
4458
4459 RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
4460
4461 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4462 ERR("insert_tree_item failed\n");
4463 ExFreePool(ned);
4464 Status = STATUS_INTERNAL_ERROR;
4465 goto end;
4466 }
4467
4468 fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
4469 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
4470 EXTENT_DATA* ned;
4471 UINT64 size;
4472
4473 delete_tree_item(Vcb, &tp, rollback);
4474
4475 size = start_data - tp.item->key.offset;
4476
4477 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4478 if (!ned) {
4479 ERR("out of memory\n");
4480 Status = STATUS_INSUFFICIENT_RESOURCES;
4481 goto end;
4482 }
4483
4484 ned->generation = Vcb->superblock.generation;
4485 ned->decoded_size = size;
4486 ned->compression = ed->compression;
4487 ned->encryption = ed->encryption;
4488 ned->encoding = ed->encoding;
4489 ned->type = ed->type;
4490
4491 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
4492
4493 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4494 ERR("insert_tree_item failed\n");
4495 ExFreePool(ned);
4496 Status = STATUS_INTERNAL_ERROR;
4497 goto end;
4498 }
4499
4500 fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
4501 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
4502 EXTENT_DATA* ned;
4503 UINT64 size;
4504
4505 delete_tree_item(Vcb, &tp, rollback);
4506
4507 size = start_data - tp.item->key.offset;
4508
4509 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4510 if (!ned) {
4511 ERR("out of memory\n");
4512 Status = STATUS_INSUFFICIENT_RESOURCES;
4513 goto end;
4514 }
4515
4516 ned->generation = Vcb->superblock.generation;
4517 ned->decoded_size = size;
4518 ned->compression = ed->compression;
4519 ned->encryption = ed->encryption;
4520 ned->encoding = ed->encoding;
4521 ned->type = ed->type;
4522
4523 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
4524
4525 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4526 ERR("insert_tree_item failed\n");
4527 ExFreePool(ned);
4528 Status = STATUS_INTERNAL_ERROR;
4529 goto end;
4530 }
4531
4532 size = tp.item->key.offset + len - end_data;
4533
4534 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4535 if (!ned) {
4536 ERR("out of memory\n");
4537 Status = STATUS_INSUFFICIENT_RESOURCES;
4538 goto end;
4539 }
4540
4541 ned->generation = Vcb->superblock.generation;
4542 ned->decoded_size = size;
4543 ned->compression = ed->compression;
4544 ned->encryption = ed->encryption;
4545 ned->encoding = ed->encoding;
4546 ned->type = ed->type;
4547
4548 RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
4549
4550 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4551 ERR("insert_tree_item failed\n");
4552 ExFreePool(ned);
4553 Status = STATUS_INTERNAL_ERROR;
4554 goto end;
4555 }
4556
4557 fcb->inode_item.st_blocks -= end_data - start_data;
4558 }
4559 } else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
4560 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
4561 if (ed2->address != 0) {
4562 Status = remove_extent_ref(Vcb, ed2->address, ed2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, changed_sector_list, rollback);
4563 if (!NT_SUCCESS(Status)) {
4564 ERR("remove_extent_ref returned %08x\n", Status);
4565 goto end;
4566 }
4567
4568 fcb->inode_item.st_blocks -= len;
4569 }
4570
4571 delete_tree_item(Vcb, &tp, rollback);
4572 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
4573 EXTENT_DATA* ned;
4574 EXTENT_DATA2* ned2;
4575
4576 if (ed2->address != 0)
4577 fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
4578
4579 delete_tree_item(Vcb, &tp, rollback);
4580
4581 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4582 if (!ned) {
4583 ERR("out of memory\n");
4584 Status = STATUS_INSUFFICIENT_RESOURCES;
4585 goto end;
4586 }
4587
4588 ned2 = (EXTENT_DATA2*)&ned->data[0];
4589
4590 ned->generation = Vcb->superblock.generation;
4591 ned->decoded_size = ed->decoded_size;
4592 ned->compression = ed->compression;
4593 ned->encryption = ed->encryption;
4594 ned->encoding = ed->encoding;
4595 ned->type = ed->type;
4596 ned2->address = ed2->address;
4597 ned2->size = ed2->size;
4598 ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
4599 ned2->num_bytes = ed2->num_bytes - (end_data - tp.item->key.offset);
4600
4601 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4602 ERR("insert_tree_item failed\n");
4603 ExFreePool(ned);
4604 Status = STATUS_INTERNAL_ERROR;
4605 goto end;
4606 }
4607 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
4608 EXTENT_DATA* ned;
4609 EXTENT_DATA2* ned2;
4610
4611 if (ed2->address != 0)
4612 fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
4613
4614 delete_tree_item(Vcb, &tp, rollback);
4615
4616 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4617 if (!ned) {
4618 ERR("out of memory\n");
4619 Status = STATUS_INSUFFICIENT_RESOURCES;
4620 goto end;
4621 }
4622
4623 ned2 = (EXTENT_DATA2*)&ned->data[0];
4624
4625 ned->generation = Vcb->superblock.generation;
4626 ned->decoded_size = ed->decoded_size;
4627 ned->compression = ed->compression;
4628 ned->encryption = ed->encryption;
4629 ned->encoding = ed->encoding;
4630 ned->type = ed->type;
4631 ned2->address = ed2->address;
4632 ned2->size = ed2->size;
4633 ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
4634 ned2->num_bytes = start_data - tp.item->key.offset;
4635
4636 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4637 ERR("insert_tree_item failed\n");
4638 ExFreePool(ned);
4639 Status = STATUS_INTERNAL_ERROR;
4640 goto end;
4641 }
4642 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
4643 EXTENT_DATA* ned;
4644 EXTENT_DATA2* ned2;
4645
4646 if (ed2->address != 0)
4647 fcb->inode_item.st_blocks -= end_data - start_data;
4648
4649 delete_tree_item(Vcb, &tp, rollback);
4650
4651 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4652 if (!ned) {
4653 ERR("out of memory\n");
4654 Status = STATUS_INSUFFICIENT_RESOURCES;
4655 goto end;
4656 }
4657
4658 ned2 = (EXTENT_DATA2*)&ned->data[0];
4659
4660 ned->generation = Vcb->superblock.generation;
4661 ned->decoded_size = ed->decoded_size;
4662 ned->compression = ed->compression;
4663 ned->encryption = ed->encryption;
4664 ned->encoding = ed->encoding;
4665 ned->type = ed->type;
4666 ned2->address = ed2->address;
4667 ned2->size = ed2->size;
4668 ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
4669 ned2->num_bytes = start_data - tp.item->key.offset;
4670
4671 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4672 ERR("insert_tree_item failed\n");
4673 ExFreePool(ned);
4674 Status = STATUS_INTERNAL_ERROR;
4675 goto end;
4676 }
4677
4678 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4679 if (!ned) {
4680 ERR("out of memory\n");
4681 Status = STATUS_INSUFFICIENT_RESOURCES;
4682 goto end;
4683 }
4684
4685 ned2 = (EXTENT_DATA2*)&ned->data[0];
4686
4687 ned->generation = Vcb->superblock.generation;
4688 ned->decoded_size = ed->decoded_size;
4689 ned->compression = ed->compression;
4690 ned->encryption = ed->encryption;
4691 ned->encoding = ed->encoding;
4692 ned->type = ed->type;
4693 ned2->address = ed2->address;
4694 ned2->size = ed2->size;
4695 ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
4696 ned2->num_bytes = tp.item->key.offset + len - end_data;
4697
4698 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4699 ERR("insert_tree_item failed\n");
4700 ExFreePool(ned);
4701 Status = STATUS_INTERNAL_ERROR;
4702 goto end;
4703 }
4704 }
4705 }
4706 }
4707
4708 if (b) {
4709 free_traverse_ptr(&tp);
4710 tp = next_tp;
4711
4712 if (tp.item->key.obj_id > fcb->inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
4713 break;
4714 }
4715 } while (b);
4716
4717 // FIXME - do bitmap analysis of changed extents, and free what we can
4718
4719 Status = STATUS_SUCCESS;
4720
4721 end:
4722 free_traverse_ptr(&tp);
4723
4724 return Status;
4725 }
4726
4727 static BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4728 UINT64 address;
4729 NTSTATUS Status;
4730 EXTENT_ITEM_DATA_REF* eidr;
4731 EXTENT_DATA* ed;
4732 EXTENT_DATA2* ed2;
4733 ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
4734 changed_sector* sc;
4735 traverse_ptr tp;
4736 int i;
4737
4738 TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, data, changed_sector_list);
4739
4740 if (!find_address_in_chunk(Vcb, c, length, &address))
4741 return FALSE;
4742
4743 eidr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_DATA_REF), ALLOC_TAG);
4744 if (!eidr) {
4745 ERR("out of memory\n");
4746 return FALSE;
4747 }
4748
4749 eidr->ei.refcount = 1;
4750 eidr->ei.generation = Vcb->superblock.generation;
4751 eidr->ei.flags = EXTENT_ITEM_DATA;
4752 eidr->type = TYPE_EXTENT_DATA_REF;
4753 eidr->edr.root = fcb->subvol->id;
4754 eidr->edr.objid = fcb->inode;
4755 eidr->edr.offset = start_data;
4756 eidr->edr.count = 1;
4757
4758 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, length, eidr, sizeof(EXTENT_ITEM_DATA_REF), &tp, rollback)) {
4759 ERR("insert_tree_item failed\n");
4760 ExFreePool(eidr);
4761 return FALSE;
4762 }
4763
4764 tp.tree->header.generation = eidr->ei.generation;
4765
4766 free_traverse_ptr(&tp);
4767
4768 Status = write_data(Vcb, address, data, length);
4769 if (!NT_SUCCESS(Status)) {
4770 ERR("write_data returned %08x\n", Status);
4771 return FALSE;
4772 }
4773
4774 if (changed_sector_list) {
4775 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4776 if (!sc) {
4777 ERR("out of memory\n");
4778 return FALSE;
4779 }
4780
4781 sc->ol.key = address;
4782 sc->length = length / Vcb->superblock.sector_size;
4783 sc->deleted = FALSE;
4784
4785 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
4786 if (!sc->checksums) {
4787 ERR("out of memory\n");
4788 ExFreePool(sc);
4789 return FALSE;
4790 }
4791
4792 for (i = 0; i < sc->length; i++) {
4793 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
4794 }
4795
4796 insert_into_ordered_list(changed_sector_list, &sc->ol);
4797 }
4798
4799 // add extent data to inode
4800 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
4801 if (!ed) {
4802 ERR("out of memory\n");
4803 return FALSE;
4804 }
4805
4806 ed->generation = Vcb->superblock.generation;
4807 ed->decoded_size = length;
4808 ed->compression = BTRFS_COMPRESSION_NONE;
4809 ed->encryption = BTRFS_ENCRYPTION_NONE;
4810 ed->encoding = BTRFS_ENCODING_NONE;
4811 ed->type = EXTENT_TYPE_REGULAR;
4812
4813 ed2 = (EXTENT_DATA2*)ed->data;
4814 ed2->address = address;
4815 ed2->size = length;
4816 ed2->offset = 0;
4817 ed2->num_bytes = length;
4818
4819 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start_data, ed, edsize, NULL, rollback)) {
4820 ERR("insert_tree_item failed\n");
4821 ExFreePool(ed);
4822 return FALSE;
4823 }
4824
4825 increase_chunk_usage(c, length);
4826 add_to_space_list(c, address, length, SPACE_TYPE_WRITING);
4827
4828 fcb->inode_item.st_blocks += length;
4829
4830 return TRUE;
4831 }
4832
4833 static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
4834 LIST_ENTRY* changed_sector_list, traverse_ptr* edtp, traverse_ptr* eitp, LIST_ENTRY* rollback) {
4835 EXTENT_DATA* ed;
4836 EXTENT_DATA2* ed2;
4837 EXTENT_ITEM* ei;
4838 NTSTATUS Status;
4839 changed_sector* sc;
4840 chunk* c;
4841 int i;
4842
4843 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
4844 length, data, changed_sector_list, edtp, eitp);
4845
4846 ed = ExAllocatePoolWithTag(PagedPool, edtp->item->size, ALLOC_TAG);
4847 if (!ed) {
4848 ERR("out of memory\n");
4849 return FALSE;
4850 }
4851
4852 RtlCopyMemory(ed, edtp->item->data, edtp->item->size);
4853
4854 ed->decoded_size += length;
4855 ed2 = (EXTENT_DATA2*)ed->data;
4856 ed2->size += length;
4857 ed2->num_bytes += length;
4858
4859 delete_tree_item(Vcb, edtp, rollback);
4860
4861 if (!insert_tree_item(Vcb, fcb->subvol, edtp->item->key.obj_id, edtp->item->key.obj_type, edtp->item->key.offset, ed, edtp->item->size, NULL, rollback)) {
4862 TRACE("insert_tree_item failed\n");
4863
4864 ExFreePool(ed);
4865 return FALSE;
4866 }
4867
4868 ei = ExAllocatePoolWithTag(PagedPool, eitp->item->size, ALLOC_TAG);
4869 if (!ei) {
4870 ERR("out of memory\n");
4871 ExFreePool(ed);
4872 return FALSE;
4873 }
4874
4875 RtlCopyMemory(ei, eitp->item->data, eitp->item->size);
4876
4877 if (!insert_tree_item(Vcb, Vcb->extent_root, eitp->item->key.obj_id, eitp->item->key.obj_type, eitp->item->key.offset + length, ei, eitp->item->size, NULL, rollback)) {
4878 ERR("insert_tree_item failed\n");
4879
4880 ExFreePool(ei);
4881 return FALSE;
4882 }
4883
4884 delete_tree_item(Vcb, eitp, rollback);
4885
4886 Status = write_data(Vcb, eitp->item->key.obj_id + eitp->item->key.offset, data, length);
4887 if (!NT_SUCCESS(Status)) {
4888 ERR("write_data returned %08x\n", Status);
4889 return FALSE;
4890 }
4891
4892 if (changed_sector_list) {
4893 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4894 if (!sc) {
4895 ERR("out of memory\n");
4896 return FALSE;
4897 }
4898
4899 sc->ol.key = eitp->item->key.obj_id + eitp->item->key.offset;
4900 sc->length = length / Vcb->superblock.sector_size;
4901 sc->deleted = FALSE;
4902
4903 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
4904 if (!sc->checksums) {
4905 ERR("out of memory\n");
4906 ExFreePool(sc);
4907 return FALSE;
4908 }
4909
4910 for (i = 0; i < sc->length; i++) {
4911 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
4912 }
4913 insert_into_ordered_list(changed_sector_list, &sc->ol);
4914 }
4915
4916 c = get_chunk_from_address(Vcb, eitp->item->key.obj_id);
4917
4918 if (c) {
4919 increase_chunk_usage(c, length);
4920
4921 add_to_space_list(c, eitp->item->key.obj_id + eitp->item->key.offset, length, SPACE_TYPE_WRITING);
4922 }
4923
4924 fcb->inode_item.st_blocks += length;
4925
4926 return TRUE;
4927 }
4928
4929 static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
4930 LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4931 KEY searchkey;
4932 traverse_ptr tp, tp2;
4933 BOOL success = FALSE;
4934 EXTENT_DATA* ed;
4935 EXTENT_DATA2* ed2;
4936 EXTENT_ITEM* ei;
4937 chunk* c;
4938 LIST_ENTRY* le;
4939 space* s;
4940 NTSTATUS Status;
4941
4942 searchkey.obj_id = fcb->inode;
4943 searchkey.obj_type = TYPE_EXTENT_DATA;
4944 searchkey.offset = start_data;
4945
4946 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
4947 if (!NT_SUCCESS(Status)) {
4948 ERR("error - find_item returned %08x\n", Status);
4949 return FALSE;
4950 }
4951
4952 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
4953 WARN("previous EXTENT_DATA not found\n");
4954 goto end;
4955 }
4956
4957 ed = (EXTENT_DATA*)tp.item->data;
4958
4959 if (tp.item->size < sizeof(EXTENT_DATA)) {
4960 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4961 goto end;
4962 }
4963
4964 if (ed->type != EXTENT_TYPE_REGULAR) {
4965 TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
4966 goto end;
4967 }
4968
4969 ed2 = (EXTENT_DATA2*)ed->data;
4970
4971 if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4972 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4973 goto end;
4974 }
4975
4976 if (tp.item->key.offset + ed2->num_bytes != start_data) {
4977 TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", tp.item->key.offset, ed2->num_bytes, start_data);
4978 goto end;
4979 }
4980
4981 if (ed->compression != BTRFS_COMPRESSION_NONE) {
4982 FIXME("FIXME: compression not yet supported\n");
4983 goto end;
4984 }
4985
4986 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
4987 WARN("encryption not supported\n");
4988 goto end;
4989 }
4990
4991 if (ed->encoding != BTRFS_ENCODING_NONE) {
4992 WARN("other encodings not supported\n");
4993 goto end;
4994 }
4995
4996 if (ed2->size - ed2->offset != ed2->num_bytes) {
4997 TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
4998 goto end;
4999 }
5000
5001 searchkey.obj_id = ed2->address;
5002 searchkey.obj_type = TYPE_EXTENT_ITEM;
5003 searchkey.offset = ed2->size;
5004
5005 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
5006 if (!NT_SUCCESS(Status)) {
5007 ERR("error - find_item returned %08x\n", Status);
5008 goto end;
5009 }
5010
5011 if (keycmp(&tp2.item->key, &searchkey)) {
5012 ERR("error - extent %llx,%llx not found in tree\n", ed2->address, ed2->size);
5013 int3; // TESTING
5014 goto end2;
5015 }
5016
5017 if (tp2.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
5018 NTSTATUS Status = convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
5019 if (!NT_SUCCESS(Status)) {
5020 ERR("convert_old_data_extent returned %08x\n", Status);
5021 goto end2;
5022 }
5023
5024 free_traverse_ptr(&tp2);
5025
5026 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
5027 if (!NT_SUCCESS(Status)) {
5028 ERR("error - find_item returned %08x\n", Status);
5029 goto end;
5030 }
5031
5032 if (keycmp(&tp2.item->key, &searchkey)) {
5033 WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
5034 goto end2;
5035 }
5036 }
5037
5038 ei = (EXTENT_ITEM*)tp2.item->data;
5039
5040 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5041 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5042 goto end2;
5043 }
5044
5045 // FIXME - test this
5046 if (extent_item_is_shared(ei, tp2.item->size - sizeof(EXTENT_ITEM))) {
5047 NTSTATUS Status = convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
5048 if (!NT_SUCCESS(Status)) {
5049 ERR("convert_shared_data_extent returned %08x\n", Status);
5050 goto end2;
5051 }
5052
5053 free_traverse_ptr(&tp2);
5054
5055 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
5056 if (!NT_SUCCESS(Status)) {
5057 ERR("error - find_item returned %08x\n", Status);
5058 goto end;
5059 }
5060
5061 if (keycmp(&tp2.item->key, &searchkey)) {
5062 WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
5063 goto end2;
5064 }
5065
5066 ei = (EXTENT_ITEM*)tp2.item->data;
5067
5068 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5069 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5070 goto end2;
5071 }
5072 }
5073
5074 if (ei->refcount != 1) {
5075 TRACE("extent refcount was not 1\n");
5076 goto end2;
5077 }
5078
5079 if (ei->flags != EXTENT_ITEM_DATA) {
5080 ERR("error - extent was not a data extent\n");
5081 goto end2;
5082 }
5083
5084 c = get_chunk_from_address(Vcb, ed2->address);
5085
5086 le = c->space.Flink;
5087 while (le != &c->space) {
5088 s = CONTAINING_RECORD(le, space, list_entry);
5089
5090 if (s->offset == ed2->address + ed2->size) {
5091 if (s->type == SPACE_TYPE_FREE && s->size >= length) {
5092 success = extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, &tp, &tp2, rollback);
5093 }
5094 break;
5095 } else if (s->offset > ed2->address + ed2->size)
5096 break;
5097
5098 le = le->Flink;
5099 }
5100
5101 end2:
5102 free_traverse_ptr(&tp2);
5103
5104 end:
5105 free_traverse_ptr(&tp);
5106
5107 return success;
5108 }
5109
5110 NTSTATUS insert_sparse_extent(device_extension* Vcb, root* r, UINT64 inode, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
5111 EXTENT_DATA* ed;
5112 EXTENT_DATA2* ed2;
5113
5114 TRACE("(%p, %llx, %llx, %llx, %llx)\n", Vcb, r->id, inode, start, length);
5115
5116 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
5117 if (!ed) {
5118 ERR("out of memory\n");
5119 return STATUS_INSUFFICIENT_RESOURCES;
5120 }
5121
5122 ed->generation = Vcb->superblock.generation;
5123 ed->decoded_size = length;
5124 ed->compression = BTRFS_COMPRESSION_NONE;
5125 ed->encryption = BTRFS_ENCRYPTION_NONE;
5126 ed->encoding = BTRFS_ENCODING_NONE;
5127 ed->type = EXTENT_TYPE_REGULAR;
5128
5129 ed2 = (EXTENT_DATA2*)ed->data;
5130 ed2->address = 0;
5131 ed2->size = 0;
5132 ed2->offset = 0;
5133 ed2->num_bytes = length;
5134
5135 if (!insert_tree_item(Vcb, r, inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
5136 ERR("insert_tree_item failed\n");
5137 ExFreePool(ed);
5138 return STATUS_INTERNAL_ERROR;
5139 }
5140
5141 return STATUS_SUCCESS;
5142 }
5143
5144 // static void print_tree(tree* t) {
5145 // LIST_ENTRY* le = t->itemlist.Flink;
5146 // while (le != &t->itemlist) {
5147 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
5148 // ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
5149 // le = le->Flink;
5150 // }
5151 // }
5152
5153 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5154 LIST_ENTRY* le = Vcb->chunks.Flink;
5155 chunk* c;
5156 KEY searchkey;
5157 UINT64 flags;
5158
5159 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
5160
5161 // FIXME - split data up if not enough space for just one extent
5162
5163 if (start_data > 0 && try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, rollback))
5164 return STATUS_SUCCESS;
5165
5166 // if there is a gap before start_data, plug it with a sparse extent
5167 if (start_data > 0) {
5168 traverse_ptr tp;
5169 NTSTATUS Status;
5170 EXTENT_DATA* ed;
5171 UINT64 len;
5172
5173 searchkey.obj_id = fcb->inode;
5174 searchkey.obj_type = TYPE_EXTENT_DATA;
5175 searchkey.offset = start_data;
5176
5177 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5178 if (!NT_SUCCESS(Status)) {
5179 ERR("error - find_item returned %08x\n", Status);
5180 return Status;
5181 }
5182
5183 // if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
5184 // traverse_ptr next_tp;
5185 //
5186 // ERR("error - did not find EXTENT_DATA expected - looking for %llx,%x,%llx, found %llx,%x,%llx\n",
5187 // searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5188 // print_tree(tp.tree);
5189 //
5190 // if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
5191 // ERR("---\n");
5192 // ERR("key = %llx,%x,%llx\n", next_tp.tree->paritem->key.obj_id, next_tp.tree->paritem->key.obj_type, next_tp.tree->paritem->key.offset);
5193 // print_tree(next_tp.tree);
5194 //
5195 // free_traverse_ptr(&next_tp);
5196 // } else
5197 // ERR("next item not found\n");
5198 //
5199 // int3;
5200 // free_traverse_ptr(&tp);
5201 // return STATUS_INTERNAL_ERROR;
5202 // }
5203
5204 if (tp.item->key.obj_type == TYPE_EXTENT_DATA && tp.item->size >= sizeof(EXTENT_DATA)) {
5205 EXTENT_DATA2* ed2;
5206
5207 ed = (EXTENT_DATA*)tp.item->data;
5208 ed2 = (EXTENT_DATA2*)ed->data;
5209
5210 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
5211 } else
5212 ed = NULL;
5213
5214 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || !ed || tp.item->key.offset + len < start_data) {
5215 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
5216 Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, 0, start_data, rollback);
5217 else if (!ed)
5218 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5219 else {
5220 Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, tp.item->key.offset + len,
5221 start_data - tp.item->key.offset - len, rollback);
5222 }
5223 if (!NT_SUCCESS(Status)) {
5224 ERR("insert_sparse_extent returned %08x\n", Status);
5225 free_traverse_ptr(&tp);
5226 return Status;
5227 }
5228 }
5229
5230 free_traverse_ptr(&tp);
5231 }
5232
5233 // FIXME - how do we know which RAID level to put this to?
5234 flags = BLOCK_FLAG_DATA; // SINGLE
5235
5236 // if (!chunk_test) { // TESTING
5237 // if ((c = alloc_chunk(Vcb, flags, NULL))) {
5238 // ERR("chunk_item->type = %llx\n", c->chunk_item->type);
5239 // ERR("size = %llx\n", c->chunk_item->size);
5240 // ERR("used = %llx\n", c->used);
5241 //
5242 // if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
5243 // if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list)) {
5244 // // chunk_test = TRUE;
5245 // ERR("SUCCESS\n");
5246 // return STATUS_SUCCESS;
5247 // } else
5248 // ERR(":-(\n");
5249 // } else
5250 // ERR("???\n");
5251 // }
5252 // }
5253
5254 while (le != &Vcb->chunks) {
5255 c = CONTAINING_RECORD(le, chunk, list_entry);
5256
5257 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
5258 if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
5259 return STATUS_SUCCESS;
5260 }
5261
5262 le = le->Flink;
5263 }
5264
5265 if ((c = alloc_chunk(Vcb, flags, rollback))) {
5266 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
5267 if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
5268 return STATUS_SUCCESS;
5269 }
5270 }
5271
5272 // FIXME - rebalance chunks if free space elsewhere?
5273 WARN("couldn't find any data chunks with %llx bytes free\n", length);
5274
5275 return STATUS_DISK_FULL;
5276 }
5277
5278 void update_checksum_tree(device_extension* Vcb, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5279 LIST_ENTRY* le = changed_sector_list->Flink;
5280 changed_sector* cs;
5281 traverse_ptr tp, next_tp;
5282 KEY searchkey;
5283 UINT32* data;
5284 NTSTATUS Status;
5285
5286 if (!Vcb->checksum_root) {
5287 ERR("no checksum root\n");
5288 goto exit;
5289 }
5290
5291 while (le != changed_sector_list) {
5292 UINT64 startaddr, endaddr;
5293 ULONG len;
5294 UINT32* checksums;
5295 RTL_BITMAP bmp;
5296 ULONG* bmparr;
5297 ULONG runlength, index;
5298
5299 cs = (changed_sector*)le;
5300
5301 searchkey.obj_id = EXTENT_CSUM_ID;
5302 searchkey.obj_type = TYPE_EXTENT_CSUM;
5303 searchkey.offset = cs->ol.key;
5304
5305 // FIXME - create checksum_root if it doesn't exist at all
5306
5307 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5308 if (!NT_SUCCESS(Status)) { // tree is completely empty
5309 // FIXME - do proper check here that tree is empty
5310 if (!cs->deleted) {
5311 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
5312 if (!checksums) {
5313 ERR("out of memory\n");
5314 goto exit;
5315 }
5316
5317 RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
5318
5319 if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, rollback)) {
5320 ERR("insert_tree_item failed\n");
5321 ExFreePool(checksums);
5322 goto exit;
5323 }
5324 }
5325 } else {
5326 UINT32 tplen;
5327
5328 // FIXME - check entry is TYPE_EXTENT_CSUM?
5329
5330 if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
5331 startaddr = tp.item->key.offset;
5332 else
5333 startaddr = cs->ol.key;
5334
5335 free_traverse_ptr(&tp);
5336
5337 searchkey.obj_id = EXTENT_CSUM_ID;
5338 searchkey.obj_type = TYPE_EXTENT_CSUM;
5339 searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
5340
5341 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5342 if (!NT_SUCCESS(Status)) {
5343 ERR("error - find_item returned %08x\n", Status);
5344 goto exit;
5345 }
5346
5347 tplen = tp.item->size / sizeof(UINT32);
5348
5349 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
5350 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
5351 else
5352 endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
5353
5354 free_traverse_ptr(&tp);
5355
5356 TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
5357 TRACE("startaddr = %llx\n", startaddr);
5358 TRACE("endaddr = %llx\n", endaddr);
5359
5360 len = (endaddr - startaddr) / Vcb->superblock.sector_size;
5361
5362 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
5363 if (!checksums) {
5364 ERR("out of memory\n");
5365 goto exit;
5366 }
5367
5368 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
5369 if (!bmparr) {
5370 ERR("out of memory\n");
5371 ExFreePool(checksums);
5372 goto exit;
5373 }
5374
5375 RtlInitializeBitMap(&bmp, bmparr, len);
5376 RtlSetAllBits(&bmp);
5377
5378 searchkey.obj_id = EXTENT_CSUM_ID;
5379 searchkey.obj_type = TYPE_EXTENT_CSUM;
5380 searchkey.offset = cs->ol.key;
5381
5382 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5383 if (!NT_SUCCESS(Status)) {
5384 ERR("error - find_item returned %08x\n", Status);
5385 goto exit;
5386 }
5387
5388 // set bit = free space, cleared bit = allocated sector
5389
5390 // ERR("start loop\n");
5391 while (tp.item->key.offset < endaddr) {
5392 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5393 if (tp.item->key.offset >= startaddr) {
5394 if (tp.item->size > 0) {
5395 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
5396 RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
5397 }
5398
5399 delete_tree_item(Vcb, &tp, rollback);
5400 }
5401
5402 if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
5403 free_traverse_ptr(&tp);
5404 tp = next_tp;
5405 } else
5406 break;
5407 }
5408 // ERR("end loop\n");
5409
5410 free_traverse_ptr(&tp);
5411
5412 if (cs->deleted) {
5413 RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
5414 } else {
5415 RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
5416 RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
5417 }
5418
5419 runlength = RtlFindFirstRunClear(&bmp, &index);
5420
5421 while (runlength != 0) {
5422 do {
5423 ULONG rl;
5424
5425 if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
5426 rl = MAX_CSUM_SIZE / sizeof(UINT32);
5427 else
5428 rl = runlength;
5429
5430 data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
5431 if (!data) {
5432 ERR("out of memory\n");
5433 ExFreePool(bmparr);
5434 ExFreePool(checksums);
5435 goto exit;
5436 }
5437
5438 RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
5439
5440 if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, rollback)) {
5441 ERR("insert_tree_item failed\n");
5442 ExFreePool(data);
5443 ExFreePool(bmparr);
5444 ExFreePool(checksums);
5445 goto exit;
5446 }
5447
5448 runlength -= rl;
5449 index += rl;
5450 } while (runlength > 0);
5451
5452 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
5453 }
5454
5455 ExFreePool(bmparr);
5456 ExFreePool(checksums);
5457 }
5458
5459 le = le->Flink;
5460 }
5461
5462 exit:
5463 while (!IsListEmpty(changed_sector_list)) {
5464 le = RemoveHeadList(changed_sector_list);
5465 cs = (changed_sector*)le;
5466
5467 if (cs->checksums)
5468 ExFreePool(cs->checksums);
5469
5470 ExFreePool(cs);
5471 }
5472 }
5473
5474 NTSTATUS truncate_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
5475 LIST_ENTRY changed_sector_list;
5476 NTSTATUS Status;
5477 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
5478
5479 if (!nocsum)
5480 InitializeListHead(&changed_sector_list);
5481
5482 // FIXME - convert into inline extent if short enough
5483
5484 Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
5485 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), nocsum ? NULL : &changed_sector_list, rollback);
5486 if (!NT_SUCCESS(Status)) {
5487 ERR("error - excise_extents failed\n");
5488 return Status;
5489 }
5490
5491 fcb->inode_item.st_size = end;
5492 TRACE("setting st_size to %llx\n", end);
5493
5494 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
5495 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
5496 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
5497 // FIXME - inform cache manager of this
5498
5499 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
5500
5501 if (!nocsum)
5502 update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
5503
5504 return STATUS_SUCCESS;
5505 }
5506
5507 NTSTATUS extend_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
5508 UINT64 oldalloc, newalloc;
5509 KEY searchkey;
5510 traverse_ptr tp;
5511 BOOL cur_inline;
5512 NTSTATUS Status;
5513
5514 TRACE("(%p, %x, %p)\n", fcb, end, rollback);
5515
5516 if (fcb->ads) {
5517 FIXME("FIXME - support streams here\n"); // FIXME
5518 return STATUS_NOT_IMPLEMENTED;
5519 } else {
5520 searchkey.obj_id = fcb->inode;
5521 searchkey.obj_type = TYPE_EXTENT_DATA;
5522 searchkey.offset = 0xffffffffffffffff;
5523
5524 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5525 if (!NT_SUCCESS(Status)) {
5526 ERR("error - find_item returned %08x\n", Status);
5527 return Status;
5528 }
5529
5530 oldalloc = 0;
5531 if (tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_EXTENT_DATA) {
5532 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
5533 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5534
5535 if (tp.item->size < sizeof(EXTENT_DATA)) {
5536 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5537 free_traverse_ptr(&tp);
5538 return STATUS_INTERNAL_ERROR;
5539 }
5540
5541 oldalloc = tp.item->key.offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
5542 cur_inline = ed->type == EXTENT_TYPE_INLINE;
5543
5544 if (cur_inline && end > fcb->Vcb->max_inline) {
5545 LIST_ENTRY changed_sector_list;
5546 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
5547 UINT64 origlength, length;
5548 UINT8* data;
5549
5550 TRACE("giving inline file proper extents\n");
5551
5552 origlength = ed->decoded_size;
5553
5554 cur_inline = FALSE;
5555
5556 if (!nocsum)
5557 InitializeListHead(&changed_sector_list);
5558
5559 delete_tree_item(fcb->Vcb, &tp, rollback);
5560
5561 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
5562
5563 data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG);
5564 if (!data) {
5565 ERR("could not allocate %llx bytes for data\n", length);
5566 free_traverse_ptr(&tp);
5567 return STATUS_INSUFFICIENT_RESOURCES;
5568 }
5569
5570 if (length > origlength)
5571 RtlZeroMemory(data + origlength, length - origlength);
5572
5573 RtlCopyMemory(data, ed->data, origlength);
5574
5575 fcb->inode_item.st_blocks -= origlength;
5576
5577 Status = insert_extent(fcb->Vcb, fcb, tp.item->key.offset, length, data, nocsum ? NULL : &changed_sector_list, rollback);
5578 if (!NT_SUCCESS(Status)) {
5579 ERR("insert_extent returned %08x\n", Status);
5580 free_traverse_ptr(&tp);
5581 ExFreePool(data);
5582 return Status;
5583 }
5584
5585 oldalloc = tp.item->key.offset + length;
5586
5587 ExFreePool(data);
5588
5589 if (!nocsum)
5590 update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
5591 }
5592
5593 if (cur_inline) {
5594 ULONG edsize;
5595
5596 if (end > oldalloc) {
5597 edsize = sizeof(EXTENT_DATA) - 1 + end - tp.item->key.offset;
5598 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
5599
5600 if (!ed) {
5601 ERR("out of memory\n");
5602 free_traverse_ptr(&tp);
5603 return STATUS_INSUFFICIENT_RESOURCES;
5604 }
5605
5606 RtlZeroMemory(ed, edsize);
5607 RtlCopyMemory(ed, tp.item->data, tp.item->size);
5608
5609 ed->decoded_size = end - tp.item->key.offset;
5610
5611 delete_tree_item(fcb->Vcb, &tp, rollback);
5612
5613 if (!insert_tree_item(fcb->Vcb, fcb->subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ed, edsize, NULL, rollback)) {
5614 ERR("error - failed to insert item\n");
5615 ExFreePool(ed);
5616 free_traverse_ptr(&tp);
5617 return STATUS_INTERNAL_ERROR;
5618 }
5619 }
5620
5621 TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
5622
5623 fcb->inode_item.st_size = end;
5624 TRACE("setting st_size to %llx\n", end);
5625
5626 fcb->inode_item.st_blocks = end;
5627
5628 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5629 } else {
5630 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
5631
5632 if (newalloc > oldalloc) {
5633 Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, oldalloc, newalloc - oldalloc, rollback);
5634
5635 if (!NT_SUCCESS(Status)) {
5636 ERR("insert_sparse_extent returned %08x\n", Status);
5637 free_traverse_ptr(&tp);
5638 return Status;
5639 }
5640 }
5641
5642 fcb->inode_item.st_size = end;
5643 TRACE("setting st_size to %llx\n", end);
5644
5645 TRACE("newalloc = %llx\n", newalloc);
5646
5647 fcb->Header.AllocationSize.QuadPart = newalloc;
5648 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5649 }
5650 } else {
5651 if (end > fcb->Vcb->max_inline) {
5652 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
5653
5654 Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, 0, newalloc, rollback);
5655
5656 if (!NT_SUCCESS(Status)) {
5657 ERR("insert_sparse_extent returned %08x\n", Status);
5658 free_traverse_ptr(&tp);
5659 return Status;
5660 }
5661
5662 fcb->inode_item.st_size = end;
5663 TRACE("setting st_size to %llx\n", end);
5664
5665 TRACE("newalloc = %llx\n", newalloc);
5666
5667 fcb->Header.AllocationSize.QuadPart = newalloc;
5668 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5669 } else {
5670 EXTENT_DATA* ed;
5671 ULONG edsize;
5672
5673 edsize = sizeof(EXTENT_DATA) - 1 + end;
5674 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
5675
5676 if (!ed) {
5677 ERR("out of memory\n");
5678 free_traverse_ptr(&tp);
5679 return STATUS_INSUFFICIENT_RESOURCES;
5680 }
5681
5682 ed->generation = fcb->Vcb->superblock.generation;
5683 ed->decoded_size = end;
5684 ed->compression = BTRFS_COMPRESSION_NONE;
5685 ed->encryption = BTRFS_ENCRYPTION_NONE;
5686 ed->encoding = BTRFS_ENCODING_NONE;
5687 ed->type = EXTENT_TYPE_INLINE;
5688
5689 RtlZeroMemory(ed->data, end);
5690
5691 if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed, edsize, NULL, rollback)) {
5692 ERR("error - failed to insert item\n");
5693 ExFreePool(ed);
5694 free_traverse_ptr(&tp);
5695 return STATUS_INTERNAL_ERROR;
5696 }
5697
5698 fcb->inode_item.st_size = end;
5699 TRACE("setting st_size to %llx\n", end);
5700
5701 fcb->inode_item.st_blocks = end;
5702
5703 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5704 }
5705 }
5706
5707 free_traverse_ptr(&tp);
5708 }
5709
5710 return STATUS_SUCCESS;
5711 }
5712
5713 static UINT64 get_extent_item_refcount(device_extension* Vcb, UINT64 address) {
5714 KEY searchkey;
5715 traverse_ptr tp;
5716 EXTENT_ITEM* ei;
5717 UINT64 rc;
5718 NTSTATUS Status;
5719
5720 searchkey.obj_id = address;
5721 searchkey.obj_type = TYPE_EXTENT_ITEM;
5722 searchkey.offset = 0xffffffffffffffff;
5723
5724 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
5725 if (!NT_SUCCESS(Status)) {
5726 ERR("error - find_item returned %08x\n", Status);
5727 return 0;
5728 }
5729
5730 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5731 ERR("error - could not find EXTENT_ITEM for %llx\n", address);
5732 free_traverse_ptr(&tp);
5733 return 0;
5734 }
5735
5736 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5737 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5738 free_traverse_ptr(&tp);
5739 return 0;
5740 }
5741
5742 ei = (EXTENT_ITEM*)tp.item->data;
5743 rc = ei->refcount;
5744
5745 free_traverse_ptr(&tp);
5746
5747 return rc;
5748 }
5749
5750 static NTSTATUS do_nocow_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5751 KEY searchkey;
5752 traverse_ptr tp, next_tp;
5753 NTSTATUS Status;
5754 EXTENT_DATA* ed;
5755 BOOL b, do_cow;
5756 EXTENT_DATA2* eds;
5757 UINT64 size, new_start, new_end, last_write = 0;
5758
5759 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
5760
5761 searchkey.obj_id = fcb->inode;
5762 searchkey.obj_type = TYPE_EXTENT_DATA;
5763 searchkey.offset = start_data;
5764
5765 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5766 if (!NT_SUCCESS(Status)) {
5767 ERR("error - find_item returned %08x\n", Status);
5768 return Status;
5769 }
5770
5771 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset > start_data) {
5772 ERR("previous EXTENT_DATA not found (found %llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5773 Status = STATUS_INTERNAL_ERROR;
5774 goto end;
5775 }
5776
5777 do {
5778 if (tp.item->size < sizeof(EXTENT_DATA)) {
5779 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5780 Status = STATUS_INTERNAL_ERROR;
5781 goto end;
5782 }
5783
5784 ed = (EXTENT_DATA*)tp.item->data;
5785
5786 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
5787 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
5788 Status = STATUS_INTERNAL_ERROR;
5789 goto end;
5790 }
5791
5792 eds = (EXTENT_DATA2*)&ed->data[0];
5793
5794 b = find_next_item(Vcb, &tp, &next_tp, TRUE);
5795
5796 switch (ed->type) {
5797 case EXTENT_TYPE_REGULAR:
5798 {
5799 UINT64 rc = get_extent_item_refcount(Vcb, eds->address);
5800
5801 if (rc == 0) {
5802 ERR("get_extent_item_refcount failed\n");
5803 Status = STATUS_INTERNAL_ERROR;
5804 goto end;
5805 }
5806
5807 do_cow = rc > 1;
5808 break;
5809 }
5810
5811 case EXTENT_TYPE_INLINE:
5812 do_cow = TRUE;
5813 break;
5814
5815 case EXTENT_TYPE_PREALLOC:
5816 FIXME("FIXME - handle prealloc extents\n"); // FIXME
5817 Status = STATUS_NOT_SUPPORTED;
5818 goto end;
5819
5820 default:
5821 ERR("error - unknown extent type %x\n", ed->type);
5822 Status = STATUS_NOT_SUPPORTED;
5823 goto end;
5824 }
5825
5826 if (ed->compression != BTRFS_COMPRESSION_NONE) {
5827 FIXME("FIXME: compression not yet supported\n");
5828 Status = STATUS_NOT_SUPPORTED;
5829 goto end;
5830 }
5831
5832 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
5833 WARN("encryption not supported\n");
5834 Status = STATUS_INTERNAL_ERROR;
5835 goto end;
5836 }
5837
5838 if (ed->encoding != BTRFS_ENCODING_NONE) {
5839 WARN("other encodings not supported\n");
5840 Status = STATUS_INTERNAL_ERROR;
5841 goto end;
5842 }
5843
5844 size = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : eds->num_bytes;
5845
5846 TRACE("extent: start = %llx, length = %llx\n", tp.item->key.offset, size);
5847
5848 new_start = tp.item->key.offset < start_data ? start_data : tp.item->key.offset;
5849 new_end = tp.item->key.offset + size > start_data + length ? (start_data + length) : (tp.item->key.offset + size);
5850
5851 TRACE("new_start = %llx\n", new_start);
5852 TRACE("new_end = %llx\n", new_end);
5853
5854 if (do_cow) {
5855 TRACE("doing COW write\n");
5856
5857 Status = excise_extents(Vcb, fcb, new_start, new_start + new_end, changed_sector_list, rollback);
5858
5859 if (!NT_SUCCESS(Status)) {
5860 ERR("error - excise_extents returned %08x\n", Status);
5861 goto end;
5862 }
5863
5864 Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
5865
5866 if (!NT_SUCCESS(Status)) {
5867 ERR("error - insert_extent returned %08x\n", Status);
5868 goto end;
5869 }
5870 } else {
5871 UINT64 writeaddr;
5872
5873 writeaddr = eds->address + eds->offset + new_start - tp.item->key.offset;
5874 TRACE("doing non-COW write to %llx\n", writeaddr);
5875
5876 Status = write_data(Vcb, writeaddr, (UINT8*)data + new_start - start_data, new_end - new_start);
5877
5878 if (!NT_SUCCESS(Status)) {
5879 ERR("error - write_data returned %08x\n", Status);
5880 goto end;
5881 }
5882
5883 if (changed_sector_list) {
5884 unsigned int i;
5885 changed_sector* sc;
5886
5887 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
5888 if (!sc) {
5889 ERR("out of memory\n");
5890 Status = STATUS_INSUFFICIENT_RESOURCES;
5891 goto end;
5892 }
5893
5894 sc->ol.key = writeaddr;
5895 sc->length = (new_end - new_start) / Vcb->superblock.sector_size;
5896 sc->deleted = FALSE;
5897
5898 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
5899 if (!sc->checksums) {
5900 ERR("out of memory\n");
5901 ExFreePool(sc);
5902 Status = STATUS_INSUFFICIENT_RESOURCES;
5903 goto end;
5904 }
5905
5906 for (i = 0; i < sc->length; i++) {
5907 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + new_start - start_data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
5908 }
5909
5910 insert_into_ordered_list(changed_sector_list, &sc->ol);
5911 }
5912 }
5913
5914 last_write = new_end;
5915
5916 if (b) {
5917 free_traverse_ptr(&tp);
5918 tp = next_tp;
5919
5920 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data + length)
5921 b = FALSE;
5922 }
5923 } while (b);
5924
5925 if (last_write < start_data + length) {
5926 new_start = last_write;
5927 new_end = start_data + length;
5928
5929 TRACE("new_start = %llx\n", new_start);
5930 TRACE("new_end = %llx\n", new_end);
5931
5932 Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
5933
5934 if (!NT_SUCCESS(Status)) {
5935 ERR("error - insert_extent returned %08x\n", Status);
5936 goto end;
5937 }
5938 }
5939
5940 Status = STATUS_SUCCESS;
5941
5942 end:
5943 free_traverse_ptr(&tp);
5944
5945 return Status;
5946 }
5947
5948 #ifdef DEBUG_PARANOID
5949 static void print_loaded_trees(tree* t, int spaces) {
5950 char pref[10];
5951 int i;
5952 LIST_ENTRY* le;
5953
5954 for (i = 0; i < spaces; i++) {
5955 pref[i] = ' ';
5956 }
5957 pref[spaces] = 0;
5958
5959 if (!t) {
5960 ERR("%s(not loaded)\n", pref);
5961 return;
5962 }
5963
5964 le = t->itemlist.Flink;
5965 while (le != &t->itemlist) {
5966 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
5967
5968 ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
5969
5970 if (t->header.level > 0) {
5971 print_loaded_trees(td->treeholder.tree, spaces+1);
5972 }
5973
5974 le = le->Flink;
5975 }
5976 }
5977
5978 static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
5979 KEY searchkey;
5980 traverse_ptr tp, next_tp;
5981 UINT64 length, oldlength, lastoff, alloc;
5982 NTSTATUS Status;
5983 EXTENT_DATA* ed;
5984 EXTENT_DATA2* ed2;
5985
5986 if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
5987 return;
5988
5989 TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
5990
5991 searchkey.obj_id = fcb->inode;
5992 searchkey.obj_type = TYPE_EXTENT_DATA;
5993 searchkey.offset = 0;
5994
5995 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5996 if (!NT_SUCCESS(Status)) {
5997 ERR("error - find_item returned %08x\n", Status);
5998 goto failure2;
5999 }
6000
6001 if (keycmp(&searchkey, &tp.item->key)) {
6002 ERR("could not find EXTENT_DATA at offset 0\n");
6003 goto failure;
6004 }
6005
6006 if (tp.item->size < sizeof(EXTENT_DATA)) {
6007 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
6008 goto failure;
6009 }
6010
6011 ed = (EXTENT_DATA*)tp.item->data;
6012 ed2 = (EXTENT_DATA2*)&ed->data[0];
6013
6014 length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
6015 lastoff = tp.item->key.offset;
6016
6017 TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
6018
6019 alloc = 0;
6020 if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
6021 alloc += length;
6022 }
6023
6024 while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
6025 if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type) {
6026 free_traverse_ptr(&next_tp);
6027 break;
6028 }
6029
6030 free_traverse_ptr(&tp);
6031 tp = next_tp;
6032
6033 if (tp.item->size < sizeof(EXTENT_DATA)) {
6034 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
6035 goto failure;
6036 }
6037
6038 ed = (EXTENT_DATA*)tp.item->data;
6039 ed2 = (EXTENT_DATA2*)&ed->data[0];
6040
6041 length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
6042
6043 TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
6044
6045 if (tp.item->key.offset != lastoff + oldlength) {
6046 ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
6047 goto failure;
6048 }
6049
6050 if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
6051 alloc += length;
6052 }
6053
6054 oldlength = length;
6055 lastoff = tp.item->key.offset;
6056 }
6057
6058 if (alloc != fcb->inode_item.st_blocks) {
6059 ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
6060 goto failure;
6061 }
6062
6063 // if (fcb->inode_item.st_blocks != lastoff + oldlength) {
6064 // ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
6065 // goto failure;
6066 // }
6067
6068 free_traverse_ptr(&tp);
6069
6070 return;
6071
6072 failure:
6073 free_traverse_ptr(&tp);
6074
6075 failure2:
6076 if (fcb->subvol->treeholder.tree)
6077 print_loaded_trees(fcb->subvol->treeholder.tree, 0);
6078
6079 int3;
6080 }
6081
6082 // static void check_extent_tree_consistent(device_extension* Vcb) {
6083 // KEY searchkey;
6084 // traverse_ptr tp, next_tp;
6085 // UINT64 lastaddr;
6086 // BOOL b, inconsistency;
6087 //
6088 // searchkey.obj_id = 0;
6089 // searchkey.obj_type = 0;
6090 // searchkey.offset = 0;
6091 //
6092 // if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
6093 // ERR("error - could not find any entries in extent_root\n");
6094 // int3;
6095 // }
6096 //
6097 // lastaddr = 0;
6098 // inconsistency = FALSE;
6099 //
6100 // do {
6101 // if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
6102 // // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
6103 //
6104 // if (tp.item->key.obj_id < lastaddr) {
6105 // // ERR("inconsistency!\n");
6106 // // int3;
6107 // inconsistency = TRUE;
6108 // }
6109 //
6110 // lastaddr = tp.item->key.obj_id + tp.item->key.offset;
6111 // }
6112 //
6113 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
6114 // if (b) {
6115 // free_traverse_ptr(&tp);
6116 // tp = next_tp;
6117 // }
6118 // } while (b);
6119 //
6120 // free_traverse_ptr(&tp);
6121 //
6122 // if (!inconsistency)
6123 // return;
6124 //
6125 // ERR("Inconsistency detected:\n");
6126 //
6127 // if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
6128 // ERR("error - could not find any entries in extent_root\n");
6129 // int3;
6130 // }
6131 //
6132 // do {
6133 // if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
6134 // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
6135 //
6136 // if (tp.item->key.obj_id < lastaddr) {
6137 // ERR("inconsistency!\n");
6138 // }
6139 //
6140 // lastaddr = tp.item->key.obj_id + tp.item->key.offset;
6141 // }
6142 //
6143 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
6144 // if (b) {
6145 // free_traverse_ptr(&tp);
6146 // tp = next_tp;
6147 // }
6148 // } while (b);
6149 //
6150 // free_traverse_ptr(&tp);
6151 //
6152 // int3;
6153 // }
6154 #endif
6155
6156 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, LIST_ENTRY* rollback) {
6157 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
6158 PFILE_OBJECT FileObject = IrpSp->FileObject;
6159 KEY searchkey;
6160 traverse_ptr tp;
6161 EXTENT_DATA* ed2;
6162 UINT64 newlength, start_data, end_data;
6163 UINT32 bufhead;
6164 BOOL make_inline;
6165 UINT8* data;
6166 LIST_ENTRY changed_sector_list;
6167 INODE_ITEM *ii, *origii;
6168 BOOL changed_length = FALSE, nocsum, nocow/*, lazy_writer = FALSE, write_eof = FALSE*/;
6169 NTSTATUS Status;
6170 LARGE_INTEGER time;
6171 BTRFS_TIME now;
6172 fcb* fcb;
6173 BOOL paging_lock = FALSE;
6174
6175 TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
6176
6177 if (*length == 0) {
6178 WARN("returning success for zero-length write\n");
6179 return STATUS_SUCCESS;
6180 }
6181
6182 if (!FileObject) {
6183 ERR("error - FileObject was NULL\n");
6184 return STATUS_ACCESS_DENIED;
6185 }
6186
6187 fcb = FileObject->FsContext;
6188
6189 if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
6190 WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
6191 return STATUS_ACCESS_DENIED;
6192 }
6193
6194 if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) {
6195 offset = fcb->Header.FileSize;
6196 // write_eof = TRUE;
6197 }
6198
6199 TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
6200
6201 if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
6202 IO_STATUS_BLOCK iosb;
6203
6204 ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
6205
6206 CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
6207
6208 if (!NT_SUCCESS(iosb.Status)) {
6209 ExReleaseResourceLite(fcb->Header.PagingIoResource);
6210 ERR("CcFlushCache returned %08x\n", iosb.Status);
6211 return iosb.Status;
6212 }
6213
6214 paging_lock = TRUE;
6215
6216 CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
6217 }
6218
6219 if (paging_io) {
6220 ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, TRUE);
6221 paging_lock = TRUE;
6222 }
6223
6224 nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
6225 nocow = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATACOW;
6226
6227 newlength = fcb->ads ? fcb->adssize : fcb->inode_item.st_size;
6228
6229 if (fcb->deleted)
6230 newlength = 0;
6231
6232 TRACE("newlength = %llx\n", newlength);
6233
6234 // if (KeGetCurrentThread() == fcb->lazy_writer_thread) {
6235 // ERR("lazy writer on the TV\n");
6236 // lazy_writer = TRUE;
6237 // }
6238
6239 if (offset.QuadPart + *length > newlength) {
6240 if (paging_io) {
6241 if (offset.QuadPart >= newlength) {
6242 TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length);
6243 TRACE("filename %.*S\n", fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer);
6244 TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
6245 fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
6246 Status = STATUS_SUCCESS;
6247 goto end;
6248 }
6249
6250 *length = newlength - offset.QuadPart;
6251 } else {
6252 newlength = offset.QuadPart + *length;
6253 changed_length = TRUE;
6254
6255 TRACE("extending length to %llx\n", newlength);
6256 }
6257 }
6258
6259 make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->max_inline;
6260
6261 if (changed_length) {
6262 if (newlength > fcb->Header.AllocationSize.QuadPart) {
6263 Status = extend_file(fcb, newlength, rollback);
6264 if (!NT_SUCCESS(Status)) {
6265 ERR("extend_file returned %08x\n", Status);
6266 goto end;
6267 }
6268 } else if (fcb->ads)
6269 fcb->adssize = newlength;
6270 else
6271 fcb->inode_item.st_size = newlength;
6272
6273 fcb->Header.FileSize.QuadPart = newlength;
6274 fcb->Header.ValidDataLength.QuadPart = newlength;
6275
6276 TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
6277 TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
6278 TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
6279 }
6280
6281 if (!no_cache) {
6282 BOOL wait;
6283
6284 if (!FileObject->PrivateCacheMap || changed_length) {
6285 CC_FILE_SIZES ccfs;
6286
6287 ccfs.AllocationSize = fcb->Header.AllocationSize;
6288 ccfs.FileSize = fcb->Header.FileSize;
6289 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6290
6291 if (!FileObject->PrivateCacheMap) {
6292 TRACE("calling CcInitializeCacheMap...\n");
6293 CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject);
6294
6295 CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
6296 } else {
6297 CcSetFileSizes(FileObject, &ccfs);
6298 }
6299 }
6300
6301 // FIXME - uncomment this when async is working
6302 // wait = IoIsOperationSynchronous(Irp) ? TRUE : FALSE;
6303 wait = TRUE;
6304
6305 TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf);
6306 if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
6307 TRACE("CcCopyWrite failed.\n");
6308
6309 IoMarkIrpPending(Irp);
6310 Status = STATUS_PENDING;
6311 goto end;
6312 }
6313 TRACE("CcCopyWrite finished\n");
6314
6315 Status = STATUS_SUCCESS;
6316 goto end;
6317 }
6318
6319 if (fcb->ads) {
6320 UINT16 datalen;
6321 UINT8* data2;
6322 UINT32 maxlen;
6323
6324 if (!get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, &data, &datalen)) {
6325 ERR("get_xattr failed\n");
6326 Status = STATUS_INTERNAL_ERROR;
6327 goto end;
6328 }
6329
6330 if (changed_length) {
6331 // find maximum length of xattr
6332 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
6333
6334 searchkey.obj_id = fcb->inode;
6335 searchkey.obj_type = TYPE_XATTR_ITEM;
6336 searchkey.offset = fcb->adshash;
6337
6338 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6339 if (!NT_SUCCESS(Status)) {
6340 ERR("error - find_item returned %08x\n", Status);
6341 goto end;
6342 }
6343
6344 if (keycmp(&tp.item->key, &searchkey)) {
6345 ERR("error - could not find key for xattr\n");
6346 free_traverse_ptr(&tp);
6347 Status = STATUS_INTERNAL_ERROR;
6348 goto end;
6349 }
6350
6351 if (tp.item->size < datalen) {
6352 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, datalen);
6353 free_traverse_ptr(&tp);
6354 Status = STATUS_INTERNAL_ERROR;
6355 goto end;
6356 }
6357
6358 maxlen -= tp.item->size - datalen; // subtract XATTR_ITEM overhead
6359
6360 free_traverse_ptr(&tp);
6361
6362 if (newlength > maxlen) {
6363 ERR("error - xattr too long (%llu > %u)\n", newlength, maxlen);
6364 Status = STATUS_DISK_FULL;
6365 goto end;
6366 }
6367
6368 fcb->adssize = newlength;
6369
6370 data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG);
6371 if (!data2) {
6372 ERR("out of memory\n");
6373 Status = STATUS_INSUFFICIENT_RESOURCES;
6374 goto end;
6375 }
6376
6377 RtlCopyMemory(data2, data, datalen);
6378
6379 if (offset.QuadPart > datalen)
6380 RtlZeroMemory(&data2[datalen], offset.QuadPart - datalen);
6381 } else
6382 data2 = data;
6383
6384 if (*length > 0)
6385 RtlCopyMemory(&data2[offset.QuadPart], buf, *length);
6386
6387 Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, data2, newlength, rollback);
6388 if (!NT_SUCCESS(Status)) {
6389 ERR("set_xattr returned %08x\n", Status);
6390 goto end;
6391 }
6392
6393 if (data) ExFreePool(data);
6394 if (data2 != data) ExFreePool(data2);
6395
6396 fcb->Header.ValidDataLength.QuadPart = newlength;
6397 } else {
6398 if (make_inline) {
6399 start_data = 0;
6400 end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
6401 bufhead = sizeof(EXTENT_DATA) - 1;
6402 } else {
6403 start_data = offset.QuadPart & ~(fcb->Vcb->superblock.sector_size - 1);
6404 end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size);
6405 bufhead = 0;
6406 }
6407
6408 fcb->Header.ValidDataLength.QuadPart = newlength;
6409 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
6410
6411 data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG);
6412 if (!data) {
6413 ERR("out of memory\n");
6414 Status = STATUS_INSUFFICIENT_RESOURCES;
6415 goto end;
6416 }
6417
6418 RtlZeroMemory(data + bufhead, end_data - start_data);
6419
6420 TRACE("start_data = %llx\n", start_data);
6421 TRACE("end_data = %llx\n", end_data);
6422
6423 if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) {
6424 if (changed_length) {
6425 if (fcb->inode_item.st_size > start_data)
6426 Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL);
6427 else
6428 Status = STATUS_SUCCESS;
6429 } else
6430 Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, end_data - start_data, NULL);
6431
6432 if (!NT_SUCCESS(Status)) {
6433 ERR("read_file returned %08x\n", Status);
6434 ExFreePool(data);
6435 goto end;
6436 }
6437 }
6438
6439 RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length);
6440
6441 if (!nocsum)
6442 InitializeListHead(&changed_sector_list);
6443
6444 if (make_inline || !nocow) {
6445 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, nocsum ? NULL : &changed_sector_list, rollback);
6446 if (!NT_SUCCESS(Status)) {
6447 ERR("error - excise_extents returned %08x\n", Status);
6448 ExFreePool(data);
6449 goto end;
6450 }
6451
6452 if (!make_inline) {
6453 Status = insert_extent(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6454
6455 if (!NT_SUCCESS(Status)) {
6456 ERR("error - insert_extent returned %08x\n", Status);
6457 ExFreePool(data);
6458 goto end;
6459 }
6460
6461 ExFreePool(data);
6462 } else {
6463 ed2 = (EXTENT_DATA*)data;
6464 ed2->generation = fcb->Vcb->superblock.generation;
6465 ed2->decoded_size = newlength;
6466 ed2->compression = BTRFS_COMPRESSION_NONE;
6467 ed2->encryption = BTRFS_ENCRYPTION_NONE;
6468 ed2->encoding = BTRFS_ENCODING_NONE;
6469 ed2->type = EXTENT_TYPE_INLINE;
6470
6471 insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, NULL, rollback);
6472
6473 fcb->inode_item.st_blocks += newlength;
6474 }
6475 } else {
6476 Status = do_nocow_write(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6477
6478 if (!NT_SUCCESS(Status)) {
6479 ERR("error - do_nocow_write returned %08x\n", Status);
6480 ExFreePool(data);
6481 goto end;
6482 }
6483
6484 ExFreePool(data);
6485 }
6486 }
6487
6488 KeQuerySystemTime(&time);
6489 win_time_to_unix(time, &now);
6490
6491 // ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap);
6492
6493 // if (!no_cache) {
6494 // if (!FileObject->PrivateCacheMap) {
6495 // CC_FILE_SIZES ccfs;
6496 //
6497 // ccfs.AllocationSize = fcb->Header.AllocationSize;
6498 // ccfs.FileSize = fcb->Header.FileSize;
6499 // ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6500 //
6501 // TRACE("calling CcInitializeCacheMap...\n");
6502 // CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb);
6503 //
6504 // changed_length = FALSE;
6505 // }
6506 // }
6507
6508 if (fcb->ads)
6509 origii = &fcb->par->inode_item;
6510 else
6511 origii = &fcb->inode_item;
6512
6513 origii->transid = Vcb->superblock.generation;
6514 origii->sequence++;
6515 origii->st_ctime = now;
6516
6517 if (!fcb->ads) {
6518 TRACE("setting st_size to %llx\n", newlength);
6519 origii->st_size = newlength;
6520 origii->st_mtime = now;
6521 }
6522
6523 searchkey.obj_id = fcb->inode;
6524 searchkey.obj_type = TYPE_INODE_ITEM;
6525 searchkey.offset = 0;
6526
6527 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6528 if (!NT_SUCCESS(Status)) {
6529 ERR("error - find_item returned %08x\n", Status);
6530 goto end;
6531 }
6532
6533 if (!keycmp(&tp.item->key, &searchkey))
6534 delete_tree_item(Vcb, &tp, rollback);
6535 else
6536 WARN("couldn't find existing INODE_ITEM\n");
6537
6538 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
6539 if (!ii) {
6540 ERR("out of memory\n");
6541 free_traverse_ptr(&tp);
6542 Status = STATUS_INSUFFICIENT_RESOURCES;
6543 goto end;
6544 }
6545
6546 RtlCopyMemory(ii, origii, sizeof(INODE_ITEM));
6547 insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, rollback);
6548
6549 free_traverse_ptr(&tp);
6550
6551 // FIXME - update inode_item of open FCBs pointing to the same inode (i.e. hardlinked files)
6552
6553 if (!nocsum)
6554 update_checksum_tree(Vcb, &changed_sector_list, rollback);
6555
6556 if (changed_length) {
6557 CC_FILE_SIZES ccfs;
6558
6559 ccfs.AllocationSize = fcb->Header.AllocationSize;
6560 ccfs.FileSize = fcb->Header.FileSize;
6561 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6562
6563 CcSetFileSizes(FileObject, &ccfs);
6564 }
6565
6566 // FIXME - make sure this still called if STATUS_PENDING and async
6567 // if (!no_cache) {
6568 // if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) {
6569 // ERR("CcCopyWrite failed.\n");
6570 // }
6571 // }
6572
6573 fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
6574 fcb->subvol->root_item.ctime = now;
6575
6576 Status = STATUS_SUCCESS;
6577
6578 end:
6579 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
6580 TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
6581 FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
6582 TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
6583 }
6584
6585 if (paging_lock)
6586 ExReleaseResourceLite(fcb->Header.PagingIoResource);
6587
6588 return Status;
6589 }
6590
6591 NTSTATUS write_file(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
6592 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
6593 device_extension* Vcb = DeviceObject->DeviceExtension;
6594 void* buf;
6595 NTSTATUS Status;
6596 LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
6597 PFILE_OBJECT FileObject = IrpSp->FileObject;
6598 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
6599 BOOL locked = FALSE;
6600 // LARGE_INTEGER freq, time1, time2;
6601 LIST_ENTRY rollback;
6602
6603 InitializeListHead(&rollback);
6604
6605 if (Vcb->readonly)
6606 return STATUS_MEDIA_WRITE_PROTECTED;
6607
6608 if (fcb && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)
6609 return STATUS_ACCESS_DENIED;
6610
6611 // time1 = KeQueryPerformanceCounter(&freq);
6612
6613 TRACE("write\n");
6614
6615 Irp->IoStatus.Information = 0;
6616
6617 switch (IrpSp->MinorFunction) {
6618 case IRP_MN_COMPLETE:
6619 FIXME("unsupported - IRP_MN_COMPLETE\n");
6620 break;
6621
6622 case IRP_MN_COMPLETE_MDL:
6623 FIXME("unsupported - IRP_MN_COMPLETE_MDL\n");
6624 break;
6625
6626 case IRP_MN_COMPLETE_MDL_DPC:
6627 FIXME("unsupported - IRP_MN_COMPLETE_MDL_DPC\n");
6628 break;
6629
6630 case IRP_MN_COMPRESSED:
6631 FIXME("unsupported - IRP_MN_COMPRESSED\n");
6632 break;
6633
6634 case IRP_MN_DPC:
6635 FIXME("unsupported - IRP_MN_DPC\n");
6636 break;
6637
6638 case IRP_MN_MDL:
6639 FIXME("unsupported - IRP_MN_MDL\n");
6640 break;
6641
6642 case IRP_MN_MDL_DPC:
6643 FIXME("unsupported - IRP_MN_MDL_DPC\n");
6644 break;
6645
6646 case IRP_MN_NORMAL:
6647 TRACE("IRP_MN_NORMAL\n");
6648 break;
6649
6650 default:
6651 WARN("unknown minor function %x\n", IrpSp->MinorFunction);
6652 break;
6653 }
6654
6655 TRACE("offset = %llx\n", offset.QuadPart);
6656 TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
6657
6658 if (!Irp->AssociatedIrp.SystemBuffer) {
6659 buf = map_user_buffer(Irp);
6660
6661 if (Irp->MdlAddress && !buf) {
6662 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
6663 Status = STATUS_INSUFFICIENT_RESOURCES;
6664 goto exit;
6665 }
6666 } else
6667 buf = Irp->AssociatedIrp.SystemBuffer;
6668
6669 TRACE("buf = %p\n", buf);
6670
6671 acquire_tree_lock(Vcb, TRUE);
6672 locked = TRUE;
6673
6674 if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
6675 WARN("tried to write to locked region\n");
6676 Status = STATUS_FILE_LOCK_CONFLICT;
6677 goto exit;
6678 }
6679
6680 // ERR("Irp->Flags = %x\n", Irp->Flags);
6681 Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE, &rollback);
6682 if (!NT_SUCCESS(Status)) {
6683 if (Status != STATUS_PENDING)
6684 ERR("write_file2 returned %08x\n", Status);
6685 goto exit;
6686 }
6687
6688 Status = consider_write(Vcb);
6689
6690 if (NT_SUCCESS(Status)) {
6691 Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
6692
6693 #ifdef DEBUG_PARANOID
6694 check_extents_consistent(Vcb, FileObject->FsContext); // TESTING
6695
6696 // check_extent_tree_consistent(Vcb);
6697 #endif
6698 }
6699
6700 exit:
6701 if (locked) {
6702 if (NT_SUCCESS(Status))
6703 clear_rollback(&rollback);
6704 else
6705 do_rollback(Vcb, &rollback);
6706
6707 release_tree_lock(Vcb, TRUE);
6708 }
6709
6710 // time2 = KeQueryPerformanceCounter(NULL);
6711
6712 // ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart);
6713
6714 return Status;
6715 }