[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
21
22 // BOOL did_split;
23 BOOL chunk_test = FALSE;
24
25 typedef struct {
26 KEVENT Event;
27 IO_STATUS_BLOCK iosb;
28 } write_context;
29
30 typedef struct {
31 EXTENT_ITEM ei;
32 UINT8 type;
33 EXTENT_DATA_REF edr;
34 } EXTENT_ITEM_DATA_REF;
35
36 typedef struct {
37 EXTENT_ITEM_TREE eit;
38 UINT8 type;
39 TREE_BLOCK_REF tbr;
40 } EXTENT_ITEM_TREE2;
41
42 typedef struct {
43 EXTENT_ITEM ei;
44 UINT8 type;
45 TREE_BLOCK_REF tbr;
46 } EXTENT_ITEM_SKINNY_METADATA;
47
48 typedef struct {
49 CHUNK_ITEM ci;
50 CHUNK_ITEM_STRIPE stripes[1];
51 } CHUNK_ITEM2;
52
53 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
54 static BOOL is_file_prealloc(fcb* fcb, UINT64 start_data, UINT64 end_data);
55
56 static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
57 write_context* context = conptr;
58
59 context->iosb = Irp->IoStatus;
60 KeSetEvent(&context->Event, 0, FALSE);
61
62 // return STATUS_SUCCESS;
63 return STATUS_MORE_PROCESSING_REQUIRED;
64 }
65
66 static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
67 NTSTATUS Status;
68 LARGE_INTEGER offset;
69 PIRP Irp;
70 PIO_STACK_LOCATION IrpSp;
71 write_context* context = NULL;
72
73 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
74
75 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
76 if (!context) {
77 ERR("out of memory\n");
78 return STATUS_INSUFFICIENT_RESOURCES;
79 }
80
81 RtlZeroMemory(context, sizeof(write_context));
82
83 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
84
85 offset.QuadPart = address;
86
87 // Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
88
89 Irp = IoAllocateIrp(device->StackSize, FALSE);
90
91 if (!Irp) {
92 ERR("IoAllocateIrp failed\n");
93 Status = STATUS_INTERNAL_ERROR;
94 goto exit2;
95 }
96
97 IrpSp = IoGetNextIrpStackLocation(Irp);
98 IrpSp->MajorFunction = IRP_MJ_WRITE;
99
100 if (device->Flags & DO_BUFFERED_IO) {
101 Irp->AssociatedIrp.SystemBuffer = data;
102
103 Irp->Flags = IRP_BUFFERED_IO;
104 } else if (device->Flags & DO_DIRECT_IO) {
105 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
106 if (!Irp->MdlAddress) {
107 DbgPrint("IoAllocateMdl failed\n");
108 goto exit;
109 }
110
111 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
112 } else {
113 Irp->UserBuffer = data;
114 }
115
116 IrpSp->Parameters.Write.Length = length;
117 IrpSp->Parameters.Write.ByteOffset = offset;
118
119 Irp->UserIosb = &context->iosb;
120
121 Irp->UserEvent = &context->Event;
122
123 IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
124
125 // FIXME - support multiple devices
126 Status = IoCallDriver(device, Irp);
127
128 if (Status == STATUS_PENDING) {
129 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
130 Status = context->iosb.Status;
131 }
132
133 if (!NT_SUCCESS(Status)) {
134 ERR("IoCallDriver returned %08x\n", Status);
135 }
136
137 if (device->Flags & DO_DIRECT_IO) {
138 MmUnlockPages(Irp->MdlAddress);
139 IoFreeMdl(Irp->MdlAddress);
140 }
141
142 exit:
143 IoFreeIrp(Irp);
144
145 exit2:
146 if (context)
147 ExFreePool(context);
148
149 return Status;
150 }
151
152 static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
153 NTSTATUS Status;
154 unsigned int i = 0;
155 UINT32 crc32;
156
157 #ifdef __REACTOS__
158 Status = STATUS_INTERNAL_ERROR;
159 #endif
160
161 // FIXME - work with RAID
162
163 // FIXME - only write one superblock if on SSD (?)
164 while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
165 TRACE("writing superblock %u\n", i);
166
167 Vcb->superblock.sb_phys_addr = superblock_addrs[i];
168 RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
169
170 crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
171 crc32 = ~crc32;
172 TRACE("crc32 is %08x\n", crc32);
173 RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
174
175 Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
176
177 if (!NT_SUCCESS(Status))
178 break;
179
180 i++;
181 }
182
183 return Status;
184 }
185
186 static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
187 LIST_ENTRY* le;
188 space *s, *bestfit = NULL;
189
190 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
191
192 le = c->space.Flink;
193 while (le != &c->space) {
194 s = CONTAINING_RECORD(le, space, list_entry);
195
196 if (s->type == SPACE_TYPE_FREE) {
197 if (s->size == length) {
198 *address = s->offset;
199 TRACE("returning exact fit at %llx\n", s->offset);
200 return TRUE;
201 } else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
202 bestfit = s;
203 }
204 }
205
206 le = le->Flink;
207 }
208
209 if (bestfit) {
210 TRACE("returning best fit at %llx\n", bestfit->offset);
211 *address = bestfit->offset;
212 return TRUE;
213 }
214
215 return FALSE;
216 }
217
218 void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
219 LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
220 space *s, *s2, *s3;
221 #ifdef DEBUG_PARANOID
222 UINT64 lastaddr;
223 #endif
224
225 TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
226
227 #ifdef DEBUG_PARANOID
228 // TESTING
229 le = c->space.Flink;
230 while (le != &c->space) {
231 s = CONTAINING_RECORD(le, space, list_entry);
232
233 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
234
235 le = le->Flink;
236 }
237 #endif
238
239 c->space_changed = TRUE;
240
241 le = c->space.Flink;
242 insbef = &c->space;
243 while (le != &c->space) {
244 s = CONTAINING_RECORD(le, space, list_entry);
245 nextle = le->Flink;
246
247 if (s->offset >= offset + size) {
248 insbef = le;
249 break;
250 }
251
252 if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
253 #ifndef __REACTOS__
254 RemoveEntryList(&s->list_entry);
255 #endif
256
257 if (s->offset + s->size == offset + size) {
258 insbef = s->list_entry.Flink;
259 RemoveEntryList(&s->list_entry);
260 ExFreePool(s);
261 break;
262 }
263
264 RemoveEntryList(&s->list_entry);
265 ExFreePool(s);
266 } else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
267 s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
268 if (!s3) {
269 ERR("out of memory\n");
270 return;
271 }
272
273 s3->offset = offset + size;
274 s3->size = s->size - size - offset + s->offset;
275 s3->type = s->type;
276 InsertHeadList(&s->list_entry, &s3->list_entry);
277 insbef = &s3->list_entry;
278
279 s->size = offset - s->offset;
280 break;
281 } else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
282 s->size = offset - s->offset;
283 } else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
284 s->size -= offset + size - s->offset;
285 s->offset = offset + size;
286
287 insbef = le;
288 break;
289 }
290
291 le = nextle;
292 }
293
294 s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
295 if (!s2) {
296 ERR("out of memory\n");
297 return;
298 }
299
300 s2->offset = offset;
301 s2->size = size;
302 s2->type = type;
303 InsertTailList(insbef, &s2->list_entry);
304
305 // merge entries if same type
306
307 if (s2->list_entry.Blink != &c->space) {
308 s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
309
310 if (s->type == type) {
311 s->size += s2->size;
312
313 RemoveEntryList(&s2->list_entry);
314 ExFreePool(s2);
315
316 s2 = s;
317 }
318 }
319
320 if (s2->list_entry.Flink != &c->space) {
321 s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
322
323 if (s->type == type) {
324 s2->size += s->size;
325
326 RemoveEntryList(&s->list_entry);
327 ExFreePool(s);
328 }
329 }
330
331 le = c->space.Flink;
332 while (le != &c->space) {
333 s = CONTAINING_RECORD(le, space, list_entry);
334
335 TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
336
337 le = le->Flink;
338 }
339
340 #ifdef DEBUG_PARANOID
341 // TESTING
342 lastaddr = c->offset;
343 le = c->space.Flink;
344 while (le != &c->space) {
345 s = CONTAINING_RECORD(le, space, list_entry);
346
347 if (s->offset != lastaddr) {
348 ERR("inconsistency detected!\n");
349 int3;
350 }
351
352 lastaddr = s->offset + s->size;
353
354 le = le->Flink;
355 }
356
357 if (lastaddr != c->offset + c->chunk_item->size) {
358 ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
359 int3;
360 }
361 #endif
362 }
363
364 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
365 LIST_ENTRY* le2;
366 chunk* c;
367
368 le2 = Vcb->chunks.Flink;
369 while (le2 != &Vcb->chunks) {
370 c = CONTAINING_RECORD(le2, chunk, list_entry);
371
372 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
373
374 if (address >= c->offset && address < c->offset + c->chunk_item->size)
375 return c;
376
377 le2 = le2->Flink;
378 }
379
380 return NULL;
381 }
382
383 typedef struct {
384 disk_hole* dh;
385 device* device;
386 } stripe;
387
388 static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
389 // LIST_ENTRY* le = s->device->disk_holes.Flink;
390 // disk_hole* dh;
391
392 // ERR("old holes:\n");
393 // while (le != &s->device->disk_holes) {
394 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
395 //
396 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
397 //
398 // le = le->Flink;
399 // }
400
401 if (s->dh->size <= max_stripe_size) {
402 s->dh->provisional = TRUE;
403 } else {
404 disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
405 if (!newdh) {
406 ERR("out of memory\n");
407 return;
408 }
409
410 newdh->address = s->dh->address + max_stripe_size;
411 newdh->size = s->dh->size - max_stripe_size;
412 newdh->provisional = FALSE;
413 InsertTailList(&s->device->disk_holes, &newdh->listentry);
414
415 s->dh->size = max_stripe_size;
416 s->dh->provisional = TRUE;
417 }
418
419 // ERR("new holes:\n");
420 // le = s->device->disk_holes.Flink;
421 // while (le != &s->device->disk_holes) {
422 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
423 //
424 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
425 //
426 // le = le->Flink;
427 // }
428 }
429
430 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
431 KEY searchkey;
432 traverse_ptr tp, next_tp;
433 BOOL b;
434 UINT64 lastaddr;
435 NTSTATUS Status;
436
437 searchkey.obj_id = 0x100;
438 searchkey.obj_type = TYPE_CHUNK_ITEM;
439 searchkey.offset = 0;
440
441 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
442 if (!NT_SUCCESS(Status)) {
443 ERR("error - find_item returned %08x\n", Status);
444 return 0xffffffffffffffff;
445 }
446
447 lastaddr = 0;
448
449 do {
450 if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
451 if (tp.item->size < sizeof(CHUNK_ITEM)) {
452 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
453 } else {
454 CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
455
456 if (tp.item->key.offset >= lastaddr + size)
457 return lastaddr;
458
459 lastaddr = tp.item->key.offset + ci->size;
460 }
461 }
462
463 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
464 if (b) {
465 tp = next_tp;
466
467 if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
468 break;
469 }
470 } while (b);
471
472 return lastaddr;
473 }
474
475 static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
476 KEY searchkey;
477 traverse_ptr tp;
478 DEV_ITEM* di;
479 NTSTATUS Status;
480
481 searchkey.obj_id = 1;
482 searchkey.obj_type = TYPE_DEV_ITEM;
483 searchkey.offset = device->devitem.dev_id;
484
485 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
486 if (!NT_SUCCESS(Status)) {
487 ERR("error - find_item returned %08x\n", Status);
488 return FALSE;
489 }
490
491 if (keycmp(&tp.item->key, &searchkey)) {
492 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
493 return FALSE;
494 }
495
496 delete_tree_item(Vcb, &tp, rollback);
497
498 device->devitem.bytes_used += size;
499
500 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
501 if (!di) {
502 ERR("out of memory\n");
503 return FALSE;
504 }
505
506 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
507
508 if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
509 ERR("insert_tree_item failed\n");
510 return FALSE;
511 }
512
513 return TRUE;
514 }
515
516 static void reset_disk_holes(device* device, BOOL commit) {
517 LIST_ENTRY* le = device->disk_holes.Flink;
518 disk_hole* dh;
519
520 // ERR("old holes:\n");
521 // while (le != &device->disk_holes) {
522 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
523 //
524 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
525 //
526 // le = le->Flink;
527 // }
528
529 le = device->disk_holes.Flink;
530 while (le != &device->disk_holes) {
531 LIST_ENTRY* le2 = le->Flink;
532
533 dh = CONTAINING_RECORD(le, disk_hole, listentry);
534
535 if (dh->provisional) {
536 if (commit) {
537 RemoveEntryList(le);
538 ExFreePool(dh);
539 } else {
540 dh->provisional = FALSE;
541 }
542 }
543
544 le = le2;
545 }
546
547 if (!commit) {
548 le = device->disk_holes.Flink;
549 while (le != &device->disk_holes) {
550 LIST_ENTRY* le2 = le->Flink;
551
552 dh = CONTAINING_RECORD(le, disk_hole, listentry);
553
554 while (le2 != &device->disk_holes) {
555 disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
556
557 if (dh2->address == dh->address + dh->size) {
558 LIST_ENTRY* le3 = le2->Flink;
559 dh->size += dh2->size;
560
561 RemoveEntryList(le2);
562 ExFreePool(dh2);
563
564 le2 = le3;
565 } else
566 break;
567 }
568
569 le = le->Flink;
570 }
571 }
572
573 // ERR("new holes:\n");
574 // le = device->disk_holes.Flink;
575 // while (le != &device->disk_holes) {
576 // dh = CONTAINING_RECORD(le, disk_hole, listentry);
577 //
578 // ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
579 //
580 // le = le->Flink;
581 // }
582 }
583
584 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
585 sys_chunk *sc, *sc2;
586 LIST_ENTRY* le;
587 USHORT i;
588
589 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
590 ERR("error - bootstrap is full\n");
591 return STATUS_INTERNAL_ERROR;
592 }
593
594 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
595 if (!sc) {
596 ERR("out of memory\n");
597 return STATUS_INSUFFICIENT_RESOURCES;
598 }
599
600 sc->key.obj_id = obj_id;
601 sc->key.obj_type = obj_type;
602 sc->key.offset = offset;
603 sc->size = size;
604 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
605 if (!sc->data) {
606 ERR("out of memory\n");
607 ExFreePool(sc);
608 return STATUS_INSUFFICIENT_RESOURCES;
609 }
610
611 RtlCopyMemory(sc->data, data, sc->size);
612
613 le = Vcb->sys_chunks.Flink;
614 while (le != &Vcb->sys_chunks) {
615 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
616
617 if (keycmp(&sc2->key, &sc->key) == 1)
618 break;
619
620 le = le->Flink;
621 }
622 InsertTailList(le, &sc->list_entry);
623
624 Vcb->superblock.n += sizeof(KEY) + size;
625
626 i = 0;
627 le = Vcb->sys_chunks.Flink;
628 while (le != &Vcb->sys_chunks) {
629 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
630
631 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
632
633 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
634 i += sizeof(KEY);
635
636 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
637 i += sc2->size;
638
639 le = le->Flink;
640 }
641
642 return STATUS_SUCCESS;
643 }
644
645 chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
646 UINT64 max_stripe_size, max_chunk_size, stripe_size;
647 UINT64 total_size = 0, i, j, logaddr;
648 int num_stripes;
649 disk_hole* dh;
650 stripe* stripes;
651 ULONG cisize;
652 CHUNK_ITEM* ci;
653 CHUNK_ITEM_STRIPE* cis;
654 chunk* c = NULL;
655 space* s = NULL;
656 BOOL success = FALSE;
657 BLOCK_GROUP_ITEM* bgi;
658
659 for (i = 0; i < Vcb->superblock.num_devices; i++) {
660 total_size += Vcb->devices[i].devitem.num_bytes;
661 }
662 TRACE("total_size = %llx\n", total_size);
663
664 if (flags & BLOCK_FLAG_DATA) {
665 max_stripe_size = 0x40000000; // 1 GB
666 max_chunk_size = 10 * max_stripe_size;
667 } else if (flags & BLOCK_FLAG_METADATA) {
668 if (total_size > 0xC80000000) // 50 GB
669 max_stripe_size = 0x40000000; // 1 GB
670 else
671 max_stripe_size = 0x10000000; // 256 MB
672
673 max_chunk_size = max_stripe_size;
674 } else if (flags & BLOCK_FLAG_SYSTEM) {
675 max_stripe_size = 0x2000000; // 32 MB
676 max_chunk_size = 2 * max_stripe_size;
677 }
678
679 // FIXME - make sure whole number of sectors?
680 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
681
682 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
683
684 if (flags & BLOCK_FLAG_DUPLICATE) {
685 num_stripes = 2;
686 } else if (flags & BLOCK_FLAG_RAID0) {
687 FIXME("RAID0 not yet supported\n");
688 return NULL;
689 } else if (flags & BLOCK_FLAG_RAID1) {
690 FIXME("RAID1 not yet supported\n");
691 return NULL;
692 } else if (flags & BLOCK_FLAG_RAID10) {
693 FIXME("RAID10 not yet supported\n");
694 return NULL;
695 } else if (flags & BLOCK_FLAG_RAID5) {
696 FIXME("RAID5 not yet supported\n");
697 return NULL;
698 } else if (flags & BLOCK_FLAG_RAID6) {
699 FIXME("RAID6 not yet supported\n");
700 return NULL;
701 } else { // SINGLE
702 num_stripes = 1;
703 }
704
705 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
706 if (!stripes) {
707 ERR("out of memory\n");
708 return NULL;
709 }
710
711 for (i = 0; i < num_stripes; i++) {
712 stripes[i].dh = NULL;
713
714 for (j = 0; j < Vcb->superblock.num_devices; j++) {
715 LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
716
717 while (le != &Vcb->devices[j].disk_holes) {
718 dh = CONTAINING_RECORD(le, disk_hole, listentry);
719
720 if (!dh->provisional) {
721 if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
722 stripes[i].dh = dh;
723 stripes[i].device = &Vcb->devices[j];
724
725 if (stripes[i].dh->size >= max_stripe_size)
726 break;
727 }
728 }
729
730 le = le->Flink;
731 }
732
733 if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
734 break;
735 }
736
737 if (stripes[i].dh) {
738 TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
739 } else {
740 TRACE("good DH not found\n");
741 goto end;
742 }
743
744 add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
745 }
746
747 stripe_size = min(stripes[0].dh->size, max_stripe_size);
748 for (i = 1; i < num_stripes; i++) {
749 stripe_size = min(stripe_size, stripes[1].dh->size);
750 }
751 // FIXME - make sure stripe_size aligned properly
752 // FIXME - obey max_chunk_size
753
754 c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
755 if (!c) {
756 ERR("out of memory\n");
757 goto end;
758 }
759
760 // add CHUNK_ITEM to tree 3
761
762 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
763 ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
764 if (!ci) {
765 ERR("out of memory\n");
766 goto end;
767 }
768
769 ci->size = stripe_size; // FIXME for RAID
770 ci->root_id = Vcb->extent_root->id;
771 ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
772 ci->type = flags;
773 ci->opt_io_alignment = ci->stripe_length;
774 ci->opt_io_width = ci->stripe_length;
775 ci->sector_size = stripes[0].device->devitem.minimal_io_size;
776 ci->num_stripes = num_stripes;
777 ci->sub_stripes = 1;
778
779 c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
780 if (!c->devices) {
781 ERR("out of memory\n");
782 ExFreePool(ci);
783 goto end;
784 }
785
786 for (i = 0; i < num_stripes; i++) {
787 if (i == 0)
788 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
789 else
790 cis = &cis[1];
791
792 cis->dev_id = stripes[i].device->devitem.dev_id;
793 cis->offset = stripes[i].dh->address;
794 cis->dev_uuid = stripes[i].device->devitem.device_uuid;
795
796 c->devices[i] = stripes[i].device;
797 }
798
799 logaddr = find_new_chunk_address(Vcb, ci->size);
800 if (logaddr == 0xffffffffffffffff) {
801 ERR("find_new_chunk_address failed\n");
802 ExFreePool(ci);
803 goto end;
804 }
805
806 if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
807 ERR("insert_tree_item failed\n");
808 ExFreePool(ci);
809 goto end;
810 }
811
812 if (flags & BLOCK_FLAG_SYSTEM) {
813 NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
814 if (!NT_SUCCESS(Status)) {
815 ERR("add_to_bootstrap returned %08x\n", Status);
816 goto end;
817 }
818 }
819
820 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
821
822 c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
823 if (!c->chunk_item) {
824 ERR("out of memory\n");
825 goto end;
826 }
827
828 RtlCopyMemory(c->chunk_item, ci, cisize);
829 c->size = cisize;
830 c->offset = logaddr;
831 c->used = c->oldused = 0;
832 c->space_changed = FALSE;
833 c->cache_size = 0;
834 c->cache_inode = 0;
835 InitializeListHead(&c->space);
836
837 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
838 if (!s) {
839 ERR("out of memory\n");
840 goto end;
841 }
842
843 s->offset = c->offset;
844 s->size = c->chunk_item->size;
845 s->type = SPACE_TYPE_FREE;
846 InsertTailList(&c->space, &s->list_entry);
847
848 protect_superblocks(Vcb, c);
849
850 // add BLOCK_GROUP_ITEM to tree 2
851
852 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
853 if (!bgi) {
854 ERR("out of memory\n");
855 goto end;
856 }
857
858 bgi->used = 0;
859 bgi->chunk_tree = 0x100;
860 bgi->flags = flags;
861
862 if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
863 ERR("insert_tree_item failed\n");
864 ExFreePool(bgi);
865 goto end;
866 }
867
868 // add DEV_EXTENTs to tree 4
869
870 for (i = 0; i < num_stripes; i++) {
871 DEV_EXTENT* de;
872
873 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
874 if (!de) {
875 ERR("out of memory\n");
876 goto end;
877 }
878
879 de->chunktree = Vcb->chunk_root->id;
880 de->objid = 0x100;
881 de->address = logaddr;
882 de->length = ci->size;
883 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
884
885 if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
886 ERR("insert_tree_item failed\n");
887 ExFreePool(de);
888 goto end;
889 }
890
891 if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
892 ERR("increase_dev_item_used failed\n");
893 goto end;
894 }
895 }
896
897 for (i = 0; i < num_stripes; i++) {
898 BOOL b = FALSE;
899 for (j = 0; j < i; j++) {
900 if (stripes[j].device == stripes[i].device)
901 b = TRUE;
902 }
903
904 if (!b)
905 reset_disk_holes(stripes[i].device, TRUE);
906 }
907
908 success = TRUE;
909
910 end:
911 ExFreePool(stripes);
912
913 if (!success) {
914 for (i = 0; i < num_stripes; i++) {
915 BOOL b = FALSE;
916 for (j = 0; j < i; j++) {
917 if (stripes[j].device == stripes[i].device)
918 b = TRUE;
919 }
920
921 if (!b)
922 reset_disk_holes(stripes[i].device, FALSE);
923 }
924
925 if (c) ExFreePool(c);
926 if (s) ExFreePool(s);
927 } else
928 InsertTailList(&Vcb->chunks, &c->list_entry);
929
930 return success ? c : NULL;
931 }
932
933 NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
934 KEY searchkey;
935 traverse_ptr tp;
936 CHUNK_ITEM2* ci;
937 NTSTATUS Status;
938 UINT32 i;
939
940 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
941
942 // FIXME - use version cached in Vcb
943
944 searchkey.obj_id = 0x100; // fixed?
945 searchkey.obj_type = TYPE_CHUNK_ITEM;
946 searchkey.offset = address;
947
948 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
949 if (!NT_SUCCESS(Status)) {
950 ERR("error - find_item returned %08x\n", Status);
951 return Status;
952 }
953
954 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
955 ERR("error - unexpected item in chunk tree\n");
956 Status = STATUS_INTERNAL_ERROR;
957 goto end;
958 }
959
960 if (tp.item->size < sizeof(CHUNK_ITEM2)) {
961 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
962 Status = STATUS_INTERNAL_ERROR;
963 goto end;
964 }
965
966 ci = (CHUNK_ITEM2*)tp.item->data;
967
968 if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
969 ERR("error - address %llx was out of chunk bounds\n", address);
970 Status = STATUS_INTERNAL_ERROR;
971 goto end;
972 }
973
974 // FIXME - only do this for chunks marked DUPLICATE?
975 // FIXME - for multiple writes, if PENDING do waits at the end
976 // FIXME - work with RAID
977 for (i = 0; i < ci->ci.num_stripes; i++) {
978 Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
979 if (!NT_SUCCESS(Status)) {
980 ERR("error - write_data_phys failed\n");
981 goto end;
982 }
983 }
984
985 end:
986
987 return Status;
988 }
989
990 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
991 LIST_ENTRY *le, *nextle;
992 space *s, *s2;
993
994 // // TESTING
995 // le = c->space.Flink;
996 // while (le != &c->space) {
997 // s = CONTAINING_RECORD(le, space, list_entry);
998 //
999 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1000 //
1001 // le = le->Flink;
1002 // }
1003
1004 le = c->space.Flink;
1005 while (le != &c->space) {
1006 s = CONTAINING_RECORD(le, space, list_entry);
1007 nextle = le->Flink;
1008
1009 if (s->type == SPACE_TYPE_DELETING)
1010 s->type = SPACE_TYPE_FREE;
1011 else if (s->type == SPACE_TYPE_WRITING)
1012 s->type = SPACE_TYPE_USED;
1013
1014 if (le->Blink != &c->space) {
1015 s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
1016
1017 if (s2->type == s->type) { // do merge
1018 s2->size += s->size;
1019
1020 RemoveEntryList(&s->list_entry);
1021 ExFreePool(s);
1022 }
1023 }
1024
1025 le = nextle;
1026 }
1027
1028 // le = c->space.Flink;
1029 // while (le != &c->space) {
1030 // s = CONTAINING_RECORD(le, space, list_entry);
1031 //
1032 // TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
1033 //
1034 // le = le->Flink;
1035 // }
1036 }
1037
1038 static void clean_space_cache(device_extension* Vcb) {
1039 LIST_ENTRY* le;
1040 chunk* c;
1041
1042 TRACE("(%p)\n", Vcb);
1043
1044 le = Vcb->chunks.Flink;
1045 while (le != &Vcb->chunks) {
1046 c = CONTAINING_RECORD(le, chunk, list_entry);
1047
1048 if (c->space_changed) {
1049 clean_space_cache_chunk(Vcb, c);
1050 c->space_changed = FALSE;
1051 }
1052
1053 le = le->Flink;
1054 }
1055 }
1056
1057 static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
1058 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
1059 LIST_ENTRY* le;
1060
1061 le = Vcb->trees.Flink;
1062 while (le != &Vcb->trees) {
1063 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1064
1065 if (t->write) {
1066 if (t->header.num_items == 0 && t->parent)
1067 return FALSE;
1068
1069 if (t->size > maxsize)
1070 return FALSE;
1071
1072 if (!t->has_new_address)
1073 return FALSE;
1074 }
1075
1076 le = le->Flink;
1077 }
1078
1079 return TRUE;
1080 }
1081
1082 static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
1083 LIST_ENTRY* le;
1084 NTSTATUS Status;
1085
1086 le = Vcb->trees.Flink;
1087 while (le != &Vcb->trees) {
1088 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1089
1090 if (t->write) {
1091 if (t->parent) {
1092 if (!t->parent->write) {
1093 t->parent->write = TRUE;
1094 Vcb->write_trees++;
1095 }
1096 } else if (t->root != Vcb->chunk_root && t->root != Vcb->root_root) {
1097 KEY searchkey;
1098 traverse_ptr tp;
1099
1100 searchkey.obj_id = t->root->id;
1101 searchkey.obj_type = TYPE_ROOT_ITEM;
1102 searchkey.offset = 0xffffffffffffffff;
1103
1104 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1105 if (!NT_SUCCESS(Status)) {
1106 ERR("error - find_item returned %08x\n", Status);
1107 return Status;
1108 }
1109
1110 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1111 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1112 return STATUS_INTERNAL_ERROR;
1113 }
1114
1115 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
1116 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1117 if (!ri) {
1118 ERR("out of memory\n");
1119 return STATUS_INSUFFICIENT_RESOURCES;
1120 }
1121
1122 if (tp.item->size > 0)
1123 RtlCopyMemory(ri, tp.item->data, tp.item->size);
1124
1125 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
1126
1127 delete_tree_item(Vcb, &tp, rollback);
1128
1129 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1130 ERR("insert_tree_item failed\n");
1131 return STATUS_INTERNAL_ERROR;
1132 }
1133 } else {
1134 if (!tp.tree->write) {
1135 tp.tree->write = TRUE;
1136 Vcb->write_trees++;
1137 }
1138 }
1139 }
1140 }
1141
1142 le = le->Flink;
1143 }
1144
1145 return STATUS_SUCCESS;
1146 }
1147
1148 static void add_parents_to_cache(device_extension* Vcb, tree* t) {
1149 KEY searchkey;
1150 traverse_ptr tp;
1151 NTSTATUS Status;
1152
1153 while (t->parent) {
1154 t = t->parent;
1155
1156 if (!t->write) {
1157 t->write = TRUE;
1158 Vcb->write_trees++;
1159 }
1160 }
1161
1162 if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
1163 return;
1164
1165 searchkey.obj_id = t->root->id;
1166 searchkey.obj_type = TYPE_ROOT_ITEM;
1167 searchkey.offset = 0xffffffffffffffff;
1168
1169 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1170 if (!NT_SUCCESS(Status)) {
1171 ERR("error - find_item returned %08x\n", Status);
1172 return;
1173 }
1174
1175 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1176 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1177 return;
1178 }
1179
1180 if (!tp.tree->write) {
1181 tp.tree->write = TRUE;
1182 Vcb->write_trees++;
1183 }
1184 }
1185
1186 static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
1187 EXTENT_ITEM_SKINNY_METADATA* eism;
1188 traverse_ptr insert_tp;
1189
1190 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1191 if (!eism) {
1192 ERR("out of memory\n");
1193 return FALSE;
1194 }
1195
1196 eism->ei.refcount = 1;
1197 eism->ei.generation = Vcb->superblock.generation;
1198 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1199 eism->type = TYPE_TREE_BLOCK_REF;
1200 eism->tbr.offset = root_id;
1201
1202 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1203 ERR("insert_tree_item failed\n");
1204 ExFreePool(eism);
1205 return FALSE;
1206 }
1207
1208 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1209
1210 add_parents_to_cache(Vcb, insert_tp.tree);
1211
1212 return TRUE;
1213 }
1214
1215 static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, LIST_ENTRY* rollback) {
1216 UINT64 address;
1217 EXTENT_ITEM_TREE2* eit2;
1218 traverse_ptr insert_tp;
1219
1220 TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
1221
1222 if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
1223 return FALSE;
1224
1225 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1226 BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, rollback);
1227
1228 if (b)
1229 *new_address = address;
1230
1231 return b;
1232 }
1233
1234 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1235 if (!eit2) {
1236 ERR("out of memory\n");
1237 return FALSE;
1238 }
1239
1240 eit2->eit.extent_item.refcount = 1;
1241 eit2->eit.extent_item.generation = Vcb->superblock.generation;
1242 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1243 // eit2->eit.firstitem = wt->firstitem;
1244 eit2->eit.level = level;
1245 eit2->type = TYPE_TREE_BLOCK_REF;
1246 eit2->tbr.offset = root_id;
1247
1248 // #ifdef DEBUG_PARANOID
1249 // if (wt->firstitem.obj_type == 0xcc) { // TESTING
1250 // ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
1251 // ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
1252 // int3;
1253 // }
1254 // #endif
1255
1256 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1257 ERR("insert_tree_item failed\n");
1258 ExFreePool(eit2);
1259 return FALSE;
1260 }
1261
1262 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
1263
1264 add_parents_to_cache(Vcb, insert_tp.tree);
1265
1266 *new_address = address;
1267
1268 return TRUE;
1269 }
1270
1271 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
1272 chunk *origchunk = NULL, *c;
1273 LIST_ENTRY* le;
1274 UINT64 flags = t->flags, addr;
1275
1276 if (flags == 0)
1277 flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
1278
1279 // TRACE("flags = %x\n", (UINT32)wt->flags);
1280
1281 // if (!chunk_test) { // TESTING
1282 // if ((c = alloc_chunk(Vcb, flags))) {
1283 // if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1284 // if (insert_tree_extent(Vcb, t, c)) {
1285 // chunk_test = TRUE;
1286 // return STATUS_SUCCESS;
1287 // }
1288 // }
1289 // }
1290 // }
1291
1292 if (t->has_address) {
1293 origchunk = get_chunk_from_address(Vcb, t->header.address);
1294
1295 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, origchunk, &addr, rollback)) {
1296 t->new_address = addr;
1297 t->has_new_address = TRUE;
1298 return STATUS_SUCCESS;
1299 }
1300 }
1301
1302 le = Vcb->chunks.Flink;
1303 while (le != &Vcb->chunks) {
1304 c = CONTAINING_RECORD(le, chunk, list_entry);
1305
1306 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1307 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, rollback)) {
1308 t->new_address = addr;
1309 t->has_new_address = TRUE;
1310 return STATUS_SUCCESS;
1311 }
1312 }
1313
1314 le = le->Flink;
1315 }
1316
1317 // allocate new chunk if necessary
1318 if ((c = alloc_chunk(Vcb, flags, rollback))) {
1319 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
1320 if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, rollback)) {
1321 t->new_address = addr;
1322 t->has_new_address = TRUE;
1323 return STATUS_SUCCESS;
1324 }
1325 }
1326 }
1327
1328 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
1329
1330 return STATUS_DISK_FULL;
1331 }
1332
1333 static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1334 KEY searchkey;
1335 traverse_ptr tp;
1336 chunk* c;
1337 EXTENT_ITEM_SKINNY_METADATA* eism;
1338 NTSTATUS Status;
1339
1340 searchkey.obj_id = address;
1341 searchkey.obj_type = TYPE_METADATA_ITEM;
1342 searchkey.offset = 0xffffffffffffffff;
1343
1344 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1345 if (!NT_SUCCESS(Status)) {
1346 ERR("error - find_item returned %08x\n", Status);
1347 return FALSE;
1348 }
1349
1350 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1351 TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1352 return FALSE;
1353 }
1354
1355 if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
1356 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
1357 return FALSE;
1358 }
1359
1360 delete_tree_item(Vcb, &tp, rollback);
1361
1362 eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
1363 if (t && t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
1364 // convert shared data extents
1365
1366 LIST_ENTRY* le = t->itemlist.Flink;
1367 while (le != &t->itemlist) {
1368 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1369
1370 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1371
1372 if (!td->ignore && !td->inserted) {
1373 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1374 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1375
1376 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1377 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1378
1379 if (ed2->address != 0) {
1380 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1381 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1382 }
1383 }
1384 }
1385 }
1386
1387 le = le->Flink;
1388 }
1389
1390 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1391 }
1392
1393 c = get_chunk_from_address(Vcb, address);
1394
1395 if (c) {
1396 decrease_chunk_usage(c, Vcb->superblock.node_size);
1397
1398 add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
1399 } else
1400 ERR("could not find chunk for address %llx\n", address);
1401
1402 return TRUE;
1403 }
1404
1405 // TESTING
1406 // static void check_tree_num_items(tree* t) {
1407 // LIST_ENTRY* le2;
1408 // UINT32 ni;
1409 //
1410 // le2 = t->itemlist.Flink;
1411 // ni = 0;
1412 // while (le2 != &t->itemlist) {
1413 // tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1414 // if (!td->ignore)
1415 // ni++;
1416 // le2 = le2->Flink;
1417 // }
1418 //
1419 // if (t->header.num_items != ni) {
1420 // ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
1421 // int3;
1422 // } else {
1423 // ERR("tree %p okay\n", t);
1424 // }
1425 // }
1426 //
1427 // static void check_trees_num_items(LIST_ENTRY* tc) {
1428 // LIST_ENTRY* le = tc->Flink;
1429 // while (le != tc) {
1430 // tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
1431 //
1432 // check_tree_num_items(tc2->tree);
1433 //
1434 // le = le->Flink;
1435 // }
1436 // }
1437
1438 static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
1439 KEY searchkey;
1440 traverse_ptr tp, tp2, insert_tp;
1441 EXTENT_REF_V0* erv0;
1442 NTSTATUS Status;
1443
1444 TRACE("(%p, %p, %p)\n", Vcb, td, t);
1445
1446 searchkey.obj_id = td->treeholder.address;
1447 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1448 searchkey.offset = 0xffffffffffffffff;
1449
1450 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1451 if (!NT_SUCCESS(Status)) {
1452 ERR("error - find_item returned %08x\n", Status);
1453 return;
1454 }
1455
1456 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1457 TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
1458 return;
1459 }
1460
1461 searchkey.obj_id = td->treeholder.address;
1462 searchkey.obj_type = TYPE_EXTENT_ITEM;
1463 searchkey.offset = Vcb->superblock.node_size;
1464
1465 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1466 if (!NT_SUCCESS(Status)) {
1467 ERR("error - find_item returned %08x\n", Status);
1468 return;
1469 }
1470
1471 if (keycmp(&searchkey, &tp2.item->key)) {
1472 ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1473 return;
1474 }
1475
1476 if (tp.item->size < sizeof(EXTENT_REF_V0)) {
1477 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
1478 return;
1479 }
1480
1481 erv0 = (EXTENT_REF_V0*)tp.item->data;
1482
1483 delete_tree_item(Vcb, &tp, rollback);
1484 delete_tree_item(Vcb, &tp2, rollback);
1485
1486 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1487 EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
1488
1489 if (!eism) {
1490 ERR("out of memory\n");
1491 return;
1492 }
1493
1494 eism->ei.refcount = 1;
1495 eism->ei.generation = erv0->gen;
1496 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
1497 eism->type = TYPE_TREE_BLOCK_REF;
1498 eism->tbr.offset = t->header.tree_id;
1499
1500 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
1501 ERR("insert_tree_item failed\n");
1502 return;
1503 }
1504 } else {
1505 EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
1506
1507 if (!eit2) {
1508 ERR("out of memory\n");
1509 return;
1510 }
1511
1512 eit2->eit.extent_item.refcount = 1;
1513 eit2->eit.extent_item.generation = erv0->gen;
1514 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
1515 eit2->eit.firstitem = td->key;
1516 eit2->eit.level = t->header.level - 1;
1517 eit2->type = TYPE_TREE_BLOCK_REF;
1518 eit2->tbr.offset = t->header.tree_id;
1519
1520 if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
1521 ERR("insert_tree_item failed\n");
1522 return;
1523 }
1524 }
1525
1526 add_parents_to_cache(Vcb, insert_tp.tree);
1527 add_parents_to_cache(Vcb, tp.tree);
1528 add_parents_to_cache(Vcb, tp2.tree);
1529 }
1530
1531 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
1532 KEY searchkey;
1533 traverse_ptr tp;
1534 EXTENT_ITEM* ei;
1535 EXTENT_ITEM_V0* eiv0;
1536 chunk* c;
1537 NTSTATUS Status;
1538
1539 // FIXME - deal with refcounts > 1
1540
1541 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
1542
1543 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
1544 if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
1545 return STATUS_SUCCESS;
1546 }
1547 }
1548
1549 searchkey.obj_id = address;
1550 searchkey.obj_type = TYPE_EXTENT_ITEM;
1551 searchkey.offset = Vcb->superblock.node_size;
1552
1553 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
1554 if (!NT_SUCCESS(Status)) {
1555 ERR("error - find_item returned %08x\n", Status);
1556 return Status;
1557 }
1558
1559 if (keycmp(&tp.item->key, &searchkey)) {
1560 ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
1561 int3;
1562 return STATUS_INTERNAL_ERROR;
1563 }
1564
1565 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1566 eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
1567
1568 if (eiv0->refcount > 1) {
1569 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
1570 return STATUS_INTERNAL_ERROR;
1571 }
1572 } else {
1573 if (tp.item->size < sizeof(EXTENT_ITEM)) {
1574 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
1575 return STATUS_INTERNAL_ERROR;
1576 }
1577
1578 ei = (EXTENT_ITEM*)tp.item->data;
1579
1580 if (ei->refcount > 1) {
1581 FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
1582 return STATUS_INTERNAL_ERROR;
1583 }
1584
1585 if (t && t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
1586 // convert shared data extents
1587
1588 LIST_ENTRY* le = t->itemlist.Flink;
1589 while (le != &t->itemlist) {
1590 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1591
1592 TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1593
1594 if (!td->ignore && !td->inserted) {
1595 if (td->key.obj_type == TYPE_EXTENT_DATA) {
1596 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1597
1598 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1599 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1600
1601 if (ed2->address != 0) {
1602 TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
1603 convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
1604 }
1605 }
1606 }
1607 }
1608
1609 le = le->Flink;
1610 }
1611
1612 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1613 }
1614 }
1615
1616 delete_tree_item(Vcb, &tp, rollback);
1617
1618 // if EXTENT_ITEM_V0, delete corresponding B4 item
1619 if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
1620 traverse_ptr tp2;
1621
1622 searchkey.obj_id = address;
1623 searchkey.obj_type = TYPE_EXTENT_REF_V0;
1624 searchkey.offset = 0xffffffffffffffff;
1625
1626 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
1627 if (!NT_SUCCESS(Status)) {
1628 ERR("error - find_item returned %08x\n", Status);
1629 return Status;
1630 }
1631
1632 if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
1633 delete_tree_item(Vcb, &tp2, rollback);
1634 }
1635 }
1636
1637 if (t && !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1638 LIST_ENTRY* le;
1639
1640 // when writing old internal trees, convert related extents
1641
1642 le = t->itemlist.Flink;
1643 while (le != &t->itemlist) {
1644 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1645
1646 // ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1647
1648 if (!td->ignore && !td->inserted) {
1649 if (t->header.level > 0) {
1650 convert_old_tree_extent(Vcb, td, t, rollback);
1651 } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
1652 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1653
1654 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1655 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1656
1657 if (ed2->address != 0) {
1658 TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
1659 convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
1660 }
1661 }
1662 }
1663 }
1664
1665 le = le->Flink;
1666 }
1667 }
1668
1669 c = get_chunk_from_address(Vcb, address);
1670
1671 if (c) {
1672 decrease_chunk_usage(c, tp.item->key.offset);
1673
1674 add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
1675 } else
1676 ERR("could not find chunk for address %llx\n", address);
1677
1678 return STATUS_SUCCESS;
1679 }
1680
1681 static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
1682 LIST_ENTRY* le;
1683 NTSTATUS Status;
1684
1685 TRACE("(%p)\n", Vcb);
1686
1687 le = Vcb->trees.Flink;
1688 while (le != &Vcb->trees) {
1689 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1690
1691 if (t->write && !t->has_new_address) {
1692 chunk* c;
1693
1694 Status = get_tree_new_address(Vcb, t, rollback);
1695 if (!NT_SUCCESS(Status)) {
1696 ERR("get_tree_new_address returned %08x\n", Status);
1697 return Status;
1698 }
1699
1700 TRACE("allocated extent %llx\n", t->new_address);
1701
1702 if (t->has_address) {
1703 Status = reduce_tree_extent(Vcb, t->header.address, t, rollback);
1704
1705 if (!NT_SUCCESS(Status)) {
1706 ERR("reduce_tree_extent returned %08x\n", Status);
1707 return Status;
1708 }
1709 }
1710
1711 c = get_chunk_from_address(Vcb, t->new_address);
1712
1713 if (c) {
1714 increase_chunk_usage(c, Vcb->superblock.node_size);
1715 } else {
1716 ERR("could not find chunk for address %llx\n", t->new_address);
1717 return STATUS_INTERNAL_ERROR;
1718 }
1719 }
1720
1721 le = le->Flink;
1722 }
1723
1724 return STATUS_SUCCESS;
1725 }
1726
1727 static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
1728 LIST_ENTRY* le;
1729 NTSTATUS Status;
1730
1731 TRACE("(%p)\n", Vcb);
1732
1733 le = Vcb->trees.Flink;
1734 while (le != &Vcb->trees) {
1735 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1736
1737 if (t->write && !t->parent) {
1738 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1739 KEY searchkey;
1740 traverse_ptr tp;
1741
1742 searchkey.obj_id = t->root->id;
1743 searchkey.obj_type = TYPE_ROOT_ITEM;
1744 searchkey.offset = 0xffffffffffffffff;
1745
1746 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
1747 if (!NT_SUCCESS(Status)) {
1748 ERR("error - find_item returned %08x\n", Status);
1749 return Status;
1750 }
1751
1752 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1753 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1754 return STATUS_INTERNAL_ERROR;
1755 }
1756
1757 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
1758
1759 t->root->root_item.block_number = t->new_address;
1760 t->root->root_item.root_level = t->header.level;
1761 t->root->root_item.generation = Vcb->superblock.generation;
1762 t->root->root_item.generation2 = Vcb->superblock.generation;
1763
1764 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
1765 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
1766
1767 if (!ri) {
1768 ERR("out of memory\n");
1769 return STATUS_INSUFFICIENT_RESOURCES;
1770 }
1771
1772 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
1773
1774 delete_tree_item(Vcb, &tp, rollback);
1775
1776 if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
1777 ERR("insert_tree_item failed\n");
1778 return STATUS_INTERNAL_ERROR;
1779 }
1780 } else
1781 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1782 }
1783
1784 t->root->treeholder.address = t->new_address;
1785 }
1786
1787 le = le->Flink;
1788 }
1789
1790 Status = update_chunk_caches(Vcb, rollback);
1791 if (!NT_SUCCESS(Status)) {
1792 ERR("update_chunk_caches returned %08x\n", Status);
1793 return Status;
1794 }
1795
1796 return STATUS_SUCCESS;
1797 }
1798
1799 static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1800 write_tree_stripe* stripe = conptr;
1801 write_tree_context* context = (write_tree_context*)stripe->context;
1802 LIST_ENTRY* le;
1803 BOOL complete;
1804
1805 if (stripe->status == WriteTreeStatus_Cancelling) {
1806 stripe->status = WriteTreeStatus_Cancelled;
1807 goto end;
1808 }
1809
1810 stripe->iosb = Irp->IoStatus;
1811
1812 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1813 stripe->status = WriteTreeStatus_Success;
1814 } else {
1815 le = context->stripes.Flink;
1816
1817 stripe->status = WriteTreeStatus_Error;
1818
1819 while (le != &context->stripes) {
1820 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1821
1822 if (s2->status == WriteTreeStatus_Pending) {
1823 s2->status = WriteTreeStatus_Cancelling;
1824 IoCancelIrp(s2->Irp);
1825 }
1826
1827 le = le->Flink;
1828 }
1829 }
1830
1831 end:
1832 le = context->stripes.Flink;
1833 complete = TRUE;
1834
1835 while (le != &context->stripes) {
1836 write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1837
1838 if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
1839 complete = FALSE;
1840 break;
1841 }
1842
1843 le = le->Flink;
1844 }
1845
1846 if (complete)
1847 KeSetEvent(&context->Event, 0, FALSE);
1848
1849 return STATUS_MORE_PROCESSING_REQUIRED;
1850 }
1851
1852 NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
1853 chunk* c;
1854 CHUNK_ITEM_STRIPE* cis;
1855 write_tree_stripe* stripe;
1856 UINT64 i;
1857
1858 c = get_chunk_from_address(Vcb, addr);
1859
1860 if (!c) {
1861 ERR("get_chunk_from_address failed\n");
1862 return STATUS_INTERNAL_ERROR;
1863 }
1864
1865 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1866
1867 // FIXME - make this work with RAID
1868
1869 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1870 PIO_STACK_LOCATION IrpSp;
1871
1872 // FIXME - handle missing devices
1873
1874 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
1875 if (!stripe) {
1876 ERR("out of memory\n");
1877 return STATUS_INSUFFICIENT_RESOURCES;
1878 }
1879
1880 stripe->context = (struct write_tree_context*)wtc;
1881 stripe->buf = data;
1882 stripe->device = c->devices[i];
1883 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1884 stripe->status = WriteTreeStatus_Pending;
1885
1886 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1887
1888 if (!stripe->Irp) {
1889 ERR("IoAllocateIrp failed\n");
1890 return STATUS_INTERNAL_ERROR;
1891 }
1892
1893 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1894 IrpSp->MajorFunction = IRP_MJ_WRITE;
1895
1896 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
1897 stripe->Irp->AssociatedIrp.SystemBuffer = data;
1898
1899 stripe->Irp->Flags = IRP_BUFFERED_IO;
1900 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1901 stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
1902 if (!stripe->Irp->MdlAddress) {
1903 ERR("IoAllocateMdl failed\n");
1904 return STATUS_INTERNAL_ERROR;
1905 }
1906
1907 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
1908 } else {
1909 stripe->Irp->UserBuffer = data;
1910 }
1911
1912 IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
1913 IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
1914
1915 stripe->Irp->UserIosb = &stripe->iosb;
1916
1917 IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
1918
1919 InsertTailList(&wtc->stripes, &stripe->list_entry);
1920 }
1921
1922 return STATUS_SUCCESS;
1923 }
1924
1925 void free_write_tree_stripes(write_tree_context* wtc) {
1926 LIST_ENTRY *le, *le2, *nextle;
1927
1928 le = wtc->stripes.Flink;
1929 while (le != &wtc->stripes) {
1930 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1931
1932 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1933 MmUnlockPages(stripe->Irp->MdlAddress);
1934 IoFreeMdl(stripe->Irp->MdlAddress);
1935 }
1936
1937 le = le->Flink;
1938 }
1939
1940 le = wtc->stripes.Flink;
1941 while (le != &wtc->stripes) {
1942 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
1943
1944 nextle = le->Flink;
1945
1946 if (stripe->buf) {
1947 ExFreePool(stripe->buf);
1948
1949 le2 = le->Flink;
1950 while (le2 != &wtc->stripes) {
1951 write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
1952
1953 if (s2->buf == stripe->buf)
1954 s2->buf = NULL;
1955
1956 le2 = le2->Flink;
1957 }
1958
1959 }
1960
1961 ExFreePool(stripe);
1962
1963 le = nextle;
1964 }
1965 }
1966
1967 static NTSTATUS write_trees(device_extension* Vcb) {
1968 UINT8 level;
1969 UINT8 *data, *body;
1970 UINT32 crc32;
1971 NTSTATUS Status;
1972 LIST_ENTRY* le;
1973 write_tree_context* wtc;
1974
1975 TRACE("(%p)\n", Vcb);
1976
1977 for (level = 0; level <= 255; level++) {
1978 BOOL nothing_found = TRUE;
1979
1980 TRACE("level = %u\n", level);
1981
1982 le = Vcb->trees.Flink;
1983 while (le != &Vcb->trees) {
1984 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1985
1986 if (t->write && t->header.level == level) {
1987 KEY firstitem, searchkey;
1988 LIST_ENTRY* le2;
1989 traverse_ptr tp;
1990 EXTENT_ITEM_TREE* eit;
1991
1992 if (!t->has_new_address) {
1993 ERR("error - tried to write tree with no new address\n");
1994 int3;
1995 }
1996
1997 le2 = t->itemlist.Flink;
1998 while (le2 != &t->itemlist) {
1999 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2000 if (!td->ignore) {
2001 firstitem = td->key;
2002 break;
2003 }
2004 le2 = le2->Flink;
2005 }
2006
2007 if (t->parent) {
2008 t->paritem->key = firstitem;
2009 t->paritem->treeholder.address = t->new_address;
2010 t->paritem->treeholder.generation = Vcb->superblock.generation;
2011 }
2012
2013 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
2014 searchkey.obj_id = t->new_address;
2015 searchkey.obj_type = TYPE_EXTENT_ITEM;
2016 searchkey.offset = Vcb->superblock.node_size;
2017
2018 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2019 if (!NT_SUCCESS(Status)) {
2020 ERR("error - find_item returned %08x\n", Status);
2021 return Status;
2022 }
2023
2024 if (keycmp(&searchkey, &tp.item->key)) {
2025 // traverse_ptr next_tp;
2026 // BOOL b;
2027 // tree_data* paritem;
2028
2029 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2030
2031 // searchkey.obj_id = 0;
2032 // searchkey.obj_type = 0;
2033 // searchkey.offset = 0;
2034 //
2035 // find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2036 //
2037 // paritem = NULL;
2038 // do {
2039 // if (tp.tree->paritem != paritem) {
2040 // paritem = tp.tree->paritem;
2041 // ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
2042 // }
2043 //
2044 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
2045 //
2046 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
2047 // if (b) {
2048 // free_traverse_ptr(&tp);
2049 // tp = next_tp;
2050 // }
2051 // } while (b);
2052 //
2053 // free_traverse_ptr(&tp);
2054
2055 return STATUS_INTERNAL_ERROR;
2056 }
2057
2058 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
2059 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
2060 return STATUS_INTERNAL_ERROR;
2061 }
2062
2063 eit = (EXTENT_ITEM_TREE*)tp.item->data;
2064 eit->firstitem = firstitem;
2065 }
2066
2067 nothing_found = FALSE;
2068 }
2069
2070 le = le->Flink;
2071 }
2072
2073 if (nothing_found)
2074 break;
2075 }
2076
2077 TRACE("allocated tree extents\n");
2078
2079 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
2080 if (!wtc) {
2081 ERR("out of memory\n");
2082 return STATUS_INSUFFICIENT_RESOURCES;
2083 }
2084
2085 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
2086 InitializeListHead(&wtc->stripes);
2087
2088 le = Vcb->trees.Flink;
2089 while (le != &Vcb->trees) {
2090 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2091 #ifdef DEBUG_PARANOID
2092 UINT32 num_items = 0, size = 0;
2093 LIST_ENTRY* le2;
2094 BOOL crash = FALSE;
2095 #endif
2096
2097 if (t->write) {
2098 #ifdef DEBUG_PARANOID
2099 le2 = t->itemlist.Flink;
2100 while (le2 != &t->itemlist) {
2101 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2102 if (!td->ignore) {
2103 num_items++;
2104
2105 if (t->header.level == 0)
2106 size += td->size;
2107 }
2108 le2 = le2->Flink;
2109 }
2110
2111 if (t->header.level == 0)
2112 size += num_items * sizeof(leaf_node);
2113 else
2114 size += num_items * sizeof(internal_node);
2115
2116 if (num_items != t->header.num_items) {
2117 ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
2118 crash = TRUE;
2119 }
2120
2121 if (size != t->size) {
2122 ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
2123 crash = TRUE;
2124 }
2125
2126 if (t->header.num_items == 0 && t->parent) {
2127 ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
2128 crash = TRUE;
2129 }
2130
2131 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
2132 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
2133 crash = TRUE;
2134 }
2135
2136 if (crash) {
2137 ERR("tree %p\n", t);
2138 le2 = t->itemlist.Flink;
2139 while (le2 != &t->itemlist) {
2140 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2141 if (!td->ignore) {
2142 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
2143 }
2144 le2 = le2->Flink;
2145 }
2146 int3;
2147 }
2148 #endif
2149 t->header.address = t->new_address;
2150 t->header.generation = Vcb->superblock.generation;
2151 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
2152 t->has_address = TRUE;
2153
2154 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
2155 if (!data) {
2156 ERR("out of memory\n");
2157 Status = STATUS_INSUFFICIENT_RESOURCES;
2158 goto end;
2159 }
2160
2161 body = data + sizeof(tree_header);
2162
2163 RtlCopyMemory(data, &t->header, sizeof(tree_header));
2164 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2165
2166 if (t->header.level == 0) {
2167 leaf_node* itemptr = (leaf_node*)body;
2168 int i = 0;
2169 LIST_ENTRY* le2;
2170 UINT8* dataptr = data + Vcb->superblock.node_size;
2171
2172 le2 = t->itemlist.Flink;
2173 while (le2 != &t->itemlist) {
2174 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2175 if (!td->ignore) {
2176 dataptr = dataptr - td->size;
2177
2178 itemptr[i].key = td->key;
2179 itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
2180 itemptr[i].size = td->size;
2181 i++;
2182
2183 if (td->size > 0)
2184 RtlCopyMemory(dataptr, td->data, td->size);
2185 }
2186
2187 le2 = le2->Flink;
2188 }
2189 } else {
2190 internal_node* itemptr = (internal_node*)body;
2191 int i = 0;
2192 LIST_ENTRY* le2;
2193
2194 le2 = t->itemlist.Flink;
2195 while (le2 != &t->itemlist) {
2196 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
2197 if (!td->ignore) {
2198 itemptr[i].key = td->key;
2199 itemptr[i].address = td->treeholder.address;
2200 itemptr[i].generation = td->treeholder.generation;
2201 i++;
2202 }
2203
2204 le2 = le2->Flink;
2205 }
2206 }
2207
2208 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
2209 crc32 = ~crc32;
2210 *((UINT32*)data) = crc32;
2211 TRACE("setting crc32 to %08x\n", crc32);
2212
2213 Status = write_tree(Vcb, t->new_address, data, wtc);
2214 if (!NT_SUCCESS(Status)) {
2215 ERR("write_tree returned %08x\n", Status);
2216 goto end;
2217 }
2218 }
2219
2220 le = le->Flink;
2221 }
2222
2223 Status = STATUS_SUCCESS;
2224
2225 if (wtc->stripes.Flink != &wtc->stripes) {
2226 // launch writes and wait
2227 le = wtc->stripes.Flink;
2228 while (le != &wtc->stripes) {
2229 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2230
2231 IoCallDriver(stripe->device->devobj, stripe->Irp);
2232
2233 le = le->Flink;
2234 }
2235
2236 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
2237
2238 le = wtc->stripes.Flink;
2239 while (le != &wtc->stripes) {
2240 write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
2241
2242 if (!NT_SUCCESS(stripe->iosb.Status)) {
2243 Status = stripe->iosb.Status;
2244 break;
2245 }
2246
2247 le = le->Flink;
2248 }
2249
2250 free_write_tree_stripes(wtc);
2251 }
2252
2253 end:
2254 ExFreePool(wtc);
2255
2256 return Status;
2257 }
2258
2259 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb) {
2260 KEY searchkey;
2261 traverse_ptr tp;
2262
2263 RtlZeroMemory(sb, sizeof(superblock_backup));
2264
2265 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2266 sb->root_tree_generation = Vcb->superblock.generation;
2267 sb->root_level = Vcb->superblock.root_level;
2268
2269 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2270 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2271 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2272
2273 searchkey.obj_id = BTRFS_ROOT_EXTENT;
2274 searchkey.obj_type = TYPE_ROOT_ITEM;
2275 searchkey.offset = 0xffffffffffffffff;
2276
2277 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2278 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2279 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2280
2281 sb->extent_tree_addr = ri->block_number;
2282 sb->extent_tree_generation = ri->generation;
2283 sb->extent_root_level = ri->root_level;
2284 }
2285 }
2286
2287 searchkey.obj_id = BTRFS_ROOT_FSTREE;
2288
2289 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2290 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2291 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2292
2293 sb->fs_tree_addr = ri->block_number;
2294 sb->fs_tree_generation = ri->generation;
2295 sb->fs_root_level = ri->root_level;
2296 }
2297 }
2298
2299 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2300
2301 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2302 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2303 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2304
2305 sb->dev_root_addr = ri->block_number;
2306 sb->dev_root_generation = ri->generation;
2307 sb->dev_root_level = ri->root_level;
2308 }
2309 }
2310
2311 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2312
2313 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE))) {
2314 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2315 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2316
2317 sb->csum_root_addr = ri->block_number;
2318 sb->csum_root_generation = ri->generation;
2319 sb->csum_root_level = ri->root_level;
2320 }
2321 }
2322
2323 sb->total_bytes = Vcb->superblock.total_bytes;
2324 sb->bytes_used = Vcb->superblock.bytes_used;
2325 sb->num_devices = Vcb->superblock.num_devices;
2326 }
2327
2328 static NTSTATUS write_superblocks(device_extension* Vcb) {
2329 UINT64 i;
2330 NTSTATUS Status;
2331 LIST_ENTRY* le;
2332
2333 TRACE("(%p)\n", Vcb);
2334
2335 le = Vcb->trees.Flink;
2336 while (le != &Vcb->trees) {
2337 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2338
2339 if (t->write && !t->parent) {
2340 if (t->root == Vcb->root_root) {
2341 Vcb->superblock.root_tree_addr = t->new_address;
2342 Vcb->superblock.root_level = t->header.level;
2343 } else if (t->root == Vcb->chunk_root) {
2344 Vcb->superblock.chunk_tree_addr = t->new_address;
2345 Vcb->superblock.chunk_root_generation = t->header.generation;
2346 Vcb->superblock.chunk_root_level = t->header.level;
2347 }
2348 }
2349
2350 le = le->Flink;
2351 }
2352
2353 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2354 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2355 }
2356
2357 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1]);
2358
2359 for (i = 0; i < Vcb->superblock.num_devices; i++) {
2360 if (Vcb->devices[i].devobj) {
2361 Status = write_superblock(Vcb, &Vcb->devices[i]);
2362 if (!NT_SUCCESS(Status)) {
2363 ERR("write_superblock returned %08x\n", Status);
2364 return Status;
2365 }
2366 }
2367 }
2368
2369 return STATUS_SUCCESS;
2370 }
2371
2372 static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
2373 LIST_ENTRY* le = Vcb->chunks.Flink;
2374 chunk* c;
2375 KEY searchkey;
2376 traverse_ptr tp;
2377 BLOCK_GROUP_ITEM* bgi;
2378 NTSTATUS Status;
2379
2380 TRACE("(%p)\n", Vcb);
2381
2382 while (le != &Vcb->chunks) {
2383 c = CONTAINING_RECORD(le, chunk, list_entry);
2384
2385 if (c->used != c->oldused) {
2386 searchkey.obj_id = c->offset;
2387 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2388 searchkey.offset = c->chunk_item->size;
2389
2390 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2391 if (!NT_SUCCESS(Status)) {
2392 ERR("error - find_item returned %08x\n", Status);
2393 return Status;
2394 }
2395
2396 if (keycmp(&searchkey, &tp.item->key)) {
2397 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2398 int3;
2399 return STATUS_INTERNAL_ERROR;
2400 }
2401
2402 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2403 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2404 return STATUS_INTERNAL_ERROR;
2405 }
2406
2407 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2408 if (!bgi) {
2409 ERR("out of memory\n");
2410 return STATUS_INSUFFICIENT_RESOURCES;
2411 }
2412
2413 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2414 bgi->used = c->used;
2415
2416 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2417
2418 delete_tree_item(Vcb, &tp, rollback);
2419
2420 if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
2421 ERR("insert_tree_item failed\n");
2422 ExFreePool(bgi);
2423 return STATUS_INTERNAL_ERROR;
2424 }
2425
2426 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2427 TRACE("chunk_item type = %llx\n", c->chunk_item->type);
2428
2429 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
2430 FIXME("RAID0 not yet supported\n");
2431 ExFreePool(bgi);
2432 return STATUS_INTERNAL_ERROR;
2433 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
2434 FIXME("RAID1 not yet supported\n");
2435 ExFreePool(bgi);
2436 return STATUS_INTERNAL_ERROR;
2437 } else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
2438 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
2439 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
2440 FIXME("RAID10 not yet supported\n");
2441 ExFreePool(bgi);
2442 return STATUS_INTERNAL_ERROR;
2443 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2444 FIXME("RAID5 not yet supported\n");
2445 ExFreePool(bgi);
2446 return STATUS_INTERNAL_ERROR;
2447 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2448 FIXME("RAID6 not yet supported\n");
2449 ExFreePool(bgi);
2450 return STATUS_INTERNAL_ERROR;
2451 } else { // SINGLE
2452 Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
2453 }
2454
2455 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2456
2457 c->oldused = c->used;
2458 }
2459
2460 le = le->Flink;
2461 }
2462
2463 return STATUS_SUCCESS;
2464 }
2465
2466 static void get_first_item(tree* t, KEY* key) {
2467 LIST_ENTRY* le;
2468
2469 le = t->itemlist.Flink;
2470 while (le != &t->itemlist) {
2471 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2472
2473 *key = td->key;
2474 return;
2475 }
2476 }
2477
2478 static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2479 tree *nt, *pt;
2480 tree_data* td;
2481 tree_data* oldlastitem;
2482 // write_tree* wt2;
2483 // // tree_data *firsttd, *lasttd;
2484 // // LIST_ENTRY* le;
2485 // #ifdef DEBUG_PARANOID
2486 // KEY lastkey1, lastkey2;
2487 // traverse_ptr tp, next_tp;
2488 // ULONG numitems1, numitems2;
2489 // #endif
2490
2491 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2492
2493 // #ifdef DEBUG_PARANOID
2494 // lastkey1.obj_id = 0xffffffffffffffff;
2495 // lastkey1.obj_type = 0xff;
2496 // lastkey1.offset = 0xffffffffffffffff;
2497 //
2498 // if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
2499 // ERR("error - find_item failed\n");
2500 // else {
2501 // lastkey1 = tp.item->key;
2502 // numitems1 = 0;
2503 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2504 // free_traverse_ptr(&tp);
2505 // tp = next_tp;
2506 // numitems1++;
2507 // }
2508 // free_traverse_ptr(&tp);
2509 // }
2510 // #endif
2511
2512 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2513 if (!nt) {
2514 ERR("out of memory\n");
2515 return STATUS_INSUFFICIENT_RESOURCES;
2516 }
2517
2518 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2519 nt->header.address = 0;
2520 nt->header.generation = Vcb->superblock.generation;
2521 nt->header.num_items = t->header.num_items - numitems;
2522 nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2523
2524 nt->has_address = FALSE;
2525 nt->Vcb = Vcb;
2526 nt->parent = t->parent;
2527 nt->root = t->root;
2528 // nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2529 nt->new_address = 0;
2530 nt->has_new_address = FALSE;
2531 nt->flags = t->flags;
2532 InitializeListHead(&nt->itemlist);
2533
2534 // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
2535
2536 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2537
2538 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2539 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2540 // //
2541 // // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
2542 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2543 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2544 // //
2545 // // le = wt->tree->itemlist.Flink;
2546 // // while (le != &wt->tree->itemlist) {
2547 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2548 // // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2549 // // le = le->Flink;
2550 // // }
2551
2552 nt->itemlist.Flink = &newfirstitem->list_entry;
2553 nt->itemlist.Blink = t->itemlist.Blink;
2554 nt->itemlist.Flink->Blink = &nt->itemlist;
2555 nt->itemlist.Blink->Flink = &nt->itemlist;
2556
2557 t->itemlist.Blink = &oldlastitem->list_entry;
2558 t->itemlist.Blink->Flink = &t->itemlist;
2559
2560 // // le = wt->tree->itemlist.Flink;
2561 // // while (le != &wt->tree->itemlist) {
2562 // // td = CONTAINING_RECORD(le, tree_data, list_entry);
2563 // // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2564 // // le = le->Flink;
2565 // // }
2566 // //
2567 // // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
2568 // // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
2569 // //
2570 // // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
2571 // // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
2572 // // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
2573
2574 nt->size = t->size - size;
2575 t->size = size;
2576 t->header.num_items = numitems;
2577 nt->write = TRUE;
2578 Vcb->write_trees++;
2579
2580 InterlockedIncrement(&Vcb->open_trees);
2581 InsertTailList(&Vcb->trees, &nt->list_entry);
2582
2583 // // // TESTING
2584 // // td = wt->tree->items;
2585 // // while (td) {
2586 // // if (!td->ignore) {
2587 // // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
2588 // // }
2589 // // td = td->next;
2590 // // }
2591
2592 // // oldlastitem->next = NULL;
2593 // // wt->tree->lastitem = oldlastitem;
2594
2595 // // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
2596
2597 if (nt->header.level > 0) {
2598 LIST_ENTRY* le = nt->itemlist.Flink;
2599
2600 while (le != &nt->itemlist) {
2601 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2602
2603 if (td2->treeholder.tree)
2604 td2->treeholder.tree->parent = nt;
2605
2606 le = le->Flink;
2607 }
2608 }
2609
2610 if (nt->parent) {
2611 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2612 if (!td) {
2613 ERR("out of memory\n");
2614 return STATUS_INSUFFICIENT_RESOURCES;
2615 }
2616
2617 td->key = newfirstitem->key;
2618
2619 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2620
2621 td->ignore = FALSE;
2622 td->inserted = TRUE;
2623 td->treeholder.tree = nt;
2624 init_tree_holder(&td->treeholder);
2625 // td->treeholder.nonpaged->status = tree_holder_loaded;
2626 nt->paritem = td;
2627
2628 nt->parent->header.num_items++;
2629 nt->parent->size += sizeof(internal_node);
2630
2631 goto end;
2632 }
2633
2634 TRACE("adding new tree parent\n");
2635
2636 if (nt->header.level == 255) {
2637 ERR("cannot add parent to tree at level 255\n");
2638 return STATUS_INTERNAL_ERROR;
2639 }
2640
2641 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2642 if (!pt) {
2643 ERR("out of memory\n");
2644 return STATUS_INSUFFICIENT_RESOURCES;
2645 }
2646
2647 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2648 pt->header.address = 0;
2649 pt->header.num_items = 2;
2650 pt->header.level = nt->header.level + 1;
2651 pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
2652
2653 pt->has_address = FALSE;
2654 pt->Vcb = Vcb;
2655 pt->parent = NULL;
2656 pt->paritem = NULL;
2657 pt->root = t->root;
2658 pt->new_address = 0;
2659 pt->has_new_address = FALSE;
2660 // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2661 pt->size = pt->header.num_items * sizeof(internal_node);
2662 pt->flags = t->flags;
2663 InitializeListHead(&pt->itemlist);
2664
2665 // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
2666
2667 InterlockedIncrement(&Vcb->open_trees);
2668 InsertTailList(&Vcb->trees, &pt->list_entry);
2669
2670 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2671 if (!td) {
2672 ERR("out of memory\n");
2673 return STATUS_INSUFFICIENT_RESOURCES;
2674 }
2675
2676 get_first_item(t, &td->key);
2677 td->ignore = FALSE;
2678 td->inserted = FALSE;
2679 td->treeholder.address = 0;
2680 td->treeholder.generation = Vcb->superblock.generation;
2681 td->treeholder.tree = t;
2682 init_tree_holder(&td->treeholder);
2683 // td->treeholder.nonpaged->status = tree_holder_loaded;
2684 InsertTailList(&pt->itemlist, &td->list_entry);
2685 t->paritem = td;
2686
2687 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
2688 if (!td) {
2689 ERR("out of memory\n");
2690 return STATUS_INSUFFICIENT_RESOURCES;
2691 }
2692
2693 td->key = newfirstitem->key;
2694 td->ignore = FALSE;
2695 td->inserted = FALSE;
2696 td->treeholder.address = 0;
2697 td->treeholder.generation = Vcb->superblock.generation;
2698 td->treeholder.tree = nt;
2699 init_tree_holder(&td->treeholder);
2700 // td->treeholder.nonpaged->status = tree_holder_loaded;
2701 InsertTailList(&pt->itemlist, &td->list_entry);
2702 nt->paritem = td;
2703
2704 pt->write = TRUE;
2705 Vcb->write_trees++;
2706
2707 t->root->treeholder.tree = pt;
2708
2709 t->parent = pt;
2710 nt->parent = pt;
2711
2712 end:
2713 t->root->root_item.bytes_used += Vcb->superblock.node_size;
2714
2715 // #ifdef DEBUG_PARANOID
2716 // lastkey2.obj_id = 0xffffffffffffffff;
2717 // lastkey2.obj_type = 0xff;
2718 // lastkey2.offset = 0xffffffffffffffff;
2719 //
2720 // if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
2721 // ERR("error - find_item failed\n");
2722 // else {
2723 // lastkey2 = tp.item->key;
2724 //
2725 // numitems2 = 0;
2726 // while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
2727 // free_traverse_ptr(&tp);
2728 // tp = next_tp;
2729 // numitems2++;
2730 // }
2731 // free_traverse_ptr(&tp);
2732 // }
2733 //
2734 // ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
2735 // ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
2736 // ERR("numitems1 = %u\n", numitems1);
2737 // ERR("numitems2 = %u\n", numitems2);
2738 // #endif
2739
2740 return STATUS_SUCCESS;
2741 }
2742
2743 static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
2744 LIST_ENTRY* le;
2745 UINT32 size, ds, numitems;
2746
2747 size = 0;
2748 numitems = 0;
2749
2750 // FIXME - naïve implementation: maximizes number of filled trees
2751
2752 le = t->itemlist.Flink;
2753 while (le != &t->itemlist) {
2754 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2755
2756 if (!td->ignore) {
2757 if (t->header.level == 0)
2758 ds = sizeof(leaf_node) + td->size;
2759 else
2760 ds = sizeof(internal_node);
2761
2762 // FIXME - move back if previous item was deleted item with same key
2763 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
2764 return split_tree_at(Vcb, t, td, numitems, size);
2765
2766 size += ds;
2767 numitems++;
2768 }
2769
2770 le = le->Flink;
2771 }
2772
2773 return STATUS_SUCCESS;
2774 }
2775
2776 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
2777 LIST_ENTRY* le;
2778 tree_data* nextparitem = NULL;
2779 NTSTATUS Status;
2780 tree *next_tree, *par;
2781 BOOL loaded;
2782
2783 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
2784
2785 // FIXME - doesn't capture everything, as it doesn't ascend
2786 // FIXME - write proper function and put it in treefuncs.c
2787 le = t->paritem->list_entry.Flink;
2788 while (le != &t->parent->itemlist) {
2789 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2790
2791 if (!td->ignore) {
2792 nextparitem = td;
2793 break;
2794 }
2795
2796 le = le->Flink;
2797 }
2798
2799 if (!nextparitem)
2800 return STATUS_SUCCESS;
2801
2802 // FIXME - loop, and capture more than one tree if we can
2803
2804 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
2805 // nextparitem = t->paritem;
2806
2807 // ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
2808
2809 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
2810 if (!NT_SUCCESS(Status)) {
2811 ERR("do_load_tree returned %08x\n", Status);
2812 return Status;
2813 }
2814
2815 // ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
2816
2817 next_tree = nextparitem->treeholder.tree;
2818
2819 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
2820 // merge two trees into one
2821
2822 t->header.num_items += next_tree->header.num_items;
2823 t->size += next_tree->size;
2824
2825 if (next_tree->header.level > 0) {
2826 le = next_tree->itemlist.Flink;
2827
2828 while (le != &next_tree->itemlist) {
2829 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2830
2831 if (td2->treeholder.tree)
2832 td2->treeholder.tree->parent = t;
2833
2834 le = le->Flink;
2835 }
2836 }
2837
2838 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
2839 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
2840 t->itemlist.Blink = next_tree->itemlist.Blink;
2841 t->itemlist.Blink->Flink = &t->itemlist;
2842
2843 // // TESTING
2844 // le = t->itemlist.Flink;
2845 // while (le != &t->itemlist) {
2846 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2847 // if (!td->ignore) {
2848 // ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
2849 // }
2850 // le = le->Flink;
2851 // }
2852
2853 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
2854
2855 next_tree->header.num_items = 0;
2856 next_tree->size = 0;
2857
2858 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
2859 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
2860
2861 if (!NT_SUCCESS(Status)) {
2862 ERR("reduce_tree_extent returned %08x\n", Status);
2863 return Status;
2864 }
2865 } else if (next_tree->has_address) {
2866 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
2867
2868 if (!NT_SUCCESS(Status)) {
2869 ERR("reduce_tree_extent returned %08x\n", Status);
2870 return Status;
2871 }
2872 }
2873
2874 if (!nextparitem->ignore) {
2875 nextparitem->ignore = TRUE;
2876 next_tree->parent->header.num_items--;
2877 next_tree->parent->size -= sizeof(internal_node);
2878 }
2879
2880 par = next_tree->parent;
2881 while (par) {
2882 if (!par->write) {
2883 par->write = TRUE;
2884 Vcb->write_trees++;
2885 }
2886 par = par->parent;
2887 }
2888
2889 RemoveEntryList(&nextparitem->list_entry);
2890 ExFreePool(next_tree->paritem);
2891 next_tree->paritem = NULL;
2892
2893 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
2894
2895 free_tree(next_tree);
2896 } else {
2897 // rebalance by moving items from second tree into first
2898 ULONG avg_size = (t->size + next_tree->size) / 2;
2899 KEY firstitem = {0, 0, 0};
2900
2901 TRACE("attempting rebalance\n");
2902
2903 le = next_tree->itemlist.Flink;
2904 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
2905 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2906 ULONG size;
2907
2908 if (!td->ignore) {
2909 if (next_tree->header.level == 0)
2910 size = sizeof(leaf_node) + td->size;
2911 else
2912 size = sizeof(internal_node);
2913 } else
2914 size = 0;
2915
2916 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
2917 RemoveEntryList(&td->list_entry);
2918 InsertTailList(&t->itemlist, &td->list_entry);
2919
2920 if (next_tree->header.level > 0 && td->treeholder.tree)
2921 td->treeholder.tree->parent = t;
2922
2923 if (!td->ignore) {
2924 next_tree->size -= size;
2925 t->size += size;
2926 next_tree->header.num_items--;
2927 t->header.num_items++;
2928 }
2929 } else
2930 break;
2931
2932 le = next_tree->itemlist.Flink;
2933 }
2934
2935 le = next_tree->itemlist.Flink;
2936 while (le != &next_tree->itemlist) {
2937 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2938
2939 if (!td->ignore) {
2940 firstitem = td->key;
2941 break;
2942 }
2943
2944 le = le->Flink;
2945 }
2946
2947 // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
2948
2949 // FIXME - once ascension is working, make this work with parent's parent, etc.
2950 if (next_tree->paritem)
2951 next_tree->paritem->key = firstitem;
2952
2953 par = next_tree;
2954 while (par) {
2955 if (!par->write) {
2956 par->write = TRUE;
2957 Vcb->write_trees++;
2958 }
2959 par = par->parent;
2960 }
2961 }
2962
2963 return STATUS_SUCCESS;
2964 }
2965
2966 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
2967 KEY searchkey;
2968 traverse_ptr tp;
2969 NTSTATUS Status;
2970
2971 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
2972 searchkey.obj_id = address;
2973 searchkey.obj_type = TYPE_METADATA_ITEM;
2974 searchkey.offset = t->header.level;
2975
2976 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2977 if (!NT_SUCCESS(Status)) {
2978 ERR("error - find_item returned %08x\n", Status);
2979 return Status;
2980 }
2981
2982 if (!keycmp(&tp.item->key, &searchkey)) {
2983 EXTENT_ITEM_SKINNY_METADATA* eism;
2984
2985 if (tp.item->size > 0) {
2986 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2987
2988 if (!eism) {
2989 ERR("out of memory\n");
2990 return STATUS_INSUFFICIENT_RESOURCES;
2991 }
2992
2993 RtlCopyMemory(eism, tp.item->data, tp.item->size);
2994 } else
2995 eism = NULL;
2996
2997 delete_tree_item(Vcb, &tp, rollback);
2998
2999 if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, rollback)) {
3000 ERR("insert_tree_item failed\n");
3001 ExFreePool(eism);
3002 return STATUS_INTERNAL_ERROR;
3003 }
3004
3005 return STATUS_SUCCESS;
3006 }
3007 }
3008
3009 searchkey.obj_id = address;
3010 searchkey.obj_type = TYPE_EXTENT_ITEM;
3011 searchkey.offset = 0xffffffffffffffff;
3012
3013 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3014 if (!NT_SUCCESS(Status)) {
3015 ERR("error - find_item returned %08x\n", Status);
3016 return Status;
3017 }
3018
3019 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3020 EXTENT_ITEM_TREE* eit;
3021
3022 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3023 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3024 return STATUS_INTERNAL_ERROR;
3025 }
3026
3027 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3028
3029 if (!eit) {
3030 ERR("out of memory\n");
3031 return STATUS_INSUFFICIENT_RESOURCES;
3032 }
3033
3034 RtlCopyMemory(eit, tp.item->data, tp.item->size);
3035
3036 delete_tree_item(Vcb, &tp, rollback);
3037
3038 eit->level = level;
3039
3040 if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, rollback)) {
3041 ERR("insert_tree_item failed\n");
3042 ExFreePool(eit);
3043 return STATUS_INTERNAL_ERROR;
3044 }
3045
3046 return STATUS_SUCCESS;
3047 }
3048
3049 ERR("could not find EXTENT_ITEM for address %llx\n", address);
3050
3051 return STATUS_INTERNAL_ERROR;
3052 }
3053
3054 static NTSTATUS STDCALL do_splits(device_extension* Vcb, LIST_ENTRY* rollback) {
3055 // LIST_ENTRY *le, *le2;
3056 // write_tree* wt;
3057 // tree_data* td;
3058 UINT8 level, max_level;
3059 UINT32 min_size;
3060 BOOL empty, done_deletions = FALSE;
3061 NTSTATUS Status;
3062 tree* t;
3063
3064 TRACE("(%p)\n", Vcb);
3065
3066 max_level = 0;
3067
3068 for (level = 0; level <= 255; level++) {
3069 LIST_ENTRY *le, *nextle;
3070
3071 empty = TRUE;
3072
3073 TRACE("doing level %u\n", level);
3074
3075 le = Vcb->trees.Flink;
3076
3077 while (le != &Vcb->trees) {
3078 t = CONTAINING_RECORD(le, tree, list_entry);
3079
3080 nextle = le->Flink;
3081
3082 if (t->write && t->header.level == level) {
3083 empty = FALSE;
3084
3085 if (t->header.num_items == 0) {
3086 if (t->parent) {
3087 LIST_ENTRY* le2;
3088 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
3089 #ifdef __REACTOS__
3090 (void)firstitem;
3091 #endif
3092
3093 done_deletions = TRUE;
3094
3095 le2 = t->itemlist.Flink;
3096 while (le2 != &t->itemlist) {
3097 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
3098 firstitem = td->key;
3099 break;
3100 }
3101
3102 TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
3103 t->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
3104
3105 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3106
3107 if (t->has_new_address) { // delete associated EXTENT_ITEM
3108 Status = reduce_tree_extent(Vcb, t->new_address, t, rollback);
3109
3110 if (!NT_SUCCESS(Status)) {
3111 ERR("reduce_tree_extent returned %08x\n", Status);
3112 return Status;
3113 }
3114 } else if (t->has_address) {
3115 Status = reduce_tree_extent(Vcb,t->header.address, t, rollback);
3116
3117 if (!NT_SUCCESS(Status)) {
3118 ERR("reduce_tree_extent returned %08x\n", Status);
3119 return Status;
3120 }
3121 }
3122
3123 if (!t->paritem->ignore) {
3124 t->paritem->ignore = TRUE;
3125 t->parent->header.num_items--;
3126 t->parent->size -= sizeof(internal_node);
3127 }
3128
3129 RemoveEntryList(&t->paritem->list_entry);
3130 ExFreePool(t->paritem);
3131 t->paritem = NULL;
3132
3133 free_tree(t);
3134 } else if (t->header.level != 0) {
3135 if (t->has_new_address) {
3136 Status = update_extent_level(Vcb, t->new_address, t, 0, rollback);
3137
3138 if (!NT_SUCCESS(Status)) {
3139 ERR("update_extent_level returned %08x\n", Status);
3140 return Status;
3141 }
3142 }
3143
3144 t->header.level = 0;
3145 }
3146 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3147 TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3148 Status = split_tree(Vcb, t);
3149
3150 if (!NT_SUCCESS(Status)) {
3151 ERR("split_tree returned %08x\n", Status);
3152 return Status;
3153 }
3154 }
3155 }
3156
3157 le = nextle;
3158 }
3159
3160 if (!empty) {
3161 max_level = level;
3162 } else {
3163 TRACE("nothing found for level %u\n", level);
3164 break;
3165 }
3166 }
3167
3168 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3169
3170 for (level = 0; level <= max_level; level++) {
3171 LIST_ENTRY* le;
3172
3173 le = Vcb->trees.Flink;
3174
3175 while (le != &Vcb->trees) {
3176 t = CONTAINING_RECORD(le, tree, list_entry);
3177
3178 if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size) {
3179 Status = try_tree_amalgamate(Vcb, t, rollback);
3180 if (!NT_SUCCESS(Status)) {
3181 ERR("try_tree_amalgamate returned %08x\n", Status);
3182 return Status;
3183 }
3184 }
3185
3186 le = le->Flink;
3187 }
3188 }
3189
3190 // simplify trees if top tree only has one entry
3191
3192 if (done_deletions) {
3193 for (level = max_level; level > 0; level--) {
3194 LIST_ENTRY *le, *nextle;
3195
3196 le = Vcb->trees.Flink;
3197 while (le != &Vcb->trees) {
3198 nextle = le->Flink;
3199 t = CONTAINING_RECORD(le, tree, list_entry);
3200
3201 if (t->write && t->header.level == level) {
3202 if (!t->parent && t->header.num_items == 1) {
3203 LIST_ENTRY* le2 = t->itemlist.Flink;
3204 tree_data* td;
3205 tree* child_tree = NULL;
3206
3207 while (le2 != &t->itemlist) {
3208 td = CONTAINING_RECORD(le2, tree_data, list_entry);
3209 if (!td->ignore)
3210 break;
3211 le2 = le2->Flink;
3212 }
3213
3214 TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
3215
3216 if (t->has_new_address) { // delete associated EXTENT_ITEM
3217 Status = reduce_tree_extent(Vcb, t->new_address, t, rollback);
3218
3219 if (!NT_SUCCESS(Status)) {
3220 ERR("reduce_tree_extent returned %08x\n", Status);
3221 return Status;
3222 }
3223 } else if (t->has_address) {
3224 Status = reduce_tree_extent(Vcb,t->header.address, t, rollback);
3225
3226 if (!NT_SUCCESS(Status)) {
3227 ERR("reduce_tree_extent returned %08x\n", Status);
3228 return Status;
3229 }
3230 }
3231
3232 if (!td->treeholder.tree) { // load first item if not already loaded
3233 KEY searchkey = {0,0,0};
3234 traverse_ptr tp;
3235
3236 Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE);
3237 if (!NT_SUCCESS(Status)) {
3238 ERR("error - find_item returned %08x\n", Status);
3239 return Status;
3240 }
3241 }
3242
3243 child_tree = td->treeholder.tree;
3244
3245 if (child_tree) {
3246 child_tree->parent = NULL;
3247 child_tree->paritem = NULL;
3248 }
3249
3250 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3251
3252 free_tree(t);
3253
3254 if (child_tree)
3255 child_tree->root->treeholder.tree = child_tree;
3256 }
3257 }
3258
3259 le = nextle;
3260 }
3261 }
3262 }
3263
3264 return STATUS_SUCCESS;
3265 }
3266
3267 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, LIST_ENTRY* rollback) {
3268 NTSTATUS Status;
3269
3270 if (level > 0) {
3271 if (!th->tree) {
3272 Status = load_tree(Vcb, th->address, r, &th->tree);
3273
3274 if (!NT_SUCCESS(Status)) {
3275 ERR("load_tree(%llx) returned %08x\n", th->address, Status);
3276 return Status;
3277 }
3278 }
3279
3280 if (th->tree->header.level > 0) {
3281 LIST_ENTRY* le = th->tree->itemlist.Flink;
3282
3283 while (le != &th->tree->itemlist) {
3284 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3285
3286 if (!td->ignore) {
3287 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, rollback);
3288
3289 if (!NT_SUCCESS(Status)) {
3290 ERR("remove_root_extents returned %08x\n", Status);
3291 return Status;
3292 }
3293 }
3294
3295 le = le->Flink;
3296 }
3297 }
3298 }
3299
3300 if (!th->tree || th->tree->has_address) {
3301 Status = reduce_tree_extent(Vcb, th->address, NULL, rollback);
3302
3303 if (!NT_SUCCESS(Status)) {
3304 ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
3305 return Status;
3306 }
3307 }
3308
3309 return STATUS_SUCCESS;
3310 }
3311
3312 static NTSTATUS drop_root(device_extension* Vcb, root* r, LIST_ENTRY* rollback) {
3313 NTSTATUS Status;
3314 KEY searchkey;
3315 traverse_ptr tp;
3316
3317 Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, rollback);
3318 if (!NT_SUCCESS(Status)) {
3319 ERR("remove_root_extents returned %08x\n", Status);
3320 return Status;
3321 }
3322
3323 // remove entry in uuid root (tree 9)
3324 if (Vcb->uuid_root) {
3325 RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
3326 searchkey.obj_type = TYPE_SUBVOL_UUID;
3327 RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
3328
3329 if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3330 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE);
3331 if (!NT_SUCCESS(Status)) {
3332 WARN("find_item returned %08x\n", Status);
3333 } else {
3334 if (!keycmp(&tp.item->key, &searchkey))
3335 delete_tree_item(Vcb, &tp, rollback);
3336 else
3337 WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3338 }
3339 }
3340 }
3341
3342 // delete ROOT_ITEM
3343
3344 searchkey.obj_id = r->id;
3345 searchkey.obj_type = TYPE_ROOT_ITEM;
3346 searchkey.offset = 0xffffffffffffffff;
3347
3348 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
3349 if (!NT_SUCCESS(Status)) {
3350 ERR("find_item returned %08x\n", Status);
3351 return Status;
3352 }
3353
3354 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
3355 delete_tree_item(Vcb, &tp, rollback);
3356 else
3357 WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3358
3359 // delete items in tree cache
3360
3361 free_trees_root(Vcb, r);
3362
3363 return STATUS_SUCCESS;
3364 }
3365
3366 static NTSTATUS drop_roots(device_extension* Vcb, LIST_ENTRY* rollback) {
3367 LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
3368 NTSTATUS Status;
3369
3370 while (le != &Vcb->drop_roots) {
3371 root* r = CONTAINING_RECORD(le, root, list_entry);
3372
3373 le2 = le->Flink;
3374
3375 Status = drop_root(Vcb, r, rollback);
3376 if (!NT_SUCCESS(Status)) {
3377 ERR("drop_root(%llx) returned %08x\n", r->id, Status);
3378 return Status;
3379 }
3380
3381 le = le2;
3382 }
3383
3384 return STATUS_SUCCESS;
3385 }
3386
3387 NTSTATUS STDCALL do_write(device_extension* Vcb, LIST_ENTRY* rollback) {
3388 NTSTATUS Status;
3389 LIST_ENTRY* le;
3390 BOOL cache_changed;
3391
3392 TRACE("(%p)\n", Vcb);
3393
3394 if (!IsListEmpty(&Vcb->drop_roots)) {
3395 Status = drop_roots(Vcb, rollback);
3396
3397 if (!NT_SUCCESS(Status)) {
3398 ERR("drop_roots returned %08x\n", Status);
3399 return Status;
3400 }
3401 }
3402
3403 // If only changing superblock, e.g. changing label, we still need to rewrite
3404 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
3405 if (Vcb->write_trees == 0) {
3406 KEY searchkey;
3407 traverse_ptr tp;
3408
3409 searchkey.obj_id = 0;
3410 searchkey.obj_type = 0;
3411 searchkey.offset = 0;
3412
3413 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
3414 if (!NT_SUCCESS(Status)) {
3415 ERR("error - find_item returned %08x\n", Status);
3416 return Status;
3417 }
3418
3419 if (!Vcb->root_root->treeholder.tree->write) {
3420 Vcb->root_root->treeholder.tree->write = TRUE;
3421 Vcb->write_trees++;
3422 }
3423 }
3424
3425 do {
3426 Status = add_parents(Vcb, rollback);
3427 if (!NT_SUCCESS(Status)) {
3428 ERR("add_parents returned %08x\n", Status);
3429 goto end;
3430 }
3431
3432 Status = do_splits(Vcb, rollback);
3433 if (!NT_SUCCESS(Status)) {
3434 ERR("do_splits returned %08x\n", Status);
3435 goto end;
3436 }
3437
3438 Status = allocate_tree_extents(Vcb, rollback);
3439 if (!NT_SUCCESS(Status)) {
3440 ERR("add_parents returned %08x\n", Status);
3441 goto end;
3442 }
3443
3444 Status = update_chunk_usage(Vcb, rollback);
3445 if (!NT_SUCCESS(Status)) {
3446 ERR("update_chunk_usage returned %08x\n", Status);
3447 goto end;
3448 }
3449
3450 Status = allocate_cache(Vcb, &cache_changed, rollback);
3451 if (!NT_SUCCESS(Status)) {
3452 ERR("allocate_cache returned %08x\n", Status);
3453 goto end;
3454 }
3455 } while (cache_changed || !trees_consistent(Vcb, rollback));
3456
3457 TRACE("trees consistent\n");
3458
3459 Status = update_root_root(Vcb, rollback);
3460 if (!NT_SUCCESS(Status)) {
3461 ERR("update_root_root returned %08x\n", Status);
3462 goto end;
3463 }
3464
3465 Status = write_trees(Vcb);
3466 if (!NT_SUCCESS(Status)) {
3467 ERR("write_trees returned %08x\n", Status);
3468 goto end;
3469 }
3470
3471 Vcb->superblock.cache_generation = Vcb->superblock.generation;
3472
3473 Status = write_superblocks(Vcb);
3474 if (!NT_SUCCESS(Status)) {
3475 ERR("write_superblocks returned %08x\n", Status);
3476 goto end;
3477 }
3478
3479 clean_space_cache(Vcb);
3480
3481 Vcb->superblock.generation++;
3482
3483 Status = STATUS_SUCCESS;
3484
3485 le = Vcb->trees.Flink;
3486 while (le != &Vcb->trees) {
3487 tree* t = CONTAINING_RECORD(le, tree, list_entry);
3488
3489 t->write = FALSE;
3490
3491 le = le->Flink;
3492 }
3493
3494 Vcb->write_trees = 0;
3495
3496 while (!IsListEmpty(&Vcb->drop_roots)) {
3497 LIST_ENTRY* le = RemoveHeadList(&Vcb->drop_roots);
3498 root* r = CONTAINING_RECORD(le, root, list_entry);
3499
3500 ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
3501 ExFreePool(r->nonpaged);
3502 ExFreePool(r);
3503 }
3504
3505 end:
3506 TRACE("do_write returning %08x\n", Status);
3507
3508 return Status;
3509 }
3510
3511 NTSTATUS consider_write(device_extension* Vcb) {
3512 // FIXME - call do_write if Vcb->write_trees high
3513 #if 0
3514 LIST_ENTRY rollback;
3515 NTSTATUS Status = STATUS_SUCCESS;
3516
3517 InitializeListHead(&rollback);
3518
3519 if (Vcb->write_trees > 0)
3520 Status = do_write(Vcb, &rollback);
3521
3522 free_tree_cache(&Vcb->tree_cache);
3523
3524 if (!NT_SUCCESS(Status))
3525 do_rollback(Vcb, &rollback);
3526 else
3527 clear_rollback(&rollback);
3528
3529 return Status;
3530 #else
3531 return STATUS_SUCCESS;
3532 #endif
3533 }
3534
3535 // NTSTATUS STDCALL add_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* rollback) {
3536 // KEY searchkey;
3537 // traverse_ptr tp;
3538 // EXTENT_ITEM* ei;
3539 // UINT8 *siptr, *type;
3540 // ULONG len;
3541 // UINT64 hash;
3542 // EXTENT_DATA_REF* edr;
3543 // NTSTATUS Status;
3544 //
3545 // TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
3546 //
3547 // searchkey.obj_id = address;
3548 // searchkey.obj_type = TYPE_EXTENT_ITEM;
3549 // searchkey.offset = size;
3550 //
3551 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3552 // if (!NT_SUCCESS(Status)) {
3553 // ERR("error - find_item returned %08x\n", Status);
3554 // return Status;
3555 // }
3556 //
3557 // if (keycmp(&tp.item->key, &searchkey)) {
3558 // // create new entry
3559 //
3560 // len = sizeof(EXTENT_ITEM) + sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3561 //
3562 // ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3563 // if (!ei) {
3564 // ERR("out of memory\n");
3565 // return STATUS_INSUFFICIENT_RESOURCES;
3566 // }
3567 //
3568 // ei->refcount = 1;
3569 // ei->generation = Vcb->superblock.generation;
3570 // ei->flags = EXTENT_ITEM_DATA;
3571 //
3572 // type = (UINT8*)&ei[1];
3573 // *type = TYPE_EXTENT_DATA_REF;
3574 //
3575 // edr = (EXTENT_DATA_REF*)&type[1];
3576 // edr->root = subvol->id;
3577 // edr->objid = inode;
3578 // edr->offset = offset;
3579 // edr->count = 1;
3580 //
3581 // if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3582 // ERR("error - failed to insert item\n");
3583 // return STATUS_INTERNAL_ERROR;
3584 // }
3585 //
3586 // // FIXME - update free space in superblock and CHUNK_ITEM
3587 //
3588 // return STATUS_SUCCESS;
3589 // }
3590 //
3591 // if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
3592 // NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
3593 // if (!NT_SUCCESS(Status)) {
3594 // ERR("convert_old_data_extent returned %08x\n", Status);
3595 // return Status;
3596 // }
3597 //
3598 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3599 // if (!NT_SUCCESS(Status)) {
3600 // ERR("error - find_item returned %08x\n", Status);
3601 // return Status;
3602 // }
3603 //
3604 // if (keycmp(&tp.item->key, &searchkey)) {
3605 // WARN("extent item not found for address %llx, size %llx\n", address, size);
3606 // return STATUS_SUCCESS;
3607 // }
3608 // }
3609 //
3610 // ei = (EXTENT_ITEM*)tp.item->data;
3611 //
3612 // if (tp.item->size < sizeof(EXTENT_ITEM)) {
3613 // ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
3614 // return STATUS_INTERNAL_ERROR;
3615 // }
3616 //
3617 // if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
3618 // NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
3619 // if (!NT_SUCCESS(Status)) {
3620 // ERR("convert_shared_data_extent returned %08x\n", Status);
3621 // return Status;
3622 // }
3623 //
3624 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3625 // if (!NT_SUCCESS(Status)) {
3626 // ERR("error - find_item returned %08x\n", Status);
3627 // return Status;
3628 // }
3629 //
3630 // if (keycmp(&tp.item->key, &searchkey)) {
3631 // WARN("extent item not found for address %llx, size %llx\n", address, size);
3632 // return STATUS_SUCCESS;
3633 // }
3634 //
3635 // ei = (EXTENT_ITEM*)tp.item->data;
3636 // }
3637 //
3638 // if (ei->flags != EXTENT_ITEM_DATA) {
3639 // ERR("error - flag was not EXTENT_ITEM_DATA\n");
3640 // return STATUS_INTERNAL_ERROR;
3641 // }
3642 //
3643 // hash = get_extent_data_ref_hash(subvol->id, inode, offset);
3644 //
3645 // len = tp.item->size - sizeof(EXTENT_ITEM);
3646 // siptr = (UINT8*)&ei[1];
3647 //
3648 // do {
3649 // if (*siptr == TYPE_EXTENT_DATA_REF) {
3650 // UINT64 sihash;
3651 //
3652 // edr = (EXTENT_DATA_REF*)&siptr[1];
3653 //
3654 // // already exists - increase refcount
3655 // if (edr->root == subvol->id && edr->objid == inode && edr->offset == offset) {
3656 // ei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3657 //
3658 // if (!ei) {
3659 // ERR("out of memory\n");
3660 // return STATUS_INSUFFICIENT_RESOURCES;
3661 // }
3662 //
3663 // RtlCopyMemory(ei, tp.item->data, tp.item->size);
3664 //
3665 // edr = (EXTENT_DATA_REF*)((UINT8*)ei + ((UINT8*)edr - tp.item->data));
3666 // edr->count++;
3667 // ei->refcount++;
3668 //
3669 // delete_tree_item(Vcb, &tp, rollback);
3670 //
3671 // if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, tp.item->size, NULL, rollback)) {
3672 // ERR("error - failed to insert item\n");
3673 // ExFreePool(ei);
3674 // return STATUS_INTERNAL_ERROR;
3675 // }
3676 //
3677 // return STATUS_SUCCESS;
3678 // }
3679 //
3680 // sihash = get_extent_data_ref_hash(edr->root, edr->objid, edr->offset);
3681 //
3682 // if (sihash >= hash)
3683 // break;
3684 //
3685 // siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3686 //
3687 // if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
3688 // len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
3689 // } else
3690 // break;
3691 // // FIXME - TYPE_TREE_BLOCK_REF 0xB0
3692 // } else {
3693 // ERR("unrecognized extent subitem %x\n", *siptr);
3694 // return STATUS_INTERNAL_ERROR;
3695 // }
3696 // } while (len > 0);
3697 //
3698 // len = tp.item->size + sizeof(UINT8) + sizeof(EXTENT_DATA_REF); // FIXME - die if too big
3699 // ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3700 // if (!ei) {
3701 // ERR("out of memory\n");
3702 // return STATUS_INSUFFICIENT_RESOURCES;
3703 // }
3704 //
3705 // RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
3706 // ei->refcount++;
3707 //
3708 // type = (UINT8*)ei + (siptr - tp.item->data);
3709 // *type = TYPE_EXTENT_DATA_REF;
3710 //
3711 // edr = (EXTENT_DATA_REF*)&type[1];
3712 // edr->root = subvol->id;
3713 // edr->objid = inode;
3714 // edr->offset = offset;
3715 // edr->count = 1;
3716 //
3717 // if (siptr < tp.item->data + tp.item->size)
3718 // RtlCopyMemory(&edr[1], siptr, tp.item->data + tp.item->size - siptr);
3719 //
3720 // delete_tree_item(Vcb, &tp, rollback);
3721 //
3722 // if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3723 // ERR("error - failed to insert item\n");
3724 // ExFreePool(ei);
3725 // return STATUS_INTERNAL_ERROR;
3726 // }
3727 //
3728 // return STATUS_SUCCESS;
3729 // }
3730
3731 static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len) {
3732 UINT8* siptr = (UINT8*)&ei[1];
3733
3734 do {
3735 if (*siptr == TYPE_TREE_BLOCK_REF) {
3736 siptr += sizeof(TREE_BLOCK_REF) + 1;
3737 len -= sizeof(TREE_BLOCK_REF) + 1;
3738 } else if (*siptr == TYPE_EXTENT_DATA_REF) {
3739 siptr += sizeof(EXTENT_DATA_REF) + 1;
3740 len -= sizeof(EXTENT_DATA_REF) + 1;
3741 } else if (*siptr == TYPE_SHARED_BLOCK_REF) {
3742 return TRUE;
3743 } else if (*siptr == TYPE_SHARED_DATA_REF) {
3744 return TRUE;
3745 } else {
3746 ERR("unrecognized extent subitem %x\n", *siptr);
3747 return FALSE;
3748 }
3749 } while (len > 0);
3750
3751 return FALSE;
3752 }
3753
3754 // static NTSTATUS STDCALL remove_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
3755 // KEY searchkey;
3756 // traverse_ptr tp;
3757 // EXTENT_ITEM* ei;
3758 // UINT8* siptr;
3759 // ULONG len;
3760 // EXTENT_DATA_REF* edr;
3761 // BOOL found;
3762 // NTSTATUS Status;
3763 //
3764 // TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
3765 //
3766 // searchkey.obj_id = address;
3767 // searchkey.obj_type = TYPE_EXTENT_ITEM;
3768 // searchkey.offset = size;
3769 //
3770 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3771 // if (!NT_SUCCESS(Status)) {
3772 // ERR("error - find_item returned %08x\n", Status);
3773 // return Status;
3774 // }
3775 //
3776 // if (keycmp(&tp.item->key, &searchkey)) {
3777 // WARN("extent item not found for address %llx, size %llx\n", address, size);
3778 // return STATUS_SUCCESS;
3779 // }
3780 //
3781 // if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
3782 // NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
3783 // if (!NT_SUCCESS(Status)) {
3784 // ERR("convert_old_data_extent returned %08x\n", Status);
3785 // return Status;
3786 // }
3787 //
3788 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3789 // if (!NT_SUCCESS(Status)) {
3790 // ERR("error - find_item returned %08x\n", Status);
3791 // return Status;
3792 // }
3793 //
3794 // if (keycmp(&tp.item->key, &searchkey)) {
3795 // WARN("extent item not found for address %llx, size %llx\n", address, size);
3796 // return STATUS_SUCCESS;
3797 // }
3798 // }
3799 //
3800 // ei = (EXTENT_ITEM*)tp.item->data;
3801 //
3802 // if (tp.item->size < sizeof(EXTENT_ITEM)) {
3803 // ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
3804 // return STATUS_INTERNAL_ERROR;
3805 // }
3806 //
3807 // if (!(ei->flags & EXTENT_ITEM_DATA)) {
3808 // ERR("error - EXTENT_ITEM_DATA flag not set\n");
3809 // return STATUS_INTERNAL_ERROR;
3810 // }
3811 //
3812 // if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
3813 // NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
3814 // if (!NT_SUCCESS(Status)) {
3815 // ERR("convert_shared_data_extent returned %08x\n", Status);
3816 // return Status;
3817 // }
3818 //
3819 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
3820 // if (!NT_SUCCESS(Status)) {
3821 // ERR("error - find_item returned %08x\n", Status);
3822 // return Status;
3823 // }
3824 //
3825 // if (keycmp(&tp.item->key, &searchkey)) {
3826 // WARN("extent item not found for address %llx, size %llx\n", address, size);
3827 // return STATUS_SUCCESS;
3828 // }
3829 //
3830 // ei = (EXTENT_ITEM*)tp.item->data;
3831 // }
3832 //
3833 // len = tp.item->size - sizeof(EXTENT_ITEM);
3834 // siptr = (UINT8*)&ei[1];
3835 // found = FALSE;
3836 //
3837 // do {
3838 // if (*siptr == TYPE_EXTENT_DATA_REF) {
3839 // edr = (EXTENT_DATA_REF*)&siptr[1];
3840 //
3841 // if (edr->root == subvol->id && edr->objid == inode && edr->offset == offset) {
3842 // if (edr->count > 1) {
3843 // ei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3844 //
3845 // if (!ei) {
3846 // ERR("out of memory\n");
3847 // return STATUS_INSUFFICIENT_RESOURCES;
3848 // }
3849 //
3850 // RtlCopyMemory(ei, tp.item->data, tp.item->size);
3851 //
3852 // edr = (EXTENT_DATA_REF*)((UINT8*)ei + ((UINT8*)edr - tp.item->data));
3853 // edr->count--;
3854 // ei->refcount--;
3855 //
3856 // delete_tree_item(Vcb, &tp, rollback);
3857 //
3858 // if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, tp.item->size, NULL, rollback)) {
3859 // ERR("error - failed to insert item\n");
3860 // ExFreePool(ei);
3861 // return STATUS_INTERNAL_ERROR;
3862 // }
3863 //
3864 // return STATUS_SUCCESS;
3865 // }
3866 //
3867 // found = TRUE;
3868 // break;
3869 // }
3870 //
3871 // siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
3872 //
3873 // if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
3874 // len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
3875 // } else
3876 // break;
3877 // // // FIXME - TYPE_TREE_BLOCK_REF 0xB0
3878 // } else {
3879 // ERR("unrecognized extent subitem %x\n", *siptr);
3880 // return STATUS_INTERNAL_ERROR;
3881 // }
3882 // } while (len > 0);
3883 //
3884 // if (!found) {
3885 // WARN("could not find extent data ref\n");
3886 // return STATUS_SUCCESS;
3887 // }
3888 //
3889 // // FIXME - decrease subitem refcount if there already?
3890 //
3891 // len = tp.item->size - sizeof(UINT8) - sizeof(EXTENT_DATA_REF);
3892 //
3893 // delete_tree_item(Vcb, &tp, rollback);
3894 //
3895 // if (len == sizeof(EXTENT_ITEM)) { // extent no longer needed
3896 // chunk* c;
3897 // LIST_ENTRY* le2;
3898 //
3899 // if (changed_sector_list) {
3900 // changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
3901 // if (!sc) {
3902 // ERR("out of memory\n");
3903 // return STATUS_INSUFFICIENT_RESOURCES;
3904 // }
3905 //
3906 // sc->ol.key = address;
3907 // sc->checksums = NULL;
3908 // sc->length = size / Vcb->superblock.sector_size;
3909 //
3910 // sc->deleted = TRUE;
3911 //
3912 // insert_into_ordered_list(changed_sector_list, &sc->ol);
3913 // }
3914 //
3915 // c = NULL;
3916 // le2 = Vcb->chunks.Flink;
3917 // while (le2 != &Vcb->chunks) {
3918 // c = CONTAINING_RECORD(le2, chunk, list_entry);
3919 //
3920 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
3921 //
3922 // if (address >= c->offset && address + size < c->offset + c->chunk_item->size)
3923 // break;
3924 //
3925 // le2 = le2->Flink;
3926 // }
3927 // if (le2 == &Vcb->chunks) c = NULL;
3928 //
3929 // if (c) {
3930 // decrease_chunk_usage(c, size);
3931 //
3932 // add_to_space_list(c, address, size, SPACE_TYPE_DELETING);
3933 // }
3934 //
3935 // return STATUS_SUCCESS;
3936 // }
3937 //
3938 // ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
3939 // if (!ei) {
3940 // ERR("out of memory\n");
3941 // return STATUS_INSUFFICIENT_RESOURCES;
3942 // }
3943 //
3944 // RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
3945 // ei->refcount--;
3946 // ei->generation = Vcb->superblock.generation;
3947 //
3948 // if (tp.item->data + len != siptr)
3949 // RtlCopyMemory((UINT8*)ei + (siptr - tp.item->data), siptr + sizeof(UINT8) + sizeof(EXTENT_DATA_REF), tp.item->size - (siptr - tp.item->data) - sizeof(UINT8) - sizeof(EXTENT_DATA_REF));
3950 //
3951 // if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
3952 // ERR("error - failed to insert item\n");
3953 // ExFreePool(ei);
3954 // return STATUS_INTERNAL_ERROR;
3955 // }
3956 //
3957 // return STATUS_SUCCESS;
3958 // }
3959
3960 static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
3961 LIST_ENTRY* le = list->Flink;
3962 ordered_list* ol;
3963
3964 while (le != list) {
3965 ol = (ordered_list*)le;
3966
3967 if (ol->key > value)
3968 return FALSE;
3969 else if (ol->key == value)
3970 return TRUE;
3971
3972 le = le->Flink;
3973 }
3974
3975 return FALSE;
3976 }
3977
3978 static BOOL is_file_prealloc_inode(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 start_data, UINT64 end_data) {
3979 NTSTATUS Status;
3980 KEY searchkey;
3981 traverse_ptr tp, next_tp;
3982 BOOL b;
3983
3984 searchkey.obj_id = inode;
3985 searchkey.obj_type = TYPE_EXTENT_DATA;
3986 searchkey.offset = start_data;
3987
3988 Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE);
3989 if (!NT_SUCCESS(Status)) {
3990 ERR("error - find_item returned %08x\n", Status);
3991 return FALSE;
3992 }
3993
3994 if (tp.item->key.obj_id != inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
3995 return FALSE;
3996
3997 do {
3998 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
3999 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4000 UINT64 len;
4001
4002 if (tp.item->size < sizeof(EXTENT_DATA)) {
4003 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4004 return FALSE;
4005 }
4006
4007 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4008 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4009 return FALSE;
4010 }
4011
4012 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
4013
4014 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
4015
4016 if (tp.item->key.offset < end_data && tp.item->key.offset + len >= start_data && ed->type == EXTENT_TYPE_PREALLOC)
4017 return TRUE;
4018
4019 if (b) {
4020 tp = next_tp;
4021
4022 if (tp.item->key.obj_id > inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
4023 break;
4024 }
4025 } while (b);
4026
4027 return FALSE;
4028 }
4029
4030 NTSTATUS excise_extents_inode(device_extension* Vcb, root* subvol, UINT64 inode, INODE_ITEM* ii, UINT64 start_data, UINT64 end_data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4031 KEY searchkey;
4032 traverse_ptr tp, next_tp;
4033 NTSTATUS Status;
4034 BOOL b, deleted_prealloc = FALSE;
4035
4036 searchkey.obj_id = inode;
4037 searchkey.obj_type = TYPE_EXTENT_DATA;
4038 searchkey.offset = start_data;
4039
4040 Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE);
4041 if (!NT_SUCCESS(Status)) {
4042 ERR("error - find_item returned %08x\n", Status);
4043 return Status;
4044 }
4045
4046 do {
4047 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
4048 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4049 UINT64 len;
4050
4051 if (tp.item->size < sizeof(EXTENT_DATA)) {
4052 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4053 Status = STATUS_INTERNAL_ERROR;
4054 goto end;
4055 }
4056
4057 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
4058
4059 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type)
4060 goto cont;
4061
4062 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4063 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4064 Status = STATUS_INTERNAL_ERROR;
4065 goto end;
4066 }
4067
4068 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
4069
4070 if (tp.item->key.offset < end_data && tp.item->key.offset + len >= start_data) {
4071 if (ed->compression != BTRFS_COMPRESSION_NONE) {
4072 FIXME("FIXME - compression not supported at present\n");
4073 Status = STATUS_NOT_SUPPORTED;
4074 goto end;
4075 }
4076
4077 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
4078 WARN("root %llx, inode %llx, extent %llx: encryption not supported (type %x)\n", subvol->id, inode, tp.item->key.offset, ed->encryption);
4079 Status = STATUS_NOT_SUPPORTED;
4080 goto end;
4081 }
4082
4083 if (ed->encoding != BTRFS_ENCODING_NONE) {
4084 WARN("other encodings not supported\n");
4085 Status = STATUS_NOT_SUPPORTED;
4086 goto end;
4087 }
4088
4089 if (ed->type == EXTENT_TYPE_INLINE) {
4090 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
4091 delete_tree_item(Vcb, &tp, rollback);
4092
4093 if (ii)
4094 ii->st_blocks -= len;
4095 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
4096 EXTENT_DATA* ned;
4097 UINT64 size;
4098
4099 delete_tree_item(Vcb, &tp, rollback);
4100
4101 size = len - (end_data - tp.item->key.offset);
4102
4103 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4104 if (!ned) {
4105 ERR("out of memory\n");
4106 Status = STATUS_INSUFFICIENT_RESOURCES;
4107 goto end;
4108 }
4109
4110 ned->generation = Vcb->superblock.generation;
4111 ned->decoded_size = size;
4112 ned->compression = ed->compression;
4113 ned->encryption = ed->encryption;
4114 ned->encoding = ed->encoding;
4115 ned->type = ed->type;
4116
4117 RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
4118
4119 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4120 ERR("insert_tree_item failed\n");
4121 ExFreePool(ned);
4122 Status = STATUS_INTERNAL_ERROR;
4123 goto end;
4124 }
4125
4126 if (ii)
4127 ii->st_blocks -= end_data - tp.item->key.offset;
4128 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
4129 EXTENT_DATA* ned;
4130 UINT64 size;
4131
4132 delete_tree_item(Vcb, &tp, rollback);
4133
4134 size = start_data - tp.item->key.offset;
4135
4136 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4137 if (!ned) {
4138 ERR("out of memory\n");
4139 Status = STATUS_INSUFFICIENT_RESOURCES;
4140 goto end;
4141 }
4142
4143 ned->generation = Vcb->superblock.generation;
4144 ned->decoded_size = size;
4145 ned->compression = ed->compression;
4146 ned->encryption = ed->encryption;
4147 ned->encoding = ed->encoding;
4148 ned->type = ed->type;
4149
4150 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
4151
4152 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4153 ERR("insert_tree_item failed\n");
4154 ExFreePool(ned);
4155 Status = STATUS_INTERNAL_ERROR;
4156 goto end;
4157 }
4158
4159 if (ii)
4160 ii->st_blocks -= tp.item->key.offset + len - start_data;
4161 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
4162 EXTENT_DATA* ned;
4163 UINT64 size;
4164
4165 delete_tree_item(Vcb, &tp, rollback);
4166
4167 size = start_data - tp.item->key.offset;
4168
4169 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4170 if (!ned) {
4171 ERR("out of memory\n");
4172 Status = STATUS_INSUFFICIENT_RESOURCES;
4173 goto end;
4174 }
4175
4176 ned->generation = Vcb->superblock.generation;
4177 ned->decoded_size = size;
4178 ned->compression = ed->compression;
4179 ned->encryption = ed->encryption;
4180 ned->encoding = ed->encoding;
4181 ned->type = ed->type;
4182
4183 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
4184
4185 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4186 ERR("insert_tree_item failed\n");
4187 ExFreePool(ned);
4188 Status = STATUS_INTERNAL_ERROR;
4189 goto end;
4190 }
4191
4192 size = tp.item->key.offset + len - end_data;
4193
4194 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
4195 if (!ned) {
4196 ERR("out of memory\n");
4197 Status = STATUS_INSUFFICIENT_RESOURCES;
4198 goto end;
4199 }
4200
4201 ned->generation = Vcb->superblock.generation;
4202 ned->decoded_size = size;
4203 ned->compression = ed->compression;
4204 ned->encryption = ed->encryption;
4205 ned->encoding = ed->encoding;
4206 ned->type = ed->type;
4207
4208 RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
4209
4210 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
4211 ERR("insert_tree_item failed\n");
4212 ExFreePool(ned);
4213 Status = STATUS_INTERNAL_ERROR;
4214 goto end;
4215 }
4216
4217 if (ii)
4218 ii->st_blocks -= end_data - start_data;
4219 }
4220 } else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
4221 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
4222 if (ed2->address != 0) {
4223 Status = decrease_extent_refcount_data(Vcb, ed2->address, ed2->size, subvol, inode, tp.item->key.offset - ed2->offset, 1, changed_sector_list, rollback);
4224 if (!NT_SUCCESS(Status)) {
4225 ERR("decrease_extent_refcount_data returned %08x\n", Status);
4226 goto end;
4227 }
4228
4229 if (ii)
4230 ii->st_blocks -= len;
4231 }
4232
4233 if (ed->type == EXTENT_TYPE_PREALLOC)
4234 deleted_prealloc = TRUE;
4235
4236 delete_tree_item(Vcb, &tp, rollback);
4237 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
4238 EXTENT_DATA* ned;
4239 EXTENT_DATA2* ned2;
4240
4241 if (ed2->address != 0 && ii)
4242 ii->st_blocks -= end_data - tp.item->key.offset;
4243
4244 delete_tree_item(Vcb, &tp, rollback);
4245
4246 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4247 if (!ned) {
4248 ERR("out of memory\n");
4249 Status = STATUS_INSUFFICIENT_RESOURCES;
4250 goto end;
4251 }
4252
4253 ned2 = (EXTENT_DATA2*)&ned->data[0];
4254
4255 ned->generation = Vcb->superblock.generation;
4256 ned->decoded_size = ed->decoded_size;
4257 ned->compression = ed->compression;
4258 ned->encryption = ed->encryption;
4259 ned->encoding = ed->encoding;
4260 ned->type = ed->type;
4261 ned2->address = ed2->address;
4262 ned2->size = ed2->size;
4263 ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
4264 ned2->num_bytes = ed2->num_bytes - (end_data - tp.item->key.offset);
4265
4266 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4267 ERR("insert_tree_item failed\n");
4268 ExFreePool(ned);
4269 Status = STATUS_INTERNAL_ERROR;
4270 goto end;
4271 }
4272 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
4273 EXTENT_DATA* ned;
4274 EXTENT_DATA2* ned2;
4275
4276 if (ed2->address != 0 && ii)
4277 ii->st_blocks -= tp.item->key.offset + len - start_data;
4278
4279 delete_tree_item(Vcb, &tp, rollback);
4280
4281 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4282 if (!ned) {
4283 ERR("out of memory\n");
4284 Status = STATUS_INSUFFICIENT_RESOURCES;
4285 goto end;
4286 }
4287
4288 ned2 = (EXTENT_DATA2*)&ned->data[0];
4289
4290 ned->generation = Vcb->superblock.generation;
4291 ned->decoded_size = ed->decoded_size;
4292 ned->compression = ed->compression;
4293 ned->encryption = ed->encryption;
4294 ned->encoding = ed->encoding;
4295 ned->type = ed->type;
4296 ned2->address = ed2->address;
4297 ned2->size = ed2->size;
4298 ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
4299 ned2->num_bytes = start_data - tp.item->key.offset;
4300
4301 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4302 ERR("insert_tree_item failed\n");
4303 ExFreePool(ned);
4304 Status = STATUS_INTERNAL_ERROR;
4305 goto end;
4306 }
4307 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
4308 EXTENT_DATA* ned;
4309 EXTENT_DATA2* ned2;
4310
4311 if (ed2->address != 0 && ii)
4312 ii->st_blocks -= end_data - start_data;
4313
4314 delete_tree_item(Vcb, &tp, rollback);
4315
4316 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4317 if (!ned) {
4318 ERR("out of memory\n");
4319 Status = STATUS_INSUFFICIENT_RESOURCES;
4320 goto end;
4321 }
4322
4323 ned2 = (EXTENT_DATA2*)&ned->data[0];
4324
4325 ned->generation = Vcb->superblock.generation;
4326 ned->decoded_size = ed->decoded_size;
4327 ned->compression = ed->compression;
4328 ned->encryption = ed->encryption;
4329 ned->encoding = ed->encoding;
4330 ned->type = ed->type;
4331 ned2->address = ed2->address;
4332 ned2->size = ed2->size;
4333 ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
4334 ned2->num_bytes = start_data - tp.item->key.offset;
4335
4336 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4337 ERR("insert_tree_item failed\n");
4338 ExFreePool(ned);
4339 Status = STATUS_INTERNAL_ERROR;
4340 goto end;
4341 }
4342
4343 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4344 if (!ned) {
4345 ERR("out of memory\n");
4346 Status = STATUS_INSUFFICIENT_RESOURCES;
4347 goto end;
4348 }
4349
4350 ned2 = (EXTENT_DATA2*)&ned->data[0];
4351
4352 ned->generation = Vcb->superblock.generation;
4353 ned->decoded_size = ed->decoded_size;
4354 ned->compression = ed->compression;
4355 ned->encryption = ed->encryption;
4356 ned->encoding = ed->encoding;
4357 ned->type = ed->type;
4358 ned2->address = ed2->address;
4359 ned2->size = ed2->size;
4360 ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
4361 ned2->num_bytes = tp.item->key.offset + len - end_data;
4362
4363 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4364 ERR("insert_tree_item failed\n");
4365 ExFreePool(ned);
4366 Status = STATUS_INTERNAL_ERROR;
4367 goto end;
4368 }
4369
4370 if (ed2->address != 0) {
4371 Status = increase_extent_refcount_data(Vcb, ed2->address, ed2->size, subvol, inode, tp.item->key.offset - ed2->offset, 1, rollback);
4372
4373 if (!NT_SUCCESS(Status)) {
4374 ERR("increase_extent_refcount_data returned %08x\n", Status);
4375 goto end;
4376 }
4377 }
4378 }
4379 }
4380 }
4381
4382 cont:
4383 if (b) {
4384 tp = next_tp;
4385
4386 if (tp.item->key.obj_id > inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
4387 break;
4388 }
4389 } while (b);
4390
4391 // FIXME - do bitmap analysis of changed extents, and free what we can
4392
4393 if (ii && deleted_prealloc && !is_file_prealloc_inode(Vcb, subvol, inode, 0, sector_align(ii->st_size, Vcb->superblock.sector_size)))
4394 ii->flags &= ~BTRFS_INODE_PREALLOC;
4395
4396 end:
4397
4398 return Status;
4399 }
4400
4401 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4402 NTSTATUS Status;
4403
4404 TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, end_data, changed_sector_list);
4405
4406 Status = excise_extents_inode(Vcb, fcb->subvol, fcb->inode, &fcb->inode_item, start_data, end_data, changed_sector_list, rollback);
4407 if (!NT_SUCCESS(Status)) {
4408 ERR("excise_extents_inode returned %08x\n");
4409 return Status;
4410 }
4411
4412 return STATUS_SUCCESS;
4413 }
4414
4415 static NTSTATUS do_write_data(device_extension* Vcb, UINT64 address, void* data, UINT64 length, LIST_ENTRY* changed_sector_list) {
4416 NTSTATUS Status;
4417 changed_sector* sc;
4418 int i;
4419
4420 Status = write_data(Vcb, address, data, length);
4421 if (!NT_SUCCESS(Status)) {
4422 ERR("write_data returned %08x\n", Status);
4423 return Status;
4424 }
4425
4426 if (changed_sector_list) {
4427 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4428 if (!sc) {
4429 ERR("out of memory\n");
4430 return STATUS_INSUFFICIENT_RESOURCES;
4431 }
4432
4433 sc->ol.key = address;
4434 sc->length = length / Vcb->superblock.sector_size;
4435 sc->deleted = FALSE;
4436
4437 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
4438 if (!sc->checksums) {
4439 ERR("out of memory\n");
4440 ExFreePool(sc);
4441 return STATUS_INSUFFICIENT_RESOURCES;
4442 }
4443
4444 for (i = 0; i < sc->length; i++) {
4445 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
4446 }
4447
4448 insert_into_ordered_list(changed_sector_list, &sc->ol);
4449 }
4450
4451 return STATUS_SUCCESS;
4452 }
4453
4454 BOOL insert_extent_chunk_inode(device_extension* Vcb, root* subvol, UINT64 inode, INODE_ITEM* inode_item, chunk* c, UINT64 start_data,
4455 UINT64 length, BOOL prealloc, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4456 UINT64 address;
4457 NTSTATUS Status;
4458 EXTENT_DATA* ed;
4459 EXTENT_DATA2* ed2;
4460 ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
4461
4462 TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %p, %p)\n", Vcb, subvol->id, inode, c->offset, start_data, length, data, changed_sector_list);
4463
4464 if (!find_address_in_chunk(Vcb, c, length, &address))
4465 return FALSE;
4466
4467 Status = increase_extent_refcount_data(Vcb, address, length, subvol, inode, start_data, 1, rollback);
4468 if (!NT_SUCCESS(Status)) {
4469 ERR("increase_extent_refcount_data returned %08x\n", Status);
4470 return FALSE;
4471 }
4472
4473 if (data) {
4474 Status = do_write_data(Vcb, address, data, length, changed_sector_list);
4475 if (!NT_SUCCESS(Status)) {
4476 ERR("do_write_data returned %08x\n", Status);
4477 return FALSE;
4478 }
4479 }
4480
4481 // add extent data to inode
4482 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
4483 if (!ed) {
4484 ERR("out of memory\n");
4485 return FALSE;
4486 }
4487
4488 ed->generation = Vcb->superblock.generation;
4489 ed->decoded_size = length;
4490 ed->compression = BTRFS_COMPRESSION_NONE;
4491 ed->encryption = BTRFS_ENCRYPTION_NONE;
4492 ed->encoding = BTRFS_ENCODING_NONE;
4493 ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
4494
4495 ed2 = (EXTENT_DATA2*)ed->data;
4496 ed2->address = address;
4497 ed2->size = length;
4498 ed2->offset = 0;
4499 ed2->num_bytes = length;
4500
4501 if (!insert_tree_item(Vcb, subvol, inode, TYPE_EXTENT_DATA, start_data, ed, edsize, NULL, rollback)) {
4502 ERR("insert_tree_item failed\n");
4503 ExFreePool(ed);
4504 return FALSE;
4505 }
4506
4507 increase_chunk_usage(c, length);
4508 add_to_space_list(c, address, length, SPACE_TYPE_WRITING);
4509
4510 if (inode_item) {
4511 inode_item->st_blocks += length;
4512
4513 if (prealloc)
4514 inode_item->flags |= BTRFS_INODE_PREALLOC;
4515 }
4516
4517 return TRUE;
4518 }
4519
4520 static BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4521 return insert_extent_chunk_inode(Vcb, fcb->subvol, fcb->inode, &fcb->inode_item, c, start_data, length, prealloc, data, changed_sector_list, rollback);
4522 }
4523
4524 static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
4525 LIST_ENTRY* changed_sector_list, traverse_ptr* edtp, traverse_ptr* eitp, LIST_ENTRY* rollback) {
4526 EXTENT_DATA* ed;
4527 EXTENT_DATA2* ed2;
4528 EXTENT_ITEM* ei;
4529 NTSTATUS Status;
4530 changed_sector* sc;
4531 chunk* c;
4532 int i;
4533
4534 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
4535 length, data, changed_sector_list, edtp, eitp);
4536
4537 ed = ExAllocatePoolWithTag(PagedPool, edtp->item->size, ALLOC_TAG);
4538 if (!ed) {
4539 ERR("out of memory\n");
4540 return FALSE;
4541 }
4542
4543 RtlCopyMemory(ed, edtp->item->data, edtp->item->size);
4544
4545 ed->decoded_size += length;
4546 ed2 = (EXTENT_DATA2*)ed->data;
4547 ed2->size += length;
4548 ed2->num_bytes += length;
4549
4550 delete_tree_item(Vcb, edtp, rollback);
4551
4552 if (!insert_tree_item(Vcb, fcb->subvol, edtp->item->key.obj_id, edtp->item->key.obj_type, edtp->item->key.offset, ed, edtp->item->size, NULL, rollback)) {
4553 TRACE("insert_tree_item failed\n");
4554
4555 ExFreePool(ed);
4556 return FALSE;
4557 }
4558
4559 ei = ExAllocatePoolWithTag(PagedPool, eitp->item->size, ALLOC_TAG);
4560 if (!ei) {
4561 ERR("out of memory\n");
4562 ExFreePool(ed);
4563 return FALSE;
4564 }
4565
4566 RtlCopyMemory(ei, eitp->item->data, eitp->item->size);
4567
4568 if (!insert_tree_item(Vcb, Vcb->extent_root, eitp->item->key.obj_id, eitp->item->key.obj_type, eitp->item->key.offset + length, ei, eitp->item->size, NULL, rollback)) {
4569 ERR("insert_tree_item failed\n");
4570
4571 ExFreePool(ei);
4572 return FALSE;
4573 }
4574
4575 delete_tree_item(Vcb, eitp, rollback);
4576
4577 Status = write_data(Vcb, eitp->item->key.obj_id + eitp->item->key.offset, data, length);
4578 if (!NT_SUCCESS(Status)) {
4579 ERR("write_data returned %08x\n", Status);
4580 return FALSE;
4581 }
4582
4583 if (changed_sector_list) {
4584 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
4585 if (!sc) {
4586 ERR("out of memory\n");
4587 return FALSE;
4588 }
4589
4590 sc->ol.key = eitp->item->key.obj_id + eitp->item->key.offset;
4591 sc->length = length / Vcb->superblock.sector_size;
4592 sc->deleted = FALSE;
4593
4594 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
4595 if (!sc->checksums) {
4596 ERR("out of memory\n");
4597 ExFreePool(sc);
4598 return FALSE;
4599 }
4600
4601 for (i = 0; i < sc->length; i++) {
4602 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
4603 }
4604 insert_into_ordered_list(changed_sector_list, &sc->ol);
4605 }
4606
4607 c = get_chunk_from_address(Vcb, eitp->item->key.obj_id);
4608
4609 if (c) {
4610 increase_chunk_usage(c, length);
4611
4612 add_to_space_list(c, eitp->item->key.obj_id + eitp->item->key.offset, length, SPACE_TYPE_WRITING);
4613 }
4614
4615 fcb->inode_item.st_blocks += length;
4616
4617 return TRUE;
4618 }
4619
4620 static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
4621 LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4622 KEY searchkey;
4623 traverse_ptr tp, tp2;
4624 BOOL success = FALSE;
4625 EXTENT_DATA* ed;
4626 EXTENT_DATA2* ed2;
4627 EXTENT_ITEM* ei;
4628 chunk* c;
4629 LIST_ENTRY* le;
4630 space* s;
4631 NTSTATUS Status;
4632
4633 searchkey.obj_id = fcb->inode;
4634 searchkey.obj_type = TYPE_EXTENT_DATA;
4635 searchkey.offset = start_data;
4636
4637 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
4638 if (!NT_SUCCESS(Status)) {
4639 ERR("error - find_item returned %08x\n", Status);
4640 return FALSE;
4641 }
4642
4643 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
4644 WARN("previous EXTENT_DATA not found\n");
4645 goto end;
4646 }
4647
4648 ed = (EXTENT_DATA*)tp.item->data;
4649
4650 if (tp.item->size < sizeof(EXTENT_DATA)) {
4651 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4652 goto end;
4653 }
4654
4655 if (ed->type != EXTENT_TYPE_REGULAR) {
4656 TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
4657 goto end;
4658 }
4659
4660 ed2 = (EXTENT_DATA2*)ed->data;
4661
4662 if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
4663 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
4664 goto end;
4665 }
4666
4667 if (tp.item->key.offset + ed2->num_bytes != start_data) {
4668 TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", tp.item->key.offset, ed2->num_bytes, start_data);
4669 goto end;
4670 }
4671
4672 if (ed->compression != BTRFS_COMPRESSION_NONE) {
4673 FIXME("FIXME: compression not yet supported\n");
4674 goto end;
4675 }
4676
4677 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
4678 WARN("encryption not supported\n");
4679 goto end;
4680 }
4681
4682 if (ed->encoding != BTRFS_ENCODING_NONE) {
4683 WARN("other encodings not supported\n");
4684 goto end;
4685 }
4686
4687 if (ed2->size - ed2->offset != ed2->num_bytes) {
4688 TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
4689 goto end;
4690 }
4691
4692 searchkey.obj_id = ed2->address;
4693 searchkey.obj_type = TYPE_EXTENT_ITEM;
4694 searchkey.offset = ed2->size;
4695
4696 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
4697 if (!NT_SUCCESS(Status)) {
4698 ERR("error - find_item returned %08x\n", Status);
4699 goto end;
4700 }
4701
4702 if (keycmp(&tp2.item->key, &searchkey)) {
4703 ERR("error - extent %llx,%llx not found in tree\n", ed2->address, ed2->size);
4704 int3; // TESTING
4705 goto end;
4706 }
4707
4708 if (tp2.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
4709 NTSTATUS Status = convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
4710 if (!NT_SUCCESS(Status)) {
4711 ERR("convert_old_data_extent returned %08x\n", Status);
4712 goto end;
4713 }
4714
4715 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
4716 if (!NT_SUCCESS(Status)) {
4717 ERR("error - find_item returned %08x\n", Status);
4718 goto end;
4719 }
4720
4721 if (keycmp(&tp2.item->key, &searchkey)) {
4722 WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
4723 goto end;
4724 }
4725 }
4726
4727 ei = (EXTENT_ITEM*)tp2.item->data;
4728
4729 if (tp.item->size < sizeof(EXTENT_ITEM)) {
4730 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
4731 goto end;
4732 }
4733
4734 // FIXME - test this
4735 if (extent_item_is_shared(ei, tp2.item->size - sizeof(EXTENT_ITEM))) {
4736 NTSTATUS Status = convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
4737 if (!NT_SUCCESS(Status)) {
4738 ERR("convert_shared_data_extent returned %08x\n", Status);
4739 goto end;
4740 }
4741
4742 Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
4743 if (!NT_SUCCESS(Status)) {
4744 ERR("error - find_item returned %08x\n", Status);
4745 goto end;
4746 }
4747
4748 if (keycmp(&tp2.item->key, &searchkey)) {
4749 WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
4750 goto end;
4751 }
4752
4753 ei = (EXTENT_ITEM*)tp2.item->data;
4754
4755 if (tp.item->size < sizeof(EXTENT_ITEM)) {
4756 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
4757 goto end;
4758 }
4759 }
4760
4761 if (ei->refcount != 1) {
4762 TRACE("extent refcount was not 1\n");
4763 goto end;
4764 }
4765
4766 if (ei->flags != EXTENT_ITEM_DATA) {
4767 ERR("error - extent was not a data extent\n");
4768 goto end;
4769 }
4770
4771 c = get_chunk_from_address(Vcb, ed2->address);
4772
4773 le = c->space.Flink;
4774 while (le != &c->space) {
4775 s = CONTAINING_RECORD(le, space, list_entry);
4776
4777 if (s->offset == ed2->address + ed2->size) {
4778 if (s->type == SPACE_TYPE_FREE && s->size >= length) {
4779 success = extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, &tp, &tp2, rollback);
4780 }
4781 break;
4782 } else if (s->offset > ed2->address + ed2->size)
4783 break;
4784
4785 le = le->Flink;
4786 }
4787
4788 end:
4789
4790 return success;
4791 }
4792
4793 static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
4794 LIST_ENTRY* le = fcb->Vcb->chunks.Flink;
4795 chunk* c;
4796 UINT64 flags;
4797
4798 // FIXME - how do we know which RAID level to put this to?
4799 flags = BLOCK_FLAG_DATA; // SINGLE
4800
4801 // FIXME - if length is more than max chunk size, loop through and
4802 // create the new chunks first
4803
4804 while (le != &fcb->Vcb->chunks) {
4805 c = CONTAINING_RECORD(le, chunk, list_entry);
4806
4807 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
4808 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, TRUE, NULL, NULL, rollback))
4809 return STATUS_SUCCESS;
4810 }
4811
4812 le = le->Flink;
4813 }
4814
4815 if ((c = alloc_chunk(fcb->Vcb, flags, rollback))) {
4816 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
4817 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, TRUE, NULL, NULL, rollback))
4818 return STATUS_SUCCESS;
4819 }
4820 }
4821
4822 // FIXME - rebalance chunks if free space elsewhere?
4823 WARN("couldn't find any data chunks with %llx bytes free\n", length);
4824
4825 return STATUS_DISK_FULL;
4826 }
4827
4828 NTSTATUS insert_sparse_extent(device_extension* Vcb, root* r, UINT64 inode, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
4829 EXTENT_DATA* ed;
4830 EXTENT_DATA2* ed2;
4831
4832 TRACE("(%p, %llx, %llx, %llx, %llx)\n", Vcb, r->id, inode, start, length);
4833
4834 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4835 if (!ed) {
4836 ERR("out of memory\n");
4837 return STATUS_INSUFFICIENT_RESOURCES;
4838 }
4839
4840 ed->generation = Vcb->superblock.generation;
4841 ed->decoded_size = length;
4842 ed->compression = BTRFS_COMPRESSION_NONE;
4843 ed->encryption = BTRFS_ENCRYPTION_NONE;
4844 ed->encoding = BTRFS_ENCODING_NONE;
4845 ed->type = EXTENT_TYPE_REGULAR;
4846
4847 ed2 = (EXTENT_DATA2*)ed->data;
4848 ed2->address = 0;
4849 ed2->size = 0;
4850 ed2->offset = 0;
4851 ed2->num_bytes = length;
4852
4853 if (!insert_tree_item(Vcb, r, inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
4854 ERR("insert_tree_item failed\n");
4855 ExFreePool(ed);
4856 return STATUS_INTERNAL_ERROR;
4857 }
4858
4859 return STATUS_SUCCESS;
4860 }
4861
4862 // static void print_tree(tree* t) {
4863 // LIST_ENTRY* le = t->itemlist.Flink;
4864 // while (le != &t->itemlist) {
4865 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
4866 // ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
4867 // le = le->Flink;
4868 // }
4869 // }
4870
4871 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4872 LIST_ENTRY* le = Vcb->chunks.Flink;
4873 chunk* c;
4874 KEY searchkey;
4875 UINT64 flags;
4876
4877 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
4878
4879 // FIXME - split data up if not enough space for just one extent
4880
4881 if (start_data > 0 && try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, rollback))
4882 return STATUS_SUCCESS;
4883
4884 // if there is a gap before start_data, plug it with a sparse extent
4885 if (start_data > 0) {
4886 traverse_ptr tp;
4887 NTSTATUS Status;
4888 EXTENT_DATA* ed;
4889 UINT64 len;
4890
4891 searchkey.obj_id = fcb->inode;
4892 searchkey.obj_type = TYPE_EXTENT_DATA;
4893 searchkey.offset = start_data;
4894
4895 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
4896 if (!NT_SUCCESS(Status)) {
4897 ERR("error - find_item returned %08x\n", Status);
4898 return Status;
4899 }
4900
4901 // if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
4902 // traverse_ptr next_tp;
4903 //
4904 // ERR("error - did not find EXTENT_DATA expected - looking for %llx,%x,%llx, found %llx,%x,%llx\n",
4905 // searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
4906 // print_tree(tp.tree);
4907 //
4908 // if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
4909 // ERR("---\n");
4910 // ERR("key = %llx,%x,%llx\n", next_tp.tree->paritem->key.obj_id, next_tp.tree->paritem->key.obj_type, next_tp.tree->paritem->key.offset);
4911 // print_tree(next_tp.tree);
4912 //
4913 // free_traverse_ptr(&next_tp);
4914 // } else
4915 // ERR("next item not found\n");
4916 //
4917 // int3;
4918 // free_traverse_ptr(&tp);
4919 // return STATUS_INTERNAL_ERROR;
4920 // }
4921
4922 if (tp.item->key.obj_type == TYPE_EXTENT_DATA && tp.item->size >= sizeof(EXTENT_DATA)) {
4923 EXTENT_DATA2* ed2;
4924
4925 ed = (EXTENT_DATA*)tp.item->data;
4926 ed2 = (EXTENT_DATA2*)ed->data;
4927
4928 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
4929 } else
4930 ed = NULL;
4931
4932 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || !ed || tp.item->key.offset + len < start_data) {
4933 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
4934 Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, 0, start_data, rollback);
4935 else if (!ed)
4936 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
4937 else {
4938 Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, tp.item->key.offset + len,
4939 start_data - tp.item->key.offset - len, rollback);
4940 }
4941 if (!NT_SUCCESS(Status)) {
4942 ERR("insert_sparse_extent returned %08x\n", Status);
4943 return Status;
4944 }
4945 }
4946 }
4947
4948 // FIXME - how do we know which RAID level to put this to?
4949 flags = BLOCK_FLAG_DATA; // SINGLE
4950
4951 // if (!chunk_test) { // TESTING
4952 // if ((c = alloc_chunk(Vcb, flags, NULL))) {
4953 // ERR("chunk_item->type = %llx\n", c->chunk_item->type);
4954 // ERR("size = %llx\n", c->chunk_item->size);
4955 // ERR("used = %llx\n", c->used);
4956 //
4957 // if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
4958 // if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list)) {
4959 // // chunk_test = TRUE;
4960 // ERR("SUCCESS\n");
4961 // return STATUS_SUCCESS;
4962 // } else
4963 // ERR(":-(\n");
4964 // } else
4965 // ERR("???\n");
4966 // }
4967 // }
4968
4969 while (le != &Vcb->chunks) {
4970 c = CONTAINING_RECORD(le, chunk, list_entry);
4971
4972 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
4973 if (insert_extent_chunk(Vcb, fcb, c, start_data, length, FALSE, data, changed_sector_list, rollback))
4974 return STATUS_SUCCESS;
4975 }
4976
4977 le = le->Flink;
4978 }
4979
4980 if ((c = alloc_chunk(Vcb, flags, rollback))) {
4981 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
4982 if (insert_extent_chunk(Vcb, fcb, c, start_data, length, FALSE, data, changed_sector_list, rollback))
4983 return STATUS_SUCCESS;
4984 }
4985 }
4986
4987 // FIXME - rebalance chunks if free space elsewhere?
4988 WARN("couldn't find any data chunks with %llx bytes free\n", length);
4989
4990 return STATUS_DISK_FULL;
4991 }
4992
4993 void update_checksum_tree(device_extension* Vcb, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
4994 LIST_ENTRY* le = changed_sector_list->Flink;
4995 changed_sector* cs;
4996 traverse_ptr tp, next_tp;
4997 KEY searchkey;
4998 UINT32* data;
4999 NTSTATUS Status;
5000
5001 if (!Vcb->checksum_root) {
5002 ERR("no checksum root\n");
5003 goto exit;
5004 }
5005
5006 while (le != changed_sector_list) {
5007 UINT64 startaddr, endaddr;
5008 ULONG len;
5009 UINT32* checksums;
5010 RTL_BITMAP bmp;
5011 ULONG* bmparr;
5012 ULONG runlength, index;
5013
5014 cs = (changed_sector*)le;
5015
5016 searchkey.obj_id = EXTENT_CSUM_ID;
5017 searchkey.obj_type = TYPE_EXTENT_CSUM;
5018 searchkey.offset = cs->ol.key;
5019
5020 // FIXME - create checksum_root if it doesn't exist at all
5021
5022 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5023 if (!NT_SUCCESS(Status)) { // tree is completely empty
5024 // FIXME - do proper check here that tree is empty
5025 if (!cs->deleted) {
5026 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
5027 if (!checksums) {
5028 ERR("out of memory\n");
5029 goto exit;
5030 }
5031
5032 RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
5033
5034 if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, rollback)) {
5035 ERR("insert_tree_item failed\n");
5036 ExFreePool(checksums);
5037 goto exit;
5038 }
5039 }
5040 } else {
5041 UINT32 tplen;
5042
5043 // FIXME - check entry is TYPE_EXTENT_CSUM?
5044
5045 if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
5046 startaddr = tp.item->key.offset;
5047 else
5048 startaddr = cs->ol.key;
5049
5050 searchkey.obj_id = EXTENT_CSUM_ID;
5051 searchkey.obj_type = TYPE_EXTENT_CSUM;
5052 searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
5053
5054 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5055 if (!NT_SUCCESS(Status)) {
5056 ERR("error - find_item returned %08x\n", Status);
5057 goto exit;
5058 }
5059
5060 tplen = tp.item->size / sizeof(UINT32);
5061
5062 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
5063 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
5064 else
5065 endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
5066
5067 TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
5068 TRACE("startaddr = %llx\n", startaddr);
5069 TRACE("endaddr = %llx\n", endaddr);
5070
5071 len = (endaddr - startaddr) / Vcb->superblock.sector_size;
5072
5073 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
5074 if (!checksums) {
5075 ERR("out of memory\n");
5076 goto exit;
5077 }
5078
5079 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
5080 if (!bmparr) {
5081 ERR("out of memory\n");
5082 ExFreePool(checksums);
5083 goto exit;
5084 }
5085
5086 RtlInitializeBitMap(&bmp, bmparr, len);
5087 RtlSetAllBits(&bmp);
5088
5089 searchkey.obj_id = EXTENT_CSUM_ID;
5090 searchkey.obj_type = TYPE_EXTENT_CSUM;
5091 searchkey.offset = cs->ol.key;
5092
5093 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
5094 if (!NT_SUCCESS(Status)) {
5095 ERR("error - find_item returned %08x\n", Status);
5096 goto exit;
5097 }
5098
5099 // set bit = free space, cleared bit = allocated sector
5100
5101 // ERR("start loop\n");
5102 while (tp.item->key.offset < endaddr) {
5103 // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5104 if (tp.item->key.offset >= startaddr) {
5105 if (tp.item->size > 0) {
5106 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
5107 RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
5108 }
5109
5110 delete_tree_item(Vcb, &tp, rollback);
5111 }
5112
5113 if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
5114 tp = next_tp;
5115 } else
5116 break;
5117 }
5118 // ERR("end loop\n");
5119
5120 if (cs->deleted) {
5121 RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
5122 } else {
5123 RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
5124 RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
5125 }
5126
5127 runlength = RtlFindFirstRunClear(&bmp, &index);
5128
5129 while (runlength != 0) {
5130 do {
5131 ULONG rl;
5132
5133 if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
5134 rl = MAX_CSUM_SIZE / sizeof(UINT32);
5135 else
5136 rl = runlength;
5137
5138 data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
5139 if (!data) {
5140 ERR("out of memory\n");
5141 ExFreePool(bmparr);
5142 ExFreePool(checksums);
5143 goto exit;
5144 }
5145
5146 RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
5147
5148 if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, rollback)) {
5149 ERR("insert_tree_item failed\n");
5150 ExFreePool(data);
5151 ExFreePool(bmparr);
5152 ExFreePool(checksums);
5153 goto exit;
5154 }
5155
5156 runlength -= rl;
5157 index += rl;
5158 } while (runlength > 0);
5159
5160 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
5161 }
5162
5163 ExFreePool(bmparr);
5164 ExFreePool(checksums);
5165 }
5166
5167 le = le->Flink;
5168 }
5169
5170 exit:
5171 while (!IsListEmpty(changed_sector_list)) {
5172 le = RemoveHeadList(changed_sector_list);
5173 cs = (changed_sector*)le;
5174
5175 if (cs->checksums)
5176 ExFreePool(cs->checksums);
5177
5178 ExFreePool(cs);
5179 }
5180 }
5181
5182 NTSTATUS truncate_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
5183 LIST_ENTRY changed_sector_list;
5184 NTSTATUS Status;
5185 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
5186
5187 if (!nocsum)
5188 InitializeListHead(&changed_sector_list);
5189
5190 // FIXME - convert into inline extent if short enough
5191
5192 Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
5193 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), nocsum ? NULL : &changed_sector_list, rollback);
5194 if (!NT_SUCCESS(Status)) {
5195 ERR("error - excise_extents failed\n");
5196 return Status;
5197 }
5198
5199 fcb->inode_item.st_size = end;
5200 TRACE("setting st_size to %llx\n", end);
5201
5202 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
5203 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
5204 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
5205 // FIXME - inform cache manager of this
5206
5207 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
5208
5209 if (!nocsum)
5210 update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
5211
5212 return STATUS_SUCCESS;
5213 }
5214
5215 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, LIST_ENTRY* rollback) {
5216 UINT64 oldalloc, newalloc;
5217 KEY searchkey;
5218 traverse_ptr tp;
5219 BOOL cur_inline;
5220 NTSTATUS Status;
5221
5222 TRACE("(%p, %x, %p)\n", fcb, end, rollback);
5223
5224 if (fcb->ads)
5225 return stream_set_end_of_file_information(fcb->Vcb, end, fcb, fileref, NULL, FALSE, rollback) ;
5226 else {
5227 searchkey.obj_id = fcb->inode;
5228 searchkey.obj_type = TYPE_EXTENT_DATA;
5229 searchkey.offset = 0xffffffffffffffff;
5230
5231 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5232 if (!NT_SUCCESS(Status)) {
5233 ERR("error - find_item returned %08x\n", Status);
5234 return Status;
5235 }
5236
5237 oldalloc = 0;
5238 if (tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_EXTENT_DATA) {
5239 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
5240 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5241
5242 if (tp.item->size < sizeof(EXTENT_DATA)) {
5243 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5244 return STATUS_INTERNAL_ERROR;
5245 }
5246
5247 oldalloc = tp.item->key.offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
5248 cur_inline = ed->type == EXTENT_TYPE_INLINE;
5249
5250 if (cur_inline && end > fcb->Vcb->max_inline) {
5251 LIST_ENTRY changed_sector_list;
5252 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
5253 UINT64 origlength, length;
5254 UINT8* data;
5255
5256 TRACE("giving inline file proper extents\n");
5257
5258 origlength = ed->decoded_size;
5259
5260 cur_inline = FALSE;
5261
5262 if (!nocsum)
5263 InitializeListHead(&changed_sector_list);
5264
5265 delete_tree_item(fcb->Vcb, &tp, rollback);
5266
5267 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
5268
5269 data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG);
5270 if (!data) {
5271 ERR("could not allocate %llx bytes for data\n", length);
5272 return STATUS_INSUFFICIENT_RESOURCES;
5273 }
5274
5275 if (length > origlength)
5276 RtlZeroMemory(data + origlength, length - origlength);
5277
5278 RtlCopyMemory(data, ed->data, origlength);
5279
5280 fcb->inode_item.st_blocks -= origlength;
5281
5282 Status = insert_extent(fcb->Vcb, fcb, tp.item->key.offset, length, data, nocsum ? NULL : &changed_sector_list, rollback);
5283 if (!NT_SUCCESS(Status)) {
5284 ERR("insert_extent returned %08x\n", Status);
5285 ExFreePool(data);
5286 return Status;
5287 }
5288
5289 oldalloc = tp.item->key.offset + length;
5290
5291 ExFreePool(data);
5292
5293 if (!nocsum)
5294 update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
5295 }
5296
5297 if (cur_inline) {
5298 ULONG edsize;
5299
5300 if (end > oldalloc) {
5301 edsize = sizeof(EXTENT_DATA) - 1 + end - tp.item->key.offset;
5302 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
5303
5304 if (!ed) {
5305 ERR("out of memory\n");
5306 return STATUS_INSUFFICIENT_RESOURCES;
5307 }
5308
5309 RtlZeroMemory(ed, edsize);
5310 RtlCopyMemory(ed, tp.item->data, tp.item->size);
5311
5312 ed->decoded_size = end - tp.item->key.offset;
5313
5314 delete_tree_item(fcb->Vcb, &tp, rollback);
5315
5316 if (!insert_tree_item(fcb->Vcb, fcb->subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ed, edsize, NULL, rollback)) {
5317 ERR("error - failed to insert item\n");
5318 ExFreePool(ed);
5319 return STATUS_INTERNAL_ERROR;
5320 }
5321 }
5322
5323 TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
5324
5325 fcb->inode_item.st_size = end;
5326 TRACE("setting st_size to %llx\n", end);
5327
5328 fcb->inode_item.st_blocks = end;
5329
5330 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5331 } else {
5332 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
5333
5334 if (newalloc > oldalloc) {
5335 if (prealloc) {
5336 // FIXME - try and extend previous extent first
5337
5338 Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
5339
5340 if (!NT_SUCCESS(Status)) {
5341 ERR("insert_prealloc_extent returned %08x\n", Status);
5342 return Status;
5343 }
5344 } else {
5345 Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, oldalloc, newalloc - oldalloc, rollback);
5346
5347 if (!NT_SUCCESS(Status)) {
5348 ERR("insert_sparse_extent returned %08x\n", Status);
5349 return Status;
5350 }
5351 }
5352 }
5353
5354 fcb->inode_item.st_size = end;
5355 TRACE("setting st_size to %llx\n", end);
5356
5357 TRACE("newalloc = %llx\n", newalloc);
5358
5359 fcb->Header.AllocationSize.QuadPart = newalloc;
5360 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5361 }
5362 } else {
5363 if (end > fcb->Vcb->max_inline) {
5364 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
5365
5366 if (prealloc) {
5367 Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
5368
5369 if (!NT_SUCCESS(Status)) {
5370 ERR("insert_prealloc_extent returned %08x\n", Status);
5371 return Status;
5372 }
5373 } else {
5374 Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, 0, newalloc, rollback);
5375
5376 if (!NT_SUCCESS(Status)) {
5377 ERR("insert_sparse_extent returned %08x\n", Status);
5378 return Status;
5379 }
5380 }
5381
5382 fcb->inode_item.st_size = end;
5383 TRACE("setting st_size to %llx\n", end);
5384
5385 TRACE("newalloc = %llx\n", newalloc);
5386
5387 fcb->Header.AllocationSize.QuadPart = newalloc;
5388 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5389 } else {
5390 EXTENT_DATA* ed;
5391 ULONG edsize;
5392
5393 edsize = sizeof(EXTENT_DATA) - 1 + end;
5394 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
5395
5396 if (!ed) {
5397 ERR("out of memory\n");
5398 return STATUS_INSUFFICIENT_RESOURCES;
5399 }
5400
5401 ed->generation = fcb->Vcb->superblock.generation;
5402 ed->decoded_size = end;
5403 ed->compression = BTRFS_COMPRESSION_NONE;
5404 ed->encryption = BTRFS_ENCRYPTION_NONE;
5405 ed->encoding = BTRFS_ENCODING_NONE;
5406 ed->type = EXTENT_TYPE_INLINE;
5407
5408 RtlZeroMemory(ed->data, end);
5409
5410 if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed, edsize, NULL, rollback)) {
5411 ERR("error - failed to insert item\n");
5412 ExFreePool(ed);
5413 return STATUS_INTERNAL_ERROR;
5414 }
5415
5416 fcb->inode_item.st_size = end;
5417 TRACE("setting st_size to %llx\n", end);
5418
5419 fcb->inode_item.st_blocks = end;
5420
5421 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
5422 }
5423 }
5424 }
5425
5426 return STATUS_SUCCESS;
5427 }
5428
5429 static UINT64 get_extent_item_refcount(device_extension* Vcb, UINT64 address) {
5430 KEY searchkey;
5431 traverse_ptr tp;
5432 EXTENT_ITEM* ei;
5433 UINT64 rc;
5434 NTSTATUS Status;
5435
5436 searchkey.obj_id = address;
5437 searchkey.obj_type = TYPE_EXTENT_ITEM;
5438 searchkey.offset = 0xffffffffffffffff;
5439
5440 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
5441 if (!NT_SUCCESS(Status)) {
5442 ERR("error - find_item returned %08x\n", Status);
5443 return 0;
5444 }
5445
5446 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
5447 ERR("error - could not find EXTENT_ITEM for %llx\n", address);
5448 return 0;
5449 }
5450
5451 if (tp.item->size < sizeof(EXTENT_ITEM)) {
5452 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
5453 return 0;
5454 }
5455
5456 ei = (EXTENT_ITEM*)tp.item->data;
5457 rc = ei->refcount;
5458
5459 return rc;
5460 }
5461
5462 static BOOL is_file_prealloc(fcb* fcb, UINT64 start_data, UINT64 end_data) {
5463 return is_file_prealloc_inode(fcb->Vcb, fcb->subvol, fcb->inode, start_data, end_data);
5464 }
5465
5466 static NTSTATUS do_cow_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5467 NTSTATUS Status;
5468
5469 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, changed_sector_list, rollback);
5470 if (!NT_SUCCESS(Status)) {
5471 ERR("error - excise_extents returned %08x\n", Status);
5472 goto end;
5473 }
5474
5475 Status = insert_extent(fcb->Vcb, fcb, start_data, end_data - start_data, data, changed_sector_list, rollback);
5476
5477 if (!NT_SUCCESS(Status)) {
5478 ERR("error - insert_extent returned %08x\n", Status);
5479 goto end;
5480 }
5481
5482 Status = STATUS_SUCCESS;
5483
5484 end:
5485 return Status;
5486 }
5487
5488 static NTSTATUS merge_data_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, LIST_ENTRY* rollback) {
5489 KEY searchkey;
5490 traverse_ptr tp, next_tp;
5491 NTSTATUS Status;
5492 BOOL b;
5493 EXTENT_DATA* ed;
5494
5495 searchkey.obj_id = fcb->inode;
5496 searchkey.obj_type = TYPE_EXTENT_DATA;
5497 searchkey.offset = start_data;
5498
5499 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5500 if (!NT_SUCCESS(Status)) {
5501 ERR("error - find_item returned %08x\n", Status);
5502 return Status;
5503 }
5504
5505 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA) {
5506 ERR("error - EXTENT_DATA not found\n");
5507 return STATUS_INTERNAL_ERROR;
5508 }
5509
5510 if (tp.item->key.offset > 0) {
5511 traverse_ptr tp2, prev_tp;
5512
5513 tp2 = tp;
5514 do {
5515 b = find_prev_item(Vcb, &tp2, &prev_tp, FALSE);
5516
5517 if (b) {
5518 if (!prev_tp.item->ignore)
5519 break;
5520
5521 tp2 = prev_tp;
5522 }
5523 } while (b);
5524
5525 if (b) {
5526 if (prev_tp.item->key.obj_id == fcb->inode && prev_tp.item->key.obj_type == TYPE_EXTENT_DATA)
5527 tp = prev_tp;
5528 }
5529 }
5530
5531 ed = (EXTENT_DATA*)tp.item->data;
5532 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
5533 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
5534 return STATUS_INTERNAL_ERROR;
5535 }
5536
5537 do {
5538 b = find_next_item(Vcb, &tp, &next_tp, FALSE);
5539
5540 if (b) {
5541 EXTENT_DATA* ned;
5542
5543 if (next_tp.item->key.obj_id != fcb->inode || next_tp.item->key.obj_type != TYPE_EXTENT_DATA)
5544 return STATUS_SUCCESS;
5545
5546 if (next_tp.item->size < sizeof(EXTENT_DATA)) {
5547 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", next_tp.item->key.obj_id, next_tp.item->key.obj_type, next_tp.item->key.offset, next_tp.item->size, sizeof(EXTENT_DATA));
5548 return STATUS_INTERNAL_ERROR;
5549 }
5550
5551 ned = (EXTENT_DATA*)next_tp.item->data;
5552 if ((ned->type == EXTENT_TYPE_REGULAR || ned->type == EXTENT_TYPE_PREALLOC) && next_tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
5553 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", next_tp.item->key.obj_id, next_tp.item->key.obj_type, next_tp.item->key.offset, next_tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
5554 return STATUS_INTERNAL_ERROR;
5555 }
5556
5557 if (ed->type == ned->type && (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC)) {
5558 EXTENT_DATA2 *ed2, *ned2;
5559
5560 ed2 = (EXTENT_DATA2*)ed->data;
5561 ned2 = (EXTENT_DATA2*)ned->data;
5562
5563 if (next_tp.item->key.offset == tp.item->key.offset + ed2->num_bytes && ed2->address == ned2->address && ed2->size == ned2->size && ned2->offset == ed2->offset + ed2->num_bytes) {
5564 EXTENT_DATA* buf = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5565 EXTENT_DATA2* buf2;
5566 traverse_ptr tp2;
5567
5568 if (!buf) {
5569 ERR("out of memory\n");
5570 return STATUS_INSUFFICIENT_RESOURCES;
5571 }
5572
5573 RtlCopyMemory(buf, tp.item->data, tp.item->size);
5574 buf->generation = Vcb->superblock.generation;
5575
5576 buf2 = (EXTENT_DATA2*)buf->data;
5577 buf2->num_bytes += ned2->num_bytes;
5578
5579 delete_tree_item(Vcb, &tp, rollback);
5580 delete_tree_item(Vcb, &next_tp, rollback);
5581
5582 if (!insert_tree_item(Vcb, fcb->subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, buf, tp.item->size, &tp2, rollback)) {
5583 ERR("insert_tree_item failed\n");
5584 ExFreePool(buf);
5585 return STATUS_INTERNAL_ERROR;
5586 }
5587
5588 Status = decrease_extent_refcount_data(Vcb, ed2->address, ed2->size, fcb->subvol, fcb->inode, tp.item->key.offset - buf2->offset, 1, NULL, rollback);
5589 if (!NT_SUCCESS(Status)) {
5590 ERR("decrease_extent_refcount_data returned %08x\n", Status);
5591 return Status;
5592 }
5593
5594 tp = tp2;
5595
5596 continue;
5597 }
5598 }
5599
5600 tp = next_tp;
5601 ed = ned;
5602 }
5603 } while (b);
5604
5605 return STATUS_SUCCESS;
5606 }
5607
5608 static NTSTATUS do_prealloc_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5609 NTSTATUS Status;
5610 KEY searchkey;
5611 traverse_ptr tp, next_tp;
5612 BOOL b, deleted_prealloc = FALSE;
5613 UINT64 last_written = start_data;
5614
5615 searchkey.obj_id = fcb->inode;
5616 searchkey.obj_type = TYPE_EXTENT_DATA;
5617 searchkey.offset = start_data;
5618
5619 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5620 if (!NT_SUCCESS(Status)) {
5621 ERR("error - find_item returned %08x\n", Status);
5622 return Status;
5623 }
5624
5625 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
5626 return do_cow_write(Vcb, fcb, start_data, end_data, data, changed_sector_list, rollback);
5627
5628 do {
5629 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
5630 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5631
5632 if (tp.item->size < sizeof(EXTENT_DATA)) {
5633 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5634 return STATUS_INTERNAL_ERROR;
5635 }
5636
5637 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
5638 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
5639 return STATUS_INTERNAL_ERROR;
5640 }
5641
5642 b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE);
5643
5644 if (ed->type == EXTENT_TYPE_PREALLOC) {
5645 if (tp.item->key.offset > last_written) {
5646 Status = do_cow_write(Vcb, fcb, last_written, tp.item->key.offset, (UINT8*)data + last_written - start_data, changed_sector_list, rollback);
5647
5648 if (!NT_SUCCESS(Status)) {
5649 ERR("do_cow_write returned %08x\n", Status);
5650
5651 return Status;
5652 }
5653
5654 last_written = tp.item->key.offset;
5655 }
5656
5657 if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + ed2->num_bytes) { // replace all
5658 EXTENT_DATA* ned = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5659
5660 if (!ned) {
5661 ERR("out of memory\n");
5662
5663 return STATUS_INSUFFICIENT_RESOURCES;
5664 }
5665
5666 RtlCopyMemory(ned, tp.item->data, tp.item->size);
5667
5668 ned->type = EXTENT_TYPE_REGULAR;
5669
5670 delete_tree_item(Vcb, &tp, rollback);
5671
5672 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, tp.item->size, NULL, rollback)) {
5673 ERR("insert_tree_item failed\n");
5674
5675 return STATUS_INTERNAL_ERROR;
5676 }
5677
5678 Status = do_write_data(Vcb, ed2->address + ed2->offset, (UINT8*)data + tp.item->key.offset - start_data, ed2->num_bytes, changed_sector_list);
5679 if (!NT_SUCCESS(Status)) {
5680 ERR("do_write_data returned %08x\n", Status);
5681
5682 return Status;
5683 }
5684
5685 deleted_prealloc = TRUE;
5686
5687 last_written = tp.item->key.offset + ed2->num_bytes;
5688 } else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + ed2->num_bytes) { // replace beginning
5689 EXTENT_DATA *ned, *nedb;
5690 EXTENT_DATA2* ned2;
5691
5692 ned = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5693
5694 if (!ned) {
5695 ERR("out of memory\n");
5696
5697 return STATUS_INSUFFICIENT_RESOURCES;
5698 }
5699
5700 nedb = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5701
5702 if (!nedb) {
5703 ERR("out of memory\n");
5704 ExFreePool(ned);
5705
5706 return STATUS_INSUFFICIENT_RESOURCES;
5707 }
5708
5709 delete_tree_item(Vcb, &tp, rollback);
5710
5711 RtlCopyMemory(ned, tp.item->data, tp.item->size);
5712
5713 ned->type = EXTENT_TYPE_REGULAR;
5714 ned2 = (EXTENT_DATA2*)ned->data;
5715 ned2->num_bytes = end_data - tp.item->key.offset;
5716
5717 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, tp.item->size, NULL, rollback)) {
5718 ERR("insert_tree_item failed\n");
5719 ExFreePool(ned);
5720 ExFreePool(nedb);
5721
5722 return STATUS_INTERNAL_ERROR;
5723 }
5724
5725 RtlCopyMemory(nedb, tp.item->data, tp.item->size);
5726 ned2 = (EXTENT_DATA2*)nedb->data;
5727 ned2->offset += end_data - tp.item->key.offset;
5728 ned2->num_bytes -= end_data - tp.item->key.offset;
5729
5730 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, nedb, tp.item->size, NULL, rollback)) {
5731 ERR("insert_tree_item failed\n");
5732 ExFreePool(nedb);
5733
5734 return STATUS_INTERNAL_ERROR;
5735 }
5736
5737 Status = do_write_data(Vcb, ed2->address + ed2->offset, (UINT8*)data + tp.item->key.offset - start_data, end_data - tp.item->key.offset, changed_sector_list);
5738 if (!NT_SUCCESS(Status)) {
5739 ERR("do_write_data returned %08x\n", Status);
5740
5741 return Status;
5742 }
5743
5744 Status = increase_extent_refcount_data(Vcb, ned2->address, ned2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, 1, rollback);
5745 if (!NT_SUCCESS(Status)) {
5746 ERR("increase_extent_refcount_data returned %08x\n", Status);
5747 return Status;
5748 }
5749
5750 last_written = end_data;
5751 } else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + ed2->num_bytes) { // replace end
5752 EXTENT_DATA *ned, *nedb;
5753 EXTENT_DATA2* ned2;
5754
5755 // FIXME - test this
5756
5757 ned = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5758
5759 if (!ned) {
5760 ERR("out of memory\n");
5761
5762 return STATUS_INSUFFICIENT_RESOURCES;
5763 }
5764
5765 nedb = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5766
5767 if (!nedb) {
5768 ERR("out of memory\n");
5769 ExFreePool(ned);
5770
5771 return STATUS_INSUFFICIENT_RESOURCES;
5772 }
5773
5774 delete_tree_item(Vcb, &tp, rollback);
5775
5776 RtlCopyMemory(ned, tp.item->data, tp.item->size);
5777
5778 ned2 = (EXTENT_DATA2*)ned->data;
5779 ned2->num_bytes = start_data - tp.item->key.offset;
5780
5781 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, tp.item->size, NULL, rollback)) {
5782 ERR("insert_tree_item failed\n");
5783 ExFreePool(ned);
5784 ExFreePool(nedb);
5785
5786 return STATUS_INTERNAL_ERROR;
5787 }
5788
5789 RtlCopyMemory(nedb, tp.item->data, tp.item->size);
5790
5791 nedb->type = EXTENT_TYPE_REGULAR;
5792 ned2 = (EXTENT_DATA2*)nedb->data;
5793 ned2->offset += start_data - tp.item->key.offset;
5794 ned2->num_bytes = tp.item->key.offset + ed2->num_bytes - start_data;
5795
5796 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start_data, nedb, tp.item->size, NULL, rollback)) {
5797 ERR("insert_tree_item failed\n");
5798 ExFreePool(nedb);
5799
5800 return STATUS_INTERNAL_ERROR;
5801 }
5802
5803 Status = do_write_data(Vcb, ed2->address + ned2->offset, data, ned2->num_bytes, changed_sector_list);
5804 if (!NT_SUCCESS(Status)) {
5805 ERR("do_write_data returned %08x\n", Status);
5806
5807 return Status;
5808 }
5809
5810 Status = increase_extent_refcount_data(Vcb, ned2->address, ned2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, 1, rollback);
5811 if (!NT_SUCCESS(Status)) {
5812 ERR("increase_extent_refcount_data returned %08x\n", Status);
5813
5814 return Status;
5815 }
5816
5817 last_written = start_data + ned2->num_bytes;
5818 } else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + ed2->num_bytes) { // replace middle
5819 EXTENT_DATA *ned, *nedb, *nedc;
5820 EXTENT_DATA2* ned2;
5821
5822 // FIXME - test this
5823
5824 ned = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5825
5826 if (!ned) {
5827 ERR("out of memory\n");
5828
5829 return STATUS_INSUFFICIENT_RESOURCES;
5830 }
5831
5832 nedb = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5833
5834 if (!nedb) {
5835 ERR("out of memory\n");
5836 ExFreePool(ned);
5837
5838 return STATUS_INSUFFICIENT_RESOURCES;
5839 }
5840
5841 nedc = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
5842
5843 if (!nedb) {
5844 ERR("out of memory\n");
5845 ExFreePool(nedb);
5846 ExFreePool(ned);
5847
5848 return STATUS_INSUFFICIENT_RESOURCES;
5849 }
5850
5851 delete_tree_item(Vcb, &tp, rollback);
5852
5853 RtlCopyMemory(ned, tp.item->data, tp.item->size);
5854 RtlCopyMemory(nedb, tp.item->data, tp.item->size);
5855 RtlCopyMemory(nedc, tp.item->data, tp.item->size);
5856
5857 ned2 = (EXTENT_DATA2*)ned->data;
5858 ned2->num_bytes = start_data - tp.item->key.offset;
5859
5860 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, tp.item->size, NULL, rollback)) {
5861 ERR("insert_tree_item failed\n");
5862 ExFreePool(ned);
5863 ExFreePool(nedb);
5864 ExFreePool(nedc);
5865
5866 return STATUS_INTERNAL_ERROR;
5867 }
5868
5869 nedb->type = EXTENT_TYPE_REGULAR;
5870 ned2 = (EXTENT_DATA2*)nedb->data;
5871 ned2->offset += start_data - tp.item->key.offset;
5872 ned2->num_bytes = end_data - start_data;
5873
5874 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start_data, nedb, tp.item->size, NULL, rollback)) {
5875 ERR("insert_tree_item failed\n");
5876 ExFreePool(nedb);
5877 ExFreePool(nedc);
5878
5879 return STATUS_INTERNAL_ERROR;
5880 }
5881
5882 ned2 = (EXTENT_DATA2*)nedc->data;
5883 ned2->offset += end_data - tp.item->key.offset;
5884 ned2->num_bytes -= end_data - tp.item->key.offset;
5885
5886 if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, nedc, tp.item->size, NULL, rollback)) {
5887 ERR("insert_tree_item failed\n");
5888 ExFreePool(nedc);
5889
5890 return STATUS_INTERNAL_ERROR;
5891 }
5892
5893 ned2 = (EXTENT_DATA2*)nedb->data;
5894 Status = do_write_data(Vcb, ed2->address + ned2->offset, data, end_data - start_data, changed_sector_list);
5895 if (!NT_SUCCESS(Status)) {
5896 ERR("do_write_data returned %08x\n", Status);
5897
5898 return Status;
5899 }
5900
5901 Status = increase_extent_refcount_data(Vcb, ed2->address, ed2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, 2, rollback);
5902 if (!NT_SUCCESS(Status)) {
5903 ERR("increase_extent_refcount_data returned %08x\n", Status);
5904 return Status;
5905 }
5906
5907 last_written = end_data;
5908 }
5909 }
5910
5911 if (b) {
5912 tp = next_tp;
5913
5914 if (tp.item->key.obj_id > fcb->inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
5915 break;
5916 }
5917 } while (b);
5918
5919 if (last_written < end_data) {
5920 Status = do_cow_write(Vcb, fcb, last_written, end_data, (UINT8*)data + last_written - start_data, changed_sector_list, rollback);
5921
5922 if (!NT_SUCCESS(Status)) {
5923 ERR("do_cow_write returned %08x\n", Status);
5924 return Status;
5925 }
5926 }
5927
5928 Status = merge_data_extents(Vcb, fcb, start_data, end_data, rollback);
5929 if (!NT_SUCCESS(Status)) {
5930 ERR("merge_data_extents returned %08x\n", Status);
5931 return Status;
5932 }
5933
5934 if (deleted_prealloc && !is_file_prealloc(fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size)))
5935 fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
5936
5937 return STATUS_SUCCESS;
5938 }
5939
5940 static NTSTATUS do_nocow_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
5941 KEY searchkey;
5942 traverse_ptr tp, next_tp;
5943 NTSTATUS Status;
5944 EXTENT_DATA* ed;
5945 BOOL b, do_cow;
5946 EXTENT_DATA2* eds;
5947 UINT64 size, new_start, new_end, last_write = 0;
5948
5949 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
5950
5951 searchkey.obj_id = fcb->inode;
5952 searchkey.obj_type = TYPE_EXTENT_DATA;
5953 searchkey.offset = start_data;
5954
5955 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
5956 if (!NT_SUCCESS(Status)) {
5957 ERR("error - find_item returned %08x\n", Status);
5958 return Status;
5959 }
5960
5961 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset > start_data) {
5962 ERR("previous EXTENT_DATA not found (found %llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5963 Status = STATUS_INTERNAL_ERROR;
5964 goto end;
5965 }
5966
5967 do {
5968 if (tp.item->size < sizeof(EXTENT_DATA)) {
5969 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
5970 Status = STATUS_INTERNAL_ERROR;
5971 goto end;
5972 }
5973
5974 ed = (EXTENT_DATA*)tp.item->data;
5975
5976 if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
5977 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
5978 Status = STATUS_INTERNAL_ERROR;
5979 goto end;
5980 }
5981
5982 eds = (EXTENT_DATA2*)&ed->data[0];
5983
5984 b = find_next_item(Vcb, &tp, &next_tp, TRUE);
5985
5986 switch (ed->type) {
5987 case EXTENT_TYPE_REGULAR:
5988 {
5989 UINT64 rc = get_extent_item_refcount(Vcb, eds->address);
5990
5991 if (rc == 0) {
5992 ERR("get_extent_item_refcount failed\n");
5993 Status = STATUS_INTERNAL_ERROR;
5994 goto end;
5995 }
5996
5997 do_cow = rc > 1;
5998 break;
5999 }
6000
6001 case EXTENT_TYPE_INLINE:
6002 do_cow = TRUE;
6003 break;
6004
6005 case EXTENT_TYPE_PREALLOC:
6006 FIXME("FIXME - handle prealloc extents\n"); // FIXME
6007 Status = STATUS_NOT_SUPPORTED;
6008 goto end;
6009
6010 default:
6011 ERR("error - unknown extent type %x\n", ed->type);
6012 Status = STATUS_NOT_SUPPORTED;
6013 goto end;
6014 }
6015
6016 if (ed->compression != BTRFS_COMPRESSION_NONE) {
6017 FIXME("FIXME: compression not yet supported\n");
6018 Status = STATUS_NOT_SUPPORTED;
6019 goto end;
6020 }
6021
6022 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
6023 WARN("encryption not supported\n");
6024 Status = STATUS_INTERNAL_ERROR;
6025 goto end;
6026 }
6027
6028 if (ed->encoding != BTRFS_ENCODING_NONE) {
6029 WARN("other encodings not supported\n");
6030 Status = STATUS_INTERNAL_ERROR;
6031 goto end;
6032 }
6033
6034 size = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : eds->num_bytes;
6035
6036 TRACE("extent: start = %llx, length = %llx\n", tp.item->key.offset, size);
6037
6038 new_start = tp.item->key.offset < start_data ? start_data : tp.item->key.offset;
6039 new_end = tp.item->key.offset + size > start_data + length ? (start_data + length) : (tp.item->key.offset + size);
6040
6041 TRACE("new_start = %llx\n", new_start);
6042 TRACE("new_end = %llx\n", new_end);
6043
6044 if (do_cow) {
6045 TRACE("doing COW write\n");
6046
6047 Status = excise_extents(Vcb, fcb, new_start, new_start + new_end, changed_sector_list, rollback);
6048
6049 if (!NT_SUCCESS(Status)) {
6050 ERR("error - excise_extents returned %08x\n", Status);
6051 goto end;
6052 }
6053
6054 Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
6055
6056 if (!NT_SUCCESS(Status)) {
6057 ERR("error - insert_extent returned %08x\n", Status);
6058 goto end;
6059 }
6060 } else {
6061 UINT64 writeaddr;
6062
6063 writeaddr = eds->address + eds->offset + new_start - tp.item->key.offset;
6064 TRACE("doing non-COW write to %llx\n", writeaddr);
6065
6066 Status = write_data(Vcb, writeaddr, (UINT8*)data + new_start - start_data, new_end - new_start);
6067
6068 if (!NT_SUCCESS(Status)) {
6069 ERR("error - write_data returned %08x\n", Status);
6070 goto end;
6071 }
6072
6073 if (changed_sector_list) {
6074 unsigned int i;
6075 changed_sector* sc;
6076
6077 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
6078 if (!sc) {
6079 ERR("out of memory\n");
6080 Status = STATUS_INSUFFICIENT_RESOURCES;
6081 goto end;
6082 }
6083
6084 sc->ol.key = writeaddr;
6085 sc->length = (new_end - new_start) / Vcb->superblock.sector_size;
6086 sc->deleted = FALSE;
6087
6088 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
6089 if (!sc->checksums) {
6090 ERR("out of memory\n");
6091 ExFreePool(sc);
6092 Status = STATUS_INSUFFICIENT_RESOURCES;
6093 goto end;
6094 }
6095
6096 for (i = 0; i < sc->length; i++) {
6097 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + new_start - start_data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
6098 }
6099
6100 insert_into_ordered_list(changed_sector_list, &sc->ol);
6101 }
6102 }
6103
6104 last_write = new_end;
6105
6106 if (b) {
6107 tp = next_tp;
6108
6109 if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data + length)
6110 b = FALSE;
6111 }
6112 } while (b);
6113
6114 if (last_write < start_data + length) {
6115 new_start = last_write;
6116 new_end = start_data + length;
6117
6118 TRACE("new_start = %llx\n", new_start);
6119 TRACE("new_end = %llx\n", new_end);
6120
6121 Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
6122
6123 if (!NT_SUCCESS(Status)) {
6124 ERR("error - insert_extent returned %08x\n", Status);
6125 goto end;
6126 }
6127 }
6128
6129 Status = STATUS_SUCCESS;
6130
6131 end:
6132
6133 return Status;
6134 }
6135
6136 #ifdef DEBUG_PARANOID
6137 static void print_loaded_trees(tree* t, int spaces) {
6138 char pref[10];
6139 int i;
6140 LIST_ENTRY* le;
6141
6142 for (i = 0; i < spaces; i++) {
6143 pref[i] = ' ';
6144 }
6145 pref[spaces] = 0;
6146
6147 if (!t) {
6148 ERR("%s(not loaded)\n", pref);
6149 return;
6150 }
6151
6152 le = t->itemlist.Flink;
6153 while (le != &t->itemlist) {
6154 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
6155
6156 ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
6157
6158 if (t->header.level > 0) {
6159 print_loaded_trees(td->treeholder.tree, spaces+1);
6160 }
6161
6162 le = le->Flink;
6163 }
6164 }
6165
6166 static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
6167 KEY searchkey;
6168 traverse_ptr tp, next_tp;
6169 UINT64 length, oldlength, lastoff, alloc;
6170 NTSTATUS Status;
6171 EXTENT_DATA* ed;
6172 EXTENT_DATA2* ed2;
6173
6174 if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
6175 return;
6176
6177 TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
6178
6179 searchkey.obj_id = fcb->inode;
6180 searchkey.obj_type = TYPE_EXTENT_DATA;
6181 searchkey.offset = 0;
6182
6183 Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6184 if (!NT_SUCCESS(Status)) {
6185 ERR("error - find_item returned %08x\n", Status);
6186 goto failure;
6187 }
6188
6189 if (keycmp(&searchkey, &tp.item->key)) {
6190 ERR("could not find EXTENT_DATA at offset 0\n");
6191 goto failure;
6192 }
6193
6194 if (tp.item->size < sizeof(EXTENT_DATA)) {
6195 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
6196 goto failure;
6197 }
6198
6199 ed = (EXTENT_DATA*)tp.item->data;
6200 ed2 = (EXTENT_DATA2*)&ed->data[0];
6201
6202 length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
6203 lastoff = tp.item->key.offset;
6204
6205 TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
6206
6207 alloc = 0;
6208 if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
6209 alloc += length;
6210 }
6211
6212 while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
6213 if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type)
6214 break;
6215
6216 tp = next_tp;
6217
6218 if (tp.item->size < sizeof(EXTENT_DATA)) {
6219 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
6220 goto failure;
6221 }
6222
6223 ed = (EXTENT_DATA*)tp.item->data;
6224 ed2 = (EXTENT_DATA2*)&ed->data[0];
6225
6226 length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
6227
6228 TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
6229
6230 if (tp.item->key.offset != lastoff + oldlength) {
6231 ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
6232 goto failure;
6233 }
6234
6235 if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
6236 alloc += length;
6237 }
6238
6239 oldlength = length;
6240 lastoff = tp.item->key.offset;
6241 }
6242
6243 if (alloc != fcb->inode_item.st_blocks) {
6244 ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
6245 goto failure;
6246 }
6247
6248 // if (fcb->inode_item.st_blocks != lastoff + oldlength) {
6249 // ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
6250 // goto failure;
6251 // }
6252
6253 return;
6254
6255 failure:
6256 if (fcb->subvol->treeholder.tree)
6257 print_loaded_trees(fcb->subvol->treeholder.tree, 0);
6258
6259 int3;
6260 }
6261
6262 // static void check_extent_tree_consistent(device_extension* Vcb) {
6263 // KEY searchkey;
6264 // traverse_ptr tp, next_tp;
6265 // UINT64 lastaddr;
6266 // BOOL b, inconsistency;
6267 //
6268 // searchkey.obj_id = 0;
6269 // searchkey.obj_type = 0;
6270 // searchkey.offset = 0;
6271 //
6272 // if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
6273 // ERR("error - could not find any entries in extent_root\n");
6274 // int3;
6275 // }
6276 //
6277 // lastaddr = 0;
6278 // inconsistency = FALSE;
6279 //
6280 // do {
6281 // if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
6282 // // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
6283 //
6284 // if (tp.item->key.obj_id < lastaddr) {
6285 // // ERR("inconsistency!\n");
6286 // // int3;
6287 // inconsistency = TRUE;
6288 // }
6289 //
6290 // lastaddr = tp.item->key.obj_id + tp.item->key.offset;
6291 // }
6292 //
6293 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
6294 // if (b) {
6295 // free_traverse_ptr(&tp);
6296 // tp = next_tp;
6297 // }
6298 // } while (b);
6299 //
6300 // free_traverse_ptr(&tp);
6301 //
6302 // if (!inconsistency)
6303 // return;
6304 //
6305 // ERR("Inconsistency detected:\n");
6306 //
6307 // if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
6308 // ERR("error - could not find any entries in extent_root\n");
6309 // int3;
6310 // }
6311 //
6312 // do {
6313 // if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
6314 // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
6315 //
6316 // if (tp.item->key.obj_id < lastaddr) {
6317 // ERR("inconsistency!\n");
6318 // }
6319 //
6320 // lastaddr = tp.item->key.obj_id + tp.item->key.offset;
6321 // }
6322 //
6323 // b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
6324 // if (b) {
6325 // free_traverse_ptr(&tp);
6326 // tp = next_tp;
6327 // }
6328 // } while (b);
6329 //
6330 // free_traverse_ptr(&tp);
6331 //
6332 // int3;
6333 // }
6334 #endif
6335
6336 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, LIST_ENTRY* rollback) {
6337 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
6338 PFILE_OBJECT FileObject = IrpSp->FileObject;
6339 KEY searchkey;
6340 traverse_ptr tp;
6341 EXTENT_DATA* ed2;
6342 UINT64 newlength, start_data, end_data;
6343 UINT32 bufhead;
6344 BOOL make_inline;
6345 UINT8* data;
6346 LIST_ENTRY changed_sector_list;
6347 INODE_ITEM *ii, *origii;
6348 BOOL changed_length = FALSE, nocsum, nocow/*, lazy_writer = FALSE, write_eof = FALSE*/;
6349 NTSTATUS Status;
6350 LARGE_INTEGER time;
6351 BTRFS_TIME now;
6352 fcb* fcb;
6353 ccb* ccb;
6354 file_ref* fileref;
6355 BOOL paging_lock = FALSE;
6356
6357 TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
6358
6359 if (*length == 0) {
6360 WARN("returning success for zero-length write\n");
6361 return STATUS_SUCCESS;
6362 }
6363
6364 if (!FileObject) {
6365 ERR("error - FileObject was NULL\n");
6366 return STATUS_ACCESS_DENIED;
6367 }
6368
6369 fcb = FileObject->FsContext;
6370 ccb = FileObject->FsContext2;
6371 fileref = ccb ? ccb->fileref : NULL;
6372
6373 if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
6374 WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
6375 return STATUS_INVALID_DEVICE_REQUEST;
6376 }
6377
6378 if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) {
6379 offset = fcb->Header.FileSize;
6380 // write_eof = TRUE;
6381 }
6382
6383 TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
6384
6385 if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
6386 IO_STATUS_BLOCK iosb;
6387
6388 ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
6389
6390 CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
6391
6392 if (!NT_SUCCESS(iosb.Status)) {
6393 ExReleaseResourceLite(fcb->Header.PagingIoResource);
6394 ERR("CcFlushCache returned %08x\n", iosb.Status);
6395 return iosb.Status;
6396 }
6397
6398 paging_lock = TRUE;
6399
6400 CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
6401 }
6402
6403 if (paging_io) {
6404 ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, TRUE);
6405 paging_lock = TRUE;
6406 }
6407
6408 nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
6409 nocow = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATACOW;
6410
6411 newlength = fcb->ads ? fcb->adssize : fcb->inode_item.st_size;
6412
6413 if (fcb->deleted)
6414 newlength = 0;
6415
6416 TRACE("newlength = %llx\n", newlength);
6417
6418 // if (KeGetCurrentThread() == fcb->lazy_writer_thread) {
6419 // ERR("lazy writer on the TV\n");
6420 // lazy_writer = TRUE;
6421 // }
6422
6423 if (offset.QuadPart + *length > newlength) {
6424 if (paging_io) {
6425 if (offset.QuadPart >= newlength) {
6426 TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length);
6427 TRACE("filename %S\n", file_desc(FileObject));
6428 TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
6429 fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
6430 Status = STATUS_SUCCESS;
6431 goto end;
6432 }
6433
6434 *length = newlength - offset.QuadPart;
6435 } else {
6436 newlength = offset.QuadPart + *length;
6437 changed_length = TRUE;
6438
6439 TRACE("extending length to %llx\n", newlength);
6440 }
6441 }
6442
6443 make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->max_inline;
6444
6445 if (changed_length) {
6446 if (newlength > fcb->Header.AllocationSize.QuadPart) {
6447 Status = extend_file(fcb, fileref, newlength, FALSE, rollback);
6448 if (!NT_SUCCESS(Status)) {
6449 ERR("extend_file returned %08x\n", Status);
6450 goto end;
6451 }
6452 } else if (fcb->ads)
6453 fcb->adssize = newlength;
6454 else
6455 fcb->inode_item.st_size = newlength;
6456
6457 fcb->Header.FileSize.QuadPart = newlength;
6458 fcb->Header.ValidDataLength.QuadPart = newlength;
6459
6460 TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
6461 TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
6462 TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
6463 }
6464
6465 if (!no_cache) {
6466 BOOL wait;
6467
6468 if (!FileObject->PrivateCacheMap || changed_length) {
6469 CC_FILE_SIZES ccfs;
6470
6471 ccfs.AllocationSize = fcb->Header.AllocationSize;
6472 ccfs.FileSize = fcb->Header.FileSize;
6473 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6474
6475 if (!FileObject->PrivateCacheMap) {
6476 TRACE("calling CcInitializeCacheMap...\n");
6477 CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject);
6478
6479 CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
6480 } else {
6481 CcSetFileSizes(FileObject, &ccfs);
6482 }
6483 }
6484
6485 // FIXME - uncomment this when async is working
6486 // wait = IoIsOperationSynchronous(Irp) ? TRUE : FALSE;
6487 wait = TRUE;
6488
6489 if (IrpSp->MinorFunction & IRP_MN_MDL) {
6490 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
6491
6492 Status = Irp->IoStatus.Status;
6493 goto end;
6494 } else {
6495 TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf);
6496 if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
6497 TRACE("CcCopyWrite failed.\n");
6498
6499 IoMarkIrpPending(Irp);
6500 Status = STATUS_PENDING;
6501 goto end;
6502 }
6503 TRACE("CcCopyWrite finished\n");
6504 }
6505
6506 Status = STATUS_SUCCESS;
6507 goto end;
6508 }
6509
6510 if (fcb->ads) {
6511 UINT16 datalen;
6512 UINT8* data2;
6513 UINT32 maxlen;
6514
6515 if (!get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, &data, &datalen)) {
6516 ERR("get_xattr failed\n");
6517 Status = STATUS_INTERNAL_ERROR;
6518 goto end;
6519 }
6520
6521 if (changed_length) {
6522 // find maximum length of xattr
6523 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
6524
6525 searchkey.obj_id = fcb->inode;
6526 searchkey.obj_type = TYPE_XATTR_ITEM;
6527 searchkey.offset = fcb->adshash;
6528
6529 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6530 if (!NT_SUCCESS(Status)) {
6531 ERR("error - find_item returned %08x\n", Status);
6532 goto end;
6533 }
6534
6535 if (keycmp(&tp.item->key, &searchkey)) {
6536 ERR("error - could not find key for xattr\n");
6537 Status = STATUS_INTERNAL_ERROR;
6538 goto end;
6539 }
6540
6541 if (tp.item->size < datalen) {
6542 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, datalen);
6543 Status = STATUS_INTERNAL_ERROR;
6544 goto end;
6545 }
6546
6547 maxlen -= tp.item->size - datalen; // subtract XATTR_ITEM overhead
6548
6549 if (newlength > maxlen) {
6550 ERR("error - xattr too long (%llu > %u)\n", newlength, maxlen);
6551 Status = STATUS_DISK_FULL;
6552 goto end;
6553 }
6554
6555 fcb->adssize = newlength;
6556
6557 data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG);
6558 if (!data2) {
6559 ERR("out of memory\n");
6560 Status = STATUS_INSUFFICIENT_RESOURCES;
6561 goto end;
6562 }
6563
6564 RtlCopyMemory(data2, data, datalen);
6565
6566 if (offset.QuadPart > datalen)
6567 RtlZeroMemory(&data2[datalen], offset.QuadPart - datalen);
6568 } else
6569 data2 = data;
6570
6571 if (*length > 0)
6572 RtlCopyMemory(&data2[offset.QuadPart], buf, *length);
6573
6574 Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, data2, newlength, rollback);
6575 if (!NT_SUCCESS(Status)) {
6576 ERR("set_xattr returned %08x\n", Status);
6577 goto end;
6578 }
6579
6580 if (data) ExFreePool(data);
6581 if (data2 != data) ExFreePool(data2);
6582
6583 fcb->Header.ValidDataLength.QuadPart = newlength;
6584 } else {
6585 if (make_inline) {
6586 start_data = 0;
6587 end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
6588 bufhead = sizeof(EXTENT_DATA) - 1;
6589 } else {
6590 start_data = offset.QuadPart & ~(fcb->Vcb->superblock.sector_size - 1);
6591 end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size);
6592 bufhead = 0;
6593 }
6594
6595 fcb->Header.ValidDataLength.QuadPart = newlength;
6596 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
6597
6598 data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG);
6599 if (!data) {
6600 ERR("out of memory\n");
6601 Status = STATUS_INSUFFICIENT_RESOURCES;
6602 goto end;
6603 }
6604
6605 RtlZeroMemory(data + bufhead, end_data - start_data);
6606
6607 TRACE("start_data = %llx\n", start_data);
6608 TRACE("end_data = %llx\n", end_data);
6609
6610 if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) {
6611 if (changed_length) {
6612 if (fcb->inode_item.st_size > start_data)
6613 Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL);
6614 else
6615 Status = STATUS_SUCCESS;
6616 } else
6617 Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, end_data - start_data, NULL);
6618
6619 if (!NT_SUCCESS(Status)) {
6620 ERR("read_file returned %08x\n", Status);
6621 ExFreePool(data);
6622 goto end;
6623 }
6624 }
6625
6626 RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length);
6627
6628 if (!nocsum)
6629 InitializeListHead(&changed_sector_list);
6630
6631 if (make_inline) {
6632 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, nocsum ? NULL : &changed_sector_list, rollback);
6633 if (!NT_SUCCESS(Status)) {
6634 ERR("error - excise_extents returned %08x\n", Status);
6635 ExFreePool(data);
6636 goto end;
6637 }
6638
6639 ed2 = (EXTENT_DATA*)data;
6640 ed2->generation = fcb->Vcb->superblock.generation;
6641 ed2->decoded_size = newlength;
6642 ed2->compression = BTRFS_COMPRESSION_NONE;
6643 ed2->encryption = BTRFS_ENCRYPTION_NONE;
6644 ed2->encoding = BTRFS_ENCODING_NONE;
6645 ed2->type = EXTENT_TYPE_INLINE;
6646
6647 insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, NULL, rollback);
6648
6649 fcb->inode_item.st_blocks += newlength;
6650 } else if (!nocow) {
6651 if (is_file_prealloc(fcb, start_data, end_data)) {
6652 Status = do_prealloc_write(fcb->Vcb, fcb, start_data, end_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6653
6654 if (!NT_SUCCESS(Status)) {
6655 ERR("error - do_prealloc_write returned %08x\n", Status);
6656 ExFreePool(data);
6657 goto end;
6658 }
6659 } else {
6660 Status = do_cow_write(fcb->Vcb, fcb, start_data, end_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6661
6662 if (!NT_SUCCESS(Status)) {
6663 ERR("error - do_cow_write returned %08x\n", Status);
6664 ExFreePool(data);
6665 goto end;
6666 }
6667 }
6668
6669 ExFreePool(data);
6670 } else {
6671 Status = do_nocow_write(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
6672
6673 if (!NT_SUCCESS(Status)) {
6674 ERR("error - do_nocow_write returned %08x\n", Status);
6675 ExFreePool(data);
6676 goto end;
6677 }
6678
6679 ExFreePool(data);
6680 }
6681 }
6682
6683 KeQuerySystemTime(&time);
6684 win_time_to_unix(time, &now);
6685
6686 // ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap);
6687
6688 // if (!no_cache) {
6689 // if (!FileObject->PrivateCacheMap) {
6690 // CC_FILE_SIZES ccfs;
6691 //
6692 // ccfs.AllocationSize = fcb->Header.AllocationSize;
6693 // ccfs.FileSize = fcb->Header.FileSize;
6694 // ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6695 //
6696 // TRACE("calling CcInitializeCacheMap...\n");
6697 // CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb);
6698 //
6699 // changed_length = FALSE;
6700 // }
6701 // }
6702
6703 if (fcb->ads) {
6704 if (fileref && fileref->parent)
6705 origii = &fileref->parent->fcb->inode_item;
6706 else {
6707 ERR("no parent fcb found for stream\n");
6708 Status = STATUS_INTERNAL_ERROR;
6709 goto end;
6710 }
6711 } else
6712 origii = &fcb->inode_item;
6713
6714 origii->transid = Vcb->superblock.generation;
6715 origii->sequence++;
6716 origii->st_ctime = now;
6717
6718 if (!fcb->ads) {
6719 TRACE("setting st_size to %llx\n", newlength);
6720 origii->st_size = newlength;
6721 origii->st_mtime = now;
6722 }
6723
6724 searchkey.obj_id = fcb->inode;
6725 searchkey.obj_type = TYPE_INODE_ITEM;
6726 searchkey.offset = 0;
6727
6728 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
6729 if (!NT_SUCCESS(Status)) {
6730 ERR("error - find_item returned %08x\n", Status);
6731 goto end;
6732 }
6733
6734 if (!keycmp(&tp.item->key, &searchkey))
6735 delete_tree_item(Vcb, &tp, rollback);
6736 else
6737 WARN("couldn't find existing INODE_ITEM\n");
6738
6739 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
6740 if (!ii) {
6741 ERR("out of memory\n");
6742 Status = STATUS_INSUFFICIENT_RESOURCES;
6743 goto end;
6744 }
6745
6746 RtlCopyMemory(ii, origii, sizeof(INODE_ITEM));
6747 insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, rollback);
6748
6749 // FIXME - update inode_item of open FCBs pointing to the same inode (i.e. hardlinked files)
6750
6751 if (!nocsum)
6752 update_checksum_tree(Vcb, &changed_sector_list, rollback);
6753
6754 if (changed_length) {
6755 CC_FILE_SIZES ccfs;
6756
6757 ccfs.AllocationSize = fcb->Header.AllocationSize;
6758 ccfs.FileSize = fcb->Header.FileSize;
6759 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
6760
6761 CcSetFileSizes(FileObject, &ccfs);
6762 }
6763
6764 // FIXME - make sure this still called if STATUS_PENDING and async
6765 // if (!no_cache) {
6766 // if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) {
6767 // ERR("CcCopyWrite failed.\n");
6768 // }
6769 // }
6770
6771 fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
6772 fcb->subvol->root_item.ctime = now;
6773
6774 Status = STATUS_SUCCESS;
6775
6776 end:
6777 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
6778 TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
6779 FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
6780 TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
6781 }
6782
6783 if (paging_lock)
6784 ExReleaseResourceLite(fcb->Header.PagingIoResource);
6785
6786 return Status;
6787 }
6788
6789 NTSTATUS write_file(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
6790 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
6791 device_extension* Vcb = DeviceObject->DeviceExtension;
6792 void* buf;
6793 NTSTATUS Status;
6794 LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
6795 PFILE_OBJECT FileObject = IrpSp->FileObject;
6796 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
6797 BOOL locked = FALSE;
6798 // LARGE_INTEGER freq, time1, time2;
6799 LIST_ENTRY rollback;
6800
6801 InitializeListHead(&rollback);
6802
6803 if (Vcb->readonly)
6804 return STATUS_MEDIA_WRITE_PROTECTED;
6805
6806 if (fcb && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)
6807 return STATUS_ACCESS_DENIED;
6808
6809 // time1 = KeQueryPerformanceCounter(&freq);
6810
6811 TRACE("write\n");
6812
6813 Irp->IoStatus.Information = 0;
6814
6815 TRACE("offset = %llx\n", offset.QuadPart);
6816 TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
6817
6818 if (!Irp->AssociatedIrp.SystemBuffer) {
6819 buf = map_user_buffer(Irp);
6820
6821 if (Irp->MdlAddress && !buf) {
6822 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
6823 Status = STATUS_INSUFFICIENT_RESOURCES;
6824 goto exit;
6825 }
6826 } else
6827 buf = Irp->AssociatedIrp.SystemBuffer;
6828
6829 TRACE("buf = %p\n", buf);
6830
6831 if (Irp->Flags & IRP_NOCACHE) {
6832 acquire_tree_lock(Vcb, TRUE);
6833 locked = TRUE;
6834 }
6835
6836 if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
6837 WARN("tried to write to locked region\n");
6838 Status = STATUS_FILE_LOCK_CONFLICT;
6839 goto exit;
6840 }
6841
6842 // ERR("Irp->Flags = %x\n", Irp->Flags);
6843 Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE, &rollback);
6844 if (!NT_SUCCESS(Status)) {
6845 if (Status != STATUS_PENDING)
6846 ERR("write_file2 returned %08x\n", Status);
6847 goto exit;
6848 }
6849
6850 if (locked)
6851 Status = consider_write(Vcb);
6852
6853 if (NT_SUCCESS(Status)) {
6854 Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
6855
6856 #ifdef DEBUG_PARANOID
6857 if (locked)
6858 check_extents_consistent(Vcb, FileObject->FsContext); // TESTING
6859
6860 // check_extent_tree_consistent(Vcb);
6861 #endif
6862 }
6863
6864 exit:
6865 if (locked) {
6866 if (NT_SUCCESS(Status))
6867 clear_rollback(&rollback);
6868 else
6869 do_rollback(Vcb, &rollback);
6870
6871 release_tree_lock(Vcb, TRUE);
6872 }
6873
6874 // time2 = KeQueryPerformanceCounter(NULL);
6875
6876 // ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart);
6877
6878 return Status;
6879 }