[BTRFS] Upgrade to 1.2.1
[reactos.git] / drivers / filesystems / btrfs / flushthread.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include <ata.h>
20 #include <ntddscsi.h>
21 #include <ntddstor.h>
22
23 #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
24
25 // #define DEBUG_WRITE_LOOPS
26
27 typedef struct {
28 KEVENT Event;
29 IO_STATUS_BLOCK iosb;
30 } write_context;
31
32 typedef struct {
33 EXTENT_ITEM_TREE eit;
34 UINT8 type;
35 TREE_BLOCK_REF tbr;
36 } EXTENT_ITEM_TREE2;
37
38 typedef struct {
39 EXTENT_ITEM ei;
40 UINT8 type;
41 TREE_BLOCK_REF tbr;
42 } EXTENT_ITEM_SKINNY_METADATA;
43
44 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp);
45 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
46
47 #ifndef _MSC_VER // not in mingw yet
48 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
49 #endif
50
51 _Function_class_(IO_COMPLETION_ROUTINE)
52 #ifdef __REACTOS__
53 static NTSTATUS NTAPI write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
54 #else
55 static NTSTATUS write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
56 #endif
57 write_context* context = conptr;
58
59 UNUSED(DeviceObject);
60
61 context->iosb = Irp->IoStatus;
62 KeSetEvent(&context->Event, 0, FALSE);
63
64 return STATUS_MORE_PROCESSING_REQUIRED;
65 }
66
67 NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length) {
68 NTSTATUS Status;
69 LARGE_INTEGER offset;
70 PIRP Irp;
71 PIO_STACK_LOCATION IrpSp;
72 write_context context;
73
74 TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
75
76 RtlZeroMemory(&context, sizeof(write_context));
77
78 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
79
80 offset.QuadPart = address;
81
82 Irp = IoAllocateIrp(device->StackSize, FALSE);
83
84 if (!Irp) {
85 ERR("IoAllocateIrp failed\n");
86 return STATUS_INSUFFICIENT_RESOURCES;
87 }
88
89 IrpSp = IoGetNextIrpStackLocation(Irp);
90 IrpSp->MajorFunction = IRP_MJ_WRITE;
91
92 if (device->Flags & DO_BUFFERED_IO) {
93 Irp->AssociatedIrp.SystemBuffer = data;
94
95 Irp->Flags = IRP_BUFFERED_IO;
96 } else if (device->Flags & DO_DIRECT_IO) {
97 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
98 if (!Irp->MdlAddress) {
99 DbgPrint("IoAllocateMdl failed\n");
100 Status = STATUS_INSUFFICIENT_RESOURCES;
101 goto exit;
102 }
103
104 Status = STATUS_SUCCESS;
105
106 _SEH2_TRY {
107 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
108 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
109 Status = _SEH2_GetExceptionCode();
110 } _SEH2_END;
111
112 if (!NT_SUCCESS(Status)) {
113 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
114 IoFreeMdl(Irp->MdlAddress);
115 goto exit;
116 }
117 } else {
118 Irp->UserBuffer = data;
119 }
120
121 IrpSp->Parameters.Write.Length = length;
122 IrpSp->Parameters.Write.ByteOffset = offset;
123
124 Irp->UserIosb = &context.iosb;
125
126 Irp->UserEvent = &context.Event;
127
128 IoSetCompletionRoutine(Irp, write_completion, &context, TRUE, TRUE, TRUE);
129
130 Status = IoCallDriver(device, Irp);
131
132 if (Status == STATUS_PENDING) {
133 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
134 Status = context.iosb.Status;
135 }
136
137 if (!NT_SUCCESS(Status)) {
138 ERR("IoCallDriver returned %08x\n", Status);
139 }
140
141 if (device->Flags & DO_DIRECT_IO) {
142 MmUnlockPages(Irp->MdlAddress);
143 IoFreeMdl(Irp->MdlAddress);
144 }
145
146 exit:
147 IoFreeIrp(Irp);
148
149 return Status;
150 }
151
152 static void add_trim_entry(device* dev, UINT64 address, UINT64 size) {
153 space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
154 if (!s) {
155 ERR("out of memory\n");
156 return;
157 }
158
159 s->address = address;
160 s->size = size;
161 dev->num_trim_entries++;
162
163 InsertTailList(&dev->trim_list, &s->list_entry);
164 }
165
166 static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
167 ULONG type;
168
169 if (Vcb->trim && !Vcb->options.no_trim) {
170 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
171 type = BLOCK_FLAG_DUPLICATE;
172 else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
173 type = BLOCK_FLAG_RAID0;
174 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
175 type = BLOCK_FLAG_DUPLICATE;
176 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
177 type = BLOCK_FLAG_RAID10;
178 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
179 type = BLOCK_FLAG_RAID5;
180 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
181 type = BLOCK_FLAG_RAID6;
182 else // SINGLE
183 type = BLOCK_FLAG_DUPLICATE;
184 }
185
186 while (!IsListEmpty(&c->deleting)) {
187 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
188
189 if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
190 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
191
192 if (type == BLOCK_FLAG_DUPLICATE) {
193 UINT16 i;
194
195 for (i = 0; i < c->chunk_item->num_stripes; i++) {
196 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
197 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
198 }
199 } else if (type == BLOCK_FLAG_RAID0) {
200 UINT64 startoff, endoff;
201 UINT16 startoffstripe, endoffstripe, i;
202
203 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
204 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
205
206 for (i = 0; i < c->chunk_item->num_stripes; i++) {
207 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
208 UINT64 stripestart, stripeend;
209
210 if (startoffstripe > i)
211 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
212 else if (startoffstripe == i)
213 stripestart = startoff;
214 else
215 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
216
217 if (endoffstripe > i)
218 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
219 else if (endoffstripe == i)
220 stripeend = endoff + 1;
221 else
222 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
223
224 if (stripestart != stripeend)
225 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
226 }
227 }
228 } else if (type == BLOCK_FLAG_RAID10) {
229 UINT64 startoff, endoff;
230 UINT16 sub_stripes, startoffstripe, endoffstripe, i;
231
232 sub_stripes = max(1, c->chunk_item->sub_stripes);
233
234 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
235 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
236
237 startoffstripe *= sub_stripes;
238 endoffstripe *= sub_stripes;
239
240 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
241 ULONG j;
242 UINT64 stripestart, stripeend;
243
244 if (startoffstripe > i)
245 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
246 else if (startoffstripe == i)
247 stripestart = startoff;
248 else
249 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
250
251 if (endoffstripe > i)
252 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
253 else if (endoffstripe == i)
254 stripeend = endoff + 1;
255 else
256 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
257
258 if (stripestart != stripeend) {
259 for (j = 0; j < sub_stripes; j++) {
260 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
261 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
262 }
263 }
264 }
265 }
266 // FIXME - RAID5(?), RAID6(?)
267 }
268
269 RemoveEntryList(&s->list_entry);
270 ExFreePool(s);
271 }
272 }
273
274 typedef struct {
275 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
276 ATA_PASS_THROUGH_EX apte;
277 PIRP Irp;
278 IO_STATUS_BLOCK iosb;
279 } ioctl_context_stripe;
280
281 typedef struct {
282 KEVENT Event;
283 LONG left;
284 ioctl_context_stripe* stripes;
285 } ioctl_context;
286
287 _Function_class_(IO_COMPLETION_ROUTINE)
288 #ifdef __REACTOS__
289 static NTSTATUS NTAPI ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
290 #else
291 static NTSTATUS ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
292 #endif
293 ioctl_context* context = (ioctl_context*)conptr;
294 LONG left2 = InterlockedDecrement(&context->left);
295
296 UNUSED(DeviceObject);
297 UNUSED(Irp);
298
299 if (left2 == 0)
300 KeSetEvent(&context->Event, 0, FALSE);
301
302 return STATUS_MORE_PROCESSING_REQUIRED;
303 }
304
305 static void clean_space_cache(device_extension* Vcb) {
306 LIST_ENTRY* le;
307 chunk* c;
308 ULONG num;
309
310 TRACE("(%p)\n", Vcb);
311
312 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
313
314 le = Vcb->chunks.Flink;
315 while (le != &Vcb->chunks) {
316 c = CONTAINING_RECORD(le, chunk, list_entry);
317
318 if (c->space_changed) {
319 acquire_chunk_lock(c, Vcb);
320
321 if (c->space_changed)
322 clean_space_cache_chunk(Vcb, c);
323
324 c->space_changed = FALSE;
325
326 release_chunk_lock(c, Vcb);
327 }
328
329 le = le->Flink;
330 }
331
332 ExReleaseResourceLite(&Vcb->chunk_lock);
333
334 if (Vcb->trim && !Vcb->options.no_trim) {
335 ioctl_context context;
336 ULONG total_num;
337
338 context.left = 0;
339
340 le = Vcb->devices.Flink;
341 while (le != &Vcb->devices) {
342 device* dev = CONTAINING_RECORD(le, device, list_entry);
343
344 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
345 context.left++;
346
347 le = le->Flink;
348 }
349
350 if (context.left == 0)
351 return;
352
353 total_num = context.left;
354 num = 0;
355
356 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
357
358 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
359 if (!context.stripes) {
360 ERR("out of memory\n");
361 return;
362 }
363
364 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
365
366 le = Vcb->devices.Flink;
367 while (le != &Vcb->devices) {
368 device* dev = CONTAINING_RECORD(le, device, list_entry);
369
370 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
371 LIST_ENTRY* le2;
372 ioctl_context_stripe* stripe = &context.stripes[num];
373 DEVICE_DATA_SET_RANGE* ranges;
374 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
375 PIO_STACK_LOCATION IrpSp;
376
377 stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
378 if (!stripe->dmdsa) {
379 ERR("out of memory\n");
380 goto nextdev;
381 }
382
383 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
384 stripe->dmdsa->Action = DeviceDsmAction_Trim;
385 stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
386 stripe->dmdsa->ParameterBlockOffset = 0;
387 stripe->dmdsa->ParameterBlockLength = 0;
388 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64));
389 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
390
391 ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
392
393 i = 0;
394
395 le2 = dev->trim_list.Flink;
396 while (le2 != &dev->trim_list) {
397 space* s = CONTAINING_RECORD(le2, space, list_entry);
398
399 ranges[i].StartingOffset = s->address;
400 ranges[i].LengthInBytes = s->size;
401 i++;
402
403 le2 = le2->Flink;
404 }
405
406 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE);
407
408 if (!stripe->Irp) {
409 ERR("IoAllocateIrp failed\n");
410 goto nextdev;
411 }
412
413 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
414 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
415
416 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
417 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
418 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
419
420 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
421 stripe->Irp->Flags |= IRP_BUFFERED_IO;
422 stripe->Irp->UserBuffer = NULL;
423 stripe->Irp->UserIosb = &stripe->iosb;
424
425 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE);
426
427 IoCallDriver(dev->devobj, stripe->Irp);
428
429 nextdev:
430 while (!IsListEmpty(&dev->trim_list)) {
431 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
432 ExFreePool(s);
433 }
434
435 dev->num_trim_entries = 0;
436
437 num++;
438 }
439
440 le = le->Flink;
441 }
442
443 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
444
445 for (num = 0; num < total_num; num++) {
446 if (context.stripes[num].dmdsa)
447 ExFreePool(context.stripes[num].dmdsa);
448 }
449
450 ExFreePool(context.stripes);
451 }
452 }
453
454 static BOOL trees_consistent(device_extension* Vcb) {
455 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
456 LIST_ENTRY* le;
457
458 le = Vcb->trees.Flink;
459 while (le != &Vcb->trees) {
460 tree* t = CONTAINING_RECORD(le, tree, list_entry);
461
462 if (t->write) {
463 if (t->header.num_items == 0 && t->parent) {
464 #ifdef DEBUG_WRITE_LOOPS
465 ERR("empty tree found, looping again\n");
466 #endif
467 return FALSE;
468 }
469
470 if (t->size > maxsize) {
471 #ifdef DEBUG_WRITE_LOOPS
472 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
473 #endif
474 return FALSE;
475 }
476
477 if (!t->has_new_address) {
478 #ifdef DEBUG_WRITE_LOOPS
479 ERR("tree found without new address, looping again\n");
480 #endif
481 return FALSE;
482 }
483 }
484
485 le = le->Flink;
486 }
487
488 return TRUE;
489 }
490
491 static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) {
492 ULONG level;
493 LIST_ENTRY* le;
494
495 for (level = 0; level <= 255; level++) {
496 BOOL nothing_found = TRUE;
497
498 TRACE("level = %u\n", level);
499
500 le = Vcb->trees.Flink;
501 while (le != &Vcb->trees) {
502 tree* t = CONTAINING_RECORD(le, tree, list_entry);
503
504 if (t->write && t->header.level == level) {
505 TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
506
507 nothing_found = FALSE;
508
509 if (t->parent) {
510 if (!t->parent->write)
511 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
512
513 t->parent->write = TRUE;
514 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
515 KEY searchkey;
516 traverse_ptr tp;
517 NTSTATUS Status;
518
519 searchkey.obj_id = t->root->id;
520 searchkey.obj_type = TYPE_ROOT_ITEM;
521 searchkey.offset = 0xffffffffffffffff;
522
523 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
524 if (!NT_SUCCESS(Status)) {
525 ERR("error - find_item returned %08x\n", Status);
526 return Status;
527 }
528
529 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
530 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
531 return STATUS_INTERNAL_ERROR;
532 }
533
534 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
535 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
536
537 if (!ri) {
538 ERR("out of memory\n");
539 return STATUS_INSUFFICIENT_RESOURCES;
540 }
541
542 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
543
544 Status = delete_tree_item(Vcb, &tp);
545 if (!NT_SUCCESS(Status)) {
546 ERR("delete_tree_item returned %08x\n", Status);
547 ExFreePool(ri);
548 return Status;
549 }
550
551 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
552 if (!NT_SUCCESS(Status)) {
553 ERR("insert_tree_item returned %08x\n", Status);
554 ExFreePool(ri);
555 return Status;
556 }
557 }
558 }
559 }
560
561 le = le->Flink;
562 }
563
564 if (nothing_found)
565 break;
566 }
567
568 return STATUS_SUCCESS;
569 }
570
571 static void add_parents_to_cache(tree* t) {
572 while (t->parent) {
573 t = t->parent;
574 t->write = TRUE;
575 }
576 }
577
578 static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
579 NTSTATUS Status;
580 EXTENT_ITEM_SKINNY_METADATA* eism;
581 traverse_ptr insert_tp;
582
583 eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
584 if (!eism) {
585 ERR("out of memory\n");
586 return FALSE;
587 }
588
589 eism->ei.refcount = 1;
590 eism->ei.generation = Vcb->superblock.generation;
591 eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
592 eism->type = TYPE_TREE_BLOCK_REF;
593 eism->tbr.offset = root_id;
594
595 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
596 if (!NT_SUCCESS(Status)) {
597 ERR("insert_tree_item returned %08x\n", Status);
598 ExFreePool(eism);
599 return FALSE;
600 }
601
602 acquire_chunk_lock(c, Vcb);
603
604 space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback);
605
606 release_chunk_lock(c, Vcb);
607
608 add_parents_to_cache(insert_tp.tree);
609
610 return TRUE;
611 }
612
613 BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address) {
614 LIST_ENTRY* le;
615 space* s;
616
617 TRACE("(%p, %llx, %p)\n", Vcb, c->offset, address);
618
619 if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
620 return FALSE;
621
622 if (!c->cache_loaded) {
623 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
624
625 if (!NT_SUCCESS(Status)) {
626 ERR("load_cache_chunk returned %08x\n", Status);
627 return FALSE;
628 }
629 }
630
631 if (IsListEmpty(&c->space_size))
632 return FALSE;
633
634 if (!c->last_alloc_set) {
635 s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
636
637 c->last_alloc = s->address;
638 c->last_alloc_set = TRUE;
639
640 if (s->size >= Vcb->superblock.node_size) {
641 *address = s->address;
642 c->last_alloc += Vcb->superblock.node_size;
643 return TRUE;
644 }
645 }
646
647 le = c->space.Flink;
648 while (le != &c->space) {
649 s = CONTAINING_RECORD(le, space, list_entry);
650
651 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
652 *address = c->last_alloc;
653 c->last_alloc += Vcb->superblock.node_size;
654 return TRUE;
655 }
656
657 le = le->Flink;
658 }
659
660 le = c->space_size.Flink;
661 while (le != &c->space_size) {
662 s = CONTAINING_RECORD(le, space, list_entry_size);
663
664 if (s->size == Vcb->superblock.node_size) {
665 *address = s->address;
666 c->last_alloc = s->address + Vcb->superblock.node_size;
667 return TRUE;
668 } else if (s->size < Vcb->superblock.node_size) {
669 if (le == c->space_size.Flink)
670 return FALSE;
671
672 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
673
674 *address = s->address;
675 c->last_alloc = s->address + Vcb->superblock.node_size;
676
677 return TRUE;
678 }
679
680 le = le->Flink;
681 }
682
683 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
684
685 if (s->size > Vcb->superblock.node_size) {
686 *address = s->address;
687 c->last_alloc = s->address + Vcb->superblock.node_size;
688 return TRUE;
689 }
690
691 return FALSE;
692 }
693
694 static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
695 NTSTATUS Status;
696 UINT64 address;
697 EXTENT_ITEM_TREE2* eit2;
698 traverse_ptr insert_tp;
699
700 TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
701
702 if (!find_metadata_address_in_chunk(Vcb, c, &address))
703 return FALSE;
704
705 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
706 BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
707
708 if (b)
709 *new_address = address;
710
711 return b;
712 }
713
714 eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
715 if (!eit2) {
716 ERR("out of memory\n");
717 return FALSE;
718 }
719
720 eit2->eit.extent_item.refcount = 1;
721 eit2->eit.extent_item.generation = Vcb->superblock.generation;
722 eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
723 eit2->eit.level = level;
724 eit2->type = TYPE_TREE_BLOCK_REF;
725 eit2->tbr.offset = root_id;
726
727 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
728 if (!NT_SUCCESS(Status)) {
729 ERR("insert_tree_item returned %08x\n", Status);
730 ExFreePool(eit2);
731 return FALSE;
732 }
733
734 acquire_chunk_lock(c, Vcb);
735
736 space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback);
737
738 release_chunk_lock(c, Vcb);
739
740 add_parents_to_cache(insert_tp.tree);
741
742 *new_address = address;
743
744 return TRUE;
745 }
746
747 NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
748 NTSTATUS Status;
749 chunk *origchunk = NULL, *c;
750 LIST_ENTRY* le;
751 UINT64 flags, addr;
752
753 if (t->root->id == BTRFS_ROOT_CHUNK)
754 flags = Vcb->system_flags;
755 else
756 flags = Vcb->metadata_flags;
757
758 if (t->has_address) {
759 origchunk = get_chunk_from_address(Vcb, t->header.address);
760
761 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
762 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
763 t->new_address = addr;
764 t->has_new_address = TRUE;
765 return STATUS_SUCCESS;
766 }
767 }
768
769 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
770
771 le = Vcb->chunks.Flink;
772 while (le != &Vcb->chunks) {
773 c = CONTAINING_RECORD(le, chunk, list_entry);
774
775 if (!c->readonly && !c->reloc) {
776 acquire_chunk_lock(c, Vcb);
777
778 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
779 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
780 release_chunk_lock(c, Vcb);
781 ExReleaseResourceLite(&Vcb->chunk_lock);
782 t->new_address = addr;
783 t->has_new_address = TRUE;
784 return STATUS_SUCCESS;
785 }
786 }
787
788 release_chunk_lock(c, Vcb);
789 }
790
791 le = le->Flink;
792 }
793
794 // allocate new chunk if necessary
795
796 Status = alloc_chunk(Vcb, flags, &c, FALSE);
797
798 if (!NT_SUCCESS(Status)) {
799 ERR("alloc_chunk returned %08x\n", Status);
800 ExReleaseResourceLite(&Vcb->chunk_lock);
801 return Status;
802 }
803
804 acquire_chunk_lock(c, Vcb);
805
806 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
807 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
808 release_chunk_lock(c, Vcb);
809 ExReleaseResourceLite(&Vcb->chunk_lock);
810 t->new_address = addr;
811 t->has_new_address = TRUE;
812 return STATUS_SUCCESS;
813 }
814 }
815
816 release_chunk_lock(c, Vcb);
817
818 ExReleaseResourceLite(&Vcb->chunk_lock);
819
820 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
821
822 return STATUS_DISK_FULL;
823 }
824
825 static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, UINT64 parent_root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
826 NTSTATUS Status;
827 UINT64 rc, root;
828
829 TRACE("(%p, %llx, %p)\n", Vcb, address, t);
830
831 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
832 if (rc == 0) {
833 ERR("error - refcount for extent %llx was 0\n", address);
834 return STATUS_INTERNAL_ERROR;
835 }
836
837 if (!t || t->parent)
838 root = parent_root;
839 else
840 root = t->header.tree_id;
841
842 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
843 if (!NT_SUCCESS(Status)) {
844 ERR("decrease_extent_refcount_tree returned %08x\n", Status);
845 return Status;
846 }
847
848 if (rc == 1) {
849 chunk* c = get_chunk_from_address(Vcb, address);
850
851 if (c) {
852 acquire_chunk_lock(c, Vcb);
853
854 if (!c->cache_loaded) {
855 Status = load_cache_chunk(Vcb, c, NULL);
856
857 if (!NT_SUCCESS(Status)) {
858 ERR("load_cache_chunk returned %08x\n", Status);
859 release_chunk_lock(c, Vcb);
860 return Status;
861 }
862 }
863
864 c->used -= Vcb->superblock.node_size;
865
866 space_list_add(c, address, Vcb->superblock.node_size, rollback);
867
868 release_chunk_lock(c, Vcb);
869 } else
870 ERR("could not find chunk for address %llx\n", address);
871 }
872
873 return STATUS_SUCCESS;
874 }
875
876 static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, BOOL old) {
877 LIST_ENTRY *le2, *list;
878 changed_extent_ref* cer;
879
880 list = old ? &ce->old_refs : &ce->refs;
881
882 le2 = list->Flink;
883 while (le2 != list) {
884 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
885
886 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
887 cer->edr.count += edr->count;
888 goto end;
889 }
890
891 le2 = le2->Flink;
892 }
893
894 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
895 if (!cer) {
896 ERR("out of memory\n");
897 return STATUS_INSUFFICIENT_RESOURCES;
898 }
899
900 cer->type = TYPE_EXTENT_DATA_REF;
901 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
902 InsertTailList(list, &cer->list_entry);
903
904 end:
905 if (old)
906 ce->old_count += edr->count;
907 else
908 ce->count += edr->count;
909
910 return STATUS_SUCCESS;
911 }
912
913 static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, BOOL old) {
914 LIST_ENTRY *le2, *list;
915 changed_extent_ref* cer;
916
917 list = old ? &ce->old_refs : &ce->refs;
918
919 le2 = list->Flink;
920 while (le2 != list) {
921 cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
922
923 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
924 cer->sdr.count += sdr->count;
925 goto end;
926 }
927
928 le2 = le2->Flink;
929 }
930
931 cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
932 if (!cer) {
933 ERR("out of memory\n");
934 return STATUS_INSUFFICIENT_RESOURCES;
935 }
936
937 cer->type = TYPE_SHARED_DATA_REF;
938 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
939 InsertTailList(list, &cer->list_entry);
940
941 end:
942 if (old)
943 ce->old_count += sdr->count;
944 else
945 ce->count += sdr->count;
946
947 return STATUS_SUCCESS;
948 }
949
950 static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
951 KEY searchkey;
952 traverse_ptr tp;
953 NTSTATUS Status;
954
955 if (!t->updated_extents && t->has_address) {
956 Status = update_tree_extents(Vcb, t, Irp, rollback);
957 if (!NT_SUCCESS(Status)) {
958 ERR("update_tree_extents returned %08x\n", Status);
959 return FALSE;
960 }
961 }
962
963 searchkey.obj_id = t->header.address;
964 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
965 searchkey.offset = 0xffffffffffffffff;
966
967 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
968 if (!NT_SUCCESS(Status)) {
969 ERR("error - find_item returned %08x\n", Status);
970 return FALSE;
971 }
972
973 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
974 return FALSE;
975 else
976 return TRUE;
977 }
978
979 static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
980 NTSTATUS Status;
981 UINT64 rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
982 UINT64 flags = get_extent_flags(Vcb, t->header.address, Irp);
983
984 if (rc == 0) {
985 ERR("refcount for extent %llx was 0\n", t->header.address);
986 return STATUS_INTERNAL_ERROR;
987 }
988
989 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
990 TREE_BLOCK_REF tbr;
991 BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : FALSE);
992
993 if (t->header.level == 0) {
994 LIST_ENTRY* le;
995
996 le = t->itemlist.Flink;
997 while (le != &t->itemlist) {
998 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
999
1000 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1001 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1002
1003 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1004 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1005
1006 if (ed2->size > 0) {
1007 EXTENT_DATA_REF edr;
1008 changed_extent* ce = NULL;
1009 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1010
1011 if (c) {
1012 LIST_ENTRY* le2;
1013
1014 le2 = c->changed_extents.Flink;
1015 while (le2 != &c->changed_extents) {
1016 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1017
1018 if (ce2->address == ed2->address) {
1019 ce = ce2;
1020 break;
1021 }
1022
1023 le2 = le2->Flink;
1024 }
1025 }
1026
1027 edr.root = t->root->id;
1028 edr.objid = td->key.obj_id;
1029 edr.offset = td->key.offset - ed2->offset;
1030 edr.count = 1;
1031
1032 if (ce) {
1033 Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
1034 if (!NT_SUCCESS(Status)) {
1035 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1036 return Status;
1037 }
1038
1039 Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
1040 if (!NT_SUCCESS(Status)) {
1041 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1042 return Status;
1043 }
1044 }
1045
1046 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1047 if (!NT_SUCCESS(Status)) {
1048 ERR("increase_extent_refcount returned %08x\n", Status);
1049 return Status;
1050 }
1051
1052 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1053 UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1054
1055 if (sdrrc > 0) {
1056 SHARED_DATA_REF sdr;
1057
1058 sdr.offset = t->header.address;
1059 sdr.count = 1;
1060
1061 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
1062 t->header.address, ce ? ce->superseded : FALSE, Irp);
1063 if (!NT_SUCCESS(Status)) {
1064 ERR("decrease_extent_refcount returned %08x\n", Status);
1065 return Status;
1066 }
1067
1068 if (ce) {
1069 LIST_ENTRY* le2;
1070
1071 le2 = ce->refs.Flink;
1072 while (le2 != &ce->refs) {
1073 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1074
1075 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1076 ce->count--;
1077 cer->sdr.count--;
1078 break;
1079 }
1080
1081 le2 = le2->Flink;
1082 }
1083
1084 le2 = ce->old_refs.Flink;
1085 while (le2 != &ce->old_refs) {
1086 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
1087
1088 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1089 ce->old_count--;
1090
1091 if (cer->sdr.count > 1)
1092 cer->sdr.count--;
1093 else {
1094 RemoveEntryList(&cer->list_entry);
1095 ExFreePool(cer);
1096 }
1097
1098 break;
1099 }
1100
1101 le2 = le2->Flink;
1102 }
1103 }
1104 }
1105 }
1106
1107 // FIXME - clear shared flag if unique?
1108 }
1109 }
1110 }
1111
1112 le = le->Flink;
1113 }
1114 } else {
1115 LIST_ENTRY* le;
1116
1117 le = t->itemlist.Flink;
1118 while (le != &t->itemlist) {
1119 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1120
1121 if (!td->inserted) {
1122 tbr.offset = t->root->id;
1123
1124 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
1125 &tbr, &td->key, t->header.level - 1, Irp);
1126 if (!NT_SUCCESS(Status)) {
1127 ERR("increase_extent_refcount returned %08x\n", Status);
1128 return Status;
1129 }
1130
1131 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1132 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1133
1134 if (sbrrc > 0) {
1135 SHARED_BLOCK_REF sbr;
1136
1137 sbr.offset = t->header.address;
1138
1139 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1140 t->header.address, FALSE, Irp);
1141 if (!NT_SUCCESS(Status)) {
1142 ERR("decrease_extent_refcount returned %08x\n", Status);
1143 return Status;
1144 }
1145 }
1146 }
1147
1148 // FIXME - clear shared flag if unique?
1149 }
1150
1151 le = le->Flink;
1152 }
1153 }
1154
1155 if (unique) {
1156 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1157
1158 if (sbrrc == 1) {
1159 SHARED_BLOCK_REF sbr;
1160
1161 sbr.offset = t->parent->header.address;
1162
1163 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1164 t->parent->header.address, FALSE, Irp);
1165 if (!NT_SUCCESS(Status)) {
1166 ERR("decrease_extent_refcount returned %08x\n", Status);
1167 return Status;
1168 }
1169 }
1170 }
1171
1172 if (t->parent)
1173 tbr.offset = t->parent->header.tree_id;
1174 else
1175 tbr.offset = t->header.tree_id;
1176
1177 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1178 t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1179 if (!NT_SUCCESS(Status)) {
1180 ERR("increase_extent_refcount returned %08x\n", Status);
1181 return Status;
1182 }
1183
1184 // FIXME - clear shared flag if unique?
1185
1186 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1187 }
1188
1189 if (rc > 1 || t->header.tree_id == t->root->id) {
1190 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1191
1192 if (!NT_SUCCESS(Status)) {
1193 ERR("reduce_tree_extent returned %08x\n", Status);
1194 return Status;
1195 }
1196 }
1197
1198 t->has_address = FALSE;
1199
1200 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1201 if (t->header.tree_id == t->root->id) {
1202 flags |= EXTENT_ITEM_SHARED_BACKREFS;
1203 update_extent_flags(Vcb, t->header.address, flags, Irp);
1204 }
1205
1206 if (t->header.level > 0) {
1207 LIST_ENTRY* le;
1208
1209 le = t->itemlist.Flink;
1210 while (le != &t->itemlist) {
1211 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1212
1213 if (!td->inserted) {
1214 if (t->header.tree_id == t->root->id) {
1215 SHARED_BLOCK_REF sbr;
1216
1217 sbr.offset = t->header.address;
1218
1219 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1220 } else {
1221 TREE_BLOCK_REF tbr;
1222
1223 tbr.offset = t->root->id;
1224
1225 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1226 }
1227
1228 if (!NT_SUCCESS(Status)) {
1229 ERR("increase_extent_refcount returned %08x\n", Status);
1230 return Status;
1231 }
1232 }
1233
1234 le = le->Flink;
1235 }
1236 } else {
1237 LIST_ENTRY* le;
1238
1239 le = t->itemlist.Flink;
1240 while (le != &t->itemlist) {
1241 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1242
1243 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1244 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1245
1246 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1247 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1248
1249 if (ed2->size > 0) {
1250 changed_extent* ce = NULL;
1251 chunk* c = get_chunk_from_address(Vcb, ed2->address);
1252
1253 if (c) {
1254 LIST_ENTRY* le2;
1255
1256 le2 = c->changed_extents.Flink;
1257 while (le2 != &c->changed_extents) {
1258 changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
1259
1260 if (ce2->address == ed2->address) {
1261 ce = ce2;
1262 break;
1263 }
1264
1265 le2 = le2->Flink;
1266 }
1267 }
1268
1269 if (t->header.tree_id == t->root->id) {
1270 SHARED_DATA_REF sdr;
1271
1272 sdr.offset = t->header.address;
1273 sdr.count = 1;
1274
1275 if (ce) {
1276 Status = add_changed_extent_ref_sdr(ce, &sdr, TRUE);
1277 if (!NT_SUCCESS(Status)) {
1278 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1279 return Status;
1280 }
1281
1282 Status = add_changed_extent_ref_sdr(ce, &sdr, FALSE);
1283 if (!NT_SUCCESS(Status)) {
1284 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1285 return Status;
1286 }
1287 }
1288
1289 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp);
1290 } else {
1291 EXTENT_DATA_REF edr;
1292
1293 edr.root = t->root->id;
1294 edr.objid = td->key.obj_id;
1295 edr.offset = td->key.offset - ed2->offset;
1296 edr.count = 1;
1297
1298 if (ce) {
1299 Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
1300 if (!NT_SUCCESS(Status)) {
1301 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1302 return Status;
1303 }
1304
1305 Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
1306 if (!NT_SUCCESS(Status)) {
1307 ERR("add_changed_extent_ref_edr returned %08x\n", Status);
1308 return Status;
1309 }
1310 }
1311
1312 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp);
1313 }
1314
1315 if (!NT_SUCCESS(Status)) {
1316 ERR("increase_extent_refcount returned %08x\n", Status);
1317 return Status;
1318 }
1319 }
1320 }
1321 }
1322
1323 le = le->Flink;
1324 }
1325 }
1326 }
1327
1328 t->updated_extents = TRUE;
1329 t->header.tree_id = t->root->id;
1330
1331 return STATUS_SUCCESS;
1332 }
1333
1334 static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
1335 LIST_ENTRY* le;
1336 NTSTATUS Status;
1337 BOOL changed = FALSE;
1338 UINT8 max_level = 0, level;
1339
1340 TRACE("(%p)\n", Vcb);
1341
1342 le = Vcb->trees.Flink;
1343 while (le != &Vcb->trees) {
1344 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1345
1346 if (t->write && !t->has_new_address) {
1347 chunk* c;
1348
1349 if (t->has_address) {
1350 c = get_chunk_from_address(Vcb, t->header.address);
1351
1352 if (c) {
1353 if (!c->cache_loaded) {
1354 acquire_chunk_lock(c, Vcb);
1355
1356 if (!c->cache_loaded) {
1357 Status = load_cache_chunk(Vcb, c, NULL);
1358
1359 if (!NT_SUCCESS(Status)) {
1360 ERR("load_cache_chunk returned %08x\n", Status);
1361 release_chunk_lock(c, Vcb);
1362 return Status;
1363 }
1364 }
1365
1366 release_chunk_lock(c, Vcb);
1367 }
1368 }
1369 }
1370
1371 Status = get_tree_new_address(Vcb, t, Irp, rollback);
1372 if (!NT_SUCCESS(Status)) {
1373 ERR("get_tree_new_address returned %08x\n", Status);
1374 return Status;
1375 }
1376
1377 TRACE("allocated extent %llx\n", t->new_address);
1378
1379 c = get_chunk_from_address(Vcb, t->new_address);
1380
1381 if (c)
1382 c->used += Vcb->superblock.node_size;
1383 else {
1384 ERR("could not find chunk for address %llx\n", t->new_address);
1385 return STATUS_INTERNAL_ERROR;
1386 }
1387
1388 changed = TRUE;
1389
1390 if (t->header.level > max_level)
1391 max_level = t->header.level;
1392 }
1393
1394 le = le->Flink;
1395 }
1396
1397 if (!changed)
1398 return STATUS_SUCCESS;
1399
1400 level = max_level;
1401 do {
1402 le = Vcb->trees.Flink;
1403 while (le != &Vcb->trees) {
1404 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1405
1406 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1407 Status = update_tree_extents(Vcb, t, Irp, rollback);
1408 if (!NT_SUCCESS(Status)) {
1409 ERR("update_tree_extents returned %08x\n", Status);
1410 return Status;
1411 }
1412 }
1413
1414 le = le->Flink;
1415 }
1416
1417 if (level == 0)
1418 break;
1419
1420 level--;
1421 } while (TRUE);
1422
1423 return STATUS_SUCCESS;
1424 }
1425
1426 static NTSTATUS update_root_root(device_extension* Vcb, BOOL no_cache, PIRP Irp, LIST_ENTRY* rollback) {
1427 LIST_ENTRY* le;
1428 NTSTATUS Status;
1429
1430 TRACE("(%p)\n", Vcb);
1431
1432 le = Vcb->trees.Flink;
1433 while (le != &Vcb->trees) {
1434 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1435
1436 if (t->write && !t->parent) {
1437 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1438 KEY searchkey;
1439 traverse_ptr tp;
1440
1441 searchkey.obj_id = t->root->id;
1442 searchkey.obj_type = TYPE_ROOT_ITEM;
1443 searchkey.offset = 0xffffffffffffffff;
1444
1445 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
1446 if (!NT_SUCCESS(Status)) {
1447 ERR("error - find_item returned %08x\n", Status);
1448 return Status;
1449 }
1450
1451 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1452 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
1453 return STATUS_INTERNAL_ERROR;
1454 }
1455
1456 TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
1457
1458 t->root->root_item.block_number = t->new_address;
1459 t->root->root_item.root_level = t->header.level;
1460 t->root->root_item.generation = Vcb->superblock.generation;
1461 t->root->root_item.generation2 = Vcb->superblock.generation;
1462
1463 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1464
1465 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1466 }
1467
1468 t->root->treeholder.address = t->new_address;
1469 t->root->treeholder.generation = Vcb->superblock.generation;
1470 }
1471
1472 le = le->Flink;
1473 }
1474
1475 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1476 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
1477 Status = update_chunk_caches(Vcb, Irp, rollback);
1478 ExReleaseResourceLite(&Vcb->chunk_lock);
1479
1480 if (!NT_SUCCESS(Status)) {
1481 ERR("update_chunk_caches returned %08x\n", Status);
1482 return Status;
1483 }
1484 }
1485
1486 return STATUS_SUCCESS;
1487 }
1488
1489 NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, BOOL no_free) {
1490 chunk* c;
1491 LIST_ENTRY* le;
1492 tree_write* tw;
1493 NTSTATUS Status;
1494 ULONG i, num_bits;
1495 write_data_context* wtc;
1496 ULONG bit_num = 0;
1497 BOOL raid56 = FALSE;
1498
1499 // merge together runs
1500 c = NULL;
1501 le = tree_writes->Flink;
1502 while (le != tree_writes) {
1503 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1504
1505 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1506 c = get_chunk_from_address(Vcb, tw->address);
1507 else {
1508 tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
1509
1510 if (tw->address == tw2->address + tw2->length) {
1511 UINT8* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
1512
1513 if (!data) {
1514 ERR("out of memory\n");
1515 return STATUS_INSUFFICIENT_RESOURCES;
1516 }
1517
1518 RtlCopyMemory(data, tw2->data, tw2->length);
1519 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1520
1521 if (!no_free)
1522 ExFreePool(tw2->data);
1523
1524 tw2->data = data;
1525 tw2->length += tw->length;
1526
1527 if (!no_free) // FIXME - what if we allocated this just now?
1528 ExFreePool(tw->data);
1529
1530 RemoveEntryList(&tw->list_entry);
1531 ExFreePool(tw);
1532
1533 le = tw2->list_entry.Flink;
1534 continue;
1535 }
1536 }
1537
1538 tw->c = c;
1539
1540 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1541 raid56 = TRUE;
1542
1543 le = le->Flink;
1544 }
1545
1546 num_bits = 0;
1547
1548 le = tree_writes->Flink;
1549 while (le != tree_writes) {
1550 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1551
1552 num_bits++;
1553
1554 le = le->Flink;
1555 }
1556
1557 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG);
1558 if (!wtc) {
1559 ERR("out of memory\n");
1560 return STATUS_INSUFFICIENT_RESOURCES;
1561 }
1562
1563 le = tree_writes->Flink;
1564
1565 while (le != tree_writes) {
1566 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1567
1568 TRACE("address: %llx, size: %x\n", tw->address, tw->length);
1569
1570 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, FALSE);
1571 InitializeListHead(&wtc[bit_num].stripes);
1572 wtc[bit_num].need_wait = FALSE;
1573 wtc[bit_num].stripes_left = 0;
1574 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1575 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1576
1577 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, FALSE, 0, HighPagePriority);
1578 if (!NT_SUCCESS(Status)) {
1579 ERR("write_data returned %08x\n", Status);
1580
1581 for (i = 0; i < num_bits; i++) {
1582 free_write_data_stripes(&wtc[i]);
1583 }
1584 ExFreePool(wtc);
1585
1586 return Status;
1587 }
1588
1589 bit_num++;
1590
1591 le = le->Flink;
1592 }
1593
1594 for (i = 0; i < num_bits; i++) {
1595 if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1596 // launch writes and wait
1597 le = wtc[i].stripes.Flink;
1598 while (le != &wtc[i].stripes) {
1599 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1600
1601 if (stripe->status != WriteDataStatus_Ignore) {
1602 wtc[i].need_wait = TRUE;
1603 IoCallDriver(stripe->device->devobj, stripe->Irp);
1604 }
1605
1606 le = le->Flink;
1607 }
1608 }
1609 }
1610
1611 for (i = 0; i < num_bits; i++) {
1612 if (wtc[i].need_wait)
1613 KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, FALSE, NULL);
1614 }
1615
1616 for (i = 0; i < num_bits; i++) {
1617 le = wtc[i].stripes.Flink;
1618 while (le != &wtc[i].stripes) {
1619 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1620
1621 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1622 Status = stripe->iosb.Status;
1623 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
1624 break;
1625 }
1626
1627 le = le->Flink;
1628 }
1629
1630 free_write_data_stripes(&wtc[i]);
1631 }
1632
1633 ExFreePool(wtc);
1634
1635 if (raid56) {
1636 c = NULL;
1637
1638 le = tree_writes->Flink;
1639 while (le != tree_writes) {
1640 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1641
1642 if (tw->c != c) {
1643 c = tw->c;
1644
1645 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE);
1646
1647 while (!IsListEmpty(&c->partial_stripes)) {
1648 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
1649
1650 Status = flush_partial_stripe(Vcb, c, ps);
1651
1652 if (ps->bmparr)
1653 ExFreePool(ps->bmparr);
1654
1655 ExFreePool(ps);
1656
1657 if (!NT_SUCCESS(Status)) {
1658 ERR("flush_partial_stripe returned %08x\n", Status);
1659 ExReleaseResourceLite(&c->partial_stripes_lock);
1660 return Status;
1661 }
1662 }
1663
1664 ExReleaseResourceLite(&c->partial_stripes_lock);
1665 }
1666
1667 le = le->Flink;
1668 }
1669 }
1670
1671 return STATUS_SUCCESS;
1672 }
1673
1674 static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
1675 ULONG level;
1676 UINT8 *data, *body;
1677 UINT32 crc32;
1678 NTSTATUS Status;
1679 LIST_ENTRY* le;
1680 LIST_ENTRY tree_writes;
1681 tree_write* tw;
1682
1683 TRACE("(%p)\n", Vcb);
1684
1685 InitializeListHead(&tree_writes);
1686
1687 for (level = 0; level <= 255; level++) {
1688 BOOL nothing_found = TRUE;
1689
1690 TRACE("level = %u\n", level);
1691
1692 le = Vcb->trees.Flink;
1693 while (le != &Vcb->trees) {
1694 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1695
1696 if (t->write && t->header.level == level) {
1697 KEY firstitem, searchkey;
1698 LIST_ENTRY* le2;
1699 traverse_ptr tp;
1700
1701 if (!t->has_new_address) {
1702 ERR("error - tried to write tree with no new address\n");
1703 return STATUS_INTERNAL_ERROR;
1704 }
1705
1706 le2 = t->itemlist.Flink;
1707 while (le2 != &t->itemlist) {
1708 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1709 if (!td->ignore) {
1710 firstitem = td->key;
1711 break;
1712 }
1713 le2 = le2->Flink;
1714 }
1715
1716 if (t->parent) {
1717 t->paritem->key = firstitem;
1718 t->paritem->treeholder.address = t->new_address;
1719 t->paritem->treeholder.generation = Vcb->superblock.generation;
1720 }
1721
1722 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1723 EXTENT_ITEM_TREE* eit;
1724
1725 searchkey.obj_id = t->new_address;
1726 searchkey.obj_type = TYPE_EXTENT_ITEM;
1727 searchkey.offset = Vcb->superblock.node_size;
1728
1729 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
1730 if (!NT_SUCCESS(Status)) {
1731 ERR("error - find_item returned %08x\n", Status);
1732 return Status;
1733 }
1734
1735 if (keycmp(searchkey, tp.item->key)) {
1736 ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1737 return STATUS_INTERNAL_ERROR;
1738 }
1739
1740 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1741 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1742 return STATUS_INTERNAL_ERROR;
1743 }
1744
1745 eit = (EXTENT_ITEM_TREE*)tp.item->data;
1746 eit->firstitem = firstitem;
1747 }
1748
1749 nothing_found = FALSE;
1750 }
1751
1752 le = le->Flink;
1753 }
1754
1755 if (nothing_found)
1756 break;
1757 }
1758
1759 TRACE("allocated tree extents\n");
1760
1761 le = Vcb->trees.Flink;
1762 while (le != &Vcb->trees) {
1763 tree* t = CONTAINING_RECORD(le, tree, list_entry);
1764 LIST_ENTRY* le2;
1765 #ifdef DEBUG_PARANOID
1766 UINT32 num_items = 0, size = 0;
1767 BOOL crash = FALSE;
1768 #endif
1769
1770 if (t->write) {
1771 #ifdef DEBUG_PARANOID
1772 BOOL first = TRUE;
1773 KEY lastkey;
1774
1775 le2 = t->itemlist.Flink;
1776 while (le2 != &t->itemlist) {
1777 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1778 if (!td->ignore) {
1779 num_items++;
1780
1781 if (!first) {
1782 if (keycmp(td->key, lastkey) == 0) {
1783 ERR("(%llx,%x,%llx): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1784 crash = TRUE;
1785 } else if (keycmp(td->key, lastkey) == -1) {
1786 ERR("(%llx,%x,%llx): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1787 crash = TRUE;
1788 }
1789 } else
1790 first = FALSE;
1791
1792 lastkey = td->key;
1793
1794 if (t->header.level == 0)
1795 size += td->size;
1796 }
1797 le2 = le2->Flink;
1798 }
1799
1800 if (t->header.level == 0)
1801 size += num_items * sizeof(leaf_node);
1802 else
1803 size += num_items * sizeof(internal_node);
1804
1805 if (num_items != t->header.num_items) {
1806 ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1807 crash = TRUE;
1808 }
1809
1810 if (size != t->size) {
1811 ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1812 crash = TRUE;
1813 }
1814
1815 if (t->header.num_items == 0 && t->parent) {
1816 ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1817 crash = TRUE;
1818 }
1819
1820 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1821 ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1822 crash = TRUE;
1823 }
1824
1825 if (crash) {
1826 ERR("tree %p\n", t);
1827 le2 = t->itemlist.Flink;
1828 while (le2 != &t->itemlist) {
1829 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1830 if (!td->ignore) {
1831 ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1832 }
1833 le2 = le2->Flink;
1834 }
1835 int3;
1836 }
1837 #endif
1838 t->header.address = t->new_address;
1839 t->header.generation = Vcb->superblock.generation;
1840 t->header.tree_id = t->root->id;
1841 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1842 t->header.fs_uuid = Vcb->superblock.uuid;
1843 t->has_address = TRUE;
1844
1845 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1846 if (!data) {
1847 ERR("out of memory\n");
1848 Status = STATUS_INSUFFICIENT_RESOURCES;
1849 goto end;
1850 }
1851
1852 body = data + sizeof(tree_header);
1853
1854 RtlCopyMemory(data, &t->header, sizeof(tree_header));
1855 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
1856
1857 if (t->header.level == 0) {
1858 leaf_node* itemptr = (leaf_node*)body;
1859 int i = 0;
1860 UINT8* dataptr = data + Vcb->superblock.node_size;
1861
1862 le2 = t->itemlist.Flink;
1863 while (le2 != &t->itemlist) {
1864 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1865 if (!td->ignore) {
1866 dataptr = dataptr - td->size;
1867
1868 itemptr[i].key = td->key;
1869 itemptr[i].offset = (UINT32)((UINT8*)dataptr - (UINT8*)body);
1870 itemptr[i].size = td->size;
1871 i++;
1872
1873 if (td->size > 0)
1874 RtlCopyMemory(dataptr, td->data, td->size);
1875 }
1876
1877 le2 = le2->Flink;
1878 }
1879 } else {
1880 internal_node* itemptr = (internal_node*)body;
1881 int i = 0;
1882
1883 le2 = t->itemlist.Flink;
1884 while (le2 != &t->itemlist) {
1885 tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
1886 if (!td->ignore) {
1887 itemptr[i].key = td->key;
1888 itemptr[i].address = td->treeholder.address;
1889 itemptr[i].generation = td->treeholder.generation;
1890 i++;
1891 }
1892
1893 le2 = le2->Flink;
1894 }
1895 }
1896
1897 crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
1898 crc32 = ~crc32;
1899 *((UINT32*)data) = crc32;
1900 TRACE("setting crc32 to %08x\n", crc32);
1901
1902 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
1903 if (!tw) {
1904 ERR("out of memory\n");
1905 ExFreePool(data);
1906 Status = STATUS_INSUFFICIENT_RESOURCES;
1907 goto end;
1908 }
1909
1910 tw->address = t->new_address;
1911 tw->length = Vcb->superblock.node_size;
1912 tw->data = data;
1913
1914 if (IsListEmpty(&tree_writes))
1915 InsertTailList(&tree_writes, &tw->list_entry);
1916 else {
1917 BOOL inserted = FALSE;
1918
1919 le2 = tree_writes.Flink;
1920 while (le2 != &tree_writes) {
1921 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
1922
1923 if (tw2->address > tw->address) {
1924 InsertHeadList(le2->Blink, &tw->list_entry);
1925 inserted = TRUE;
1926 break;
1927 }
1928
1929 le2 = le2->Flink;
1930 }
1931
1932 if (!inserted)
1933 InsertTailList(&tree_writes, &tw->list_entry);
1934 }
1935 }
1936
1937 le = le->Flink;
1938 }
1939
1940 Status = do_tree_writes(Vcb, &tree_writes, FALSE);
1941 if (!NT_SUCCESS(Status)) {
1942 ERR("do_tree_writes returned %08x\n", Status);
1943 goto end;
1944 }
1945
1946 Status = STATUS_SUCCESS;
1947
1948 end:
1949 while (!IsListEmpty(&tree_writes)) {
1950 le = RemoveHeadList(&tree_writes);
1951 tw = CONTAINING_RECORD(le, tree_write, list_entry);
1952
1953 if (tw->data)
1954 ExFreePool(tw->data);
1955
1956 ExFreePool(tw);
1957 }
1958
1959 return Status;
1960 }
1961
1962 static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
1963 KEY searchkey;
1964 traverse_ptr tp;
1965
1966 RtlZeroMemory(sb, sizeof(superblock_backup));
1967
1968 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
1969 sb->root_tree_generation = Vcb->superblock.generation;
1970 sb->root_level = Vcb->superblock.root_level;
1971
1972 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
1973 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
1974 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
1975
1976 searchkey.obj_id = BTRFS_ROOT_EXTENT;
1977 searchkey.obj_type = TYPE_ROOT_ITEM;
1978 searchkey.offset = 0xffffffffffffffff;
1979
1980 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
1981 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
1982 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
1983
1984 sb->extent_tree_addr = ri->block_number;
1985 sb->extent_tree_generation = ri->generation;
1986 sb->extent_root_level = ri->root_level;
1987 }
1988 }
1989
1990 searchkey.obj_id = BTRFS_ROOT_FSTREE;
1991
1992 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
1993 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
1994 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
1995
1996 sb->fs_tree_addr = ri->block_number;
1997 sb->fs_tree_generation = ri->generation;
1998 sb->fs_root_level = ri->root_level;
1999 }
2000 }
2001
2002 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2003
2004 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
2005 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2006 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2007
2008 sb->dev_root_addr = ri->block_number;
2009 sb->dev_root_generation = ri->generation;
2010 sb->dev_root_level = ri->root_level;
2011 }
2012 }
2013
2014 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2015
2016 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
2017 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2018 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2019
2020 sb->csum_root_addr = ri->block_number;
2021 sb->csum_root_generation = ri->generation;
2022 sb->csum_root_level = ri->root_level;
2023 }
2024 }
2025
2026 sb->total_bytes = Vcb->superblock.total_bytes;
2027 sb->bytes_used = Vcb->superblock.bytes_used;
2028 sb->num_devices = Vcb->superblock.num_devices;
2029 }
2030
2031 typedef struct {
2032 void* context;
2033 UINT8* buf;
2034 PMDL mdl;
2035 device* device;
2036 NTSTATUS Status;
2037 PIRP Irp;
2038 LIST_ENTRY list_entry;
2039 } write_superblocks_stripe;
2040
2041 typedef struct _write_superblocks_context {
2042 KEVENT Event;
2043 LIST_ENTRY stripes;
2044 LONG left;
2045 } write_superblocks_context;
2046
2047 _Function_class_(IO_COMPLETION_ROUTINE)
2048 #ifdef __REACTOS__
2049 static NTSTATUS NTAPI write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2050 #else
2051 static NTSTATUS write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2052 #endif
2053 write_superblocks_stripe* stripe = conptr;
2054 write_superblocks_context* context = stripe->context;
2055
2056 UNUSED(DeviceObject);
2057
2058 stripe->Status = Irp->IoStatus.Status;
2059
2060 if (InterlockedDecrement(&context->left) == 0)
2061 KeSetEvent(&context->Event, 0, FALSE);
2062
2063 return STATUS_MORE_PROCESSING_REQUIRED;
2064 }
2065
2066 static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) {
2067 unsigned int i = 0;
2068
2069 // All the documentation says that the Linux driver only writes one superblock
2070 // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2071
2072 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2073 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2074 superblock* sb;
2075 UINT32 crc32;
2076 write_superblocks_stripe* stripe;
2077 PIO_STACK_LOCATION IrpSp;
2078
2079 sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG);
2080 if (!sb) {
2081 ERR("out of memory\n");
2082 return STATUS_INSUFFICIENT_RESOURCES;
2083 }
2084
2085 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2086
2087 if (sblen > sizeof(superblock))
2088 RtlZeroMemory((UINT8*)sb + sizeof(superblock), sblen - sizeof(superblock));
2089
2090 RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM));
2091 sb->sb_phys_addr = superblock_addrs[i];
2092
2093 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2094 RtlCopyMemory(&sb->checksum, &crc32, sizeof(UINT32));
2095
2096 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG);
2097 if (!stripe) {
2098 ERR("out of memory\n");
2099 ExFreePool(sb);
2100 return STATUS_INSUFFICIENT_RESOURCES;
2101 }
2102
2103 stripe->buf = (UINT8*)sb;
2104
2105 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, FALSE);
2106 if (!stripe->Irp) {
2107 ERR("IoAllocateIrp failed\n");
2108 ExFreePool(stripe);
2109 ExFreePool(sb);
2110 return STATUS_INSUFFICIENT_RESOURCES;
2111 }
2112
2113 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2114 IrpSp->MajorFunction = IRP_MJ_WRITE;
2115
2116 if (i == 0)
2117 IrpSp->Flags |= SL_WRITE_THROUGH;
2118
2119 if (device->devobj->Flags & DO_BUFFERED_IO) {
2120 stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2121 stripe->mdl = NULL;
2122
2123 stripe->Irp->Flags = IRP_BUFFERED_IO;
2124 } else if (device->devobj->Flags & DO_DIRECT_IO) {
2125 stripe->mdl = IoAllocateMdl(sb, sblen, FALSE, FALSE, NULL);
2126 if (!stripe->mdl) {
2127 ERR("IoAllocateMdl failed\n");
2128 IoFreeIrp(stripe->Irp);
2129 ExFreePool(stripe);
2130 ExFreePool(sb);
2131 return STATUS_INSUFFICIENT_RESOURCES;
2132 }
2133
2134 stripe->Irp->MdlAddress = stripe->mdl;
2135
2136 MmBuildMdlForNonPagedPool(stripe->mdl);
2137 } else {
2138 stripe->Irp->UserBuffer = sb;
2139 stripe->mdl = NULL;
2140 }
2141
2142 IrpSp->Parameters.Write.Length = sblen;
2143 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2144
2145 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, TRUE, TRUE, TRUE);
2146
2147 stripe->context = context;
2148 stripe->device = device;
2149 InsertTailList(&context->stripes, &stripe->list_entry);
2150
2151 context->left++;
2152
2153 i++;
2154 }
2155
2156 if (i == 0)
2157 ERR("no superblocks written!\n");
2158
2159 return STATUS_SUCCESS;
2160 }
2161
2162 static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
2163 UINT64 i;
2164 NTSTATUS Status;
2165 LIST_ENTRY* le;
2166 write_superblocks_context context;
2167
2168 TRACE("(%p)\n", Vcb);
2169
2170 le = Vcb->trees.Flink;
2171 while (le != &Vcb->trees) {
2172 tree* t = CONTAINING_RECORD(le, tree, list_entry);
2173
2174 if (t->write && !t->parent) {
2175 if (t->root == Vcb->root_root) {
2176 Vcb->superblock.root_tree_addr = t->new_address;
2177 Vcb->superblock.root_level = t->header.level;
2178 } else if (t->root == Vcb->chunk_root) {
2179 Vcb->superblock.chunk_tree_addr = t->new_address;
2180 Vcb->superblock.chunk_root_generation = t->header.generation;
2181 Vcb->superblock.chunk_root_level = t->header.level;
2182 }
2183 }
2184
2185 le = le->Flink;
2186 }
2187
2188 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2189 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2190 }
2191
2192 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2193
2194 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
2195 InitializeListHead(&context.stripes);
2196 context.left = 0;
2197
2198 le = Vcb->devices.Flink;
2199 while (le != &Vcb->devices) {
2200 device* dev = CONTAINING_RECORD(le, device, list_entry);
2201
2202 if (dev->devobj && !dev->readonly) {
2203 Status = write_superblock(Vcb, dev, &context);
2204 if (!NT_SUCCESS(Status)) {
2205 ERR("write_superblock returned %08x\n", Status);
2206 goto end;
2207 }
2208 }
2209
2210 le = le->Flink;
2211 }
2212
2213 if (IsListEmpty(&context.stripes)) {
2214 ERR("error - not writing any superblocks\n");
2215 Status = STATUS_INTERNAL_ERROR;
2216 goto end;
2217 }
2218
2219 le = context.stripes.Flink;
2220 while (le != &context.stripes) {
2221 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2222
2223 IoCallDriver(stripe->device->devobj, stripe->Irp);
2224
2225 le = le->Flink;
2226 }
2227
2228 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2229
2230 le = context.stripes.Flink;
2231 while (le != &context.stripes) {
2232 write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry);
2233
2234 if (!NT_SUCCESS(stripe->Status)) {
2235 ERR("device %llx returned %08x\n", stripe->device->devitem.dev_id, stripe->Status);
2236 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2237 Status = stripe->Status;
2238 goto end;
2239 }
2240
2241 le = le->Flink;
2242 }
2243
2244 Status = STATUS_SUCCESS;
2245
2246 end:
2247 while (!IsListEmpty(&context.stripes)) {
2248 write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry);
2249
2250 if (stripe->mdl) {
2251 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2252 MmUnlockPages(stripe->mdl);
2253
2254 IoFreeMdl(stripe->mdl);
2255 }
2256
2257 if (stripe->Irp)
2258 IoFreeIrp(stripe->Irp);
2259
2260 if (stripe->buf)
2261 ExFreePool(stripe->buf);
2262
2263 ExFreePool(stripe);
2264 }
2265
2266 return Status;
2267 }
2268
2269 static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
2270 LIST_ENTRY *le, *le2;
2271 NTSTATUS Status;
2272 UINT64 old_size;
2273
2274 if (ce->count == 0 && ce->old_count == 0) {
2275 while (!IsListEmpty(&ce->refs)) {
2276 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry);
2277 ExFreePool(cer);
2278 }
2279
2280 while (!IsListEmpty(&ce->old_refs)) {
2281 changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry);
2282 ExFreePool(cer);
2283 }
2284
2285 goto end;
2286 }
2287
2288 le = ce->refs.Flink;
2289 while (le != &ce->refs) {
2290 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2291 UINT32 old_count = 0;
2292
2293 if (cer->type == TYPE_EXTENT_DATA_REF) {
2294 le2 = ce->old_refs.Flink;
2295 while (le2 != &ce->old_refs) {
2296 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2297
2298 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2299 old_count = cer2->edr.count;
2300 break;
2301 }
2302
2303 le2 = le2->Flink;
2304 }
2305
2306 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2307
2308 if (cer->edr.count > old_count) {
2309 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2310
2311 if (!NT_SUCCESS(Status)) {
2312 ERR("increase_extent_refcount_data returned %08x\n", Status);
2313 return Status;
2314 }
2315 }
2316 } else if (cer->type == TYPE_SHARED_DATA_REF) {
2317 le2 = ce->old_refs.Flink;
2318 while (le2 != &ce->old_refs) {
2319 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2320
2321 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2322 RemoveEntryList(&cer2->list_entry);
2323 ExFreePool(cer2);
2324 break;
2325 }
2326
2327 le2 = le2->Flink;
2328 }
2329 }
2330
2331 le = le->Flink;
2332 }
2333
2334 le = ce->refs.Flink;
2335 while (le != &ce->refs) {
2336 changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
2337 LIST_ENTRY* le3 = le->Flink;
2338 UINT32 old_count = 0;
2339
2340 if (cer->type == TYPE_EXTENT_DATA_REF) {
2341 le2 = ce->old_refs.Flink;
2342 while (le2 != &ce->old_refs) {
2343 changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
2344
2345 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2346 old_count = cer2->edr.count;
2347
2348 RemoveEntryList(&cer2->list_entry);
2349 ExFreePool(cer2);
2350 break;
2351 }
2352
2353 le2 = le2->Flink;
2354 }
2355
2356 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2357
2358 if (cer->edr.count < old_count) {
2359 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2360 old_count - cer->edr.count, ce->superseded, Irp);
2361
2362 if (!NT_SUCCESS(Status)) {
2363 ERR("decrease_extent_refcount_data returned %08x\n", Status);
2364 return Status;
2365 }
2366 }
2367
2368 if (ce->size != ce->old_size && ce->old_count > 0) {
2369 KEY searchkey;
2370 traverse_ptr tp;
2371 void* data;
2372
2373 searchkey.obj_id = ce->address;
2374 searchkey.obj_type = TYPE_EXTENT_ITEM;
2375 searchkey.offset = ce->old_size;
2376
2377 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
2378 if (!NT_SUCCESS(Status)) {
2379 ERR("error - find_item returned %08x\n", Status);
2380 return Status;
2381 }
2382
2383 if (keycmp(searchkey, tp.item->key)) {
2384 ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2385 return STATUS_INTERNAL_ERROR;
2386 }
2387
2388 if (tp.item->size > 0) {
2389 data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2390
2391 if (!data) {
2392 ERR("out of memory\n");
2393 return STATUS_INSUFFICIENT_RESOURCES;
2394 }
2395
2396 RtlCopyMemory(data, tp.item->data, tp.item->size);
2397 } else
2398 data = NULL;
2399
2400 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2401 if (!NT_SUCCESS(Status)) {
2402 ERR("insert_tree_item returned %08x\n", Status);
2403 if (data) ExFreePool(data);
2404 return Status;
2405 }
2406
2407 Status = delete_tree_item(Vcb, &tp);
2408 if (!NT_SUCCESS(Status)) {
2409 ERR("delete_tree_item returned %08x\n", Status);
2410 return Status;
2411 }
2412 }
2413 }
2414
2415 RemoveEntryList(&cer->list_entry);
2416 ExFreePool(cer);
2417
2418 le = le3;
2419 }
2420
2421 #ifdef DEBUG_PARANOID
2422 if (!IsListEmpty(&ce->old_refs))
2423 WARN("old_refs not empty\n");
2424 #endif
2425
2426 end:
2427 if (ce->count == 0 && !ce->superseded) {
2428 c->used -= ce->size;
2429 space_list_add(c, ce->address, ce->size, rollback);
2430 }
2431
2432 RemoveEntryList(&ce->list_entry);
2433 ExFreePool(ce);
2434
2435 return STATUS_SUCCESS;
2436 }
2437
2438 void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp) {
2439 KEY searchkey;
2440 traverse_ptr tp, next_tp;
2441 NTSTATUS Status;
2442 UINT64 startaddr, endaddr;
2443 ULONG len;
2444 UINT32* checksums;
2445 RTL_BITMAP bmp;
2446 ULONG* bmparr;
2447 ULONG runlength, index;
2448
2449 searchkey.obj_id = EXTENT_CSUM_ID;
2450 searchkey.obj_type = TYPE_EXTENT_CSUM;
2451 searchkey.offset = address;
2452
2453 // FIXME - create checksum_root if it doesn't exist at all
2454
2455 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2456 if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2457 if (csum) { // not deleted
2458 ULONG length2 = length;
2459 UINT64 off = address;
2460 UINT32* data = csum;
2461
2462 do {
2463 UINT16 il = (UINT16)min(length2, MAX_CSUM_SIZE / sizeof(UINT32));
2464
2465 checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(UINT32), ALLOC_TAG);
2466 if (!checksums) {
2467 ERR("out of memory\n");
2468 return;
2469 }
2470
2471 RtlCopyMemory(checksums, data, il * sizeof(UINT32));
2472
2473 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2474 il * sizeof(UINT32), NULL, Irp);
2475 if (!NT_SUCCESS(Status)) {
2476 ERR("insert_tree_item returned %08x\n", Status);
2477 ExFreePool(checksums);
2478 return;
2479 }
2480
2481 length2 -= il;
2482
2483 if (length2 > 0) {
2484 off += il * Vcb->superblock.sector_size;
2485 data += il;
2486 }
2487 } while (length2 > 0);
2488 }
2489 } else if (!NT_SUCCESS(Status)) {
2490 ERR("find_item returned %08x\n", Status);
2491 return;
2492 } else {
2493 UINT32 tplen;
2494
2495 // FIXME - check entry is TYPE_EXTENT_CSUM?
2496
2497 if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= address)
2498 startaddr = tp.item->key.offset;
2499 else
2500 startaddr = address;
2501
2502 searchkey.obj_id = EXTENT_CSUM_ID;
2503 searchkey.obj_type = TYPE_EXTENT_CSUM;
2504 searchkey.offset = address + (length * Vcb->superblock.sector_size);
2505
2506 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2507 if (!NT_SUCCESS(Status)) {
2508 ERR("find_item returned %08x\n", Status);
2509 return;
2510 }
2511
2512 tplen = tp.item->size / sizeof(UINT32);
2513
2514 if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size))
2515 endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
2516 else
2517 endaddr = address + (length * Vcb->superblock.sector_size);
2518
2519 TRACE("cs starts at %llx (%x sectors)\n", address, length);
2520 TRACE("startaddr = %llx\n", startaddr);
2521 TRACE("endaddr = %llx\n", endaddr);
2522
2523 len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size);
2524
2525 checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
2526 if (!checksums) {
2527 ERR("out of memory\n");
2528 return;
2529 }
2530
2531 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2532 if (!bmparr) {
2533 ERR("out of memory\n");
2534 ExFreePool(checksums);
2535 return;
2536 }
2537
2538 RtlInitializeBitMap(&bmp, bmparr, len);
2539 RtlSetAllBits(&bmp);
2540
2541 searchkey.obj_id = EXTENT_CSUM_ID;
2542 searchkey.obj_type = TYPE_EXTENT_CSUM;
2543 searchkey.offset = address;
2544
2545 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
2546 if (!NT_SUCCESS(Status)) {
2547 ERR("find_item returned %08x\n", Status);
2548 ExFreePool(checksums);
2549 ExFreePool(bmparr);
2550 return;
2551 }
2552
2553 // set bit = free space, cleared bit = allocated sector
2554
2555 while (tp.item->key.offset < endaddr) {
2556 if (tp.item->key.offset >= startaddr) {
2557 if (tp.item->size > 0) {
2558 ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(UINT32), tp.item->size);
2559
2560 RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen);
2561 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(UINT32));
2562 }
2563
2564 Status = delete_tree_item(Vcb, &tp);
2565 if (!NT_SUCCESS(Status)) {
2566 ERR("delete_tree_item returned %08x\n", Status);
2567 ExFreePool(checksums);
2568 ExFreePool(bmparr);
2569 return;
2570 }
2571 }
2572
2573 if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
2574 tp = next_tp;
2575 } else
2576 break;
2577 }
2578
2579 if (!csum) { // deleted
2580 RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2581 } else {
2582 RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(UINT32));
2583 RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length);
2584 }
2585
2586 runlength = RtlFindFirstRunClear(&bmp, &index);
2587
2588 while (runlength != 0) {
2589 do {
2590 UINT16 rl;
2591 UINT64 off;
2592 UINT32* data;
2593
2594 if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
2595 rl = MAX_CSUM_SIZE / sizeof(UINT32);
2596 else
2597 rl = (UINT16)runlength;
2598
2599 data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
2600 if (!data) {
2601 ERR("out of memory\n");
2602 ExFreePool(bmparr);
2603 ExFreePool(checksums);
2604 return;
2605 }
2606
2607 RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
2608
2609 off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
2610
2611 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp);
2612 if (!NT_SUCCESS(Status)) {
2613 ERR("insert_tree_item returned %08x\n", Status);
2614 ExFreePool(data);
2615 ExFreePool(bmparr);
2616 ExFreePool(checksums);
2617 return;
2618 }
2619
2620 runlength -= rl;
2621 index += rl;
2622 } while (runlength > 0);
2623
2624 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2625 }
2626
2627 ExFreePool(bmparr);
2628 ExFreePool(checksums);
2629 }
2630 }
2631
2632 static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
2633 LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2634 chunk* c;
2635 KEY searchkey;
2636 traverse_ptr tp;
2637 BLOCK_GROUP_ITEM* bgi;
2638 NTSTATUS Status;
2639
2640 TRACE("(%p)\n", Vcb);
2641
2642 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2643
2644 while (le != &Vcb->chunks) {
2645 c = CONTAINING_RECORD(le, chunk, list_entry);
2646
2647 acquire_chunk_lock(c, Vcb);
2648
2649 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2650 Status = load_cache_chunk(Vcb, c, NULL);
2651
2652 if (!NT_SUCCESS(Status)) {
2653 ERR("load_cache_chunk returned %08x\n", Status);
2654 release_chunk_lock(c, Vcb);
2655 goto end;
2656 }
2657 }
2658
2659 le2 = c->changed_extents.Flink;
2660 while (le2 != &c->changed_extents) {
2661 LIST_ENTRY* le3 = le2->Flink;
2662 changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
2663
2664 Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
2665 if (!NT_SUCCESS(Status)) {
2666 ERR("flush_changed_extent returned %08x\n", Status);
2667 release_chunk_lock(c, Vcb);
2668 goto end;
2669 }
2670
2671 le2 = le3;
2672 }
2673
2674 // This is usually done by update_chunks, but we have to check again in case any new chunks
2675 // have been allocated since.
2676 if (c->created) {
2677 Status = create_chunk(Vcb, c, Irp);
2678 if (!NT_SUCCESS(Status)) {
2679 ERR("create_chunk returned %08x\n", Status);
2680 release_chunk_lock(c, Vcb);
2681 goto end;
2682 }
2683 }
2684
2685 if (c->old_cache) {
2686 if (c->old_cache->dirty) {
2687 LIST_ENTRY batchlist;
2688
2689 InitializeListHead(&batchlist);
2690
2691 Status = flush_fcb(c->old_cache, FALSE, &batchlist, Irp);
2692 if (!NT_SUCCESS(Status)) {
2693 ERR("flush_fcb returned %08x\n", Status);
2694 release_chunk_lock(c, Vcb);
2695 clear_batch_list(Vcb, &batchlist);
2696 goto end;
2697 }
2698
2699 Status = commit_batch_list(Vcb, &batchlist, Irp);
2700 if (!NT_SUCCESS(Status)) {
2701 ERR("commit_batch_list returned %08x\n", Status);
2702 release_chunk_lock(c, Vcb);
2703 goto end;
2704 }
2705 }
2706
2707 free_fcb(c->old_cache);
2708
2709 if (c->old_cache->refcount == 0)
2710 reap_fcb(c->old_cache);
2711
2712 c->old_cache = NULL;
2713 }
2714
2715 if (c->used != c->oldused) {
2716 searchkey.obj_id = c->offset;
2717 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2718 searchkey.offset = c->chunk_item->size;
2719
2720 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
2721 if (!NT_SUCCESS(Status)) {
2722 ERR("error - find_item returned %08x\n", Status);
2723 release_chunk_lock(c, Vcb);
2724 goto end;
2725 }
2726
2727 if (keycmp(searchkey, tp.item->key)) {
2728 ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2729 Status = STATUS_INTERNAL_ERROR;
2730 release_chunk_lock(c, Vcb);
2731 goto end;
2732 }
2733
2734 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2735 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2736 Status = STATUS_INTERNAL_ERROR;
2737 release_chunk_lock(c, Vcb);
2738 goto end;
2739 }
2740
2741 bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
2742 if (!bgi) {
2743 ERR("out of memory\n");
2744 Status = STATUS_INSUFFICIENT_RESOURCES;
2745 release_chunk_lock(c, Vcb);
2746 goto end;
2747 }
2748
2749 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2750 bgi->used = c->used;
2751
2752 TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
2753
2754 Status = delete_tree_item(Vcb, &tp);
2755 if (!NT_SUCCESS(Status)) {
2756 ERR("delete_tree_item returned %08x\n", Status);
2757 ExFreePool(bgi);
2758 release_chunk_lock(c, Vcb);
2759 goto end;
2760 }
2761
2762 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2763 if (!NT_SUCCESS(Status)) {
2764 ERR("insert_tree_item returned %08x\n", Status);
2765 ExFreePool(bgi);
2766 release_chunk_lock(c, Vcb);
2767 goto end;
2768 }
2769
2770 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2771
2772 Vcb->superblock.bytes_used += c->used - c->oldused;
2773
2774 TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
2775
2776 c->oldused = c->used;
2777 }
2778
2779 release_chunk_lock(c, Vcb);
2780
2781 le = le->Flink;
2782 }
2783
2784 Status = STATUS_SUCCESS;
2785
2786 end:
2787 ExReleaseResourceLite(&Vcb->chunk_lock);
2788
2789 return Status;
2790 }
2791
2792 static void get_first_item(tree* t, KEY* key) {
2793 LIST_ENTRY* le;
2794
2795 le = t->itemlist.Flink;
2796 while (le != &t->itemlist) {
2797 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2798
2799 *key = td->key;
2800 return;
2801 }
2802 }
2803
2804 static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
2805 tree *nt, *pt;
2806 tree_data* td;
2807 tree_data* oldlastitem;
2808
2809 TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2810
2811 nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2812 if (!nt) {
2813 ERR("out of memory\n");
2814 return STATUS_INSUFFICIENT_RESOURCES;
2815 }
2816
2817 if (t->header.level > 0) {
2818 nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2819 if (!nt->nonpaged) {
2820 ERR("out of memory\n");
2821 ExFreePool(nt);
2822 return STATUS_INSUFFICIENT_RESOURCES;
2823 }
2824
2825 ExInitializeFastMutex(&nt->nonpaged->mutex);
2826 } else
2827 nt->nonpaged = NULL;
2828
2829 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
2830 nt->header.address = 0;
2831 nt->header.generation = Vcb->superblock.generation;
2832 nt->header.num_items = t->header.num_items - numitems;
2833 nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2834
2835 nt->has_address = FALSE;
2836 nt->Vcb = Vcb;
2837 nt->parent = t->parent;
2838
2839 #ifdef DEBUG_PARANOID
2840 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
2841 #endif
2842
2843 nt->root = t->root;
2844 nt->new_address = 0;
2845 nt->has_new_address = FALSE;
2846 nt->updated_extents = FALSE;
2847 nt->uniqueness_determined = TRUE;
2848 nt->is_unique = TRUE;
2849 nt->list_entry_hash.Flink = NULL;
2850 nt->buf = NULL;
2851 InitializeListHead(&nt->itemlist);
2852
2853 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
2854
2855 nt->itemlist.Flink = &newfirstitem->list_entry;
2856 nt->itemlist.Blink = t->itemlist.Blink;
2857 nt->itemlist.Flink->Blink = &nt->itemlist;
2858 nt->itemlist.Blink->Flink = &nt->itemlist;
2859
2860 t->itemlist.Blink = &oldlastitem->list_entry;
2861 t->itemlist.Blink->Flink = &t->itemlist;
2862
2863 nt->size = t->size - size;
2864 t->size = size;
2865 t->header.num_items = numitems;
2866 nt->write = TRUE;
2867
2868 InsertTailList(&Vcb->trees, &nt->list_entry);
2869
2870 if (nt->header.level > 0) {
2871 LIST_ENTRY* le = nt->itemlist.Flink;
2872
2873 while (le != &nt->itemlist) {
2874 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2875
2876 if (td2->treeholder.tree) {
2877 td2->treeholder.tree->parent = nt;
2878 #ifdef DEBUG_PARANOID
2879 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
2880 #endif
2881 }
2882
2883 le = le->Flink;
2884 }
2885 } else {
2886 LIST_ENTRY* le = nt->itemlist.Flink;
2887
2888 while (le != &nt->itemlist) {
2889 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
2890
2891 if (!td2->inserted && td2->data) {
2892 UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
2893
2894 if (!data) {
2895 ERR("out of memory\n");
2896 return STATUS_INSUFFICIENT_RESOURCES;
2897 }
2898
2899 RtlCopyMemory(data, td2->data, td2->size);
2900 td2->data = data;
2901 td2->inserted = TRUE;
2902 }
2903
2904 le = le->Flink;
2905 }
2906 }
2907
2908 if (nt->parent) {
2909 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2910 if (!td) {
2911 ERR("out of memory\n");
2912 return STATUS_INSUFFICIENT_RESOURCES;
2913 }
2914
2915 td->key = newfirstitem->key;
2916
2917 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
2918
2919 td->ignore = FALSE;
2920 td->inserted = TRUE;
2921 td->treeholder.tree = nt;
2922 nt->paritem = td;
2923
2924 nt->parent->header.num_items++;
2925 nt->parent->size += sizeof(internal_node);
2926
2927 goto end;
2928 }
2929
2930 TRACE("adding new tree parent\n");
2931
2932 if (nt->header.level == 255) {
2933 ERR("cannot add parent to tree at level 255\n");
2934 return STATUS_INTERNAL_ERROR;
2935 }
2936
2937 pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
2938 if (!pt) {
2939 ERR("out of memory\n");
2940 return STATUS_INSUFFICIENT_RESOURCES;
2941 }
2942
2943 pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
2944 if (!pt->nonpaged) {
2945 ERR("out of memory\n");
2946 ExFreePool(pt);
2947 return STATUS_INSUFFICIENT_RESOURCES;
2948 }
2949
2950 ExInitializeFastMutex(&pt->nonpaged->mutex);
2951
2952 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
2953 pt->header.address = 0;
2954 pt->header.num_items = 2;
2955 pt->header.level = nt->header.level + 1;
2956 pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
2957
2958 pt->has_address = FALSE;
2959 pt->Vcb = Vcb;
2960 pt->parent = NULL;
2961 pt->paritem = NULL;
2962 pt->root = t->root;
2963 pt->new_address = 0;
2964 pt->has_new_address = FALSE;
2965 pt->updated_extents = FALSE;
2966 pt->size = pt->header.num_items * sizeof(internal_node);
2967 pt->uniqueness_determined = TRUE;
2968 pt->is_unique = TRUE;
2969 pt->list_entry_hash.Flink = NULL;
2970 pt->buf = NULL;
2971 InitializeListHead(&pt->itemlist);
2972
2973 InsertTailList(&Vcb->trees, &pt->list_entry);
2974
2975 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2976 if (!td) {
2977 ERR("out of memory\n");
2978 return STATUS_INSUFFICIENT_RESOURCES;
2979 }
2980
2981 get_first_item(t, &td->key);
2982 td->ignore = FALSE;
2983 td->inserted = FALSE;
2984 td->treeholder.address = 0;
2985 td->treeholder.generation = Vcb->superblock.generation;
2986 td->treeholder.tree = t;
2987 InsertTailList(&pt->itemlist, &td->list_entry);
2988 t->paritem = td;
2989
2990 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
2991 if (!td) {
2992 ERR("out of memory\n");
2993 return STATUS_INSUFFICIENT_RESOURCES;
2994 }
2995
2996 td->key = newfirstitem->key;
2997 td->ignore = FALSE;
2998 td->inserted = FALSE;
2999 td->treeholder.address = 0;
3000 td->treeholder.generation = Vcb->superblock.generation;
3001 td->treeholder.tree = nt;
3002 InsertTailList(&pt->itemlist, &td->list_entry);
3003 nt->paritem = td;
3004
3005 pt->write = TRUE;
3006
3007 t->root->treeholder.tree = pt;
3008
3009 t->parent = pt;
3010 nt->parent = pt;
3011
3012 #ifdef DEBUG_PARANOID
3013 if (t->parent && t->parent->header.level <= t->header.level) int3;
3014 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3015 #endif
3016
3017 end:
3018 t->root->root_item.bytes_used += Vcb->superblock.node_size;
3019
3020 return STATUS_SUCCESS;
3021 }
3022
3023 static NTSTATUS split_tree(device_extension* Vcb, tree* t) {
3024 LIST_ENTRY* le;
3025 UINT32 size, ds, numitems;
3026
3027 size = 0;
3028 numitems = 0;
3029
3030 // FIXME - naïve implementation: maximizes number of filled trees
3031
3032 le = t->itemlist.Flink;
3033 while (le != &t->itemlist) {
3034 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3035
3036 if (!td->ignore) {
3037 if (t->header.level == 0)
3038 ds = sizeof(leaf_node) + td->size;
3039 else
3040 ds = sizeof(internal_node);
3041
3042 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3043 ERR("(%llx,%x,%llx) in tree %llx is too large (%x > %x)\n",
3044 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3045 ds, Vcb->superblock.node_size - sizeof(tree_header));
3046 return STATUS_INTERNAL_ERROR;
3047 }
3048
3049 // FIXME - move back if previous item was deleted item with same key
3050 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3051 return split_tree_at(Vcb, t, td, numitems, size);
3052
3053 size += ds;
3054 numitems++;
3055 }
3056
3057 le = le->Flink;
3058 }
3059
3060 return STATUS_SUCCESS;
3061 }
3062
3063 BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
3064 KEY searchkey;
3065 traverse_ptr tp;
3066 NTSTATUS Status;
3067 BOOL ret = FALSE;
3068 EXTENT_ITEM* ei;
3069 UINT8* type;
3070
3071 if (t->uniqueness_determined)
3072 return t->is_unique;
3073
3074 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3075 goto end;
3076
3077 if (t->has_address) {
3078 searchkey.obj_id = t->header.address;
3079 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3080 searchkey.offset = 0xffffffffffffffff;
3081
3082 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3083 if (!NT_SUCCESS(Status)) {
3084 ERR("error - find_item returned %08x\n", Status);
3085 goto end;
3086 }
3087
3088 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3089 goto end;
3090
3091 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3092 goto end;
3093
3094 if (tp.item->size < sizeof(EXTENT_ITEM))
3095 goto end;
3096
3097 ei = (EXTENT_ITEM*)tp.item->data;
3098
3099 if (ei->refcount > 1)
3100 goto end;
3101
3102 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
3103 EXTENT_ITEM2* ei2;
3104
3105 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3106 goto end;
3107
3108 ei2 = (EXTENT_ITEM2*)&ei[1];
3109 type = (UINT8*)&ei2[1];
3110 } else
3111 type = (UINT8*)&ei[1];
3112
3113 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3114 goto end;
3115 }
3116
3117 ret = TRUE;
3118
3119 end:
3120 t->is_unique = ret;
3121 t->uniqueness_determined = TRUE;
3122
3123 return ret;
3124 }
3125
3126 static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, BOOL* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3127 LIST_ENTRY* le;
3128 tree_data* nextparitem = NULL;
3129 NTSTATUS Status;
3130 tree *next_tree, *par;
3131
3132 *done = FALSE;
3133
3134 TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3135
3136 // FIXME - doesn't capture everything, as it doesn't ascend
3137 le = t->paritem->list_entry.Flink;
3138 while (le != &t->parent->itemlist) {
3139 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3140
3141 if (!td->ignore) {
3142 nextparitem = td;
3143 break;
3144 }
3145
3146 le = le->Flink;
3147 }
3148
3149 if (!nextparitem)
3150 return STATUS_SUCCESS;
3151
3152 TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3153
3154 if (!nextparitem->treeholder.tree) {
3155 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL);
3156 if (!NT_SUCCESS(Status)) {
3157 ERR("do_load_tree returned %08x\n", Status);
3158 return Status;
3159 }
3160 }
3161
3162 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3163 return STATUS_SUCCESS;
3164
3165 next_tree = nextparitem->treeholder.tree;
3166
3167 if (!next_tree->updated_extents && next_tree->has_address) {
3168 Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3169 if (!NT_SUCCESS(Status)) {
3170 ERR("update_tree_extents returned %08x\n", Status);
3171 return Status;
3172 }
3173 }
3174
3175 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3176 // merge two trees into one
3177
3178 t->header.num_items += next_tree->header.num_items;
3179 t->size += next_tree->size;
3180
3181 if (next_tree->header.level > 0) {
3182 le = next_tree->itemlist.Flink;
3183
3184 while (le != &next_tree->itemlist) {
3185 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3186
3187 if (td2->treeholder.tree) {
3188 td2->treeholder.tree->parent = t;
3189 #ifdef DEBUG_PARANOID
3190 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3191 #endif
3192 }
3193
3194 td2->inserted = TRUE;
3195 le = le->Flink;
3196 }
3197 } else {
3198 le = next_tree->itemlist.Flink;
3199
3200 while (le != &next_tree->itemlist) {
3201 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
3202
3203 if (!td2->inserted && td2->data) {
3204 UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG);
3205
3206 if (!data) {
3207 ERR("out of memory\n");
3208 return STATUS_INSUFFICIENT_RESOURCES;
3209 }
3210
3211 RtlCopyMemory(data, td2->data, td2->size);
3212 td2->data = data;
3213 td2->inserted = TRUE;
3214 }
3215
3216 le = le->Flink;
3217 }
3218 }
3219
3220 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3221 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3222 t->itemlist.Blink = next_tree->itemlist.Blink;
3223 t->itemlist.Blink->Flink = &t->itemlist;
3224
3225 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3226
3227 next_tree->header.num_items = 0;
3228 next_tree->size = 0;
3229
3230 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3231 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3232
3233 if (!NT_SUCCESS(Status)) {
3234 ERR("reduce_tree_extent returned %08x\n", Status);
3235 return Status;
3236 }
3237 } else if (next_tree->has_address) {
3238 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3239
3240 if (!NT_SUCCESS(Status)) {
3241 ERR("reduce_tree_extent returned %08x\n", Status);
3242 return Status;
3243 }
3244 }
3245
3246 if (!nextparitem->ignore) {
3247 nextparitem->ignore = TRUE;
3248 next_tree->parent->header.num_items--;
3249 next_tree->parent->size -= sizeof(internal_node);
3250
3251 *done_deletions = TRUE;
3252 }
3253
3254 par = next_tree->parent;
3255 while (par) {
3256 par->write = TRUE;
3257 par = par->parent;
3258 }
3259
3260 RemoveEntryList(&nextparitem->list_entry);
3261 ExFreePool(next_tree->paritem);
3262 next_tree->paritem = NULL;
3263
3264 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3265
3266 free_tree(next_tree);
3267
3268 *done = TRUE;
3269 } else {
3270 // rebalance by moving items from second tree into first
3271 ULONG avg_size = (t->size + next_tree->size) / 2;
3272 KEY firstitem = {0, 0, 0};
3273 BOOL changed = FALSE;
3274
3275 TRACE("attempting rebalance\n");
3276
3277 le = next_tree->itemlist.Flink;
3278 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3279 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3280 ULONG size;
3281
3282 if (!td->ignore) {
3283 if (next_tree->header.level == 0)
3284 size = sizeof(leaf_node) + td->size;
3285 else
3286 size = sizeof(internal_node);
3287 } else
3288 size = 0;
3289
3290 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3291 RemoveEntryList(&td->list_entry);
3292 InsertTailList(&t->itemlist, &td->list_entry);
3293
3294 if (next_tree->header.level > 0 && td->treeholder.tree) {
3295 td->treeholder.tree->parent = t;
3296 #ifdef DEBUG_PARANOID
3297 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3298 #endif
3299 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3300 UINT8* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG);
3301
3302 if (!data) {
3303 ERR("out of memory\n");
3304 return STATUS_INSUFFICIENT_RESOURCES;
3305 }
3306
3307 RtlCopyMemory(data, td->data, td->size);
3308 td->data = data;
3309 }
3310
3311 td->inserted = TRUE;
3312
3313 if (!td->ignore) {
3314 next_tree->size -= size;
3315 t->size += size;
3316 next_tree->header.num_items--;
3317 t->header.num_items++;
3318 }
3319
3320 changed = TRUE;
3321 } else
3322 break;
3323
3324 le = next_tree->itemlist.Flink;
3325 }
3326
3327 le = next_tree->itemlist.Flink;
3328 while (le != &next_tree->itemlist) {
3329 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3330
3331 if (!td->ignore) {
3332 firstitem = td->key;
3333 break;
3334 }
3335
3336 le = le->Flink;
3337 }
3338
3339 // FIXME - once ascension is working, make this work with parent's parent, etc.
3340 if (next_tree->paritem)
3341 next_tree->paritem->key = firstitem;
3342
3343 par = next_tree;
3344 while (par) {
3345 par->write = TRUE;
3346 par = par->parent;
3347 }
3348
3349 if (changed)
3350 *done = TRUE;
3351 }
3352
3353 return STATUS_SUCCESS;
3354 }
3355
3356 static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp) {
3357 KEY searchkey;
3358 traverse_ptr tp;
3359 NTSTATUS Status;
3360
3361 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3362 searchkey.obj_id = address;
3363 searchkey.obj_type = TYPE_METADATA_ITEM;
3364 searchkey.offset = t->header.level;
3365
3366 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3367 if (!NT_SUCCESS(Status)) {
3368 ERR("error - find_item returned %08x\n", Status);
3369 return Status;
3370 }
3371
3372 if (!keycmp(tp.item->key, searchkey)) {
3373 EXTENT_ITEM_SKINNY_METADATA* eism;
3374
3375 if (tp.item->size > 0) {
3376 eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3377
3378 if (!eism) {
3379 ERR("out of memory\n");
3380 return STATUS_INSUFFICIENT_RESOURCES;
3381 }
3382
3383 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3384 } else
3385 eism = NULL;
3386
3387 Status = delete_tree_item(Vcb, &tp);
3388 if (!NT_SUCCESS(Status)) {
3389 ERR("delete_tree_item returned %08x\n", Status);
3390 if (eism) ExFreePool(eism);
3391 return Status;
3392 }
3393
3394 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp);
3395 if (!NT_SUCCESS(Status)) {
3396 ERR("insert_tree_item returned %08x\n", Status);
3397 if (eism) ExFreePool(eism);
3398 return Status;
3399 }
3400
3401 return STATUS_SUCCESS;
3402 }
3403 }
3404
3405 searchkey.obj_id = address;
3406 searchkey.obj_type = TYPE_EXTENT_ITEM;
3407 searchkey.offset = 0xffffffffffffffff;
3408
3409 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
3410 if (!NT_SUCCESS(Status)) {
3411 ERR("error - find_item returned %08x\n", Status);
3412 return Status;
3413 }
3414
3415 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3416 EXTENT_ITEM_TREE* eit;
3417
3418 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
3419 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
3420 return STATUS_INTERNAL_ERROR;
3421 }
3422
3423 eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
3424
3425 if (!eit) {
3426 ERR("out of memory\n");
3427 return STATUS_INSUFFICIENT_RESOURCES;
3428 }
3429
3430 RtlCopyMemory(eit, tp.item->data, tp.item->size);
3431
3432 Status = delete_tree_item(Vcb, &tp);
3433 if (!NT_SUCCESS(Status)) {
3434 ERR("delete_tree_item returned %08x\n", Status);
3435 ExFreePool(eit);
3436 return Status;
3437 }
3438
3439 eit->level = level;
3440
3441 Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp);
3442 if (!NT_SUCCESS(Status)) {
3443 ERR("insert_tree_item returned %08x\n", Status);
3444 ExFreePool(eit);
3445 return Status;
3446 }
3447
3448 return STATUS_SUCCESS;
3449 }
3450
3451 ERR("could not find EXTENT_ITEM for address %llx\n", address);
3452
3453 return STATUS_INTERNAL_ERROR;
3454 }
3455
3456 static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
3457 NTSTATUS Status;
3458
3459 if (t->parent && !t->parent->updated_extents && t->parent->has_address) {
3460 Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback);
3461 if (!NT_SUCCESS(Status))
3462 return Status;
3463 }
3464
3465 Status = update_tree_extents(Vcb, t, Irp, rollback);
3466 if (!NT_SUCCESS(Status)) {
3467 ERR("update_tree_extents returned %08x\n", Status);
3468 return Status;
3469 }
3470
3471 return STATUS_SUCCESS;
3472 }
3473
3474 static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3475 ULONG level, max_level;
3476 UINT32 min_size;
3477 BOOL empty, done_deletions = FALSE;
3478 NTSTATUS Status;
3479 tree* t;
3480
3481 TRACE("(%p)\n", Vcb);
3482
3483 max_level = 0;
3484
3485 for (level = 0; level <= 255; level++) {
3486 LIST_ENTRY *le, *nextle;
3487
3488 empty = TRUE;
3489
3490 TRACE("doing level %u\n", level);
3491
3492 le = Vcb->trees.Flink;
3493
3494 while (le != &Vcb->trees) {
3495 t = CONTAINING_RECORD(le, tree, list_entry);
3496
3497 nextle = le->Flink;
3498
3499 if (t->write && t->header.level == level) {
3500 empty = FALSE;
3501
3502 if (t->header.num_items == 0) {
3503 if (t->parent) {
3504 done_deletions = TRUE;
3505
3506 TRACE("deleting tree in root %llx\n", t->root->id);
3507
3508 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3509
3510 if (t->has_new_address) { // delete associated EXTENT_ITEM
3511 Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3512
3513 if (!NT_SUCCESS(Status)) {
3514 ERR("reduce_tree_extent returned %08x\n", Status);
3515 return Status;
3516 }
3517
3518 t->has_new_address = FALSE;
3519 } else if (t->has_address) {
3520 Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback);
3521
3522 if (!NT_SUCCESS(Status)) {
3523 ERR("reduce_tree_extent returned %08x\n", Status);
3524 return Status;
3525 }
3526
3527 t->has_address = FALSE;
3528 }
3529
3530 if (!t->paritem->ignore) {
3531 t->paritem->ignore = TRUE;
3532 t->parent->header.num_items--;
3533 t->parent->size -= sizeof(internal_node);
3534 }
3535
3536 RemoveEntryList(&t->paritem->list_entry);
3537 ExFreePool(t->paritem);
3538 t->paritem = NULL;
3539
3540 free_tree(t);
3541 } else if (t->header.level != 0) {
3542 if (t->has_new_address) {
3543 Status = update_extent_level(Vcb, t->new_address, t, 0, Irp);
3544
3545 if (!NT_SUCCESS(Status)) {
3546 ERR("update_extent_level returned %08x\n", Status);
3547 return Status;
3548 }
3549 }
3550
3551 t->header.level = 0;
3552 }
3553 } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
3554 TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
3555
3556 if (!t->updated_extents && t->has_address) {
3557 Status = update_tree_extents_recursive(Vcb, t, Irp, rollback);
3558 if (!NT_SUCCESS(Status)) {
3559 ERR("update_tree_extents_recursive returned %08x\n", Status);
3560 return Status;
3561 }
3562 }
3563
3564 Status = split_tree(Vcb, t);
3565
3566 if (!NT_SUCCESS(Status)) {
3567 ERR("split_tree returned %08x\n", Status);
3568 return Status;
3569 }
3570 }
3571 }
3572
3573 le = nextle;
3574 }
3575
3576 if (!empty) {
3577 max_level = level;
3578 } else {
3579 TRACE("nothing found for level %u\n", level);
3580 break;
3581 }
3582 }
3583
3584 min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
3585
3586 for (level = 0; level <= max_level; level++) {
3587 LIST_ENTRY* le;
3588
3589 le = Vcb->trees.Flink;
3590
3591 while (le != &Vcb->trees) {
3592 t = CONTAINING_RECORD(le, tree, list_entry);
3593
3594 if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size &&
3595 t->root->id != BTRFS_ROOT_FREE_SPACE && is_tree_unique(Vcb, t, Irp)) {
3596 BOOL done;
3597
3598 do {
3599 Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback);
3600 if (!NT_SUCCESS(Status)) {
3601 ERR("try_tree_amalgamate returned %08x\n", Status);
3602 return Status;
3603 }
3604 } while (done && t->size < min_size);
3605 }
3606
3607 le = le->Flink;
3608 }
3609 }
3610
3611 // simplify trees if top tree only has one entry
3612
3613 if (done_deletions) {
3614 for (level = max_level; level > 0; level--) {
3615 LIST_ENTRY *le, *nextle;
3616
3617 le = Vcb->trees.Flink;
3618 while (le != &Vcb->trees) {
3619 nextle = le->Flink;
3620 t = CONTAINING_RECORD(le, tree, list_entry);
3621
3622 if (t->write && t->header.level == level) {
3623 if (!t->parent && t->header.num_items == 1) {
3624 LIST_ENTRY* le2 = t->itemlist.Flink;
3625 tree_data* td = NULL;
3626 tree* child_tree = NULL;
3627
3628 while (le2 != &t->itemlist) {
3629 td = CONTAINING_RECORD(le2, tree_data, list_entry);
3630 if (!td->ignore)
3631 break;
3632 le2 = le2->Flink;
3633 }
3634
3635 TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
3636
3637 if (t->has_new_address) { // delete associated EXTENT_ITEM
3638 Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback);
3639
3640 if (!NT_SUCCESS(Status)) {
3641 ERR("reduce_tree_extent returned %08x\n", Status);
3642 return Status;
3643 }
3644
3645 t->has_new_address = FALSE;
3646 } else if (t->has_address) {
3647 Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback);
3648
3649 if (!NT_SUCCESS(Status)) {
3650 ERR("reduce_tree_extent returned %08x\n", Status);
3651 return Status;
3652 }
3653
3654 t->has_address = FALSE;
3655 }
3656
3657 if (!td->treeholder.tree) { // load first item if not already loaded
3658 KEY searchkey = {0,0,0};
3659 traverse_ptr tp;
3660
3661 Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp);
3662 if (!NT_SUCCESS(Status)) {
3663 ERR("error - find_item returned %08x\n", Status);
3664 return Status;
3665 }
3666 }
3667
3668 child_tree = td->treeholder.tree;
3669
3670 if (child_tree) {
3671 child_tree->parent = NULL;
3672 child_tree->paritem = NULL;
3673 }
3674
3675 t->root->root_item.bytes_used -= Vcb->superblock.node_size;
3676
3677 free_tree(t);
3678
3679 if (child_tree)
3680 child_tree->root->treeholder.tree = child_tree;
3681 }
3682 }
3683
3684 le = nextle;
3685 }
3686 }
3687 }
3688
3689 return STATUS_SUCCESS;
3690 }
3691
3692 static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) {
3693 NTSTATUS Status;
3694
3695 if (!th->tree) {
3696 UINT8* buf;
3697 chunk* c;
3698
3699 buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
3700 if (!buf) {
3701 ERR("out of memory\n");
3702 return STATUS_INSUFFICIENT_RESOURCES;
3703 }
3704
3705 Status = read_data(Vcb, th->address, Vcb->superblock.node_size, NULL, TRUE, buf, NULL,
3706 &c, Irp, th->generation, FALSE, NormalPagePriority);
3707 if (!NT_SUCCESS(Status)) {
3708 ERR("read_data returned 0x%08x\n", Status);
3709 ExFreePool(buf);
3710 return Status;
3711 }
3712
3713 Status = load_tree(Vcb, th->address, buf, r, &th->tree);
3714
3715 if (!th->tree || th->tree->buf != buf)
3716 ExFreePool(buf);
3717
3718 if (!NT_SUCCESS(Status)) {
3719 ERR("load_tree(%llx) returned %08x\n", th->address, Status);
3720 return Status;
3721 }
3722 }
3723
3724 if (level > 0) {
3725 LIST_ENTRY* le = th->tree->itemlist.Flink;
3726
3727 while (le != &th->tree->itemlist) {
3728 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
3729
3730 if (!td->ignore) {
3731 Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback);
3732
3733 if (!NT_SUCCESS(Status)) {
3734 ERR("remove_root_extents returned %08x\n", Status);
3735 return Status;
3736 }
3737 }
3738
3739 le = le->Flink;
3740 }
3741 }
3742
3743 if (th->tree && !th->tree->updated_extents && th->tree->has_address) {
3744 Status = update_tree_extents(Vcb, th->tree, Irp, rollback);
3745 if (!NT_SUCCESS(Status)) {
3746 ERR("update_tree_extents returned %08x\n", Status);
3747 return Status;
3748 }
3749 }
3750
3751 if (!th->tree || th->tree->has_address) {
3752 Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback);
3753
3754 if (!NT_SUCCESS(Status)) {
3755 ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
3756 return Status;
3757 }
3758 }
3759
3760 return STATUS_SUCCESS;
3761 }
3762
3763 static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
3764 NTSTATUS Status;
3765 KEY searchkey;
3766 traverse_ptr tp;
3767
3768 Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback);
3769 if (!NT_SUCCESS(Status)) {
3770 ERR("remove_root_extents returned %08x\n", Status);
3771 return Status;
3772 }
3773
3774 // remove entries in uuid root (tree 9)
3775 if (Vcb->uuid_root) {
3776 RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
3777 searchkey.obj_type = TYPE_SUBVOL_UUID;
3778 RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
3779
3780 if (searchkey.obj_id != 0 || searchkey.offset != 0) {
3781 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
3782 if (!NT_SUCCESS(Status)) {
3783 WARN("find_item returned %08x\n", Status);
3784 } else {
3785 if (!keycmp(tp.item->key, searchkey)) {
3786 Status = delete_tree_item(Vcb, &tp);
3787 if (!NT_SUCCESS(Status)) {
3788 ERR("delete_tree_item returned %08x\n", Status);
3789 return Status;
3790 }
3791 } else
3792 WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3793 }
3794 }
3795
3796 if (r->root_item.rtransid > 0) {
3797 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(UINT64));
3798 searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
3799 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
3800
3801 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
3802 if (!NT_SUCCESS(Status))
3803 WARN("find_item returned %08x\n", Status);
3804 else {
3805 if (!keycmp(tp.item->key, searchkey)) {
3806 if (tp.item->size == sizeof(UINT64)) {
3807 UINT64* id = (UINT64*)tp.item->data;
3808
3809 if (*id == r->id) {
3810 Status = delete_tree_item(Vcb, &tp);
3811 if (!NT_SUCCESS(Status)) {
3812 ERR("delete_tree_item returned %08x\n", Status);
3813 return Status;
3814 }
3815 }
3816 } else if (tp.item->size > sizeof(UINT64)) {
3817 ULONG i;
3818 UINT64* ids = (UINT64*)tp.item->data;
3819
3820 for (i = 0; i < tp.item->size / sizeof(UINT64); i++) {
3821 if (ids[i] == r->id) {
3822 UINT64* ne;
3823
3824 ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(UINT64), ALLOC_TAG);
3825 if (!ne) {
3826 ERR("out of memory\n");
3827 return STATUS_INSUFFICIENT_RESOURCES;
3828 }
3829
3830 if (i > 0)
3831 RtlCopyMemory(ne, ids, sizeof(UINT64) * i);
3832
3833 if ((i + 1) * sizeof(UINT64) < tp.item->size)
3834 RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(UINT64)));
3835
3836 Status = delete_tree_item(Vcb, &tp);
3837 if (!NT_SUCCESS(Status)) {
3838 ERR("delete_tree_item returned %08x\n", Status);
3839 ExFreePool(ne);
3840 return Status;
3841 }
3842
3843 Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
3844 ne, tp.item->size - sizeof(UINT64), NULL, Irp);
3845 if (!NT_SUCCESS(Status)) {
3846 ERR("insert_tree_item returned %08x\n", Status);
3847 ExFreePool(ne);
3848 return Status;
3849 }
3850
3851 break;
3852 }
3853 }
3854 }
3855 } else
3856 WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3857 }
3858 }
3859 }
3860
3861 // delete ROOT_ITEM
3862
3863 searchkey.obj_id = r->id;
3864 searchkey.obj_type = TYPE_ROOT_ITEM;
3865 searchkey.offset = 0xffffffffffffffff;
3866
3867 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
3868 if (!NT_SUCCESS(Status)) {
3869 ERR("find_item returned %08x\n", Status);
3870 return Status;
3871 }
3872
3873 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
3874 Status = delete_tree_item(Vcb, &tp);
3875
3876 if (!NT_SUCCESS(Status)) {
3877 ERR("delete_tree_item returned %08x\n", Status);
3878 return Status;
3879 }
3880 } else
3881 WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
3882
3883 // delete items in tree cache
3884
3885 free_trees_root(Vcb, r);
3886
3887 return STATUS_SUCCESS;
3888 }
3889
3890 static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
3891 LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
3892 NTSTATUS Status;
3893
3894 while (le != &Vcb->drop_roots) {
3895 root* r = CONTAINING_RECORD(le, root, list_entry);
3896
3897 le2 = le->Flink;
3898
3899 Status = drop_root(Vcb, r, Irp, rollback);
3900 if (!NT_SUCCESS(Status)) {
3901 ERR("drop_root(%llx) returned %08x\n", r->id, Status);
3902 return Status;
3903 }
3904
3905 le = le2;
3906 }
3907
3908 return STATUS_SUCCESS;
3909 }
3910
3911 NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) {
3912 KEY searchkey;
3913 traverse_ptr tp;
3914 DEV_ITEM* di;
3915 NTSTATUS Status;
3916
3917 searchkey.obj_id = 1;
3918 searchkey.obj_type = TYPE_DEV_ITEM;
3919 searchkey.offset = device->devitem.dev_id;
3920
3921 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
3922 if (!NT_SUCCESS(Status)) {
3923 ERR("error - find_item returned %08x\n", Status);
3924 return Status;
3925 }
3926
3927 if (keycmp(tp.item->key, searchkey)) {
3928 ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
3929 return STATUS_INTERNAL_ERROR;
3930 }
3931
3932 Status = delete_tree_item(Vcb, &tp);
3933 if (!NT_SUCCESS(Status)) {
3934 ERR("delete_tree_item returned %08x\n", Status);
3935 return Status;
3936 }
3937
3938 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
3939 if (!di) {
3940 ERR("out of memory\n");
3941 return STATUS_INSUFFICIENT_RESOURCES;
3942 }
3943
3944 RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
3945
3946 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
3947 if (!NT_SUCCESS(Status)) {
3948 ERR("insert_tree_item returned %08x\n", Status);
3949 ExFreePool(di);
3950 return Status;
3951 }
3952
3953 return STATUS_SUCCESS;
3954 }
3955
3956 static void regen_bootstrap(device_extension* Vcb) {
3957 sys_chunk* sc2;
3958 USHORT i = 0;
3959 LIST_ENTRY* le;
3960
3961 i = 0;
3962 le = Vcb->sys_chunks.Flink;
3963 while (le != &Vcb->sys_chunks) {
3964 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
3965
3966 TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
3967
3968 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
3969 i += sizeof(KEY);
3970
3971 RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
3972 i += sc2->size;
3973
3974 le = le->Flink;
3975 }
3976 }
3977
3978 static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, UINT16 size) {
3979 sys_chunk* sc;
3980 LIST_ENTRY* le;
3981
3982 if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
3983 ERR("error - bootstrap is full\n");
3984 return STATUS_INTERNAL_ERROR;
3985 }
3986
3987 sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
3988 if (!sc) {
3989 ERR("out of memory\n");
3990 return STATUS_INSUFFICIENT_RESOURCES;
3991 }
3992
3993 sc->key.obj_id = obj_id;
3994 sc->key.obj_type = obj_type;
3995 sc->key.offset = offset;
3996 sc->size = size;
3997 sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
3998 if (!sc->data) {
3999 ERR("out of memory\n");
4000 ExFreePool(sc);
4001 return STATUS_INSUFFICIENT_RESOURCES;
4002 }
4003
4004 RtlCopyMemory(sc->data, data, sc->size);
4005
4006 le = Vcb->sys_chunks.Flink;
4007 while (le != &Vcb->sys_chunks) {
4008 sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4009
4010 if (keycmp(sc2->key, sc->key) == 1)
4011 break;
4012
4013 le = le->Flink;
4014 }
4015 InsertTailList(le, &sc->list_entry);
4016
4017 Vcb->superblock.n += sizeof(KEY) + size;
4018
4019 regen_bootstrap(Vcb);
4020
4021 return STATUS_SUCCESS;
4022 }
4023
4024 static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) {
4025 CHUNK_ITEM* ci;
4026 CHUNK_ITEM_STRIPE* cis;
4027 BLOCK_GROUP_ITEM* bgi;
4028 UINT16 i, factor;
4029 NTSTATUS Status;
4030
4031 ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
4032 if (!ci) {
4033 ERR("out of memory\n");
4034 return STATUS_INSUFFICIENT_RESOURCES;
4035 }
4036
4037 RtlCopyMemory(ci, c->chunk_item, c->size);
4038
4039 Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp);
4040 if (!NT_SUCCESS(Status)) {
4041 ERR("insert_tree_item failed\n");
4042 ExFreePool(ci);
4043 return Status;
4044 }
4045
4046 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
4047 Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
4048 if (!NT_SUCCESS(Status)) {
4049 ERR("add_to_bootstrap returned %08x\n", Status);
4050 return Status;
4051 }
4052 }
4053
4054 // add BLOCK_GROUP_ITEM to tree 2
4055
4056 bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
4057 if (!bgi) {
4058 ERR("out of memory\n");
4059 return STATUS_INSUFFICIENT_RESOURCES;
4060 }
4061
4062 bgi->used = c->used;
4063 bgi->chunk_tree = 0x100;
4064 bgi->flags = c->chunk_item->type;
4065
4066 Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp);
4067 if (!NT_SUCCESS(Status)) {
4068 ERR("insert_tree_item failed\n");
4069 ExFreePool(bgi);
4070 return Status;
4071 }
4072
4073 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
4074 factor = c->chunk_item->num_stripes;
4075 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
4076 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
4077 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
4078 factor = c->chunk_item->num_stripes - 1;
4079 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
4080 factor = c->chunk_item->num_stripes - 2;
4081 else // SINGLE, DUPLICATE, RAID1
4082 factor = 1;
4083
4084 // add DEV_EXTENTs to tree 4
4085
4086 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
4087
4088 for (i = 0; i < c->chunk_item->num_stripes; i++) {
4089 DEV_EXTENT* de;
4090
4091 de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
4092 if (!de) {
4093 ERR("out of memory\n");
4094 return STATUS_INSUFFICIENT_RESOURCES;
4095 }
4096
4097 de->chunktree = Vcb->chunk_root->id;
4098 de->objid = 0x100;
4099 de->address = c->offset;
4100 de->length = c->chunk_item->size / factor;
4101 de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
4102
4103 Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp);
4104 if (!NT_SUCCESS(Status)) {
4105 ERR("insert_tree_item returned %08x\n", Status);
4106 ExFreePool(de);
4107 return Status;
4108 }
4109
4110 // FIXME - no point in calling this twice for the same device
4111 Status = update_dev_item(Vcb, c->devices[i], Irp);
4112 if (!NT_SUCCESS(Status)) {
4113 ERR("update_dev_item returned %08x\n", Status);
4114 return Status;
4115 }
4116 }
4117
4118 c->created = FALSE;
4119 c->oldused = c->used;
4120
4121 return STATUS_SUCCESS;
4122 }
4123
4124 static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset) {
4125 sys_chunk* sc2;
4126 LIST_ENTRY* le;
4127
4128 le = Vcb->sys_chunks.Flink;
4129 while (le != &Vcb->sys_chunks) {
4130 sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
4131
4132 if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
4133 RemoveEntryList(&sc2->list_entry);
4134
4135 Vcb->superblock.n -= sizeof(KEY) + sc2->size;
4136
4137 ExFreePool(sc2->data);
4138 ExFreePool(sc2);
4139 regen_bootstrap(Vcb);
4140 return;
4141 }
4142
4143 le = le->Flink;
4144 }
4145 }
4146
4147 static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, UINT16 namelen,
4148 UINT32 crc32, UINT8* data, UINT16 datalen) {
4149 NTSTATUS Status;
4150 UINT16 xasize;
4151 DIR_ITEM* xa;
4152
4153 TRACE("(%p, %llx, %llx, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen);
4154
4155 xasize = (UINT16)offsetof(DIR_ITEM, name[0]) + namelen + datalen;
4156
4157 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4158 if (!xa) {
4159 ERR("out of memory\n");
4160 return STATUS_INSUFFICIENT_RESOURCES;
4161 }
4162
4163 xa->key.obj_id = 0;
4164 xa->key.obj_type = 0;
4165 xa->key.offset = 0;
4166 xa->transid = Vcb->superblock.generation;
4167 xa->m = datalen;
4168 xa->n = namelen;
4169 xa->type = BTRFS_TYPE_EA;
4170 RtlCopyMemory(xa->name, name, namelen);
4171 RtlCopyMemory(xa->name + namelen, data, datalen);
4172
4173 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr);
4174 if (!NT_SUCCESS(Status)) {
4175 ERR("insert_tree_item_batch returned %08x\n", Status);
4176 ExFreePool(xa);
4177 return Status;
4178 }
4179
4180 return STATUS_SUCCESS;
4181 }
4182
4183 static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name,
4184 UINT16 namelen, UINT32 crc32) {
4185 NTSTATUS Status;
4186 UINT16 xasize;
4187 DIR_ITEM* xa;
4188
4189 TRACE("(%p, %llx, %llx, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32);
4190
4191 xasize = (UINT16)offsetof(DIR_ITEM, name[0]) + namelen;
4192
4193 xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
4194 if (!xa) {
4195 ERR("out of memory\n");
4196 return STATUS_INSUFFICIENT_RESOURCES;
4197 }
4198
4199 xa->key.obj_id = 0;
4200 xa->key.obj_type = 0;
4201 xa->key.offset = 0;
4202 xa->transid = Vcb->superblock.generation;
4203 xa->m = 0;
4204 xa->n = namelen;
4205 xa->type = BTRFS_TYPE_EA;
4206 RtlCopyMemory(xa->name, name, namelen);
4207
4208 Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr);
4209 if (!NT_SUCCESS(Status)) {
4210 ERR("insert_tree_item_batch returned %08x\n", Status);
4211 ExFreePool(xa);
4212 return Status;
4213 }
4214
4215 return STATUS_SUCCESS;
4216 }
4217
4218 static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, UINT64 start, UINT64 length) {
4219 NTSTATUS Status;
4220 EXTENT_DATA* ed;
4221 EXTENT_DATA2* ed2;
4222
4223 TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length);
4224
4225 ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
4226 if (!ed) {
4227 ERR("out of memory\n");
4228 return STATUS_INSUFFICIENT_RESOURCES;
4229 }
4230
4231 ed->generation = fcb->Vcb->superblock.generation;
4232 ed->decoded_size = length;
4233 ed->compression = BTRFS_COMPRESSION_NONE;
4234 ed->encryption = BTRFS_ENCRYPTION_NONE;
4235 ed->encoding = BTRFS_ENCODING_NONE;
4236 ed->type = EXTENT_TYPE_REGULAR;
4237
4238 ed2 = (EXTENT_DATA2*)ed->data;
4239 ed2->address = 0;
4240 ed2->size = 0;
4241 ed2->offset = 0;
4242 ed2->num_bytes = length;
4243
4244 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert);
4245 if (!NT_SUCCESS(Status)) {
4246 ERR("insert_tree_item_batch returned %08x\n", Status);
4247 ExFreePool(ed);
4248 return Status;
4249 }
4250
4251 return STATUS_SUCCESS;
4252 }
4253
4254 #ifdef _MSC_VER
4255 #pragma warning(push)
4256 #pragma warning(suppress: 28194)
4257 #endif
4258 NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT8 objtype, UINT64 offset,
4259 _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, UINT16 datalen, enum batch_operation operation) {
4260 LIST_ENTRY* le;
4261 batch_root* br = NULL;
4262 batch_item* bi;
4263
4264 le = batchlist->Flink;
4265 while (le != batchlist) {
4266 batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
4267
4268 if (br2->r == r) {
4269 br = br2;
4270 break;
4271 }
4272
4273 le = le->Flink;
4274 }
4275
4276 if (!br) {
4277 br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
4278 if (!br) {
4279 ERR("out of memory\n");
4280 return STATUS_INSUFFICIENT_RESOURCES;
4281 }
4282
4283 br->r = r;
4284 InitializeListHead(&br->items);
4285 InsertTailList(batchlist, &br->list_entry);
4286 }
4287
4288 bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
4289 if (!bi) {
4290 ERR("out of memory\n");
4291 return STATUS_INSUFFICIENT_RESOURCES;
4292 }
4293
4294 bi->key.obj_id = objid;
4295 bi->key.obj_type = objtype;
4296 bi->key.offset = offset;
4297 bi->data = data;
4298 bi->datalen = datalen;
4299 bi->operation = operation;
4300
4301 le = br->items.Blink;
4302 while (le != &br->items) {
4303 batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry);
4304 int cmp = keycmp(bi2->key, bi->key);
4305
4306 if (cmp == -1 || (cmp == 0 && bi->operation >= bi2->operation)) {
4307 InsertHeadList(&bi2->list_entry, &bi->list_entry);
4308 return STATUS_SUCCESS;
4309 }
4310
4311 le = le->Blink;
4312 }
4313
4314 InsertHeadList(&br->items, &bi->list_entry);
4315
4316 return STATUS_SUCCESS;
4317 }
4318 #ifdef _MSC_VER
4319 #pragma warning(pop)
4320 #endif
4321
4322 typedef struct {
4323 UINT64 address;
4324 UINT64 length;
4325 UINT64 offset;
4326 BOOL changed;
4327 chunk* chunk;
4328 UINT64 skip_start;
4329 UINT64 skip_end;
4330 LIST_ENTRY list_entry;
4331 } extent_range;
4332
4333 static void rationalize_extents(fcb* fcb, PIRP Irp) {
4334 LIST_ENTRY* le;
4335 LIST_ENTRY extent_ranges;
4336 extent_range* er;
4337 BOOL changed = FALSE, truncating = FALSE;
4338 UINT32 num_extents = 0;
4339
4340 InitializeListHead(&extent_ranges);
4341
4342 le = fcb->extents.Flink;
4343 while (le != &fcb->extents) {
4344 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4345
4346 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4347 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4348
4349 if (ed2->size != 0) {
4350 LIST_ENTRY* le2;
4351
4352 le2 = extent_ranges.Flink;
4353 while (le2 != &extent_ranges) {
4354 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4355
4356 if (er2->address == ed2->address) {
4357 er2->skip_start = min(er2->skip_start, ed2->offset);
4358 er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
4359 goto cont;
4360 } else if (er2->address > ed2->address)
4361 break;
4362
4363 le2 = le2->Flink;
4364 }
4365
4366 er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
4367 if (!er) {
4368 ERR("out of memory\n");
4369 goto end;
4370 }
4371
4372 er->address = ed2->address;
4373 er->length = ed2->size;
4374 er->offset = ext->offset - ed2->offset;
4375 er->changed = FALSE;
4376 er->chunk = NULL;
4377 er->skip_start = ed2->offset;
4378 er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
4379
4380 if (er->skip_start != 0 || er->skip_end != 0)
4381 truncating = TRUE;
4382
4383 InsertHeadList(le2->Blink, &er->list_entry);
4384 num_extents++;
4385 }
4386 }
4387
4388 cont:
4389 le = le->Flink;
4390 }
4391
4392 if (num_extents == 0 || (num_extents == 1 && !truncating))
4393 goto end;
4394
4395 le = extent_ranges.Flink;
4396 while (le != &extent_ranges) {
4397 er = CONTAINING_RECORD(le, extent_range, list_entry);
4398
4399 if (!er->chunk) {
4400 LIST_ENTRY* le2;
4401
4402 er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
4403
4404 if (!er->chunk) {
4405 ERR("get_chunk_from_address(%llx) failed\n", er->address);
4406 goto end;
4407 }
4408
4409 le2 = le->Flink;
4410 while (le2 != &extent_ranges) {
4411 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4412
4413 if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
4414 er2->chunk = er->chunk;
4415
4416 le2 = le2->Flink;
4417 }
4418 }
4419
4420 le = le->Flink;
4421 }
4422
4423 if (truncating) {
4424 // truncate beginning or end of extent if unused
4425
4426 le = extent_ranges.Flink;
4427 while (le != &extent_ranges) {
4428 er = CONTAINING_RECORD(le, extent_range, list_entry);
4429
4430 if (er->skip_start > 0) {
4431 LIST_ENTRY* le2 = fcb->extents.Flink;
4432 while (le2 != &fcb->extents) {
4433 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4434
4435 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4436 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4437
4438 if (ed2->size != 0 && ed2->address == er->address) {
4439 NTSTATUS Status;
4440
4441 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4442 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
4443 if (!NT_SUCCESS(Status)) {
4444 ERR("update_changed_extent_ref returned %08x\n", Status);
4445 goto end;
4446 }
4447
4448 ext->extent_data.decoded_size -= er->skip_start;
4449 ed2->size -= er->skip_start;
4450 ed2->address += er->skip_start;
4451 ed2->offset -= er->skip_start;
4452
4453 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4454 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4455 }
4456 }
4457
4458 le2 = le2->Flink;
4459 }
4460
4461 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4462 add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start / fcb->Vcb->superblock.sector_size), NULL, NULL);
4463
4464 acquire_chunk_lock(er->chunk, fcb->Vcb);
4465
4466 if (!er->chunk->cache_loaded) {
4467 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4468
4469 if (!NT_SUCCESS(Status)) {
4470 ERR("load_cache_chunk returned %08x\n", Status);
4471 release_chunk_lock(er->chunk, fcb->Vcb);
4472 goto end;
4473 }
4474 }
4475
4476 er->chunk->used -= er->skip_start;
4477
4478 space_list_add(er->chunk, er->address, er->skip_start, NULL);
4479
4480 release_chunk_lock(er->chunk, fcb->Vcb);
4481
4482 er->address += er->skip_start;
4483 er->length -= er->skip_start;
4484 }
4485
4486 if (er->skip_end > 0) {
4487 LIST_ENTRY* le2 = fcb->extents.Flink;
4488 while (le2 != &fcb->extents) {
4489 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
4490
4491 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4492 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4493
4494 if (ed2->size != 0 && ed2->address == er->address) {
4495 NTSTATUS Status;
4496
4497 Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4498 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
4499 if (!NT_SUCCESS(Status)) {
4500 ERR("update_changed_extent_ref returned %08x\n", Status);
4501 goto end;
4502 }
4503
4504 ext->extent_data.decoded_size -= er->skip_end;
4505 ed2->size -= er->skip_end;
4506
4507 add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4508 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4509 }
4510 }
4511
4512 le2 = le2->Flink;
4513 }
4514
4515 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM))
4516 add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end / fcb->Vcb->superblock.sector_size), NULL, NULL);
4517
4518 acquire_chunk_lock(er->chunk, fcb->Vcb);
4519
4520 if (!er->chunk->cache_loaded) {
4521 NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL);
4522
4523 if (!NT_SUCCESS(Status)) {
4524 ERR("load_cache_chunk returned %08x\n", Status);
4525 release_chunk_lock(er->chunk, fcb->Vcb);
4526 goto end;
4527 }
4528 }
4529
4530 er->chunk->used -= er->skip_end;
4531
4532 space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL);
4533
4534 release_chunk_lock(er->chunk, fcb->Vcb);
4535
4536 er->length -= er->skip_end;
4537 }
4538
4539 le = le->Flink;
4540 }
4541 }
4542
4543 if (num_extents < 2)
4544 goto end;
4545
4546 // merge together adjacent extents
4547 le = extent_ranges.Flink;
4548 while (le != &extent_ranges) {
4549 er = CONTAINING_RECORD(le, extent_range, list_entry);
4550
4551 if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
4552 extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
4553
4554 if (er->chunk == er2->chunk) {
4555 if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
4556 if (er->length + er2->length <= MAX_EXTENT_SIZE) {
4557 er->length += er2->length;
4558 er->changed = TRUE;
4559
4560 RemoveEntryList(&er2->list_entry);
4561 ExFreePool(er2);
4562
4563 changed = TRUE;
4564 continue;
4565 }
4566 }
4567 }
4568 }
4569
4570 le = le->Flink;
4571 }
4572
4573 if (!changed)
4574 goto end;
4575
4576 le = fcb->extents.Flink;
4577 while (le != &fcb->extents) {
4578 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4579
4580 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) {
4581 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4582
4583 if (ed2->size != 0) {
4584 LIST_ENTRY* le2;
4585
4586 le2 = extent_ranges.Flink;
4587 while (le2 != &extent_ranges) {
4588 extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
4589
4590 if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
4591 NTSTATUS Status;
4592
4593 Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4594 -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
4595 if (!NT_SUCCESS(Status)) {
4596 ERR("update_changed_extent_ref returned %08x\n", Status);
4597 goto end;
4598 }
4599
4600 ed2->offset += ed2->address - er2->address;
4601 ed2->address = er2->address;
4602 ed2->size = er2->length;
4603 ext->extent_data.decoded_size = ed2->size;
4604
4605 add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
4606 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
4607
4608 break;
4609 }
4610
4611 le2 = le2->Flink;
4612 }
4613 }
4614 }
4615
4616 le = le->Flink;
4617 }
4618
4619 end:
4620 while (!IsListEmpty(&extent_ranges)) {
4621 le = RemoveHeadList(&extent_ranges);
4622 er = CONTAINING_RECORD(le, extent_range, list_entry);
4623
4624 ExFreePool(er);
4625 }
4626 }
4627
4628 NTSTATUS flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp) {
4629 traverse_ptr tp;
4630 KEY searchkey;
4631 NTSTATUS Status;
4632 INODE_ITEM* ii;
4633 UINT64 ii_offset;
4634 #ifdef DEBUG_PARANOID
4635 UINT64 old_size = 0;
4636 BOOL extents_changed;
4637 #endif
4638
4639 if (fcb->ads) {
4640 if (fcb->deleted) {
4641 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash);
4642 if (!NT_SUCCESS(Status)) {
4643 ERR("delete_xattr returned %08x\n", Status);
4644 goto end;
4645 }
4646 } else {
4647 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length,
4648 fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length);
4649 if (!NT_SUCCESS(Status)) {
4650 ERR("set_xattr returned %08x\n", Status);
4651 goto end;
4652 }
4653 }
4654
4655 Status = STATUS_SUCCESS;
4656 goto end;
4657 }
4658
4659 if (fcb->deleted) {
4660 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode);
4661 if (!NT_SUCCESS(Status)) {
4662 ERR("insert_tree_item_batch returned %08x\n", Status);
4663 goto end;
4664 }
4665
4666 Status = STATUS_SUCCESS;
4667 goto end;
4668 }
4669
4670 #ifdef DEBUG_PARANOID
4671 extents_changed = fcb->extents_changed;
4672 #endif
4673
4674 if (fcb->extents_changed) {
4675 LIST_ENTRY* le;
4676 BOOL prealloc = FALSE, extents_inline = FALSE;
4677 UINT64 last_end;
4678
4679 // delete ignored extent items
4680 le = fcb->extents.Flink;
4681 while (le != &fcb->extents) {
4682 LIST_ENTRY* le2 = le->Flink;
4683 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4684
4685 if (ext->ignore) {
4686 RemoveEntryList(&ext->list_entry);
4687
4688 if (ext->csum)
4689 ExFreePool(ext->csum);
4690
4691 ExFreePool(ext);
4692 }
4693
4694 le = le2;
4695 }
4696
4697 le = fcb->extents.Flink;
4698 while (le != &fcb->extents) {
4699 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4700
4701 if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) {
4702 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4703
4704 if (ed2->size > 0) { // not sparse
4705 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE)
4706 add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4707 else
4708 add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size / fcb->Vcb->superblock.sector_size), ext->csum, Irp);
4709 }
4710 }
4711
4712 le = le->Flink;
4713 }
4714
4715 if (!IsListEmpty(&fcb->extents)) {
4716 rationalize_extents(fcb, Irp);
4717
4718 // merge together adjacent EXTENT_DATAs pointing to same extent
4719
4720 le = fcb->extents.Flink;
4721 while (le != &fcb->extents) {
4722 LIST_ENTRY* le2 = le->Flink;
4723 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4724
4725 if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
4726 extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
4727
4728 if (ext->extent_data.type == nextext->extent_data.type) {
4729 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
4730 EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data;
4731
4732 if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
4733 nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
4734 chunk* c;
4735
4736 if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) {
4737 ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size);
4738 UINT32* csum;
4739
4740 csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(UINT32), ALLOC_TAG);
4741 if (!csum) {
4742 ERR("out of memory\n");
4743 Status = STATUS_INSUFFICIENT_RESOURCES;
4744 goto end;
4745 }
4746
4747 RtlCopyMemory(csum, ext->csum, (ULONG)(ed2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size));
4748 RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum,
4749 (ULONG)(ned2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size));
4750
4751 ExFreePool(ext->csum);
4752 ext->csum = csum;
4753 }
4754
4755 ext->extent_data.generation = fcb->Vcb->superblock.generation;
4756 ed2->num_bytes += ned2->num_bytes;
4757
4758 RemoveEntryList(&nextext->list_entry);
4759
4760 if (nextext->csum)
4761 ExFreePool(nextext->csum);
4762
4763 ExFreePool(nextext);
4764
4765 c = get_chunk_from_address(fcb->Vcb, ed2->address);
4766
4767 if (!c) {
4768 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
4769 } else {
4770 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
4771 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
4772 if (!NT_SUCCESS(Status)) {
4773 ERR("update_changed_extent_ref returned %08x\n", Status);
4774 goto end;
4775 }
4776 }
4777
4778 le2 = le;
4779 }
4780 }
4781 }
4782
4783 le = le2;
4784 }
4785 }
4786
4787 if (!fcb->created) {
4788 // delete existing EXTENT_DATA items
4789
4790 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData);
4791 if (!NT_SUCCESS(Status)) {
4792 ERR("insert_tree_item_batch returned %08x\n", Status);
4793 goto end;
4794 }
4795 }
4796
4797 // add new EXTENT_DATAs
4798
4799 last_end = 0;
4800
4801 le = fcb->extents.Flink;
4802 while (le != &fcb->extents) {
4803 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4804 EXTENT_DATA* ed;
4805
4806 ext->inserted = FALSE;
4807
4808 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
4809 Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end);
4810 if (!NT_SUCCESS(Status)) {
4811 ERR("insert_sparse_extent returned %08x\n", Status);
4812 goto end;
4813 }
4814 }
4815
4816 ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
4817 if (!ed) {
4818 ERR("out of memory\n");
4819 Status = STATUS_INSUFFICIENT_RESOURCES;
4820 goto end;
4821 }
4822
4823 RtlCopyMemory(ed, &ext->extent_data, ext->datalen);
4824
4825 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
4826 ed, ext->datalen, Batch_Insert);
4827 if (!NT_SUCCESS(Status)) {
4828 ERR("insert_tree_item_batch returned %08x\n", Status);
4829 goto end;
4830 }
4831
4832 if (ed->type == EXTENT_TYPE_PREALLOC)
4833 prealloc = TRUE;
4834
4835 if (ed->type == EXTENT_TYPE_INLINE)
4836 extents_inline = TRUE;
4837
4838 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
4839 if (ed->type == EXTENT_TYPE_INLINE)
4840 last_end = ext->offset + ed->decoded_size;
4841 else {
4842 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
4843
4844 last_end = ext->offset + ed2->num_bytes;
4845 }
4846 }
4847
4848 le = le->Flink;
4849 }
4850
4851 if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
4852 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
4853 Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end);
4854 if (!NT_SUCCESS(Status)) {
4855 ERR("insert_sparse_extent returned %08x\n", Status);
4856 goto end;
4857 }
4858 }
4859
4860 // update prealloc flag in INODE_ITEM
4861
4862 if (!prealloc)
4863 fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
4864 else
4865 fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
4866
4867 fcb->inode_item_changed = TRUE;
4868
4869 fcb->extents_changed = FALSE;
4870 }
4871
4872 if ((!fcb->created && fcb->inode_item_changed) || cache) {
4873 searchkey.obj_id = fcb->inode;
4874 searchkey.obj_type = TYPE_INODE_ITEM;
4875 searchkey.offset = 0xffffffffffffffff;
4876
4877 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
4878 if (!NT_SUCCESS(Status)) {
4879 ERR("error - find_item returned %08x\n", Status);
4880 goto end;
4881 }
4882
4883 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
4884 if (cache) {
4885 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
4886 if (!ii) {
4887 ERR("out of memory\n");
4888 Status = STATUS_INSUFFICIENT_RESOURCES;
4889 goto end;
4890 }
4891
4892 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
4893
4894 Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp);
4895 if (!NT_SUCCESS(Status)) {
4896 ERR("insert_tree_item returned %08x\n", Status);
4897 goto end;
4898 }
4899
4900 ii_offset = 0;
4901 } else {
4902 ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
4903 Status = STATUS_INTERNAL_ERROR;
4904 goto end;
4905 }
4906 } else {
4907 #ifdef DEBUG_PARANOID
4908 INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
4909
4910 old_size = ii2->st_size;
4911 #endif
4912
4913 ii_offset = tp.item->key.offset;
4914 }
4915
4916 if (!cache) {
4917 Status = delete_tree_item(fcb->Vcb, &tp);
4918 if (!NT_SUCCESS(Status)) {
4919 ERR("delete_tree_item returned %08x\n", Status);
4920 goto end;
4921 }
4922 } else {
4923 searchkey.obj_id = fcb->inode;
4924 searchkey.obj_type = TYPE_INODE_ITEM;
4925 searchkey.offset = ii_offset;
4926
4927 Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
4928 if (!NT_SUCCESS(Status)) {
4929 ERR("error - find_item returned %08x\n", Status);
4930 goto end;
4931 }
4932
4933 if (keycmp(tp.item->key, searchkey)) {
4934 ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
4935 Status = STATUS_INTERNAL_ERROR;
4936 goto end;
4937 } else
4938 RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
4939 }
4940
4941 #ifdef DEBUG_PARANOID
4942 if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
4943 ERR("error - size has changed but extents not marked as changed\n");
4944 int3;
4945 }
4946 #endif
4947 } else
4948 ii_offset = 0;
4949
4950 fcb->created = FALSE;
4951
4952 if (!cache && fcb->inode_item_changed) {
4953 ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
4954 if (!ii) {
4955 ERR("out of memory\n");
4956 Status = STATUS_INSUFFICIENT_RESOURCES;
4957 goto end;
4958 }
4959
4960 RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
4961
4962 Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
4963 Batch_Insert);
4964 if (!NT_SUCCESS(Status)) {
4965 ERR("insert_tree_item_batch returned %08x\n", Status);
4966 goto end;
4967 }
4968
4969 fcb->inode_item_changed = FALSE;
4970 }
4971
4972 if (fcb->sd_dirty) {
4973 if (!fcb->sd_deleted) {
4974 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1,
4975 EA_NTACL_HASH, (UINT8*)fcb->sd, (UINT16)RtlLengthSecurityDescriptor(fcb->sd));
4976 if (!NT_SUCCESS(Status)) {
4977 ERR("set_xattr returned %08x\n", Status);
4978 goto end;
4979 }
4980 } else {
4981 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, sizeof(EA_NTACL) - 1, EA_NTACL_HASH);
4982 if (!NT_SUCCESS(Status)) {
4983 ERR("delete_xattr returned %08x\n", Status);
4984 goto end;
4985 }
4986 }
4987
4988 fcb->sd_deleted = FALSE;
4989 fcb->sd_dirty = FALSE;
4990 }
4991
4992 if (fcb->atts_changed) {
4993 if (!fcb->atts_deleted) {
4994 UINT8 val[16], *val2;
4995 ULONG atts = fcb->atts;
4996
4997 TRACE("inserting new DOSATTRIB xattr\n");
4998
4999 if (fcb->inode == SUBVOL_ROOT_INODE)
5000 atts &= ~FILE_ATTRIBUTE_READONLY;
5001
5002 val2 = &val[sizeof(val) - 1];
5003
5004 do {
5005 UINT8 c = atts % 16;
5006 *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a');
5007
5008 val2--;
5009 atts >>= 4;
5010 } while (atts != 0);
5011
5012 *val2 = 'x';
5013 val2--;
5014 *val2 = '0';
5015
5016 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1,
5017 EA_DOSATTRIB_HASH, val2, (UINT16)(val + sizeof(val) - val2));
5018 if (!NT_SUCCESS(Status)) {
5019 ERR("set_xattr returned %08x\n", Status);
5020 goto end;
5021 }
5022 } else {
5023 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, sizeof(EA_DOSATTRIB) - 1, EA_DOSATTRIB_HASH);
5024 if (!NT_SUCCESS(Status)) {
5025 ERR("delete_xattr returned %08x\n", Status);
5026 goto end;
5027 }
5028 }
5029
5030 fcb->atts_changed = FALSE;
5031 fcb->atts_deleted = FALSE;
5032 }
5033
5034 if (fcb->reparse_xattr_changed) {
5035 if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
5036 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1,
5037 EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, (UINT16)fcb->reparse_xattr.Length);
5038 if (!NT_SUCCESS(Status)) {
5039 ERR("set_xattr returned %08x\n", Status);
5040 goto end;
5041 }
5042 } else {
5043 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, sizeof(EA_REPARSE) - 1, EA_REPARSE_HASH);
5044 if (!NT_SUCCESS(Status)) {
5045 ERR("delete_xattr returned %08x\n", Status);
5046 goto end;
5047 }
5048 }
5049
5050 fcb->reparse_xattr_changed = FALSE;
5051 }
5052
5053 if (fcb->ea_changed) {
5054 if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
5055 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1,
5056 EA_EA_HASH, (UINT8*)fcb->ea_xattr.Buffer, (UINT16)fcb->ea_xattr.Length);
5057 if (!NT_SUCCESS(Status)) {
5058 ERR("set_xattr returned %08x\n", Status);
5059 goto end;
5060 }
5061 } else {
5062 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, sizeof(EA_EA) - 1, EA_EA_HASH);
5063 if (!NT_SUCCESS(Status)) {
5064 ERR("delete_xattr returned %08x\n", Status);
5065 goto end;
5066 }
5067 }
5068
5069 fcb->ea_changed = FALSE;
5070 }
5071
5072 if (fcb->prop_compression_changed) {
5073 if (fcb->prop_compression == PropCompression_None) {
5074 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1, EA_PROP_COMPRESSION_HASH);
5075 if (!NT_SUCCESS(Status)) {
5076 ERR("delete_xattr returned %08x\n", Status);
5077 goto end;
5078 }
5079 } else if (fcb->prop_compression == PropCompression_Zlib) {
5080 static const char zlib[] = "zlib";
5081
5082 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5083 EA_PROP_COMPRESSION_HASH, (UINT8*)zlib, sizeof(zlib) - 1);
5084 if (!NT_SUCCESS(Status)) {
5085 ERR("set_xattr returned %08x\n", Status);
5086 goto end;
5087 }
5088 } else if (fcb->prop_compression == PropCompression_LZO) {
5089 static const char lzo[] = "lzo";
5090
5091 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5092 EA_PROP_COMPRESSION_HASH, (UINT8*)lzo, sizeof(lzo) - 1);
5093 if (!NT_SUCCESS(Status)) {
5094 ERR("set_xattr returned %08x\n", Status);
5095 goto end;
5096 }
5097 } else if (fcb->prop_compression == PropCompression_ZSTD) {
5098 static const char zstd[] = "zstd";
5099
5100 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, sizeof(EA_PROP_COMPRESSION) - 1,
5101 EA_PROP_COMPRESSION_HASH, (UINT8*)zstd, sizeof(zstd) - 1);
5102 if (!NT_SUCCESS(Status)) {
5103 ERR("set_xattr returned %08x\n", Status);
5104 goto end;
5105 }
5106 }
5107
5108 fcb->prop_compression_changed = FALSE;
5109 }
5110
5111 if (fcb->xattrs_changed) {
5112 LIST_ENTRY* le;
5113
5114 le = fcb->xattrs.Flink;
5115 while (le != &fcb->xattrs) {
5116 xattr* xa = CONTAINING_RECORD(le, xattr, list_entry);
5117 LIST_ENTRY* le2 = le->Flink;
5118
5119 if (xa->dirty) {
5120 UINT32 hash = calc_crc32c(0xfffffffe, (UINT8*)xa->data, xa->namelen);
5121
5122 if (xa->valuelen == 0) {
5123 Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash);
5124 if (!NT_SUCCESS(Status)) {
5125 ERR("delete_xattr returned %08x\n", Status);
5126 goto end;
5127 }
5128
5129 RemoveEntryList(&xa->list_entry);
5130 ExFreePool(xa);
5131 } else {
5132 Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen,
5133 hash, (UINT8*)&xa->data[xa->namelen], xa->valuelen);
5134 if (!NT_SUCCESS(Status)) {
5135 ERR("set_xattr returned %08x\n", Status);
5136 goto end;
5137 }
5138
5139 xa->dirty = FALSE;
5140 }
5141 }
5142
5143 le = le2;
5144 }
5145
5146 fcb->xattrs_changed = FALSE;
5147 }
5148
5149 Status = STATUS_SUCCESS;
5150
5151 end:
5152 if (fcb->dirty) {
5153 BOOL lock = FALSE;
5154
5155 fcb->dirty = FALSE;
5156
5157 if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) {
5158 ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, TRUE);
5159 lock = TRUE;
5160 }
5161
5162 RemoveEntryList(&fcb->list_entry_dirty);
5163
5164 if (lock)
5165 ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
5166 }
5167
5168 return Status;
5169 }
5170
5171 void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, UINT64 address, UINT64 size) {
5172 int i;
5173 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
5174
5175 i = 0;
5176 while (superblock_addrs[i] != 0) {
5177 if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) {
5178 if (superblock_addrs[i] > address)
5179 add_trim_entry(dev, address, superblock_addrs[i] - address);
5180
5181 if (size <= superblock_addrs[i] + sblen - address)
5182 return;
5183
5184 size -= superblock_addrs[i] + sblen - address;
5185 address = superblock_addrs[i] + sblen;
5186 } else if (superblock_addrs[i] > address + size)
5187 break;
5188
5189 i++;
5190 }
5191
5192 add_trim_entry(dev, address, size);
5193 }
5194
5195 static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5196 NTSTATUS Status;
5197 KEY searchkey;
5198 traverse_ptr tp;
5199 UINT64 i, factor;
5200 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];;
5201
5202 TRACE("dropping chunk %llx\n", c->offset);
5203
5204 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
5205 factor = c->chunk_item->num_stripes;
5206 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
5207 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
5208 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
5209 factor = c->chunk_item->num_stripes - 1;
5210 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
5211 factor = c->chunk_item->num_stripes - 2;
5212 else // SINGLE, DUPLICATE, RAID1
5213 factor = 1;
5214
5215 // do TRIM
5216 if (Vcb->trim && !Vcb->options.no_trim) {
5217 UINT64 len = c->chunk_item->size / factor;
5218
5219 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5220 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
5221 add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len);
5222 }
5223 }
5224
5225 if (!c->cache) {
5226 Status = load_stored_free_space_cache(Vcb, c, TRUE, Irp);
5227
5228 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
5229 WARN("load_stored_free_space_cache returned %08x\n", Status);
5230 }
5231
5232 // remove free space cache
5233 if (c->cache) {
5234 c->cache->deleted = TRUE;
5235
5236 Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback);
5237 if (!NT_SUCCESS(Status)) {
5238 ERR("excise_extents returned %08x\n", Status);
5239 return Status;
5240 }
5241
5242 Status = flush_fcb(c->cache, TRUE, batchlist, Irp);
5243
5244 free_fcb(c->cache);
5245
5246 if (c->cache->refcount == 0)
5247 reap_fcb(c->cache);
5248
5249 if (!NT_SUCCESS(Status)) {
5250 ERR("flush_fcb returned %08x\n", Status);
5251 return Status;
5252 }
5253
5254 searchkey.obj_id = FREE_SPACE_CACHE_ID;
5255 searchkey.obj_type = 0;
5256 searchkey.offset = c->offset;
5257
5258 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5259 if (!NT_SUCCESS(Status)) {
5260 ERR("error - find_item returned %08x\n", Status);
5261 return Status;
5262 }
5263
5264 if (!keycmp(tp.item->key, searchkey)) {
5265 Status = delete_tree_item(Vcb, &tp);
5266 if (!NT_SUCCESS(Status)) {
5267 ERR("delete_tree_item returned %08x\n", Status);
5268 return Status;
5269 }
5270 }
5271 }
5272
5273 if (Vcb->space_root) {
5274 Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size,
5275 NULL, 0, Batch_DeleteFreeSpace);
5276 if (!NT_SUCCESS(Status)) {
5277 ERR("insert_tree_item_batch returned %08x\n", Status);
5278 return Status;
5279 }
5280 }
5281
5282 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5283 if (!c->created) {
5284 // remove DEV_EXTENTs from tree 4
5285 searchkey.obj_id = cis[i].dev_id;
5286 searchkey.obj_type = TYPE_DEV_EXTENT;
5287 searchkey.offset = cis[i].offset;
5288
5289 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
5290 if (!NT_SUCCESS(Status)) {
5291 ERR("error - find_item returned %08x\n", Status);
5292 return Status;
5293 }
5294
5295 if (!keycmp(tp.item->key, searchkey)) {
5296 Status = delete_tree_item(Vcb, &tp);
5297 if (!NT_SUCCESS(Status)) {
5298 ERR("delete_tree_item returned %08x\n", Status);
5299 return Status;
5300 }
5301
5302 if (tp.item->size >= sizeof(DEV_EXTENT)) {
5303 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
5304
5305 c->devices[i]->devitem.bytes_used -= de->length;
5306
5307 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5308 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start)
5309 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5310 } else
5311 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback);
5312 }
5313 } else
5314 WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
5315 } else {
5316 UINT64 len = c->chunk_item->size / factor;
5317
5318 c->devices[i]->devitem.bytes_used -= len;
5319
5320 if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) {
5321 if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start)
5322 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback);
5323 } else
5324 space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback);
5325 }
5326 }
5327
5328 // modify DEV_ITEMs in chunk tree
5329 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5330 if (c->devices[i]) {
5331 UINT64 j;
5332 DEV_ITEM* di;
5333
5334 searchkey.obj_id = 1;
5335 searchkey.obj_type = TYPE_DEV_ITEM;
5336 searchkey.offset = c->devices[i]->devitem.dev_id;
5337
5338 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
5339 if (!NT_SUCCESS(Status)) {
5340 ERR("error - find_item returned %08x\n", Status);
5341 return Status;
5342 }
5343
5344 if (!keycmp(tp.item->key, searchkey)) {
5345 Status = delete_tree_item(Vcb, &tp);
5346 if (!NT_SUCCESS(Status)) {
5347 ERR("delete_tree_item returned %08x\n", Status);
5348 return Status;
5349 }
5350
5351 di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
5352 if (!di) {
5353 ERR("out of memory\n");
5354 return STATUS_INSUFFICIENT_RESOURCES;
5355 }
5356
5357 RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
5358
5359 Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp);
5360 if (!NT_SUCCESS(Status)) {
5361 ERR("insert_tree_item returned %08x\n", Status);
5362 return Status;
5363 }
5364 }
5365
5366 for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
5367 if (c->devices[j] == c->devices[i])
5368 c->devices[j] = NULL;
5369 }
5370 }
5371 }
5372
5373 if (!c->created) {
5374 // remove CHUNK_ITEM from chunk tree
5375 searchkey.obj_id = 0x100;
5376 searchkey.obj_type = TYPE_CHUNK_ITEM;
5377 searchkey.offset = c->offset;
5378
5379 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
5380 if (!NT_SUCCESS(Status)) {
5381 ERR("error - find_item returned %08x\n", Status);
5382 return Status;
5383 }
5384
5385 if (!keycmp(tp.item->key, searchkey)) {
5386 Status = delete_tree_item(Vcb, &tp);
5387
5388 if (!NT_SUCCESS(Status)) {
5389 ERR("delete_tree_item returned %08x\n", Status);
5390 return Status;
5391 }
5392 } else
5393 WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset);
5394
5395 // remove BLOCK_GROUP_ITEM from extent tree
5396 searchkey.obj_id = c->offset;
5397 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
5398 searchkey.offset = 0xffffffffffffffff;
5399
5400 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
5401 if (!NT_SUCCESS(Status)) {
5402 ERR("error - find_item returned %08x\n", Status);
5403 return Status;
5404 }
5405
5406 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
5407 Status = delete_tree_item(Vcb, &tp);
5408
5409 if (!NT_SUCCESS(Status)) {
5410 ERR("delete_tree_item returned %08x\n", Status);
5411 return Status;
5412 }
5413 } else
5414 WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset);
5415 }
5416
5417 if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
5418 remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
5419
5420 RemoveEntryList(&c->list_entry);
5421
5422 // clear raid56 incompat flag if dropping last RAID5/6 chunk
5423
5424 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
5425 LIST_ENTRY* le;
5426 BOOL clear_flag = TRUE;
5427
5428 le = Vcb->chunks.Flink;
5429 while (le != &Vcb->chunks) {
5430 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
5431
5432 if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) {
5433 clear_flag = FALSE;
5434 break;
5435 }
5436
5437 le = le->Flink;
5438 }
5439
5440 if (clear_flag)
5441 Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56;
5442 }
5443
5444 Vcb->superblock.bytes_used -= c->oldused;
5445
5446 ExFreePool(c->chunk_item);
5447 ExFreePool(c->devices);
5448
5449 while (!IsListEmpty(&c->space)) {
5450 space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
5451
5452 RemoveEntryList(&s->list_entry);
5453 ExFreePool(s);
5454 }
5455
5456 while (!IsListEmpty(&c->deleting)) {
5457 space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
5458
5459 RemoveEntryList(&s->list_entry);
5460 ExFreePool(s);
5461 }
5462
5463 release_chunk_lock(c, Vcb);
5464
5465 ExDeleteResourceLite(&c->partial_stripes_lock);
5466 ExDeleteResourceLite(&c->range_locks_lock);
5467 ExDeleteResourceLite(&c->lock);
5468 ExDeleteResourceLite(&c->changed_extents_lock);
5469
5470 ExFreePool(c);
5471
5472 return STATUS_SUCCESS;
5473 }
5474
5475 static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, UINT64 startoff, UINT16 parity, ULONG offset, ULONG len) {
5476 NTSTATUS Status;
5477 ULONG sl = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size);
5478 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5479
5480 while (len > 0) {
5481 ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset;
5482 UINT16 stripe;
5483
5484 stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes;
5485
5486 if (c->devices[stripe]->devobj) {
5487 Status = sync_read_phys(c->devices[stripe]->devobj, cis[stripe].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5488 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), FALSE);
5489 if (!NT_SUCCESS(Status)) {
5490 ERR("sync_read_phys returned %08x\n", Status);
5491 return Status;
5492 }
5493 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5494 UINT16 i;
5495 UINT8* scratch;
5496
5497 scratch = ExAllocatePoolWithTag(NonPagedPool, readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5498 if (!scratch) {
5499 ERR("out of memory\n");
5500 return STATUS_INSUFFICIENT_RESOURCES;
5501 }
5502
5503 for (i = 0; i < c->chunk_item->num_stripes; i++) {
5504 if (i != stripe) {
5505 if (!c->devices[i]->devobj) {
5506 ExFreePool(scratch);
5507 return STATUS_UNEXPECTED_IO_ERROR;
5508 }
5509
5510 if (i == 0 || (stripe == 0 && i == 1)) {
5511 Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5512 readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), FALSE);
5513 if (!NT_SUCCESS(Status)) {
5514 ERR("sync_read_phys returned %08x\n", Status);
5515 ExFreePool(scratch);
5516 return Status;
5517 }
5518 } else {
5519 Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5520 readlen * Vcb->superblock.sector_size, scratch, FALSE);
5521 if (!NT_SUCCESS(Status)) {
5522 ERR("sync_read_phys returned %08x\n", Status);
5523 ExFreePool(scratch);
5524 return Status;
5525 }
5526
5527 do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch, readlen * Vcb->superblock.sector_size);
5528 }
5529 }
5530 }
5531
5532 ExFreePool(scratch);
5533 } else {
5534 UINT8* scratch;
5535 UINT16 k, i, logstripe, error_stripe, num_errors = 0;
5536
5537 scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen * Vcb->superblock.sector_size, ALLOC_TAG);
5538 if (!scratch) {
5539 ERR("out of memory\n");
5540 return STATUS_INSUFFICIENT_RESOURCES;
5541 }
5542
5543 i = (parity + 1) % c->chunk_item->num_stripes;
5544 for (k = 0; k < c->chunk_item->num_stripes; k++) {
5545 if (i != stripe) {
5546 if (c->devices[i]->devobj) {
5547 Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size),
5548 readlen * Vcb->superblock.sector_size, scratch + (k * readlen * Vcb->superblock.sector_size), FALSE);
5549 if (!NT_SUCCESS(Status)) {
5550 ERR("sync_read_phys returned %08x\n", Status);
5551 num_errors++;
5552 error_stripe = k;
5553 }
5554 } else {
5555 num_errors++;
5556 error_stripe = k;
5557 }
5558
5559 if (num_errors > 1) {
5560 ExFreePool(scratch);
5561 return STATUS_UNEXPECTED_IO_ERROR;
5562 }
5563 } else
5564 logstripe = k;
5565
5566 i = (i + 1) % c->chunk_item->num_stripes;
5567 }
5568
5569 if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) {
5570 for (k = 0; k < c->chunk_item->num_stripes - 1; k++) {
5571 if (k != logstripe) {
5572 if (k == 0 || (k == 1 && logstripe == 0)) {
5573 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5574 readlen * Vcb->superblock.sector_size);
5575 } else {
5576 do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size),
5577 readlen * Vcb->superblock.sector_size);
5578 }
5579 }
5580 }
5581 } else {
5582 raid6_recover2(scratch, c->chunk_item->num_stripes, readlen * Vcb->superblock.sector_size, logstripe,
5583 error_stripe, scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size));
5584
5585 RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size),
5586 readlen * Vcb->superblock.sector_size);
5587 }
5588
5589 ExFreePool(scratch);
5590 }
5591
5592 offset += readlen;
5593 len -= readlen;
5594 }
5595
5596 return STATUS_SUCCESS;
5597 }
5598
5599 NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) {
5600 NTSTATUS Status;
5601 UINT16 parity2, stripe, startoffstripe;
5602 UINT8* data;
5603 UINT64 startoff;
5604 ULONG runlength, index, last1;
5605 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
5606 LIST_ENTRY* le;
5607 UINT16 k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
5608 UINT64 ps_length = num_data_stripes * c->chunk_item->stripe_length;
5609 ULONG stripe_length = (ULONG)c->chunk_item->stripe_length;
5610
5611 // FIXME - do writes asynchronously?
5612
5613 get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe);
5614
5615 parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5616
5617 // read data (or reconstruct if degraded)
5618
5619 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
5620 last1 = 0;
5621
5622 while (runlength != 0) {
5623 if (index > last1) {
5624 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1);
5625 if (!NT_SUCCESS(Status)) {
5626 ERR("partial_stripe_read returned %08x\n", Status);
5627 return Status;
5628 }
5629 }
5630
5631 last1 = index + runlength;
5632
5633 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
5634 }
5635
5636 if (last1 < ps_length / Vcb->superblock.sector_size) {
5637 Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length / Vcb->superblock.sector_size) - last1));
5638 if (!NT_SUCCESS(Status)) {
5639 ERR("partial_stripe_read returned %08x\n", Status);
5640 return Status;
5641 }
5642 }
5643
5644 // set unallocated data to 0
5645 le = c->space.Flink;
5646 while (le != &c->space) {
5647 space* s = CONTAINING_RECORD(le, space, list_entry);
5648
5649 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5650 UINT64 start = max(ps->address, s->address);
5651 UINT64 end = min(ps->address + ps_length, s->address + s->size);
5652
5653 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5654 } else if (s->address >= ps->address + ps_length)
5655 break;
5656
5657 le = le->Flink;
5658 }
5659
5660 le = c->deleting.Flink;
5661 while (le != &c->deleting) {
5662 space* s = CONTAINING_RECORD(le, space, list_entry);
5663
5664 if (s->address + s->size > ps->address && s->address < ps->address + ps_length) {
5665 UINT64 start = max(ps->address, s->address);
5666 UINT64 end = min(ps->address + ps_length, s->address + s->size);
5667
5668 RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start));
5669 } else if (s->address >= ps->address + ps_length)
5670 break;
5671
5672 le = le->Flink;
5673 }
5674
5675 stripe = (parity2 + 1) % c->chunk_item->num_stripes;
5676
5677 data = ps->data;
5678 for (k = 0; k < num_data_stripes; k++) {
5679 if (c->devices[stripe]->devobj) {
5680 Status = write_data_phys(c->devices[stripe]->devobj, cis[stripe].offset + startoff, data, stripe_length);
5681 if (!NT_SUCCESS(Status)) {
5682 ERR("write_data_phys returned %08x\n", Status);
5683 return Status;
5684 }
5685 }
5686
5687 data += stripe_length;
5688 stripe = (stripe + 1) % c->chunk_item->num_stripes;
5689 }
5690
5691 // write parity
5692 if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
5693 if (c->devices[parity2]->devobj) {
5694 UINT16 i;
5695
5696 for (i = 1; i < c->chunk_item->num_stripes - 1; i++) {
5697 do_xor(ps->data, ps->data + (i * stripe_length), stripe_length);
5698 }
5699
5700 Status = write_data_phys(c->devices[parity2]->devobj, cis[parity2].offset + startoff, ps->data, stripe_length);
5701 if (!NT_SUCCESS(Status)) {
5702 ERR("write_data_phys returned %08x\n", Status);
5703 return Status;
5704 }
5705 }
5706 } else {
5707 UINT16 parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
5708
5709 if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) {
5710 UINT8* scratch;
5711 UINT16 i;
5712
5713 scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG);
5714 if (!scratch) {
5715 ERR("out of memory\n");
5716 return STATUS_INSUFFICIENT_RESOURCES;
5717 }
5718
5719 i = c->chunk_item->num_stripes - 3;
5720
5721 while (TRUE) {
5722 if (i == c->chunk_item->num_stripes - 3) {
5723 RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length);
5724 RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5725 } else {
5726 do_xor(scratch, ps->data + (i * stripe_length), stripe_length);
5727
5728 galois_double(scratch + stripe_length, stripe_length);
5729 do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length);
5730 }
5731
5732 if (i == 0)
5733 break;
5734
5735 i--;
5736 }
5737
5738 if (c->devices[parity1]->devobj) {
5739 Status = write_data_phys(c->devices[parity1]->devobj, cis[parity1].offset + startoff, scratch, stripe_length);
5740 if (!NT_SUCCESS(Status)) {
5741 ERR("write_data_phys returned %08x\n", Status);
5742 ExFreePool(scratch);
5743 return Status;
5744 }
5745 }
5746
5747 if (c->devices[parity2]->devobj) {
5748 Status = write_data_phys(c->devices[parity2]->devobj, cis[parity2].offset + startoff, scratch + stripe_length, stripe_length);
5749 if (!NT_SUCCESS(Status)) {
5750 ERR("write_data_phys returned %08x\n", Status);
5751 ExFreePool(scratch);
5752 return Status;
5753 }
5754 }
5755
5756 ExFreePool(scratch);
5757 }
5758 }
5759
5760 return STATUS_SUCCESS;
5761 }
5762
5763 static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
5764 LIST_ENTRY *le, *le2;
5765 NTSTATUS Status;
5766 UINT64 used_minus_cache;
5767
5768 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
5769
5770 // FIXME - do tree chunks before data chunks
5771
5772 le = Vcb->chunks.Flink;
5773 while (le != &Vcb->chunks) {
5774 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
5775
5776 le2 = le->Flink;
5777
5778 if (c->changed) {
5779 acquire_chunk_lock(c, Vcb);
5780
5781 // flush partial stripes
5782 if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) {
5783 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE);
5784
5785 while (!IsListEmpty(&c->partial_stripes)) {
5786 partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry);
5787
5788 Status = flush_partial_stripe(Vcb, c, ps);
5789
5790 if (ps->bmparr)
5791 ExFreePool(ps->bmparr);
5792
5793 ExFreePool(ps);
5794
5795 if (!NT_SUCCESS(Status)) {
5796 ERR("flush_partial_stripe returned %08x\n", Status);
5797 ExReleaseResourceLite(&c->partial_stripes_lock);
5798 release_chunk_lock(c, Vcb);
5799 ExReleaseResourceLite(&Vcb->chunk_lock);
5800 return Status;
5801 }
5802 }
5803
5804 ExReleaseResourceLite(&c->partial_stripes_lock);
5805 }
5806
5807 if (c->list_entry_balance.Flink) {
5808 release_chunk_lock(c, Vcb);
5809 le = le2;
5810 continue;
5811 }
5812
5813 if (c->space_changed || c->created) {
5814 BOOL created = c->created;
5815
5816 used_minus_cache = c->used;
5817
5818 // subtract self-hosted cache
5819 if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
5820 LIST_ENTRY* le3;
5821
5822 le3 = c->cache->extents.Flink;
5823 while (le3 != &c->cache->extents) {
5824 extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
5825 EXTENT_DATA* ed = &ext->extent_data;
5826
5827 if (!ext->ignore) {
5828 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
5829 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
5830
5831 if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
5832 used_minus_cache -= ed2->size;
5833 }
5834 }
5835
5836 le3 = le3->Flink;
5837 }
5838 }
5839
5840 if (used_minus_cache == 0) {
5841 Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
5842 if (!NT_SUCCESS(Status)) {
5843 ERR("drop_chunk returned %08x\n", Status);
5844 release_chunk_lock(c, Vcb);
5845 ExReleaseResourceLite(&Vcb->chunk_lock);
5846 return Status;
5847 }
5848
5849 // c is now freed, so avoid releasing non-existent lock
5850 le = le2;
5851 continue;
5852 } else if (c->created) {
5853 Status = create_chunk(Vcb, c, Irp);
5854 if (!NT_SUCCESS(Status)) {
5855 ERR("create_chunk returned %08x\n", Status);
5856 release_chunk_lock(c, Vcb);
5857 ExReleaseResourceLite(&Vcb->chunk_lock);
5858 return Status;
5859 }
5860 }
5861
5862 if (used_minus_cache > 0 || created)
5863 release_chunk_lock(c, Vcb);
5864 } else
5865 release_chunk_lock(c, Vcb);
5866 }
5867
5868 le = le2;
5869 }
5870
5871 ExReleaseResourceLite(&Vcb->chunk_lock);
5872
5873 return STATUS_SUCCESS;
5874 }
5875
5876 static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp) {
5877 KEY searchkey;
5878 traverse_ptr tp;
5879 NTSTATUS Status;
5880
5881 searchkey.obj_id = parsubvolid;
5882 searchkey.obj_type = TYPE_ROOT_REF;
5883 searchkey.offset = subvolid;
5884
5885 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5886 if (!NT_SUCCESS(Status)) {
5887 ERR("error - find_item returned %08x\n", Status);
5888 return Status;
5889 }
5890
5891 if (!keycmp(searchkey, tp.item->key)) {
5892 if (tp.item->size < sizeof(ROOT_REF)) {
5893 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
5894 return STATUS_INTERNAL_ERROR;
5895 } else {
5896 ROOT_REF* rr;
5897 ULONG len;
5898
5899 rr = (ROOT_REF*)tp.item->data;
5900 len = tp.item->size;
5901
5902 do {
5903 UINT16 itemlen;
5904
5905 if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) {
5906 ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5907 break;
5908 }
5909
5910 itemlen = (UINT16)offsetof(ROOT_REF, name[0]) + rr->n;
5911
5912 if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
5913 UINT16 newlen = tp.item->size - itemlen;
5914
5915 Status = delete_tree_item(Vcb, &tp);
5916 if (!NT_SUCCESS(Status)) {
5917 ERR("delete_tree_item returned %08x\n", Status);
5918 return Status;
5919 }
5920
5921 if (newlen == 0) {
5922 TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5923 } else {
5924 UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
5925
5926 if (!newrr) {
5927 ERR("out of memory\n");
5928 return STATUS_INSUFFICIENT_RESOURCES;
5929 }
5930
5931 TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
5932
5933 if ((UINT8*)rr > tp.item->data) {
5934 RtlCopyMemory(newrr, tp.item->data, (UINT8*)rr - tp.item->data);
5935 rroff = newrr + ((UINT8*)rr - tp.item->data);
5936 } else {
5937 rroff = newrr;
5938 }
5939
5940 if ((UINT8*)&rr->name[rr->n] < tp.item->data + tp.item->size)
5941 RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data));
5942
5943 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp);
5944 if (!NT_SUCCESS(Status)) {
5945 ERR("insert_tree_item returned %08x\n", Status);
5946 ExFreePool(newrr);
5947 return Status;
5948 }
5949 }
5950
5951 break;
5952 }
5953
5954 if (len > itemlen) {
5955 len -= itemlen;
5956 rr = (ROOT_REF*)&rr->name[rr->n];
5957 } else
5958 break;
5959 } while (len > 0);
5960 }
5961 } else {
5962 WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id);
5963 return STATUS_NOT_FOUND;
5964 }
5965
5966 return STATUS_SUCCESS;
5967 }
5968
5969 #ifdef _MSC_VER
5970 #pragma warning(push)
5971 #pragma warning(suppress: 28194)
5972 #endif
5973 static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ UINT64 subvolid, _In_ UINT64 parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) {
5974 KEY searchkey;
5975 traverse_ptr tp;
5976 NTSTATUS Status;
5977
5978 searchkey.obj_id = parsubvolid;
5979 searchkey.obj_type = TYPE_ROOT_REF;
5980 searchkey.offset = subvolid;
5981
5982 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
5983 if (!NT_SUCCESS(Status)) {
5984 ERR("error - find_item returned %08x\n", Status);
5985 return Status;
5986 }
5987
5988 if (!keycmp(searchkey, tp.item->key)) {
5989 UINT16 rrsize = tp.item->size + (UINT16)offsetof(ROOT_REF, name[0]) + rr->n;
5990 UINT8* rr2;
5991
5992 rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
5993 if (!rr2) {
5994 ERR("out of memory\n");
5995 return STATUS_INSUFFICIENT_RESOURCES;
5996 }
5997
5998 if (tp.item->size > 0)
5999 RtlCopyMemory(rr2, tp.item->data, tp.item->size);
6000
6001 RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n);
6002 ExFreePool(rr);
6003
6004 Status = delete_tree_item(Vcb, &tp);
6005 if (!NT_SUCCESS(Status)) {
6006 ERR("delete_tree_item returned %08x\n", Status);
6007 ExFreePool(rr2);
6008 return Status;
6009 }
6010
6011 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp);
6012 if (!NT_SUCCESS(Status)) {
6013 ERR("insert_tree_item returned %08x\n", Status);
6014 ExFreePool(rr2);
6015 return Status;
6016 }
6017 } else {
6018 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (UINT16)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp);
6019 if (!NT_SUCCESS(Status)) {
6020 ERR("insert_tree_item returned %08x\n", Status);
6021 ExFreePool(rr);
6022 return Status;
6023 }
6024 }
6025
6026 return STATUS_SUCCESS;
6027 }
6028 #ifdef _MSC_VER
6029 #pragma warning(pop)
6030 #endif
6031
6032 static NTSTATUS update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp) {
6033 KEY searchkey;
6034 traverse_ptr tp;
6035 UINT8* data;
6036 UINT16 datalen;
6037 NTSTATUS Status;
6038
6039 searchkey.obj_id = parsubvolid;
6040 searchkey.obj_type = TYPE_ROOT_REF;
6041 searchkey.offset = subvolid;
6042
6043 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6044 if (!NT_SUCCESS(Status)) {
6045 ERR("error - find_item returned %08x\n", Status);
6046 return Status;
6047 }
6048
6049 if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
6050 datalen = tp.item->size;
6051
6052 data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
6053 if (!data) {
6054 ERR("out of memory\n");
6055 return STATUS_INSUFFICIENT_RESOURCES;
6056 }
6057
6058 RtlCopyMemory(data, tp.item->data, datalen);
6059 } else {
6060 datalen = 0;
6061 data = NULL;
6062 }
6063
6064 searchkey.obj_id = subvolid;
6065 searchkey.obj_type = TYPE_ROOT_BACKREF;
6066 searchkey.offset = parsubvolid;
6067
6068 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6069 if (!NT_SUCCESS(Status)) {
6070 ERR("error - find_item returned %08x\n", Status);
6071
6072 if (datalen > 0)
6073 ExFreePool(data);
6074
6075 return Status;
6076 }
6077
6078 if (!keycmp(tp.item->key, searchkey)) {
6079 Status = delete_tree_item(Vcb, &tp);
6080 if (!NT_SUCCESS(Status)) {
6081 ERR("delete_tree_item returned %08x\n", Status);
6082
6083 if (datalen > 0)
6084 ExFreePool(data);
6085
6086 return Status;
6087 }
6088 }
6089
6090 if (datalen > 0) {
6091 Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp);
6092 if (!NT_SUCCESS(Status)) {
6093 ERR("insert_tree_item returned %08x\n", Status);
6094 ExFreePool(data);
6095 return Status;
6096 }
6097 }
6098
6099 return STATUS_SUCCESS;
6100 }
6101
6102 static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp) {
6103 KEY searchkey;
6104 traverse_ptr tp;
6105 NTSTATUS Status;
6106
6107 searchkey.obj_id = root;
6108 searchkey.obj_type = TYPE_ROOT_ITEM;
6109 searchkey.offset = 0xffffffffffffffff;
6110
6111 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6112 if (!NT_SUCCESS(Status)) {
6113 ERR("error - find_item returned %08x\n", Status);
6114 return Status;
6115 }
6116
6117 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6118 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
6119 return STATUS_INTERNAL_ERROR;
6120 }
6121
6122 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
6123 ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6124 if (!ri) {
6125 ERR("out of memory\n");
6126 return STATUS_INSUFFICIENT_RESOURCES;
6127 }
6128
6129 if (tp.item->size > 0)
6130 RtlCopyMemory(ri, tp.item->data, tp.item->size);
6131
6132 RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
6133
6134 Status = delete_tree_item(Vcb, &tp);
6135 if (!NT_SUCCESS(Status)) {
6136 ERR("delete_tree_item returned %08x\n", Status);
6137 ExFreePool(ri);
6138 return Status;
6139 }
6140
6141 Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6142 if (!NT_SUCCESS(Status)) {
6143 ERR("insert_tree_item returned %08x\n", Status);
6144 ExFreePool(ri);
6145 return Status;
6146 }
6147 } else {
6148 tp.tree->write = TRUE;
6149 }
6150
6151 return STATUS_SUCCESS;
6152 }
6153
6154 static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) {
6155 NTSTATUS Status;
6156
6157 // if fileref created and then immediately deleted, do nothing
6158 if (fileref->created && fileref->deleted) {
6159 fileref->dirty = FALSE;
6160 return STATUS_SUCCESS;
6161 }
6162
6163 if (fileref->fcb->ads) {
6164 fileref->dirty = FALSE;
6165 return STATUS_SUCCESS;
6166 }
6167
6168 if (fileref->created) {
6169 UINT16 disize;
6170 DIR_ITEM *di, *di2;
6171 UINT32 crc32;
6172
6173 crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6174
6175 disize = (UINT16)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6176 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6177 if (!di) {
6178 ERR("out of memory\n");
6179 return STATUS_INSUFFICIENT_RESOURCES;
6180 }
6181
6182 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6183 di->key.obj_id = fileref->fcb->inode;
6184 di->key.obj_type = TYPE_INODE_ITEM;
6185 di->key.offset = 0;
6186 } else { // subvolume
6187 di->key.obj_id = fileref->fcb->subvol->id;
6188 di->key.obj_type = TYPE_ROOT_ITEM;
6189 di->key.offset = 0xffffffffffffffff;
6190 }
6191
6192 di->transid = fileref->fcb->Vcb->superblock.generation;
6193 di->m = 0;
6194 di->n = (UINT16)fileref->dc->utf8.Length;
6195 di->type = fileref->fcb->type;
6196 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6197
6198 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6199 if (!di2) {
6200 ERR("out of memory\n");
6201 return STATUS_INSUFFICIENT_RESOURCES;
6202 }
6203
6204 RtlCopyMemory(di2, di, disize);
6205
6206 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6207 fileref->dc->index, di, disize, Batch_Insert);
6208 if (!NT_SUCCESS(Status)) {
6209 ERR("insert_tree_item_batch returned %08x\n", Status);
6210 return Status;
6211 }
6212
6213 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6214 di2, disize, Batch_DirItem);
6215 if (!NT_SUCCESS(Status)) {
6216 ERR("insert_tree_item_batch returned %08x\n", Status);
6217 return Status;
6218 }
6219
6220 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6221 INODE_REF* ir;
6222
6223 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6224 if (!ir) {
6225 ERR("out of memory\n");
6226 return STATUS_INSUFFICIENT_RESOURCES;
6227 }
6228
6229 ir->index = fileref->dc->index;
6230 ir->n = fileref->dc->utf8.Length;
6231 RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n);
6232
6233 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6234 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef);
6235 if (!NT_SUCCESS(Status)) {
6236 ERR("insert_tree_item_batch returned %08x\n", Status);
6237 return Status;
6238 }
6239 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) {
6240 ULONG rrlen;
6241 ROOT_REF* rr;
6242
6243 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6244
6245 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6246 if (!rr) {
6247 ERR("out of memory\n");
6248 return STATUS_INSUFFICIENT_RESOURCES;
6249 }
6250
6251 rr->dir = fileref->parent->fcb->inode;
6252 rr->index = fileref->dc->index;
6253 rr->n = fileref->dc->utf8.Length;
6254 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6255
6256 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6257 if (!NT_SUCCESS(Status)) {
6258 ERR("add_root_ref returned %08x\n", Status);
6259 return Status;
6260 }
6261
6262 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6263 if (!NT_SUCCESS(Status)) {
6264 ERR("update_root_backref returned %08x\n", Status);
6265 return Status;
6266 }
6267 }
6268
6269 fileref->created = FALSE;
6270 } else if (fileref->deleted) {
6271 UINT32 crc32;
6272 ANSI_STRING* name;
6273 DIR_ITEM* di;
6274
6275 name = &fileref->oldutf8;
6276
6277 crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length);
6278
6279 TRACE("deleting %.*S\n", file_desc_fileref(fileref));
6280
6281 di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG);
6282 if (!di) {
6283 ERR("out of memory\n");
6284 return STATUS_INSUFFICIENT_RESOURCES;
6285 }
6286
6287 di->m = 0;
6288 di->n = name->Length;
6289 RtlCopyMemory(di->name, name->Buffer, name->Length);
6290
6291 // delete DIR_ITEM (0x54)
6292
6293 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6294 crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem);
6295 if (!NT_SUCCESS(Status)) {
6296 ERR("insert_tree_item_batch returned %08x\n", Status);
6297 return Status;
6298 }
6299
6300 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6301 INODE_REF* ir;
6302
6303 // delete INODE_REF (0xc)
6304
6305 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG);
6306 if (!ir) {
6307 ERR("out of memory\n");
6308 return STATUS_INSUFFICIENT_RESOURCES;
6309 }
6310
6311 ir->index = fileref->oldindex;
6312 ir->n = name->Length;
6313 RtlCopyMemory(ir->name, name->Buffer, name->Length);
6314
6315 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF,
6316 fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef);
6317 if (!NT_SUCCESS(Status)) {
6318 ERR("insert_tree_item_batch returned %08x\n", Status);
6319 return Status;
6320 }
6321 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6322 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp);
6323 if (!NT_SUCCESS(Status)) {
6324 ERR("delete_root_ref returned %08x\n", Status);
6325 return Status;
6326 }
6327
6328 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6329 if (!NT_SUCCESS(Status)) {
6330 ERR("update_root_backref returned %08x\n", Status);
6331 return Status;
6332 }
6333 }
6334
6335 // delete DIR_INDEX (0x60)
6336
6337 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6338 fileref->oldindex, NULL, 0, Batch_Delete);
6339 if (!NT_SUCCESS(Status)) {
6340 ERR("insert_tree_item_batch returned %08x\n", Status);
6341 return Status;
6342 }
6343
6344 if (fileref->oldutf8.Buffer) {
6345 ExFreePool(fileref->oldutf8.Buffer);
6346 fileref->oldutf8.Buffer = NULL;
6347 }
6348 } else { // rename or change type
6349 PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8;
6350 UINT32 crc32, oldcrc32;
6351 UINT16 disize;
6352 DIR_ITEM *olddi, *di, *di2;
6353
6354 crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6355
6356 if (!fileref->oldutf8.Buffer)
6357 oldcrc32 = crc32;
6358 else
6359 oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
6360
6361 olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG);
6362 if (!olddi) {
6363 ERR("out of memory\n");
6364 return STATUS_INSUFFICIENT_RESOURCES;
6365 }
6366
6367 olddi->m = 0;
6368 olddi->n = (UINT16)oldutf8->Length;
6369 RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length);
6370
6371 // delete DIR_ITEM (0x54)
6372
6373 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM,
6374 oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem);
6375 if (!NT_SUCCESS(Status)) {
6376 ERR("insert_tree_item_batch returned %08x\n", Status);
6377 ExFreePool(olddi);
6378 return Status;
6379 }
6380
6381 // add DIR_ITEM (0x54)
6382
6383 disize = (UINT16)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length);
6384 di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6385 if (!di) {
6386 ERR("out of memory\n");
6387 return STATUS_INSUFFICIENT_RESOURCES;
6388 }
6389
6390 di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
6391 if (!di2) {
6392 ERR("out of memory\n");
6393 ExFreePool(di);
6394 return STATUS_INSUFFICIENT_RESOURCES;
6395 }
6396
6397 if (fileref->dc)
6398 di->key = fileref->dc->key;
6399 else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6400 di->key.obj_id = fileref->fcb->inode;
6401 di->key.obj_type = TYPE_INODE_ITEM;
6402 di->key.offset = 0;
6403 } else { // subvolume
6404 di->key.obj_id = fileref->fcb->subvol->id;
6405 di->key.obj_type = TYPE_ROOT_ITEM;
6406 di->key.offset = 0xffffffffffffffff;
6407 }
6408
6409 di->transid = fileref->fcb->Vcb->superblock.generation;
6410 di->m = 0;
6411 di->n = (UINT16)fileref->dc->utf8.Length;
6412 di->type = fileref->fcb->type;
6413 RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6414
6415 RtlCopyMemory(di2, di, disize);
6416
6417 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
6418 di, disize, Batch_DirItem);
6419 if (!NT_SUCCESS(Status)) {
6420 ERR("insert_tree_item_batch returned %08x\n", Status);
6421 ExFreePool(di2);
6422 ExFreePool(di);
6423 return Status;
6424 }
6425
6426 if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
6427 INODE_REF *ir, *ir2;
6428
6429 // delete INODE_REF (0xc)
6430
6431 ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG);
6432 if (!ir) {
6433 ERR("out of memory\n");
6434 ExFreePool(di2);
6435 return STATUS_INSUFFICIENT_RESOURCES;
6436 }
6437
6438 ir->index = fileref->dc->index;
6439 ir->n = oldutf8->Length;
6440 RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n);
6441
6442 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6443 ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef);
6444 if (!NT_SUCCESS(Status)) {
6445 ERR("insert_tree_item_batch returned %08x\n", Status);
6446 ExFreePool(ir);
6447 ExFreePool(di2);
6448 return Status;
6449 }
6450
6451 // add INODE_REF (0xc)
6452
6453 ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG);
6454 if (!ir2) {
6455 ERR("out of memory\n");
6456 ExFreePool(di2);
6457 return STATUS_INSUFFICIENT_RESOURCES;
6458 }
6459
6460 ir2->index = fileref->dc->index;
6461 ir2->n = fileref->dc->utf8.Length;
6462 RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n);
6463
6464 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
6465 ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef);
6466 if (!NT_SUCCESS(Status)) {
6467 ERR("insert_tree_item_batch returned %08x\n", Status);
6468 ExFreePool(ir2);
6469 ExFreePool(di2);
6470 return Status;
6471 }
6472 } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume
6473 ULONG rrlen;
6474 ROOT_REF* rr;
6475
6476 Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp);
6477 if (!NT_SUCCESS(Status)) {
6478 ERR("delete_root_ref returned %08x\n", Status);
6479 ExFreePool(di2);
6480 return Status;
6481 }
6482
6483 rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length;
6484
6485 rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
6486 if (!rr) {
6487 ERR("out of memory\n");
6488 ExFreePool(di2);
6489 return STATUS_INSUFFICIENT_RESOURCES;
6490 }
6491
6492 rr->dir = fileref->parent->fcb->inode;
6493 rr->index = fileref->dc->index;
6494 rr->n = fileref->dc->utf8.Length;
6495 RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length);
6496
6497 Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp);
6498 if (!NT_SUCCESS(Status)) {
6499 ERR("add_root_ref returned %08x\n", Status);
6500 ExFreePool(di2);
6501 return Status;
6502 }
6503
6504 Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp);
6505 if (!NT_SUCCESS(Status)) {
6506 ERR("update_root_backref returned %08x\n", Status);
6507 ExFreePool(di2);
6508 return Status;
6509 }
6510 }
6511
6512 // delete DIR_INDEX (0x60)
6513
6514 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6515 fileref->dc->index, NULL, 0, Batch_Delete);
6516 if (!NT_SUCCESS(Status)) {
6517 ERR("insert_tree_item_batch returned %08x\n", Status);
6518 ExFreePool(di2);
6519 return Status;
6520 }
6521
6522 // add DIR_INDEX (0x60)
6523
6524 Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX,
6525 fileref->dc->index, di2, disize, Batch_Insert);
6526 if (!NT_SUCCESS(Status)) {
6527 ERR("insert_tree_item_batch returned %08x\n", Status);
6528 ExFreePool(di2);
6529 return Status;
6530 }
6531
6532 if (fileref->oldutf8.Buffer) {
6533 ExFreePool(fileref->oldutf8.Buffer);
6534 fileref->oldutf8.Buffer = NULL;
6535 }
6536 }
6537
6538 fileref->dirty = FALSE;
6539
6540 return STATUS_SUCCESS;
6541 }
6542
6543 static void flush_disk_caches(device_extension* Vcb) {
6544 LIST_ENTRY* le;
6545 ioctl_context context;
6546 ULONG num;
6547
6548 context.left = 0;
6549
6550 le = Vcb->devices.Flink;
6551
6552 while (le != &Vcb->devices) {
6553 device* dev = CONTAINING_RECORD(le, device, list_entry);
6554
6555 if (dev->devobj && !dev->readonly && dev->can_flush)
6556 context.left++;
6557
6558 le = le->Flink;
6559 }
6560
6561 if (context.left == 0)
6562 return;
6563
6564 num = 0;
6565
6566 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
6567
6568 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG);
6569 if (!context.stripes) {
6570 ERR("out of memory\n");
6571 return;
6572 }
6573
6574 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
6575
6576 le = Vcb->devices.Flink;
6577
6578 while (le != &Vcb->devices) {
6579 device* dev = CONTAINING_RECORD(le, device, list_entry);
6580
6581 if (dev->devobj && !dev->readonly && dev->can_flush) {
6582 PIO_STACK_LOCATION IrpSp;
6583 ioctl_context_stripe* stripe = &context.stripes[num];
6584
6585 RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX));
6586
6587 stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX);
6588 stripe->apte.TimeOutValue = 5;
6589 stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE;
6590
6591 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE);
6592
6593 if (!stripe->Irp) {
6594 ERR("IoAllocateIrp failed\n");
6595 goto nextdev;
6596 }
6597
6598 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
6599 IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL;
6600
6601 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH;
6602 IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6603 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX);
6604
6605 stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte;
6606 stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION;
6607 stripe->Irp->UserBuffer = &stripe->apte;
6608 stripe->Irp->UserIosb = &stripe->iosb;
6609
6610 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE);
6611
6612 IoCallDriver(dev->devobj, stripe->Irp);
6613
6614 nextdev:
6615 num++;
6616 }
6617
6618 le = le->Flink;
6619 }
6620
6621 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
6622
6623 ExFreePool(context.stripes);
6624 }
6625
6626 static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) {
6627 NTSTATUS Status;
6628 KEY searchkey;
6629 traverse_ptr tp;
6630 UINT16 statslen;
6631 UINT64* stats;
6632
6633 searchkey.obj_id = 0;
6634 searchkey.obj_type = TYPE_DEV_STATS;
6635 searchkey.offset = dev->devitem.dev_id;
6636
6637 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
6638 if (!NT_SUCCESS(Status)) {
6639 ERR("find_item returned %08x\n", Status);
6640 return Status;
6641 }
6642
6643 if (!keycmp(tp.item->key, searchkey)) {
6644 Status = delete_tree_item(Vcb, &tp);
6645 if (!NT_SUCCESS(Status)) {
6646 ERR("delete_tree_item returned %08x\n", Status);
6647 return Status;
6648 }
6649 }
6650
6651 statslen = sizeof(UINT64) * 5;
6652 stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG);
6653 if (!stats) {
6654 ERR("out of memory\n");
6655 return STATUS_INSUFFICIENT_RESOURCES;
6656 }
6657
6658 RtlCopyMemory(stats, dev->stats, statslen);
6659
6660 Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp);
6661 if (!NT_SUCCESS(Status)) {
6662 ERR("insert_tree_item returned %08x\n", Status);
6663 ExFreePool(stats);
6664 return Status;
6665 }
6666
6667 return STATUS_SUCCESS;
6668 }
6669
6670 static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) {
6671 NTSTATUS Status;
6672
6673 if (r != Vcb->root_root && r != Vcb->chunk_root) {
6674 KEY searchkey;
6675 traverse_ptr tp;
6676 ROOT_ITEM* ri;
6677
6678 searchkey.obj_id = r->id;
6679 searchkey.obj_type = TYPE_ROOT_ITEM;
6680 searchkey.offset = 0xffffffffffffffff;
6681
6682 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
6683 if (!NT_SUCCESS(Status)) {
6684 ERR("error - find_item returned %08x\n", Status);
6685 return Status;
6686 }
6687
6688 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
6689 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
6690 return STATUS_INTERNAL_ERROR;
6691 }
6692
6693 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
6694 if (!ri) {
6695 ERR("out of memory\n");
6696 return STATUS_INSUFFICIENT_RESOURCES;
6697 }
6698
6699 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
6700
6701 Status = delete_tree_item(Vcb, &tp);
6702 if (!NT_SUCCESS(Status)) {
6703 ERR("delete_tree_item returned %08x\n", Status);
6704 return Status;
6705 }
6706
6707 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp);
6708 if (!NT_SUCCESS(Status)) {
6709 ERR("insert_tree_item returned %08x\n", Status);
6710 return Status;
6711 }
6712 }
6713
6714 if (r->received) {
6715 KEY searchkey;
6716 traverse_ptr tp;
6717
6718 if (!Vcb->uuid_root) {
6719 root* uuid_root;
6720
6721 TRACE("uuid root doesn't exist, creating it\n");
6722
6723 Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp);
6724
6725 if (!NT_SUCCESS(Status)) {
6726 ERR("create_root returned %08x\n", Status);
6727 return Status;
6728 }
6729
6730 Vcb->uuid_root = uuid_root;
6731 }
6732
6733 RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(UINT64));
6734 searchkey.obj_type = TYPE_SUBVOL_REC_UUID;
6735 RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
6736
6737 Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
6738 if (!NT_SUCCESS(Status)) {
6739 ERR("find_item returned %08x\n", Status);
6740 return Status;
6741 }
6742
6743 if (!keycmp(tp.item->key, searchkey)) {
6744 if (tp.item->size + sizeof(UINT64) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) {
6745 UINT64* ids;
6746
6747 ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(UINT64), ALLOC_TAG);
6748 if (!ids) {
6749 ERR("out of memory\n");
6750 return STATUS_INSUFFICIENT_RESOURCES;
6751 }
6752
6753 RtlCopyMemory(ids, tp.item->data, tp.item->size);
6754 RtlCopyMemory((UINT8*)ids + tp.item->size, &r->id, sizeof(UINT64));
6755
6756 Status = delete_tree_item(Vcb, &tp);
6757 if (!NT_SUCCESS(Status)) {
6758 ERR("delete_tree_item returned %08x\n", Status);
6759 ExFreePool(ids);
6760 return Status;
6761 }
6762
6763 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(UINT64), NULL, Irp);
6764 if (!NT_SUCCESS(Status)) {
6765 ERR("insert_tree_item returned %08x\n", Status);
6766 ExFreePool(ids);
6767 return Status;
6768 }
6769 }
6770 } else {
6771 UINT64* root_num;
6772
6773 root_num = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64), ALLOC_TAG);
6774 if (!root_num) {
6775 ERR("out of memory\n");
6776 return STATUS_INSUFFICIENT_RESOURCES;
6777 }
6778
6779 *root_num = r->id;
6780
6781 Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp);
6782 if (!NT_SUCCESS(Status)) {
6783 ERR("insert_tree_item returned %08x\n", Status);
6784 ExFreePool(root_num);
6785 return Status;
6786 }
6787 }
6788
6789 r->received = FALSE;
6790 }
6791
6792 r->dirty = FALSE;
6793
6794 return STATUS_SUCCESS;
6795 }
6796
6797 static NTSTATUS test_not_full(device_extension* Vcb) {
6798 UINT64 reserve, could_alloc, free_space;
6799 LIST_ENTRY* le;
6800
6801 // This function ensures we drop into readonly mode if we're about to leave very little
6802 // space for metadata - this is similar to the "global reserve" of the Linux driver.
6803 // Otherwise we might completely fill our space, at which point due to COW we can't
6804 // delete anything in order to fix this.
6805
6806 reserve = Vcb->extent_root->root_item.bytes_used;
6807 reserve += Vcb->root_root->root_item.bytes_used;
6808 if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used;
6809
6810 reserve = max(reserve, 0x1000000); // 16 M
6811 reserve = min(reserve, 0x20000000); // 512 M
6812
6813 // Find out how much space would be available for new metadata chunks
6814
6815 could_alloc = 0;
6816
6817 if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) {
6818 UINT64 s1 = 0, s2 = 0, s3 = 0;
6819
6820 le = Vcb->devices.Flink;
6821 while (le != &Vcb->devices) {
6822 device* dev = CONTAINING_RECORD(le, device, list_entry);
6823
6824 if (!dev->readonly) {
6825 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6826
6827 if (space >= s1) {
6828 s3 = s2;
6829 s2 = s1;
6830 s1 = space;
6831 } else if (space >= s2) {
6832 s3 = s2;
6833 s2 = space;
6834 } else if (space >= s3)
6835 s3 = space;
6836 }
6837
6838 le = le->Flink;
6839 }
6840
6841 could_alloc = s3 * 2;
6842 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) {
6843 UINT64 s1 = 0, s2 = 0, s3 = 0, s4 = 0;
6844
6845 le = Vcb->devices.Flink;
6846 while (le != &Vcb->devices) {
6847 device* dev = CONTAINING_RECORD(le, device, list_entry);
6848
6849 if (!dev->readonly) {
6850 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6851
6852 if (space >= s1) {
6853 s4 = s3;
6854 s3 = s2;
6855 s2 = s1;
6856 s1 = space;
6857 } else if (space >= s2) {
6858 s4 = s3;
6859 s3 = s2;
6860 s2 = space;
6861 } else if (space >= s3) {
6862 s4 = s3;
6863 s3 = space;
6864 } else if (space >= s4)
6865 s4 = space;
6866 }
6867
6868 le = le->Flink;
6869 }
6870
6871 could_alloc = s4 * 2;
6872 } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) {
6873 UINT64 s1 = 0, s2 = 0;
6874
6875 le = Vcb->devices.Flink;
6876 while (le != &Vcb->devices) {
6877 device* dev = CONTAINING_RECORD(le, device, list_entry);
6878
6879 if (!dev->readonly) {
6880 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6881
6882 if (space >= s1) {
6883 s2 = s1;
6884 s1 = space;
6885 } else if (space >= s2)
6886 s2 = space;
6887 }
6888
6889 le = le->Flink;
6890 }
6891
6892 if (Vcb->metadata_flags & BLOCK_FLAG_RAID1)
6893 could_alloc = s2;
6894 else // RAID0
6895 could_alloc = s2 * 2;
6896 } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) {
6897 le = Vcb->devices.Flink;
6898 while (le != &Vcb->devices) {
6899 device* dev = CONTAINING_RECORD(le, device, list_entry);
6900
6901 if (!dev->readonly) {
6902 UINT64 space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2;
6903
6904 could_alloc = max(could_alloc, space);
6905 }
6906
6907 le = le->Flink;
6908 }
6909 } else { // SINGLE
6910 le = Vcb->devices.Flink;
6911 while (le != &Vcb->devices) {
6912 device* dev = CONTAINING_RECORD(le, device, list_entry);
6913
6914 if (!dev->readonly) {
6915 UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used;
6916
6917 could_alloc = max(could_alloc, space);
6918 }
6919
6920 le = le->Flink;
6921 }
6922 }
6923
6924 if (could_alloc >= reserve)
6925 return STATUS_SUCCESS;
6926
6927 free_space = 0;
6928
6929 le = Vcb->chunks.Flink;
6930 while (le != &Vcb->chunks) {
6931 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
6932
6933 if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) {
6934 free_space += c->chunk_item->size - c->used;
6935
6936 if (free_space + could_alloc >= reserve)
6937 return STATUS_SUCCESS;
6938 }
6939
6940 le = le->Flink;
6941 }
6942
6943 return STATUS_DISK_FULL;
6944 }
6945
6946 static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
6947 NTSTATUS Status;
6948 LIST_ENTRY *le, batchlist;
6949 BOOL cache_changed = FALSE;
6950 volume_device_extension* vde;
6951 BOOL no_cache = FALSE;
6952 #ifdef DEBUG_FLUSH_TIMES
6953 UINT64 filerefs = 0, fcbs = 0;
6954 LARGE_INTEGER freq, time1, time2;
6955 #endif
6956 #ifdef DEBUG_WRITE_LOOPS
6957 UINT loops = 0;
6958 #endif
6959
6960 TRACE("(%p)\n", Vcb);
6961
6962 InitializeListHead(&batchlist);
6963
6964 #ifdef DEBUG_FLUSH_TIMES
6965 time1 = KeQueryPerformanceCounter(&freq);
6966 #endif
6967
6968 ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, TRUE);
6969
6970 while (!IsListEmpty(&Vcb->dirty_filerefs)) {
6971 file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty);
6972
6973 flush_fileref(fr, &batchlist, Irp);
6974 free_fileref(fr);
6975
6976 #ifdef DEBUG_FLUSH_TIMES
6977 filerefs++;
6978 #endif
6979 }
6980
6981 ExReleaseResourceLite(&Vcb->dirty_filerefs_lock);
6982
6983 Status = commit_batch_list(Vcb, &batchlist, Irp);
6984 if (!NT_SUCCESS(Status)) {
6985 ERR("commit_batch_list returned %08x\n", Status);
6986 return Status;
6987 }
6988
6989 #ifdef DEBUG_FLUSH_TIMES
6990 time2 = KeQueryPerformanceCounter(NULL);
6991
6992 ERR("flushed %llu filerefs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
6993
6994 time1 = KeQueryPerformanceCounter(&freq);
6995 #endif
6996
6997 // We process deleted streams first, so we don't run over our xattr
6998 // limit unless we absolutely have to.
6999 // We also process deleted normal files, to avoid any problems
7000 // caused by inode collisions.
7001
7002 ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, TRUE);
7003
7004 le = Vcb->dirty_fcbs.Flink;
7005 while (le != &Vcb->dirty_fcbs) {
7006 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7007 LIST_ENTRY* le2 = le->Flink;
7008
7009 if (fcb->deleted) {
7010 ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
7011 Status = flush_fcb(fcb, FALSE, &batchlist, Irp);
7012 ExReleaseResourceLite(fcb->Header.Resource);
7013
7014 free_fcb(fcb);
7015
7016 if (!NT_SUCCESS(Status)) {
7017 ERR("flush_fcb returned %08x\n", Status);
7018 clear_batch_list(Vcb, &batchlist);
7019 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7020 return Status;
7021 }
7022
7023 #ifdef DEBUG_FLUSH_TIMES
7024 fcbs++;
7025 #endif
7026 }
7027
7028 le = le2;
7029 }
7030
7031 Status = commit_batch_list(Vcb, &batchlist, Irp);
7032 if (!NT_SUCCESS(Status)) {
7033 ERR("commit_batch_list returned %08x\n", Status);
7034 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7035 return Status;
7036 }
7037
7038 le = Vcb->dirty_fcbs.Flink;
7039 while (le != &Vcb->dirty_fcbs) {
7040 fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty);
7041 LIST_ENTRY* le2 = le->Flink;
7042
7043 if (fcb->subvol != Vcb->root_root) {
7044 ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
7045 Status = flush_fcb(fcb, FALSE, &batchlist, Irp);
7046 ExReleaseResourceLite(fcb->Header.Resource);
7047 free_fcb(fcb);
7048
7049 if (!NT_SUCCESS(Status)) {
7050 ERR("flush_fcb returned %08x\n", Status);
7051 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7052 return Status;
7053 }
7054
7055 #ifdef DEBUG_FLUSH_TIMES
7056 fcbs++;
7057 #endif
7058 }
7059
7060 le = le2;
7061 }
7062
7063 ExReleaseResourceLite(&Vcb->dirty_fcbs_lock);
7064
7065 Status = commit_batch_list(Vcb, &batchlist, Irp);
7066 if (!NT_SUCCESS(Status)) {
7067 ERR("commit_batch_list returned %08x\n", Status);
7068 return Status;
7069 }
7070
7071 #ifdef DEBUG_FLUSH_TIMES
7072 time2 = KeQueryPerformanceCounter(NULL);
7073
7074 ERR("flushed %llu fcbs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
7075 #endif
7076
7077 // no need to get dirty_subvols_lock here, as we have tree_lock exclusively
7078 while (!IsListEmpty(&Vcb->dirty_subvols)) {
7079 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty);
7080
7081 Status = flush_subvol(Vcb, r, Irp);
7082 if (!NT_SUCCESS(Status)) {
7083 ERR("flush_subvol returned %08x\n", Status);
7084 return Status;
7085 }
7086 }
7087
7088 if (!IsListEmpty(&Vcb->drop_roots)) {
7089 Status = drop_roots(Vcb, Irp, rollback);
7090
7091 if (!NT_SUCCESS(Status)) {
7092 ERR("drop_roots returned %08x\n", Status);
7093 return Status;
7094 }
7095 }
7096
7097 Status = update_chunks(Vcb, &batchlist, Irp, rollback);
7098
7099 if (!NT_SUCCESS(Status)) {
7100 ERR("update_chunks returned %08x\n", Status);
7101 return Status;
7102 }
7103
7104 Status = commit_batch_list(Vcb, &batchlist, Irp);
7105
7106 // If only changing superblock, e.g. changing label, we still need to rewrite
7107 // the root tree so the generations match, otherwise you won't be able to mount on Linux.
7108 if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
7109 KEY searchkey;
7110
7111 traverse_ptr tp;
7112
7113 searchkey.obj_id = 0;
7114 searchkey.obj_type = 0;
7115 searchkey.offset = 0;
7116
7117 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
7118 if (!NT_SUCCESS(Status)) {
7119 ERR("error - find_item returned %08x\n", Status);
7120 return Status;
7121 }
7122
7123 Vcb->root_root->treeholder.tree->write = TRUE;
7124 }
7125
7126 // make sure we always update the extent tree
7127 Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp);
7128 if (!NT_SUCCESS(Status)) {
7129 ERR("add_root_item_to_cache returned %08x\n", Status);
7130 return Status;
7131 }
7132
7133 if (Vcb->stats_changed) {
7134 le = Vcb->devices.Flink;
7135 while (le != &Vcb->devices) {
7136 device* dev = CONTAINING_RECORD(le, device, list_entry);
7137
7138 if (dev->stats_changed) {
7139 Status = flush_changed_dev_stats(Vcb, dev, Irp);
7140 if (!NT_SUCCESS(Status)) {
7141 ERR("flush_changed_dev_stats returned %08x\n", Status);
7142 return Status;
7143 }
7144 dev->stats_changed = FALSE;
7145 }
7146
7147 le = le->Flink;
7148 }
7149
7150 Vcb->stats_changed = FALSE;
7151 }
7152
7153 do {
7154 Status = add_parents(Vcb, Irp);
7155 if (!NT_SUCCESS(Status)) {
7156 ERR("add_parents returned %08x\n", Status);
7157 goto end;
7158 }
7159
7160 Status = allocate_tree_extents(Vcb, Irp, rollback);
7161 if (!NT_SUCCESS(Status)) {
7162 ERR("allocate_tree_extents returned %08x\n", Status);
7163 goto end;
7164 }
7165
7166 Status = do_splits(Vcb, Irp, rollback);
7167 if (!NT_SUCCESS(Status)) {
7168 ERR("do_splits returned %08x\n", Status);
7169 goto end;
7170 }
7171
7172 Status = update_chunk_usage(Vcb, Irp, rollback);
7173 if (!NT_SUCCESS(Status)) {
7174 ERR("update_chunk_usage returned %08x\n", Status);
7175 goto end;
7176 }
7177
7178 if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
7179 if (!no_cache) {
7180 Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
7181 if (!NT_SUCCESS(Status)) {
7182 WARN("allocate_cache returned %08x\n", Status);
7183 no_cache = TRUE;
7184 cache_changed = FALSE;
7185 }
7186 }
7187 } else {
7188 Status = update_chunk_caches_tree(Vcb, Irp);
7189 if (!NT_SUCCESS(Status)) {
7190 ERR("update_chunk_caches_tree returned %08x\n", Status);
7191 goto end;
7192 }
7193 }
7194
7195 #ifdef DEBUG_WRITE_LOOPS
7196 loops++;
7197
7198 if (cache_changed)
7199 ERR("cache has changed, looping again\n");
7200 #endif
7201 } while (cache_changed || !trees_consistent(Vcb));
7202
7203 #ifdef DEBUG_WRITE_LOOPS
7204 ERR("%u loops\n", loops);
7205 #endif
7206
7207 TRACE("trees consistent\n");
7208
7209 Status = update_root_root(Vcb, no_cache, Irp, rollback);
7210 if (!NT_SUCCESS(Status)) {
7211 ERR("update_root_root returned %08x\n", Status);
7212 goto end;
7213 }
7214
7215 Status = write_trees(Vcb, Irp);
7216 if (!NT_SUCCESS(Status)) {
7217 ERR("write_trees returned %08x\n", Status);
7218 goto end;
7219 }
7220
7221 Status = test_not_full(Vcb);
7222 if (!NT_SUCCESS(Status)) {
7223 ERR("test_not_full returned %08x\n", Status);
7224 goto end;
7225 }
7226
7227 #ifdef DEBUG_PARANOID
7228 le = Vcb->trees.Flink;
7229 while (le != &Vcb->trees) {
7230 tree* t = CONTAINING_RECORD(le, tree, list_entry);
7231 KEY searchkey;
7232 traverse_ptr tp;
7233
7234 searchkey.obj_id = t->header.address;
7235 searchkey.obj_type = TYPE_METADATA_ITEM;
7236 searchkey.offset = 0xffffffffffffffff;
7237
7238 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
7239 if (!NT_SUCCESS(Status)) {
7240 ERR("error - find_item returned %08x\n", Status);
7241 goto end;
7242 }
7243
7244 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7245 searchkey.obj_id = t->header.address;
7246 searchkey.obj_type = TYPE_EXTENT_ITEM;
7247 searchkey.offset = 0xffffffffffffffff;
7248
7249 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
7250 if (!NT_SUCCESS(Status)) {
7251 ERR("error - find_item returned %08x\n", Status);
7252 goto end;
7253 }
7254
7255 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
7256 ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address);
7257 Status = STATUS_INTERNAL_ERROR;
7258 goto end;
7259 }
7260 }
7261
7262 le = le->Flink;
7263 }
7264 #endif
7265
7266 Vcb->superblock.cache_generation = Vcb->superblock.generation;
7267
7268 if (!Vcb->options.no_barrier)
7269 flush_disk_caches(Vcb);
7270
7271 Status = write_superblocks(Vcb, Irp);
7272 if (!NT_SUCCESS(Status)) {
7273 ERR("write_superblocks returned %08x\n", Status);
7274 goto end;
7275 }
7276
7277 vde = Vcb->vde;
7278
7279 if (vde) {
7280 pdo_device_extension* pdode = vde->pdode;
7281
7282 ExAcquireResourceSharedLite(&pdode->child_lock, TRUE);
7283
7284 le = pdode->children.Flink;
7285
7286 while (le != &pdode->children) {
7287 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
7288
7289 vc->generation = Vcb->superblock.generation;
7290 le = le->Flink;
7291 }
7292
7293 ExReleaseResourceLite(&pdode->child_lock);
7294 }
7295
7296 clean_space_cache(Vcb);
7297
7298 le = Vcb->chunks.Flink;
7299 while (le != &Vcb->chunks) {
7300 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
7301
7302 c->changed = FALSE;
7303 c->space_changed = FALSE;
7304
7305 le = le->Flink;
7306 }
7307
7308 Vcb->superblock.generation++;
7309
7310 Status = STATUS_SUCCESS;
7311
7312 le = Vcb->trees.Flink;
7313 while (le != &Vcb->trees) {
7314 tree* t = CONTAINING_RECORD(le, tree, list_entry);
7315
7316 t->write = FALSE;
7317
7318 le = le->Flink;
7319 }
7320
7321 Vcb->need_write = FALSE;
7322
7323 while (!IsListEmpty(&Vcb->drop_roots)) {
7324 root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry);
7325
7326 ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
7327 ExFreePool(r->nonpaged);
7328 ExFreePool(r);
7329 }
7330
7331 end:
7332 TRACE("do_write returning %08x\n", Status);
7333
7334 return Status;
7335 }
7336
7337 NTSTATUS do_write(device_extension* Vcb, PIRP Irp) {
7338 LIST_ENTRY rollback;
7339 NTSTATUS Status;
7340
7341 InitializeListHead(&rollback);
7342
7343 Status = do_write2(Vcb, Irp, &rollback);
7344
7345 if (!NT_SUCCESS(Status)) {
7346 ERR("do_write2 returned %08x, dropping into readonly mode\n", Status);
7347 Vcb->readonly = TRUE;
7348 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED);
7349 do_rollback(Vcb, &rollback);
7350 } else
7351 clear_rollback(&rollback);
7352
7353 return Status;
7354 }
7355
7356 #ifdef DEBUG_STATS
7357 static void print_stats(device_extension* Vcb) {
7358 LARGE_INTEGER freq;
7359
7360 ERR("READ STATS:\n");
7361 ERR("number of reads: %llu\n", Vcb->stats.num_reads);
7362 ERR("data read: %llu bytes\n", Vcb->stats.data_read);
7363 ERR("total time taken: %llu\n", Vcb->stats.read_total_time);
7364 ERR("csum time taken: %llu\n", Vcb->stats.read_csum_time);
7365 ERR("disk time taken: %llu\n", Vcb->stats.read_disk_time);
7366 ERR("other time taken: %llu\n", Vcb->stats.read_total_time - Vcb->stats.read_csum_time - Vcb->stats.read_disk_time);
7367
7368 KeQueryPerformanceCounter(&freq);
7369
7370 ERR("OPEN STATS (freq = %llu):\n", freq.QuadPart);
7371 ERR("number of opens: %llu\n", Vcb->stats.num_opens);
7372 ERR("total time taken: %llu\n", Vcb->stats.open_total_time);
7373 ERR("number of overwrites: %llu\n", Vcb->stats.num_overwrites);
7374 ERR("total time taken: %llu\n", Vcb->stats.overwrite_total_time);
7375 ERR("number of creates: %llu\n", Vcb->stats.num_creates);
7376 ERR("calls to open_fcb: %llu\n", Vcb->stats.open_fcb_calls);
7377 ERR("time spent in open_fcb: %llu\n", Vcb->stats.open_fcb_time);
7378 ERR("calls to open_fileref_child: %llu\n", Vcb->stats.open_fileref_child_calls);
7379 ERR("time spent in open_fileref_child: %llu\n", Vcb->stats.open_fileref_child_time);
7380 ERR("time spent waiting for fcb_lock: %llu\n", Vcb->stats.fcb_lock_time);
7381 ERR("total time taken: %llu\n", Vcb->stats.create_total_time);
7382
7383 RtlZeroMemory(&Vcb->stats, sizeof(debug_stats));
7384 }
7385 #endif
7386
7387 static void do_flush(device_extension* Vcb) {
7388 NTSTATUS Status;
7389
7390 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
7391
7392 #ifdef DEBUG_STATS
7393 print_stats(Vcb);
7394 #endif
7395
7396 if (Vcb->need_write && !Vcb->readonly)
7397 Status = do_write(Vcb, NULL);
7398 else
7399 Status = STATUS_SUCCESS;
7400
7401 free_trees(Vcb);
7402
7403 if (!NT_SUCCESS(Status))
7404 ERR("do_write returned %08x\n", Status);
7405
7406 ExReleaseResourceLite(&Vcb->tree_lock);
7407 }
7408
7409 _Function_class_(KSTART_ROUTINE)
7410 #ifdef __REACTOS__
7411 void NTAPI flush_thread(void* context) {
7412 #else
7413 void flush_thread(void* context) {
7414 #endif
7415 DEVICE_OBJECT* devobj = context;
7416 device_extension* Vcb = devobj->DeviceExtension;
7417 LARGE_INTEGER due_time;
7418
7419 ObReferenceObject(devobj);
7420
7421 KeInitializeTimer(&Vcb->flush_thread_timer);
7422
7423 due_time.QuadPart = (UINT64)Vcb->options.flush_interval * -10000000;
7424
7425 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7426
7427 while (TRUE) {
7428 KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, FALSE, NULL);
7429
7430 if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing)
7431 break;
7432
7433 if (!Vcb->locked)
7434 do_flush(Vcb);
7435
7436 KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL);
7437 }
7438
7439 ObDereferenceObject(devobj);
7440 KeCancelTimer(&Vcb->flush_thread_timer);
7441
7442 KeSetEvent(&Vcb->flush_thread_finished, 0, FALSE);
7443
7444 PsTerminateSystemThread(STATUS_SUCCESS);
7445 }