[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / treefuncs.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 // #define DEBUG_TREE_LOCKS
21
22 enum read_tree_status {
23 ReadTreeStatus_Pending,
24 ReadTreeStatus_Success,
25 ReadTreeStatus_Cancelling,
26 ReadTreeStatus_Cancelled,
27 ReadTreeStatus_Error,
28 ReadTreeStatus_CRCError,
29 ReadTreeStatus_MissingDevice
30 };
31
32 struct read_tree_context;
33
34 typedef struct {
35 struct read_tree_context* context;
36 UINT8* buf;
37 PIRP Irp;
38 IO_STATUS_BLOCK iosb;
39 enum read_tree_status status;
40 } read_tree_stripe;
41
42 typedef struct {
43 KEVENT Event;
44 NTSTATUS Status;
45 chunk* c;
46 // UINT8* buf;
47 UINT32 buflen;
48 UINT64 num_stripes;
49 LONG stripes_left;
50 UINT64 type;
51 read_tree_stripe* stripes;
52 } read_tree_context;
53
54 enum rollback_type {
55 ROLLBACK_INSERT_ITEM,
56 ROLLBACK_DELETE_ITEM
57 };
58
59 typedef struct {
60 enum rollback_type type;
61 void* ptr;
62 LIST_ENTRY list_entry;
63 } rollback_item;
64
65 static NTSTATUS STDCALL read_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
66 read_tree_stripe* stripe = conptr;
67 read_tree_context* context = (read_tree_context*)stripe->context;
68 UINT64 i;
69
70 if (stripe->status == ReadTreeStatus_Cancelling) {
71 stripe->status = ReadTreeStatus_Cancelled;
72 goto end;
73 }
74
75 stripe->iosb = Irp->IoStatus;
76
77 if (NT_SUCCESS(Irp->IoStatus.Status)) {
78 tree_header* th = (tree_header*)stripe->buf;
79 UINT32 crc32;
80
81 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
82
83 if (crc32 == *((UINT32*)th->csum)) {
84 stripe->status = ReadTreeStatus_Success;
85
86 for (i = 0; i < context->num_stripes; i++) {
87 if (context->stripes[i].status == ReadTreeStatus_Pending) {
88 context->stripes[i].status = ReadTreeStatus_Cancelling;
89 IoCancelIrp(context->stripes[i].Irp);
90 }
91 }
92
93 goto end;
94 } else
95 stripe->status = ReadTreeStatus_CRCError;
96 } else {
97 stripe->status = ReadTreeStatus_Error;
98 }
99
100 end:
101 if (InterlockedDecrement(&context->stripes_left) == 0)
102 KeSetEvent(&context->Event, 0, FALSE);
103
104 // return STATUS_SUCCESS;
105 return STATUS_MORE_PROCESSING_REQUIRED;
106 }
107
108 NTSTATUS STDCALL read_tree(device_extension* Vcb, UINT64 addr, UINT8* buf) {
109 CHUNK_ITEM* ci;
110 CHUNK_ITEM_STRIPE* cis;
111 read_tree_context* context;
112 UINT64 i/*, type*/, offset;
113 NTSTATUS Status;
114 device** devices;
115
116 // FIXME - make this work with RAID
117
118 if (Vcb->log_to_phys_loaded) {
119 chunk* c = get_chunk_from_address(Vcb, addr);
120
121 if (!c) {
122 ERR("get_chunk_from_address failed\n");
123 return STATUS_INTERNAL_ERROR;
124 }
125
126 ci = c->chunk_item;
127 offset = c->offset;
128 devices = c->devices;
129 } else {
130 LIST_ENTRY* le = Vcb->sys_chunks.Flink;
131
132 ci = NULL;
133
134 while (le != &Vcb->sys_chunks) {
135 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
136
137 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
138 CHUNK_ITEM* chunk_item = sc->data;
139
140 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
141 ci = chunk_item;
142 offset = sc->key.offset;
143 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
144
145 devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
146 if (!devices) {
147 ERR("out of memory\n");
148 return STATUS_INSUFFICIENT_RESOURCES;
149 }
150
151 for (i = 0; i < ci->num_stripes; i++) {
152 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
153 }
154
155 break;
156 }
157 }
158
159 le = le->Flink;
160 }
161
162 if (!ci) {
163 ERR("could not find chunk for %llx in bootstrap\n", addr);
164 return STATUS_INTERNAL_ERROR;
165 }
166 }
167
168 // if (ci->type & BLOCK_FLAG_DUPLICATE) {
169 // type = BLOCK_FLAG_DUPLICATE;
170 // } else if (ci->type & BLOCK_FLAG_RAID0) {
171 // FIXME("RAID0 not yet supported\n");
172 // return STATUS_NOT_IMPLEMENTED;
173 // } else if (ci->type & BLOCK_FLAG_RAID1) {
174 // FIXME("RAID1 not yet supported\n");
175 // return STATUS_NOT_IMPLEMENTED;
176 // } else if (ci->type & BLOCK_FLAG_RAID10) {
177 // FIXME("RAID10 not yet supported\n");
178 // return STATUS_NOT_IMPLEMENTED;
179 // } else if (ci->type & BLOCK_FLAG_RAID5) {
180 // FIXME("RAID5 not yet supported\n");
181 // return STATUS_NOT_IMPLEMENTED;
182 // } else if (ci->type & BLOCK_FLAG_RAID6) {
183 // FIXME("RAID6 not yet supported\n");
184 // return STATUS_NOT_IMPLEMENTED;
185 // } else { // SINGLE
186 // type = 0;
187 // }
188
189 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
190
191 context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_tree_context), ALLOC_TAG);
192 if (!context) {
193 ERR("out of memory\n");
194 return STATUS_INSUFFICIENT_RESOURCES;
195 }
196
197 RtlZeroMemory(context, sizeof(read_tree_context));
198 KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
199
200 context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_tree_stripe) * ci->num_stripes, ALLOC_TAG);
201 if (!context->stripes) {
202 ERR("out of memory\n");
203 ExFreePool(context);
204 return STATUS_INSUFFICIENT_RESOURCES;
205 }
206
207 RtlZeroMemory(context->stripes, sizeof(read_tree_stripe) * ci->num_stripes);
208
209 context->buflen = Vcb->superblock.node_size;
210 context->num_stripes = ci->num_stripes;
211 context->stripes_left = context->num_stripes;
212 // context->type = type;
213
214 // FIXME - for RAID, check beforehand whether there's enough devices to satisfy request
215
216 for (i = 0; i < ci->num_stripes; i++) {
217 PIO_STACK_LOCATION IrpSp;
218
219 if (!devices[i]) {
220 context->stripes[i].status = ReadTreeStatus_MissingDevice;
221 context->stripes[i].buf = NULL;
222 context->stripes_left--;
223 } else {
224 context->stripes[i].context = (struct read_tree_context*)context;
225 context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
226
227 if (!context->stripes[i].buf) {
228 ERR("out of memory\n");
229 Status = STATUS_INSUFFICIENT_RESOURCES;
230 goto exit;
231 }
232
233 context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
234
235 if (!context->stripes[i].Irp) {
236 ERR("IoAllocateIrp failed\n");
237 Status = STATUS_INSUFFICIENT_RESOURCES;
238 goto exit;
239 }
240
241 IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
242 IrpSp->MajorFunction = IRP_MJ_READ;
243
244 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
245 FIXME("FIXME - buffered IO\n");
246 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
247 context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, Vcb->superblock.node_size, FALSE, FALSE, NULL);
248 if (!context->stripes[i].Irp->MdlAddress) {
249 ERR("IoAllocateMdl failed\n");
250 Status = STATUS_INSUFFICIENT_RESOURCES;
251 goto exit;
252 }
253
254 MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
255 } else {
256 context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
257 }
258
259 IrpSp->Parameters.Read.Length = Vcb->superblock.node_size;
260 IrpSp->Parameters.Read.ByteOffset.QuadPart = addr - offset + cis[i].offset;
261
262 context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
263
264 IoSetCompletionRoutine(context->stripes[i].Irp, read_tree_completion, &context->stripes[i], TRUE, TRUE, TRUE);
265
266 context->stripes[i].status = ReadTreeStatus_Pending;
267 }
268 }
269
270 for (i = 0; i < ci->num_stripes; i++) {
271 if (context->stripes[i].status != ReadTreeStatus_MissingDevice) {
272 IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
273 }
274 }
275
276 KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
277
278 // FIXME - if checksum error, write good data over bad
279
280 // check if any of the devices return a "user-induced" error
281
282 for (i = 0; i < ci->num_stripes; i++) {
283 if (context->stripes[i].status == ReadTreeStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) {
284 IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj);
285
286 Status = context->stripes[i].iosb.Status;
287 goto exit;
288 }
289 }
290
291 // check if any of the stripes succeeded
292
293 for (i = 0; i < ci->num_stripes; i++) {
294 if (context->stripes[i].status == ReadTreeStatus_Success) {
295 RtlCopyMemory(buf, context->stripes[i].buf, Vcb->superblock.node_size);
296 Status = STATUS_SUCCESS;
297 goto exit;
298 }
299 }
300
301 // if not, see if we got a checksum error
302
303 for (i = 0; i < ci->num_stripes; i++) {
304 if (context->stripes[i].status == ReadTreeStatus_CRCError) {
305 #ifdef _DEBUG
306 tree_header* th = (tree_header*)context->stripes[i].buf;
307 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
308 // UINT64 j;
309
310 WARN("stripe %llu had a checksum error\n", i);
311 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
312 #endif
313
314 // for (j = 0; j < ci->num_stripes; j++) {
315 // WARN("stripe %llu: device = %p, status = %u\n", j, c->devices[j], context->stripes[j].status);
316 // }
317 // int3;
318
319 Status = STATUS_IMAGE_CHECKSUM_MISMATCH;
320 goto exit;
321 }
322 }
323
324 // failing that, return the first error we encountered
325
326 for (i = 0; i < ci->num_stripes; i++) {
327 if (context->stripes[i].status == ReadTreeStatus_Error) {
328 Status = context->stripes[i].iosb.Status;
329 goto exit;
330 }
331 }
332
333 // if we somehow get here, return STATUS_INTERNAL_ERROR
334
335 Status = STATUS_INTERNAL_ERROR;
336
337 // for (i = 0; i < ci->num_stripes; i++) {
338 // ERR("%llx: status = %u, NTSTATUS = %08x\n", i, context->stripes[i].status, context->stripes[i].iosb.Status);
339 // }
340 exit:
341
342 for (i = 0; i < ci->num_stripes; i++) {
343 if (context->stripes[i].Irp) {
344 if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
345 MmUnlockPages(context->stripes[i].Irp->MdlAddress);
346 IoFreeMdl(context->stripes[i].Irp->MdlAddress);
347 }
348 IoFreeIrp(context->stripes[i].Irp);
349 }
350
351 if (context->stripes[i].buf)
352 ExFreePool(context->stripes[i].buf);
353 }
354
355 ExFreePool(context->stripes);
356 ExFreePool(context);
357
358 if (!Vcb->log_to_phys_loaded)
359 ExFreePool(devices);
360
361 return Status;
362 }
363
364 NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, const char* func, const char* file, unsigned int line) {
365 UINT8* buf;
366 NTSTATUS Status;
367 tree_header* th;
368 tree* t;
369 tree_data* td;
370 chunk* c;
371
372 TRACE("(%p, %llx)\n", Vcb, addr);
373
374 buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
375 if (!buf) {
376 ERR("out of memory\n");
377 return STATUS_INSUFFICIENT_RESOURCES;
378 }
379
380 Status = read_tree(Vcb, addr, buf);
381 if (!NT_SUCCESS(Status)) {
382 ERR("read_tree returned 0x%08x\n", Status);
383 ExFreePool(buf);
384 return Status;
385 }
386
387 th = (tree_header*)buf;
388
389 t = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
390 if (!t) {
391 ERR("out of memory\n");
392 ExFreePool(buf);
393 return STATUS_INSUFFICIENT_RESOURCES;
394 }
395
396 RtlCopyMemory(&t->header, th, sizeof(tree_header));
397 // t->address = addr;
398 // t->level = th->level;
399 t->has_address = TRUE;
400 t->Vcb = Vcb;
401 t->parent = NULL;
402 t->root = r;
403 // t->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
404 t->paritem = NULL;
405 t->size = 0;
406 t->new_address = 0;
407 t->has_new_address = FALSE;
408 t->write = FALSE;
409
410 c = get_chunk_from_address(Vcb, addr);
411
412 if (c)
413 t->flags = c->chunk_item->type;
414 else
415 t->flags = 0;
416
417 // ExInitializeResourceLite(&t->nonpaged->load_tree_lock);
418
419 // t->items = ExAllocatePoolWithTag(PagedPool, num_items * sizeof(tree_data), ALLOC_TAG);
420 InitializeListHead(&t->itemlist);
421
422 if (t->header.level == 0) { // leaf node
423 leaf_node* ln = (leaf_node*)(buf + sizeof(tree_header));
424 unsigned int i;
425
426 for (i = 0; i < t->header.num_items; i++) {
427 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
428 if (!td) {
429 ERR("out of memory\n");
430 ExFreePool(buf);
431 return STATUS_INSUFFICIENT_RESOURCES;
432 }
433
434 td->key = ln[i].key;
435 // TRACE("load_tree: leaf item %u (%x,%x,%x)\n", i, (UINT32)ln[i].key.obj_id, ln[i].key.obj_type, (UINT32)ln[i].key.offset);
436
437 if (ln[i].size > 0) {
438 td->data = ExAllocatePoolWithTag(PagedPool, ln[i].size, ALLOC_TAG);
439 if (!td->data) {
440 ERR("out of memory\n");
441 ExFreePool(buf);
442 return STATUS_INSUFFICIENT_RESOURCES;
443 }
444
445 RtlCopyMemory(td->data, buf + sizeof(tree_header) + ln[i].offset, ln[i].size);
446 } else
447 td->data = NULL;
448
449 td->size = ln[i].size;
450 td->ignore = FALSE;
451 td->inserted = FALSE;
452
453 InsertTailList(&t->itemlist, &td->list_entry);
454
455 t->size += ln[i].size;
456 }
457
458 t->size += t->header.num_items * sizeof(leaf_node);
459 } else {
460 internal_node* in = (internal_node*)(buf + sizeof(tree_header));
461 unsigned int i;
462
463 for (i = 0; i < t->header.num_items; i++) {
464 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
465 if (!td) {
466 ERR("out of memory\n");
467 ExFreePool(buf);
468 return STATUS_INSUFFICIENT_RESOURCES;
469 }
470
471 td->key = in[i].key;
472 // TRACE("load_tree: internal item %u (%x,%x,%x)\n", i, (UINT32)in[i].key.obj_id, in[i].key.obj_type, (UINT32)in[i].key.offset);
473
474 td->treeholder.address = in[i].address;
475 td->treeholder.generation = in[i].generation;
476 td->treeholder.tree = NULL;
477 init_tree_holder(&td->treeholder);
478 // td->treeholder.nonpaged->status = tree_holder_unloaded;
479 td->ignore = FALSE;
480 td->inserted = FALSE;
481
482 InsertTailList(&t->itemlist, &td->list_entry);
483 }
484
485 t->size = t->header.num_items * sizeof(internal_node);
486 }
487
488 ExFreePool(buf);
489
490 InterlockedIncrement(&Vcb->open_trees);
491 InsertTailList(&Vcb->trees, &t->list_entry);
492
493 TRACE("returning %p\n", t);
494
495 *pt = t;
496
497 return STATUS_SUCCESS;
498 }
499
500 static tree* free_tree2(tree* t, const char* func, const char* file, unsigned int line) {
501 LIST_ENTRY* le;
502 tree_data* td;
503 tree* par;
504 root* r = t->root;
505
506 par = t->parent;
507
508 // if (par) ExAcquireResourceExclusiveLite(&par->nonpaged->load_tree_lock, TRUE);
509
510 if (r && r->treeholder.tree != t)
511 r = NULL;
512
513 // if (r) {
514 // FsRtlEnterFileSystem();
515 // ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE);
516 // }
517
518 if (par) {
519 if (t->paritem)
520 t->paritem->treeholder.tree = NULL;
521
522 // ExReleaseResourceLite(&par->nonpaged->load_tree_lock);
523 }
524
525 // ExDeleteResourceLite(&t->nonpaged->load_tree_lock);
526
527 // ExFreePool(t->nonpaged);
528
529 while (!IsListEmpty(&t->itemlist)) {
530 le = RemoveHeadList(&t->itemlist);
531 td = CONTAINING_RECORD(le, tree_data, list_entry);
532
533 if (t->header.level == 0 && td->data)
534 ExFreePool(td->data);
535
536 ExFreePool(td);
537 }
538
539 InterlockedDecrement(&t->Vcb->open_trees);
540 RemoveEntryList(&t->list_entry);
541
542 if (r) {
543 r->treeholder.tree = NULL;
544 // ExReleaseResourceLite(&r->nonpaged->load_tree_lock);
545 // FsRtlExitFileSystem();
546 }
547
548 ExFreePool(t);
549
550 return NULL;
551 }
552
553 NTSTATUS STDCALL _do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, const char* func, const char* file, unsigned int line) {
554 // KIRQL irql;
555 // tree_holder_nonpaged* thnp = th->nonpaged;
556 BOOL ret;
557
558 // ExAcquireResourceExclusiveLite(&thnp->lock, TRUE);
559 ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE);
560
561 // KeAcquireSpinLock(&thnp->spin_lock, &irql);
562 //
563 // if (thnp->status == tree_header_loading) {
564 // KeReleaseSpinLock(&thnp->spin_lock, irql);
565 //
566 // // FIXME - wait for Event
567 // } else if (thnp->status == tree_header_unloaded || thnp->status == tree_header_unloading) {
568 // if (thnp->status == tree_header_unloading) {
569 // KeReleaseSpinLock(&thnp->spin_lock, irql);
570 // // FIXME - wait for Event
571 // }
572 //
573 // // FIXME - change status
574 // thnp->status = tree_header_loading;
575 // KeReleaseSpinLock(&thnp->spin_lock, irql);
576 //
577 // // FIXME - load
578 // // FIXME - change status
579 // // FIXME - trigger event
580 // } else if (thnp->status == tree_header_loaded) {
581 // _increase_tree_rc(th->tree, func, file, line);
582 // KeReleaseSpinLock(&thnp->spin_lock, irql);
583 //
584 // ret = FALSE;
585 // }
586
587 if (!th->tree) {
588 NTSTATUS Status;
589
590 Status = _load_tree(Vcb, th->address, r, &th->tree, func, file, line);
591 if (!NT_SUCCESS(Status)) {
592 ERR("load_tree returned %08x\n", Status);
593 ExReleaseResourceLite(&r->nonpaged->load_tree_lock);
594 return Status;
595 }
596
597 th->tree->parent = t;
598 th->tree->paritem = td;
599
600 ret = TRUE;
601 } else
602 ret = FALSE;
603
604 // KeReleaseSpinLock(&thnp->spin_lock, irql);
605
606 // ExReleaseResourceLite(&thnp->lock);
607 ExReleaseResourceLite(&r->nonpaged->load_tree_lock);
608
609 *loaded = ret;
610
611 return STATUS_SUCCESS;
612 }
613
614 tree* STDCALL _free_tree(tree* t, const char* func, const char* file, unsigned int line) {
615 tree* ret;
616 root* r = t->root;
617
618 ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE);
619
620 ret = free_tree2(t, func, file, line);
621
622 ExReleaseResourceLite(&r->nonpaged->load_tree_lock);
623
624 return ret;
625 }
626
627 static __inline tree_data* first_item(tree* t) {
628 LIST_ENTRY* le = t->itemlist.Flink;
629
630 if (le == &t->itemlist)
631 return NULL;
632
633 return CONTAINING_RECORD(le, tree_data, list_entry);
634 }
635
636 static __inline tree_data* prev_item(tree* t, tree_data* td) {
637 LIST_ENTRY* le = td->list_entry.Blink;
638
639 if (le == &t->itemlist)
640 return NULL;
641
642 return CONTAINING_RECORD(le, tree_data, list_entry);
643 }
644
645 static __inline tree_data* next_item(tree* t, tree_data* td) {
646 LIST_ENTRY* le = td->list_entry.Flink;
647
648 if (le == &t->itemlist)
649 return NULL;
650
651 return CONTAINING_RECORD(le, tree_data, list_entry);
652 }
653
654 static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, const char* func, const char* file, unsigned int line) {
655 int cmp;
656 tree_data *td, *lasttd;
657
658 TRACE("(%p, %p, %p, %p, %u)\n", Vcb, t, tp, searchkey, ignore);
659
660 cmp = 1;
661 td = first_item(t);
662 lasttd = NULL;
663
664 if (!td) return STATUS_INTERNAL_ERROR;
665
666 do {
667 cmp = keycmp(searchkey, &td->key);
668 // TRACE("(%u) comparing (%x,%x,%x) to (%x,%x,%x) - %i (ignore = %s)\n", t->header.level, (UINT32)searchkey->obj_id, searchkey->obj_type, (UINT32)searchkey->offset, (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset, cmp, td->ignore ? "TRUE" : "FALSE");
669 if (cmp == 1) {
670 lasttd = td;
671 td = next_item(t, td);
672 }
673
674 if (t->header.level == 0 && cmp == 0 && !ignore && td && td->ignore) {
675 tree_data* origtd = td;
676
677 while (td && td->ignore)
678 td = next_item(t, td);
679
680 if (td) {
681 cmp = keycmp(searchkey, &td->key);
682
683 if (cmp != 0) {
684 td = origtd;
685 cmp = 0;
686 }
687 } else
688 td = origtd;
689 }
690 } while (td && cmp == 1);
691
692 if ((cmp == -1 || !td) && lasttd)
693 td = lasttd;
694
695 if (t->header.level == 0) {
696 if (td->ignore && !ignore) {
697 traverse_ptr oldtp;
698
699 oldtp.tree = t;
700 oldtp.item = td;
701
702 while (_find_prev_item(Vcb, &oldtp, tp, TRUE, func, file, line)) {
703 if (!tp->item->ignore)
704 return STATUS_SUCCESS;
705
706 oldtp = *tp;
707 }
708
709 // if no valid entries before where item should be, look afterwards instead
710
711 oldtp.tree = t;
712 oldtp.item = td;
713
714 while (_find_next_item(Vcb, &oldtp, tp, TRUE, func, file, line)) {
715 if (!tp->item->ignore)
716 return STATUS_SUCCESS;
717
718 oldtp = *tp;
719 }
720
721 return STATUS_INTERNAL_ERROR;
722 } else {
723 tp->tree = t;
724 tp->item = td;
725 }
726
727 return STATUS_SUCCESS;
728 } else {
729 NTSTATUS Status;
730 BOOL loaded;
731
732 while (td && td->treeholder.tree && IsListEmpty(&td->treeholder.tree->itemlist)) {
733 td = prev_item(t, td);
734 }
735
736 if (!td)
737 return STATUS_INTERNAL_ERROR;
738
739 // if (i > 0)
740 // TRACE("entering tree from (%x,%x,%x) to (%x,%x,%x) (%p)\n", (UINT32)t->items[i].key.obj_id, t->items[i].key.obj_type, (UINT32)t->items[i].key.offset, (UINT32)t->items[i+1].key.obj_id, t->items[i+1].key.obj_type, (UINT32)t->items[i+1].key.offset, t->items[i].tree);
741
742 Status = _do_load_tree(Vcb, &td->treeholder, t->root, t, td, &loaded, func, file, line);
743 if (!NT_SUCCESS(Status)) {
744 ERR("do_load_tree returned %08x\n", Status);
745 return Status;
746 }
747
748 Status = find_item_in_tree(Vcb, td->treeholder.tree, tp, searchkey, ignore, func, file, line);
749
750 return Status;
751 }
752 }
753
754 NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, const char* func, const char* file, unsigned int line) {
755 NTSTATUS Status;
756 BOOL loaded;
757 // KIRQL irql;
758
759 TRACE("(%p, %p, %p, %p)\n", Vcb, r, tp, searchkey);
760
761 if (!r->treeholder.tree) {
762 Status = _do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, func, file, line);
763 if (!NT_SUCCESS(Status)) {
764 ERR("do_load_tree returned %08x\n", Status);
765 return Status;
766 }
767 }
768
769 Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, func, file, line);
770 if (!NT_SUCCESS(Status)) {
771 ERR("find_item_in_tree returned %08x\n", Status);
772 }
773
774 // #ifdef DEBUG_PARANOID
775 // if (b && !ignore && tp->item->ignore) {
776 // ERR("error - returning ignored item\n");
777 // int3;
778 // }
779 // #endif
780
781 return Status;
782 }
783
784 BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, const char* func, const char* file, unsigned int line) {
785 tree* t;
786 tree_data *td, *next;
787 NTSTATUS Status;
788 BOOL loaded;
789
790 next = next_item(tp->tree, tp->item);
791
792 if (!ignore) {
793 while (next && next->ignore)
794 next = next_item(tp->tree, next);
795 }
796
797 if (next) {
798 next_tp->tree = tp->tree;
799 next_tp->item = next;
800
801 #ifdef DEBUG_PARANOID
802 if (!ignore && next_tp->item->ignore) {
803 ERR("error - returning ignored item\n");
804 int3;
805 }
806 #endif
807
808 return TRUE;
809 }
810
811 if (!tp->tree->parent)
812 return FALSE;
813
814 t = tp->tree;
815 do {
816 if (t->parent) {
817 td = next_item(t->parent, t->paritem);
818
819 if (td) break;
820 }
821
822 t = t->parent;
823 } while (t);
824
825 if (!t)
826 return FALSE;
827
828 Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, func, file, line);
829 if (!NT_SUCCESS(Status)) {
830 ERR("do_load_tree returned %08x\n", Status);
831 return FALSE;
832 }
833
834 t = td->treeholder.tree;
835
836 while (t->header.level != 0) {
837 tree_data* fi;
838
839 fi = first_item(t);
840
841 Status = _do_load_tree(Vcb, &fi->treeholder, t->parent->root, t, fi, &loaded, func, file, line);
842 if (!NT_SUCCESS(Status)) {
843 ERR("do_load_tree returned %08x\n", Status);
844 return FALSE;
845 }
846
847 t = fi->treeholder.tree;
848 }
849
850 next_tp->tree = t;
851 next_tp->item = first_item(t);
852
853 if (!ignore && next_tp->item->ignore) {
854 traverse_ptr ntp2;
855 BOOL b;
856
857 while ((b = _find_next_item(Vcb, next_tp, &ntp2, TRUE, func, file, line))) {
858 *next_tp = ntp2;
859
860 if (!next_tp->item->ignore)
861 break;
862 }
863
864 if (!b)
865 return FALSE;
866 }
867
868 #ifdef DEBUG_PARANOID
869 if (!ignore && next_tp->item->ignore) {
870 ERR("error - returning ignored item\n");
871 int3;
872 }
873 #endif
874
875 return TRUE;
876 }
877
878 static __inline tree_data* last_item(tree* t) {
879 LIST_ENTRY* le = t->itemlist.Blink;
880
881 if (le == &t->itemlist)
882 return NULL;
883
884 return CONTAINING_RECORD(le, tree_data, list_entry);
885 }
886
887 BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, BOOL ignore, const char* func, const char* file, unsigned int line) {
888 tree* t;
889 tree_data* td;
890 NTSTATUS Status;
891 BOOL loaded;
892
893 // FIXME - support ignore flag
894 if (prev_item(tp->tree, tp->item)) {
895 prev_tp->tree = tp->tree;
896 prev_tp->item = prev_item(tp->tree, tp->item);
897
898 return TRUE;
899 }
900
901 if (!tp->tree->parent)
902 return FALSE;
903
904 t = tp->tree;
905 while (t && (!t->parent || !prev_item(t->parent, t->paritem))) {
906 t = t->parent;
907 }
908
909 if (!t)
910 return FALSE;
911
912 td = prev_item(t->parent, t->paritem);
913
914 Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t, td, &loaded, func, file, line);
915 if (!NT_SUCCESS(Status)) {
916 ERR("do_load_tree returned %08x\n", Status);
917 return FALSE;
918 }
919
920 t = td->treeholder.tree;
921
922 while (t->header.level != 0) {
923 tree_data* li;
924
925 li = last_item(t);
926
927 Status = _do_load_tree(Vcb, &li->treeholder, t->parent->root, t, li, &loaded, func, file, line);
928 if (!NT_SUCCESS(Status)) {
929 ERR("do_load_tree returned %08x\n", Status);
930 return FALSE;
931 }
932
933 t = li->treeholder.tree;
934 }
935
936 prev_tp->tree = t;
937 prev_tp->item = last_item(t);
938
939 return TRUE;
940 }
941
942 // static void free_tree_holder(tree_holder* th) {
943 // root* r = th->tree->root;
944 //
945 // // ExAcquireResourceExclusiveLite(&th->nonpaged->lock, TRUE);
946 // ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE);
947 //
948 // free_tree2(th->tree, funcname, __FILE__, __LINE__);
949 //
950 // // ExReleaseResourceLite(&th->nonpaged->lock);
951 // ExReleaseResourceLite(&r->nonpaged->load_tree_lock);
952 // }
953
954 void free_trees_root(device_extension* Vcb, root* r) {
955 LIST_ENTRY* le;
956 UINT8 level;
957
958 for (level = 0; level <= 255; level++) {
959 BOOL empty = TRUE;
960
961 le = Vcb->trees.Flink;
962
963 while (le != &Vcb->trees) {
964 LIST_ENTRY* nextle = le->Flink;
965 tree* t = CONTAINING_RECORD(le, tree, list_entry);
966
967 if (t->root == r && t->header.level == level) {
968 BOOL top = !t->paritem;
969
970 empty = FALSE;
971
972 free_tree2(t, funcname, __FILE__, __LINE__);
973 if (top && r->treeholder.tree == t)
974 r->treeholder.tree = NULL;
975
976 if (IsListEmpty(&Vcb->trees))
977 return;
978 }
979
980 le = nextle;
981 }
982
983 if (empty)
984 break;
985 }
986 }
987
988 void STDCALL free_trees(device_extension* Vcb) {
989 tree* t;
990 root* r;
991
992 while (!IsListEmpty(&Vcb->trees)) {
993 t = CONTAINING_RECORD(Vcb->trees.Flink, tree, list_entry);
994 r = t->root;
995
996 ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE);
997
998 free_trees_root(Vcb, r);
999
1000 ExReleaseResourceLite(&r->nonpaged->load_tree_lock);
1001 }
1002 }
1003
1004 static void add_rollback(LIST_ENTRY* rollback, enum rollback_type type, void* ptr) {
1005 rollback_item* ri;
1006
1007 ri = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_item), ALLOC_TAG);
1008 if (!ri) {
1009 ERR("out of memory\n");
1010 return;
1011 }
1012
1013 ri->type = type;
1014 ri->ptr = ptr;
1015 InsertTailList(rollback, &ri->list_entry);
1016 }
1017
1018 BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, UINT32 size, traverse_ptr* ptp, LIST_ENTRY* rollback) {
1019 traverse_ptr tp;
1020 KEY searchkey;
1021 int cmp;
1022 tree_data *td, *paritem;
1023 tree* t;
1024 #ifdef _DEBUG
1025 LIST_ENTRY* le;
1026 KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
1027 #endif
1028 traverse_ptr* tp2;
1029 BOOL success = FALSE;
1030 NTSTATUS Status;
1031
1032 TRACE("(%p, %p, %llx, %x, %llx, %p, %x, %p, %p)\n", Vcb, r, obj_id, obj_type, offset, data, size, ptp, rollback);
1033
1034 searchkey.obj_id = obj_id;
1035 searchkey.obj_type = obj_type;
1036 searchkey.offset = offset;
1037
1038 Status = find_item(Vcb, r, &tp, &searchkey, TRUE);
1039 if (!NT_SUCCESS(Status)) {
1040 if (r) {
1041 if (!r->treeholder.tree) {
1042 BOOL loaded;
1043
1044 Status = do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded);
1045
1046 if (!NT_SUCCESS(Status)) {
1047 ERR("do_load_tree returned %08x\n", Status);
1048 goto end;
1049 }
1050 }
1051
1052 if (r->treeholder.tree && r->treeholder.tree->header.num_items == 0) {
1053 tp.tree = r->treeholder.tree;
1054 tp.item = NULL;
1055 } else {
1056 ERR("error: unable to load tree for root %llx\n", r->id);
1057 goto end;
1058 }
1059 } else {
1060 ERR("error: find_item returned %08x\n", Status);
1061 goto end;
1062 }
1063 }
1064
1065 TRACE("tp.item = %p\n", tp.item);
1066
1067 if (tp.item) {
1068 TRACE("tp.item->key = %p\n", &tp.item->key);
1069 cmp = keycmp(&searchkey, &tp.item->key);
1070
1071 if (cmp == 0 && !tp.item->ignore) { // FIXME - look for all items of the same key to make sure none are non-ignored
1072 ERR("error: key (%llx,%x,%llx) already present\n", obj_id, obj_type, offset);
1073 goto end;
1074 }
1075 } else
1076 cmp = -1;
1077
1078 td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
1079 if (!td) {
1080 ERR("out of memory\n");
1081 goto end;
1082 }
1083
1084 td->key = searchkey;
1085 td->size = size;
1086 td->data = data;
1087 td->ignore = FALSE;
1088 td->inserted = TRUE;
1089
1090 #ifdef _DEBUG
1091 le = tp.tree->itemlist.Flink;
1092 while (le != &tp.tree->itemlist) {
1093 tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
1094 firstitem = td2->key;
1095 break;
1096 }
1097
1098 TRACE("inserting %llx,%x,%llx into tree beginning %llx,%x,%llx (num_items %x)\n", obj_id, obj_type, offset, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tp.tree->header.num_items);
1099 #endif
1100
1101 if (cmp == -1) { // very first key in root
1102 InsertHeadList(&tp.tree->itemlist, &td->list_entry);
1103
1104 paritem = tp.tree->paritem;
1105 while (paritem) {
1106 // ERR("paritem = %llx,%x,%llx, tp.item->key = %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1107 if (!keycmp(&paritem->key, &tp.item->key)) {
1108 paritem->key = searchkey;
1109 } else
1110 break;
1111
1112 paritem = paritem->treeholder.tree->paritem;
1113 }
1114
1115 } else {
1116 InsertAfter(&tp.tree->itemlist, &td->list_entry, &tp.item->list_entry); // FIXME - we don't need this
1117 }
1118
1119 tp.tree->header.num_items++;
1120 tp.tree->size += size + sizeof(leaf_node);
1121 // ERR("tree %p, num_items now %x\n", tp.tree, tp.tree->header.num_items);
1122 // ERR("size now %x\n", tp.tree->size);
1123
1124 if (!tp.tree->write) {
1125 tp.tree->write = TRUE;
1126 Vcb->write_trees++;
1127 }
1128
1129 if (ptp)
1130 *ptp = tp;
1131
1132 t = tp.tree;
1133 while (t) {
1134 if (t->paritem && t->paritem->ignore) {
1135 t->paritem->ignore = FALSE;
1136 t->parent->header.num_items++;
1137 t->parent->size += sizeof(internal_node);
1138
1139 // FIXME - do we need to add a rollback entry here?
1140 }
1141
1142 t->header.generation = Vcb->superblock.generation;
1143 t = t->parent;
1144 }
1145
1146 // FIXME - free this correctly
1147
1148 tp2 = ExAllocatePoolWithTag(PagedPool, sizeof(traverse_ptr), ALLOC_TAG);
1149 if (!tp2) {
1150 ERR("out of memory\n");
1151 goto end;
1152 }
1153
1154 tp2->tree = tp.tree;
1155 tp2->item = td;
1156
1157 add_rollback(rollback, ROLLBACK_INSERT_ITEM, tp2);
1158
1159 success = TRUE;
1160
1161 end:
1162 return success;
1163 }
1164
1165 static __inline tree_data* first_valid_item(tree* t) {
1166 LIST_ENTRY* le = t->itemlist.Flink;
1167
1168 while (le != &t->itemlist) {
1169 tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
1170
1171 if (!td->ignore)
1172 return td;
1173
1174 le = le->Flink;
1175 }
1176
1177 return NULL;
1178 }
1179
1180 void STDCALL delete_tree_item(device_extension* Vcb, traverse_ptr* tp, LIST_ENTRY* rollback) {
1181 tree* t;
1182 UINT64 gen;
1183 traverse_ptr* tp2;
1184
1185 TRACE("deleting item %llx,%x,%llx (ignore = %s)\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, tp->item->ignore ? "TRUE" : "FALSE");
1186
1187 #ifdef DEBUG_PARANOID
1188 if (tp->item->ignore) {
1189 ERR("trying to delete already-deleted item %llx,%x,%llx\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset);
1190 int3;
1191 }
1192 #endif
1193
1194 tp->item->ignore = TRUE;
1195
1196 if (!tp->tree->write) {
1197 tp->tree->write = TRUE;
1198 Vcb->write_trees++;
1199 }
1200
1201 tp->tree->header.num_items--;
1202
1203 if (tp->tree->header.level == 0)
1204 tp->tree->size -= sizeof(leaf_node) + tp->item->size;
1205 else
1206 tp->tree->size -= sizeof(internal_node);
1207
1208 gen = tp->tree->Vcb->superblock.generation;
1209
1210 t = tp->tree;
1211 while (t) {
1212 t->header.generation = gen;
1213 t = t->parent;
1214 }
1215
1216 tp2 = ExAllocatePoolWithTag(PagedPool, sizeof(traverse_ptr), ALLOC_TAG);
1217 if (!tp2) {
1218 ERR("out of memory\n");
1219 return;
1220 }
1221
1222 tp2->tree = tp->tree;
1223 tp2->item = tp->item;
1224
1225 add_rollback(rollback, ROLLBACK_DELETE_ITEM, tp2);
1226 }
1227
1228 void clear_rollback(LIST_ENTRY* rollback) {
1229 rollback_item* ri;
1230
1231 while (!IsListEmpty(rollback)) {
1232 LIST_ENTRY* le = RemoveHeadList(rollback);
1233 ri = CONTAINING_RECORD(le, rollback_item, list_entry);
1234
1235 switch (ri->type) {
1236 case ROLLBACK_INSERT_ITEM:
1237 case ROLLBACK_DELETE_ITEM:
1238 ExFreePool(ri->ptr);
1239 break;
1240 }
1241
1242 ExFreePool(ri);
1243 }
1244 }
1245
1246 void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
1247 rollback_item* ri;
1248
1249 while (!IsListEmpty(rollback)) {
1250 LIST_ENTRY* le = RemoveHeadList(rollback);
1251 ri = CONTAINING_RECORD(le, rollback_item, list_entry);
1252
1253 switch (ri->type) {
1254 case ROLLBACK_INSERT_ITEM:
1255 {
1256 traverse_ptr* tp = ri->ptr;
1257
1258 if (!tp->item->ignore) {
1259 tp->item->ignore = TRUE;
1260 tp->tree->header.num_items--;
1261
1262 if (tp->tree->header.level == 0)
1263 tp->tree->size -= sizeof(leaf_node) + tp->item->size;
1264 else
1265 tp->tree->size -= sizeof(internal_node);
1266 }
1267
1268 ExFreePool(tp);
1269 break;
1270 }
1271
1272 case ROLLBACK_DELETE_ITEM:
1273 {
1274 traverse_ptr* tp = ri->ptr;
1275
1276 if (tp->item->ignore) {
1277 tp->item->ignore = FALSE;
1278 tp->tree->header.num_items++;
1279
1280 if (tp->tree->header.level == 0)
1281 tp->tree->size += sizeof(leaf_node) + tp->item->size;
1282 else
1283 tp->tree->size += sizeof(internal_node);
1284 }
1285
1286 ExFreePool(tp);
1287 break;
1288 }
1289 }
1290
1291 ExFreePool(ri);
1292 }
1293 }