From: Pierre Schweitzer Date: Fri, 8 Sep 2017 08:02:43 +0000 (+0000) Subject: [BTRFS] X-Git-Tag: backups/GSoC_2017/rapps@75905~4^2~20 X-Git-Url: https://git.reactos.org/?p=reactos.git;a=commitdiff_plain;h=68a5f90220b6797c58f22fc7e41515b3f9c9ad7d [BTRFS] Sync btrfs to 1.0. CORE-13763 svn path=/trunk/; revision=75786 --- diff --git a/reactos/drivers/filesystems/btrfs/CMakeLists.txt b/reactos/drivers/filesystems/btrfs/CMakeLists.txt index 581bab99ae9..65e61d255a4 100644 --- a/reactos/drivers/filesystems/btrfs/CMakeLists.txt +++ b/reactos/drivers/filesystems/btrfs/CMakeLists.txt @@ -20,13 +20,17 @@ list(APPEND SOURCE free-space.c fsctl.c galois.c + guid.c pnp.c read.c registry.c reparse.c + scrub.c search.c security.c + send.c treefuncs.c + volume.c worker-thread.c write.c btrfs_drv.h) @@ -37,5 +41,4 @@ add_definitions(-D__KERNEL__) set_module_type(btrfs kernelmodedriver) target_link_libraries(btrfs rtlver ntoskrnl_vista zlib_solo wdmguid ${PSEH_LIB}) add_importlibs(btrfs ntoskrnl hal) -add_pch(btrfs btrfs_drv.h SOURCE) add_cd_file(TARGET btrfs DESTINATION reactos/system32/drivers NO_CAB FOR all) diff --git a/reactos/drivers/filesystems/btrfs/balance.c b/reactos/drivers/filesystems/btrfs/balance.c index 63e8821c884..c961fc47d34 100644 --- a/reactos/drivers/filesystems/btrfs/balance.c +++ b/reactos/drivers/filesystems/btrfs/balance.c @@ -1,22 +1,23 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" #include "btrfsioctl.h" +#include typedef struct { UINT64 address; @@ -31,12 +32,13 @@ typedef struct { typedef struct { UINT8 type; - + UINT64 hash; + union { TREE_BLOCK_REF tbr; SHARED_BLOCK_REF sbr; }; - + metadata_reloc* parent; BOOL top; LIST_ENTRY list_entry; @@ -54,70 +56,81 @@ typedef struct { typedef struct { UINT8 type; - + UINT64 hash; + union { EXTENT_DATA_REF edr; SHARED_DATA_REF sdr; }; - + metadata_reloc* parent; LIST_ENTRY list_entry; } data_reloc_ref; -extern LIST_ENTRY volumes; -extern ERESOURCE volumes_lock; +#ifndef _MSC_VER // not in mingw yet +#define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000 +#endif + +#define BALANCE_UNIT 0x100000 // only read 1 MB at a time -static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) { +static NTSTATUS add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, + BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) { + NTSTATUS Status; metadata_reloc* mr; EXTENT_ITEM* ei; UINT16 len; UINT64 inline_rc; UINT8* ptr; - + mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG); if (!mr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + mr->address = tp->item->key.obj_id; mr->data = NULL; mr->ei = (EXTENT_ITEM*)tp->item->data; mr->system = FALSE; InitializeListHead(&mr->refs); - - delete_tree_item(Vcb, tp, rollback); - + + Status = delete_tree_item(Vcb, tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(mr); + return Status; + } + if (!c) c = get_chunk_from_address(Vcb, tp->item->key.obj_id); - + if (c) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - decrease_chunk_usage(c, Vcb->superblock.node_size); - - space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, Vcb->superblock.node_size, rollback); - + + c->used -= Vcb->superblock.node_size; + + space_list_add(c, tp->item->key.obj_id, Vcb->superblock.node_size, rollback); + ExReleaseResourceLite(&c->lock); } - + ei = (EXTENT_ITEM*)tp->item->data; inline_rc = 0; - + len = tp->item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM); if (!skinny) { len -= sizeof(EXTENT_ITEM2); ptr += sizeof(EXTENT_ITEM2); } - + while (len > 0) { UINT8 secttype = *ptr; - ULONG sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0); + UINT16 sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0); metadata_reloc_ref* ref; - + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen); return STATUS_INTERNAL_ERROR; @@ -127,13 +140,13 @@ static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, tra ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype); return STATUS_INTERNAL_ERROR; } - + ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); if (!ref) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + if (secttype == TYPE_TREE_BLOCK_REF) { ref->type = TYPE_TREE_BLOCK_REF; RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF)); @@ -147,99 +160,108 @@ static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, tra ExFreePool(ref); return STATUS_INTERNAL_ERROR; } - + ref->parent = NULL; ref->top = FALSE; InsertTailList(&mr->refs, &ref->list_entry); - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; } - + if (inline_rc < ei->refcount) { // look for non-inline entries traverse_ptr tp2 = *tp, next_tp; - + while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) { tp2 = next_tp; - + if (tp2.item->key.obj_id == tp->item->key.obj_id) { - if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF && tp2.item->size >= sizeof(TREE_BLOCK_REF)) { + if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF) { metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); if (!ref) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ref->type = TYPE_TREE_BLOCK_REF; - RtlCopyMemory(&ref->tbr, tp2.item->data, sizeof(TREE_BLOCK_REF)); + ref->tbr.offset = tp2.item->key.offset; ref->parent = NULL; ref->top = FALSE; InsertTailList(&mr->refs, &ref->list_entry); - - delete_tree_item(Vcb, &tp2, rollback); - } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF && tp2.item->size >= sizeof(SHARED_BLOCK_REF)) { + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF) { metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); if (!ref) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ref->type = TYPE_SHARED_BLOCK_REF; - RtlCopyMemory(&ref->sbr, tp2.item->data, sizeof(SHARED_BLOCK_REF)); + ref->sbr.offset = tp2.item->key.offset; ref->parent = NULL; ref->top = FALSE; InsertTailList(&mr->refs, &ref->list_entry); - - delete_tree_item(Vcb, &tp2, rollback); + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } } } else break; } } - + InsertTailList(items, &mr->list_entry); - + if (mr2) *mr2 = mr; - + return STATUS_SUCCESS; } -static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* items, UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) { +static NTSTATUS add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, + UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) { LIST_ENTRY* le; KEY searchkey; traverse_ptr tp; BOOL skinny = FALSE; NTSTATUS Status; - + le = items->Flink; while (le != items) { metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); - + if (mr->address == address) { *mr2 = mr; return STATUS_SUCCESS; } - + le = le->Flink; } - + searchkey.obj_id = address; searchkey.obj_type = TYPE_METADATA_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) skinny = TRUE; else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size && tp.item->size >= sizeof(EXTENT_ITEM)) { EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; - + if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) { ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address); return STATUS_INTERNAL_ERROR; @@ -248,17 +270,60 @@ static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* ite ERR("could not find valid EXTENT_ITEM for address %llx\n", address); return STATUS_INTERNAL_ERROR; } - + Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback); if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc returned %08x\n", Status); return Status; } - + return STATUS_SUCCESS; } -static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_reloc* mr, LIST_ENTRY* rollback) { +static void sort_metadata_reloc_refs(metadata_reloc* mr) { + LIST_ENTRY newlist, *le; + + if (mr->refs.Flink == mr->refs.Blink) // 0 or 1 items + return; + + // insertion sort + + InitializeListHead(&newlist); + + while (!IsListEmpty(&mr->refs)) { + metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); + BOOL inserted = FALSE; + + if (ref->type == TYPE_TREE_BLOCK_REF) + ref->hash = ref->tbr.offset; + else if (ref->type == TYPE_SHARED_BLOCK_REF) + ref->hash = ref->parent->new_address; + + le = newlist.Flink; + while (le != &newlist) { + metadata_reloc_ref* ref2 = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); + + if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) { + InsertHeadList(le->Blink, &ref->list_entry); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&newlist, &ref->list_entry); + } + + newlist.Flink->Blink = &mr->refs; + newlist.Blink->Flink = &mr->refs; + mr->refs.Flink = newlist.Flink; + mr->refs.Blink = newlist.Blink; +} + +static NTSTATUS add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, metadata_reloc* mr) { + NTSTATUS Status; LIST_ENTRY* le; UINT64 rc = 0; UINT16 inline_len; @@ -266,160 +331,141 @@ static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_r metadata_reloc_ref* first_noninline = NULL; EXTENT_ITEM* ei; UINT8* ptr; - + inline_len = sizeof(EXTENT_ITEM); if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) inline_len += sizeof(EXTENT_ITEM2); - + + sort_metadata_reloc_refs(mr); + le = mr->refs.Flink; while (le != &mr->refs) { metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); - ULONG extlen = 0; - + UINT16 extlen = 0; + rc++; - + if (ref->type == TYPE_TREE_BLOCK_REF) extlen += sizeof(TREE_BLOCK_REF); else if (ref->type == TYPE_SHARED_BLOCK_REF) extlen += sizeof(SHARED_BLOCK_REF); if (all_inline) { - if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) { + if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) { all_inline = FALSE; first_noninline = ref; } else inline_len += extlen + 1; } - + le = le->Flink; } - + ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); if (!ei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ei->refcount = rc; ei->generation = mr->ei->generation; ei->flags = mr->ei->flags; ptr = (UINT8*)&ei[1]; - + if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr; - + ei2->firstitem = *(KEY*)&mr->data[1]; ei2->level = mr->data->level; - + ptr += sizeof(EXTENT_ITEM2); } - + le = mr->refs.Flink; while (le != &mr->refs) { metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); - + if (ref == first_noninline) break; - + *ptr = ref->type; ptr++; - + if (ref->type == TYPE_TREE_BLOCK_REF) { TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr; - + tbr->offset = ref->tbr.offset; - + ptr += sizeof(TREE_BLOCK_REF); } else if (ref->type == TYPE_SHARED_BLOCK_REF) { SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr; - + sbr->offset = ref->parent->new_address; - + ptr += sizeof(SHARED_BLOCK_REF); } - + le = le->Flink; } - - if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { - if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; - } - } else { - if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; - } + + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) + Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL); + else + Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ei); + return Status; } - + if (!all_inline) { le = &first_noninline->list_entry; - + while (le != &mr->refs) { metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); - + if (ref->type == TYPE_TREE_BLOCK_REF) { - TREE_BLOCK_REF* tbr; - - tbr = ExAllocatePoolWithTag(PagedPool, sizeof(TREE_BLOCK_REF), ALLOC_TAG); - if (!tbr) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - tbr->offset = ref->tbr.offset; - - if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, tbr->offset, tbr, sizeof(TREE_BLOCK_REF), NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, ref->tbr.offset, NULL, 0, NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } } else if (ref->type == TYPE_SHARED_BLOCK_REF) { - SHARED_BLOCK_REF* sbr; - - sbr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_BLOCK_REF), ALLOC_TAG); - if (!sbr) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sbr->offset = ref->parent->new_address; - - if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, sbr->offset, sbr, sizeof(SHARED_BLOCK_REF), NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, ref->parent->new_address, NULL, 0, NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } } - + le = le->Flink; } } - + if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) { if (mr->data->level > 0) { UINT16 i; internal_node* in = (internal_node*)&mr->data[1]; - + for (i = 0; i < mr->data->num_items; i++) { UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL); if (sbrrc > 0) { - NTSTATUS Status; SHARED_BLOCK_REF sbr; - + sbr.offset = mr->new_address; - - Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, - NULL, rollback); + + Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, NULL); if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; } - + sbr.offset = mr->address; - + Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, - sbr.offset, FALSE, NULL, rollback); + sbr.offset, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; @@ -429,86 +475,84 @@ static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_r } else { UINT16 i; leaf_node* ln = (leaf_node*)&mr->data[1]; - + for (i = 0; i < mr->data->num_items; i++) { if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset); - + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - + if (ed2->size > 0) { // not sparse - UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL); - + UINT32 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL); + if (sdrrc > 0) { - NTSTATUS Status; SHARED_DATA_REF sdr; chunk* c; - + sdr.offset = mr->new_address; sdr.count = sdrrc; - - Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, - NULL, rollback); + + Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, NULL); if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; } - + sdr.offset = mr->address; - + Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, - sdr.offset, FALSE, NULL, rollback); + sdr.offset, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; } - + c = get_chunk_from_address(Vcb, ed2->address); - + if (c) { // check changed_extents - + ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE); - + le = c->changed_extents.Flink; - + while (le != &c->changed_extents) { changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry); - + if (ce->address == ed2->address) { LIST_ENTRY* le2; - + le2 = ce->refs.Flink; while (le2 != &ce->refs) { changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) { cer->sdr.offset = mr->new_address; break; } - + le2 = le2->Flink; } - + le2 = ce->old_refs.Flink; while (le2 != &ce->old_refs) { changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) { cer->sdr.offset = mr->new_address; break; } - + le2 = le2->Flink; } - + break; } - + le = le->Flink; } - + ExReleaseResourceLite(&c->changed_extents_lock); } } @@ -522,303 +566,304 @@ static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_r return STATUS_SUCCESS; } -static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) { +static NTSTATUS write_metadata_items(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, + LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) { LIST_ENTRY tree_writes, *le; NTSTATUS Status; traverse_ptr tp; UINT8 level, max_level = 0; chunk* newchunk = NULL; - + InitializeListHead(&tree_writes); - + le = items->Flink; while (le != items) { metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); LIST_ENTRY* le2; chunk* pc; - -// ERR("address %llx\n", mr->address); - + mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); if (!mr->data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data, - c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, FALSE); + c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, 0, FALSE, NormalPagePriority); if (!NT_SUCCESS(Status)) { ERR("read_data returned %08x\n", Status); return Status; } - + if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM) mr->system = TRUE; - + if (data_items && mr->data->level == 0) { - LIST_ENTRY* le2 = data_items->Flink; + le2 = data_items->Flink; while (le2 != data_items) { data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); leaf_node* ln = (leaf_node*)&mr->data[1]; UINT16 i; - + for (i = 0; i < mr->data->num_items; i++) { if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset); - + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - + if (ed2->address == dr->address) ed2->address = dr->new_address; } } } - + le2 = le2->Flink; } } - + if (mr->data->level > max_level) max_level = mr->data->level; - + le2 = mr->refs.Flink; while (le2 != &mr->refs) { metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry); - + if (ref->type == TYPE_TREE_BLOCK_REF) { KEY* firstitem; root* r = NULL; LIST_ENTRY* le3; tree* t; - + firstitem = (KEY*)&mr->data[1]; - + le3 = Vcb->roots.Flink; while (le3 != &Vcb->roots) { root* r2 = CONTAINING_RECORD(le3, root, list_entry); - + if (r2->id == ref->tbr.offset) { r = r2; break; } - + le3 = le3->Flink; } - + if (!r) { ERR("could not find subvol with id %llx\n", ref->tbr.offset); return STATUS_INTERNAL_ERROR; } - + Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL); if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { ERR("find_item_to_level returned %08x\n", Status); return Status; } - + t = tp.tree; while (t && t->header.level < mr->data->level + 1) { t = t->parent; } - + if (!t) ref->top = TRUE; else { metadata_reloc* mr2; - + Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback); if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc_parent returned %08x\n", Status); return Status; } - + ref->parent = mr2; } } else if (ref->type == TYPE_SHARED_BLOCK_REF) { metadata_reloc* mr2; - + Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback); if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc_parent returned %08x\n", Status); return Status; } - + ref->parent = mr2; } - + le2 = le2->Flink; } - + le = le->Flink; } - + le = items->Flink; while (le != items) { metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); LIST_ENTRY* le2; UINT32 hash; - + mr->t = NULL; - + hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64)); - + le2 = Vcb->trees_ptrs[hash >> 24]; - + if (le2) { while (le2 != &Vcb->trees_hash) { tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash); - + if (t->header.address == mr->address) { mr->t = t; break; } else if (t->hash > hash) break; - + le2 = le2->Flink; } } - + le = le->Flink; } - + for (level = 0; level <= max_level; level++) { le = items->Flink; while (le != items) { metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); - + if (mr->data->level == level) { BOOL done = FALSE; LIST_ENTRY* le2; tree_write* tw; UINT64 flags; tree* t3; - + if (mr->system) flags = Vcb->system_flags; else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) flags = Vcb->data_flags; else flags = Vcb->metadata_flags; - + if (newchunk) { ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); - + if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) { - increase_chunk_usage(newchunk, Vcb->superblock.node_size); - space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); + newchunk->used += Vcb->superblock.node_size; + space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); done = TRUE; } - + ExReleaseResourceLite(&newchunk->lock); } - + if (!done) { ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); - + le2 = Vcb->chunks.Flink; while (le2 != &Vcb->chunks) { chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); - + if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) { ExAcquireResourceExclusiveLite(&c2->lock, TRUE); - + if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) { if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) { - increase_chunk_usage(c2, Vcb->superblock.node_size); - space_list_subtract(Vcb, c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); + c2->used += Vcb->superblock.node_size; + space_list_subtract(c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); ExReleaseResourceLite(&c2->lock); newchunk = c2; done = TRUE; break; } } - + ExReleaseResourceLite(&c2->lock); } le2 = le2->Flink; } - + // allocate new chunk if necessary if (!done) { - newchunk = alloc_chunk(Vcb, flags); - - if (!newchunk) { - ERR("could not allocate new chunk\n"); + Status = alloc_chunk(Vcb, flags, &newchunk, FALSE); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); ExReleaseResourceLite(&Vcb->chunk_lock); - Status = STATUS_DISK_FULL; goto end; } - + ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); - + + newchunk->balance_num = Vcb->balance.balance_num; + if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) { ExReleaseResourceLite(&newchunk->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); ERR("could not find address in new chunk\n"); Status = STATUS_DISK_FULL; goto end; } else { - increase_chunk_usage(newchunk, Vcb->superblock.node_size); - space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); + newchunk->used += Vcb->superblock.node_size; + space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); } - + ExReleaseResourceLite(&newchunk->lock); } - + ExReleaseResourceLite(&Vcb->chunk_lock); } - + // update parents le2 = mr->refs.Flink; while (le2 != &mr->refs) { metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry); - + if (ref->parent) { UINT16 i; internal_node* in = (internal_node*)&ref->parent->data[1]; - + for (i = 0; i < ref->parent->data->num_items; i++) { if (in[i].address == mr->address) { in[i].address = mr->new_address; break; } } - + if (ref->parent->t) { LIST_ENTRY* le3; - + le3 = ref->parent->t->itemlist.Flink; while (le3 != &ref->parent->t->itemlist) { tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry); - + if (!td->inserted && td->treeholder.address == mr->address) td->treeholder.address = mr->new_address; - + le3 = le3->Flink; } } } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) { LIST_ENTRY* le3; root* r = NULL; - + // alter ROOT_ITEM - + le3 = Vcb->roots.Flink; while (le3 != &Vcb->roots) { root* r2 = CONTAINING_RECORD(le3, root, list_entry); - + if (r2->id == ref->tbr.offset) { r = r2; break; } - + le3 = le3->Flink; } - + if (r) { r->treeholder.address = mr->new_address; - + if (r == Vcb->root_root) Vcb->superblock.root_tree_addr = mr->new_address; else if (r == Vcb->chunk_root) @@ -826,92 +871,96 @@ static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, L else if (r->root_item.block_number == mr->address) { KEY searchkey; ROOT_ITEM* ri; - + r->root_item.block_number = mr->new_address; - + searchkey.obj_id = r->id; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); goto end; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); Status = STATUS_INTERNAL_ERROR; goto end; } - + ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); if (!ri) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - Status = STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + goto end; + } + + Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); goto end; } } } } - + le2 = le2->Flink; } - + mr->data->address = mr->new_address; - + t3 = mr->t; while (t3) { UINT8 h; BOOL inserted; tree* t4 = NULL; - + // check if tree loaded more than once if (t3->list_entry.Flink != &Vcb->trees_hash) { tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash); - + if (nt->header.address == t3->header.address) t4 = nt; } - + t3->header.address = mr->new_address; - + h = t3->hash >> 24; - + if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) { if (t3->list_entry_hash.Flink == &Vcb->trees_hash) Vcb->trees_ptrs[h] = NULL; else { tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash); - + if (t2->hash >> 24 == h) Vcb->trees_ptrs[h] = &t2->list_entry_hash; else Vcb->trees_ptrs[h] = NULL; } } - + RemoveEntryList(&t3->list_entry_hash); - + t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64)); h = t3->hash >> 24; - + if (!Vcb->trees_ptrs[h]) { UINT8 h2 = h; - + le2 = Vcb->trees_hash.Flink; - + if (h2 > 0) { h2--; do { @@ -919,23 +968,23 @@ static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, L le2 = Vcb->trees_ptrs[h2]; break; } - + h2--; } while (h2 > 0); } } else le2 = Vcb->trees_ptrs[h]; - + inserted = FALSE; while (le2 != &Vcb->trees_hash) { tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash); - + if (t2->hash >= t3->hash) { InsertHeadList(le2->Blink, &t3->list_entry_hash); inserted = TRUE; break; } - + le2 = le2->Flink; } @@ -944,106 +993,105 @@ static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, L if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h]) Vcb->trees_ptrs[h] = &t3->list_entry_hash; - + if (data_items && level == 0) { le2 = data_items->Flink; - + while (le2 != data_items) { data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); LIST_ENTRY* le3 = t3->itemlist.Flink; - + while (le3 != &t3->itemlist) { tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry); - + if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { EXTENT_DATA* ed = (EXTENT_DATA*)td->data; - + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - + if (ed2->address == dr->address) ed2->address = dr->new_address; } } - + le3 = le3->Flink; } - + le2 = le2->Flink; } } - + t3 = t4; } *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum)); - + tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG); if (!tw) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + tw->address = mr->new_address; tw->length = Vcb->superblock.node_size; tw->data = (UINT8*)mr->data; - tw->overlap = FALSE; - + if (IsListEmpty(&tree_writes)) InsertTailList(&tree_writes, &tw->list_entry); else { BOOL inserted = FALSE; - + le2 = tree_writes.Flink; while (le2 != &tree_writes) { tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry); - + if (tw2->address > tw->address) { InsertHeadList(le2->Blink, &tw->list_entry); inserted = TRUE; break; } - + le2 = le2->Flink; } - + if (!inserted) InsertTailList(&tree_writes, &tw->list_entry); } } - + le = le->Flink; } } - + + Status = do_tree_writes(Vcb, &tree_writes, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("do_tree_writes returned %08x\n", Status); + goto end; + } + le = items->Flink; while (le != items) { metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); - - Status = add_metadata_reloc_extent_item(Vcb, mr, rollback); + + Status = add_metadata_reloc_extent_item(Vcb, mr); if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc_extent_item returned %08x\n", Status); goto end; } - + le = le->Flink; } - - Status = do_tree_writes(Vcb, &tree_writes, NULL); - if (!NT_SUCCESS(Status)) { - ERR("do_tree_writes returned %08x\n", Status); - goto end; - } - + Status = STATUS_SUCCESS; - + end: while (!IsListEmpty(&tree_writes)) { tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry); ExFreePool(tw); } - + return Status; } @@ -1054,153 +1102,263 @@ static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* ch BOOL b; LIST_ENTRY items, rollback; UINT32 loaded = 0; - + TRACE("chunk %llx\n", c->offset); - + InitializeListHead(&rollback); InitializeListHead(&items); - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + searchkey.obj_id = c->offset; searchkey.obj_type = TYPE_METADATA_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); goto end; } - + do { traverse_ptr next_tp; - + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) break; - + if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { BOOL tree = FALSE, skinny = FALSE; - + if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { tree = TRUE; skinny = TRUE; } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size && tp.item->size >= sizeof(EXTENT_ITEM)) { EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; - + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) tree = TRUE; } - + if (tree) { Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback); - + if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc returned %08x\n", Status); goto end; } - + loaded++; - + if (loaded >= 64) // only do 64 at a time break; } } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); - + if (b) tp = next_tp; } while (b); - + if (IsListEmpty(&items)) { *changed = FALSE; Status = STATUS_SUCCESS; goto end; } else *changed = TRUE; - + Status = write_metadata_items(Vcb, &items, NULL, c, &rollback); if (!NT_SUCCESS(Status)) { ERR("write_metadata_items returned %08x\n", Status); goto end; } - + Status = STATUS_SUCCESS; - + Vcb->need_write = TRUE; - + end: + if (NT_SUCCESS(Status)) { + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + } + if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(Vcb, &rollback); - + + free_trees(Vcb); + ExReleaseResourceLite(&Vcb->tree_lock); - + while (!IsListEmpty(&items)) { metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry); - + while (!IsListEmpty(&mr->refs)) { metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); - + ExFreePool(ref); } - + ExFreePool(mr); } - + return Status; } -static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) { +static NTSTATUS data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* metadata_items, + data_reloc* dr, EXTENT_DATA_REF* edr, LIST_ENTRY* rollback) { + NTSTATUS Status; + LIST_ENTRY* le; + KEY searchkey; + traverse_ptr tp; + root* r = NULL; + metadata_reloc* mr; + UINT64 last_tree = 0; + data_reloc_ref* ref; + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == edr->root) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("could not find subvol %llx\n", edr->count); + return STATUS_INTERNAL_ERROR; + } + + searchkey.obj_id = edr->objid; + searchkey.obj_type = TYPE_EXTENT_DATA; + searchkey.offset = 0; + + Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (tp.item->key.obj_id < searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type < searchkey.obj_type)) { + traverse_ptr tp2; + + if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL)) + tp = tp2; + else { + ERR("could not find EXTENT_DATA for inode %llx in root %llx\n", searchkey.obj_id, r->id); + return STATUS_INTERNAL_ERROR; + } + } + + ref = NULL; + + while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + traverse_ptr tp2; + + if (tp.item->size >= sizeof(EXTENT_DATA)) { + EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data; + + if ((ed->type == EXTENT_TYPE_PREALLOC || ed->type == EXTENT_TYPE_REGULAR) && tp.item->size >= offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed2->address == dr->address && ed2->size == dr->size && tp.item->key.offset - ed2->offset == edr->offset) { + if (ref && last_tree == tp.tree->header.address) + ref->edr.count++; + else { + ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ref->type = TYPE_EXTENT_DATA_REF; + RtlCopyMemory(&ref->edr, edr, sizeof(EXTENT_DATA_REF)); + ref->edr.count = 1; + + Status = add_metadata_reloc_parent(Vcb, metadata_items, tp.tree->header.address, &mr, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + last_tree = tp.tree->header.address; + ref->parent = mr; + + InsertTailList(&dr->refs, &ref->list_entry); + } + } + } + } + + if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL)) + tp = tp2; + else + break; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS add_data_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, + traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) { + NTSTATUS Status; data_reloc* dr; EXTENT_ITEM* ei; UINT16 len; UINT64 inline_rc; UINT8* ptr; - + dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG); if (!dr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + dr->address = tp->item->key.obj_id; dr->size = tp->item->key.offset; dr->ei = (EXTENT_ITEM*)tp->item->data; InitializeListHead(&dr->refs); - - delete_tree_item(Vcb, tp, rollback); - + + Status = delete_tree_item(Vcb, tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (!c) c = get_chunk_from_address(Vcb, tp->item->key.obj_id); - + if (c) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - decrease_chunk_usage(c, tp->item->key.offset); - - space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, tp->item->key.offset, rollback); - + + c->used -= tp->item->key.offset; + + space_list_add(c, tp->item->key.obj_id, tp->item->key.offset, rollback); + ExReleaseResourceLite(&c->lock); } - + ei = (EXTENT_ITEM*)tp->item->data; inline_rc = 0; - + len = tp->item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM); - + while (len > 0) { UINT8 secttype = *ptr; - ULONG sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0); - data_reloc_ref* ref; - NTSTATUS Status; - metadata_reloc* mr; - + UINT16 sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0); + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen); return STATUS_INTERNAL_ERROR; @@ -1210,196 +1368,173 @@ static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_EN ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype); return STATUS_INTERNAL_ERROR; } - - ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); - if (!ref) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - + if (secttype == TYPE_EXTENT_DATA_REF) { - LIST_ENTRY* le; - KEY searchkey; - traverse_ptr tp3; - root* r = NULL; - - ref->type = TYPE_EXTENT_DATA_REF; - RtlCopyMemory(&ref->edr, ptr + sizeof(UINT8), sizeof(EXTENT_DATA_REF)); - inline_rc += ref->edr.count; - - le = Vcb->roots.Flink; - while (le != &Vcb->roots) { - root* r2 = CONTAINING_RECORD(le, root, list_entry); - - if (r2->id == ref->edr.root) { - r = r2; - break; - } - - le = le->Flink; - } - - if (!r) { - ERR("could not find subvol %llx\n", ref->edr.count); - ExFreePool(ref); - return STATUS_INTERNAL_ERROR; - } - - searchkey.obj_id = ref->edr.objid; - searchkey.obj_type = TYPE_EXTENT_DATA; - searchkey.offset = ref->edr.offset; - - Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("find_item returned %08x\n", Status); - ExFreePool(ref); - return Status; - } - - if (keycmp(tp3.item->key, searchkey)) { - ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id); - ExFreePool(ref); - return STATUS_INTERNAL_ERROR; - } - - Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback); + EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); + + inline_rc += edr->count; + + Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, edr, rollback); if (!NT_SUCCESS(Status)) { - ERR("add_metadata_reloc_parent returned %08x\n", Status); - ExFreePool(ref); + ERR("data_reloc_add_tree_edr returned %08x\n", Status); return Status; } - - ref->parent = mr; } else if (secttype == TYPE_SHARED_DATA_REF) { + metadata_reloc* mr; + data_reloc_ref* ref; + + ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + ref->type = TYPE_SHARED_DATA_REF; RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF)); inline_rc += ref->sdr.count; - + Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback); if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc_parent returned %08x\n", Status); ExFreePool(ref); return Status; } - + ref->parent = mr; + + InsertTailList(&dr->refs, &ref->list_entry); } else { ERR("unexpected tree type %x\n", secttype); - ExFreePool(ref); return STATUS_INTERNAL_ERROR; } - - InsertTailList(&dr->refs, &ref->list_entry); - + + len -= sectlen; ptr += sizeof(UINT8) + sectlen; } - + if (inline_rc < ei->refcount) { // look for non-inline entries traverse_ptr tp2 = *tp, next_tp; - + while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) { - metadata_reloc* mr; - NTSTATUS Status; - tp2 = next_tp; - + if (tp2.item->key.obj_id == tp->item->key.obj_id) { if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) { - data_reloc_ref* ref; - LIST_ENTRY* le; - KEY searchkey; - traverse_ptr tp3; - root* r = NULL; - - ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); - if (!ref) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - ref->type = TYPE_EXTENT_DATA_REF; - RtlCopyMemory(&ref->edr, tp2.item->data, sizeof(EXTENT_DATA_REF)); - - le = Vcb->roots.Flink; - while (le != &Vcb->roots) { - root* r2 = CONTAINING_RECORD(le, root, list_entry); - - if (r2->id == ref->edr.root) { - r = r2; - break; - } - - le = le->Flink; - } - - if (!r) { - ERR("could not find subvol %llx\n", ref->edr.count); - ExFreePool(ref); - return STATUS_INTERNAL_ERROR; - } - - searchkey.obj_id = ref->edr.objid; - searchkey.obj_type = TYPE_EXTENT_DATA; - searchkey.offset = ref->edr.offset; - - Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL); + Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, (EXTENT_DATA_REF*)tp2.item->data, rollback); if (!NT_SUCCESS(Status)) { - ERR("find_item returned %08x\n", Status); - ExFreePool(ref); + ERR("data_reloc_add_tree_edr returned %08x\n", Status); return Status; } - - if (!keycmp(tp3.item->key, searchkey)) { - ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id); - ExFreePool(ref); - return STATUS_INTERNAL_ERROR; - } - - Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback); + + Status = delete_tree_item(Vcb, &tp2); if (!NT_SUCCESS(Status)) { - ERR("add_metadata_reloc_parent returned %08x\n", Status); - ExFreePool(ref); + ERR("delete_tree_item returned %08x\n", Status); return Status; } - - ref->parent = mr; - InsertTailList(&dr->refs, &ref->list_entry); - - delete_tree_item(Vcb, &tp2, rollback); - } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(SHARED_DATA_REF)) { - data_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); + } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(UINT32)) { + metadata_reloc* mr; + data_reloc_ref* ref; + + ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); if (!ref) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ref->type = TYPE_SHARED_DATA_REF; - RtlCopyMemory(&ref->sdr, tp2.item->data, sizeof(SHARED_DATA_REF)); - + ref->sdr.offset = tp2.item->key.offset; + ref->sdr.count = *((UINT32*)tp2.item->data); + Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback); if (!NT_SUCCESS(Status)) { ERR("add_metadata_reloc_parent returned %08x\n", Status); ExFreePool(ref); return Status; } - + ref->parent = mr; InsertTailList(&dr->refs, &ref->list_entry); - - delete_tree_item(Vcb, &tp2, rollback); + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } } } else break; } } - + InsertTailList(items, &dr->list_entry); - + return STATUS_SUCCESS; } -static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr, LIST_ENTRY* rollback) { +static void sort_data_reloc_refs(data_reloc* dr) { + LIST_ENTRY newlist, *le; + + if (IsListEmpty(&dr->refs)) + return; + + // insertion sort + + InitializeListHead(&newlist); + + while (!IsListEmpty(&dr->refs)) { + data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry); + BOOL inserted = FALSE; + + if (ref->type == TYPE_EXTENT_DATA_REF) + ref->hash = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset); + else if (ref->type == TYPE_SHARED_DATA_REF) + ref->hash = ref->parent->new_address; + + le = newlist.Flink; + while (le != &newlist) { + data_reloc_ref* ref2 = CONTAINING_RECORD(le, data_reloc_ref, list_entry); + + if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) { + InsertHeadList(le->Blink, &ref->list_entry); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&newlist, &ref->list_entry); + } + + le = newlist.Flink; + while (le != &newlist) { + data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); + + if (le->Flink != &newlist) { + data_reloc_ref* ref2 = CONTAINING_RECORD(le->Flink, data_reloc_ref, list_entry); + + if (ref->type == TYPE_EXTENT_DATA_REF && ref2->type == TYPE_EXTENT_DATA_REF && ref->edr.root == ref2->edr.root && + ref->edr.objid == ref2->edr.objid && ref->edr.offset == ref2->edr.offset) { + RemoveEntryList(&ref2->list_entry); + ref->edr.count += ref2->edr.count; + ExFreePool(ref2); + continue; + } + } + + le = le->Flink; + } + + newlist.Flink->Blink = &dr->refs; + newlist.Blink->Flink = &dr->refs; + dr->refs.Flink = newlist.Flink; + dr->refs.Blink = newlist.Blink; +} + +static NTSTATUS add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, data_reloc* dr) { + NTSTATUS Status; LIST_ENTRY* le; UINT64 rc = 0; UINT16 inline_len; @@ -1407,118 +1542,120 @@ static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr data_reloc_ref* first_noninline = NULL; EXTENT_ITEM* ei; UINT8* ptr; - + inline_len = sizeof(EXTENT_ITEM); - + + sort_data_reloc_refs(dr); + le = dr->refs.Flink; while (le != &dr->refs) { data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); - ULONG extlen = 0; - - rc++; - - if (ref->type == TYPE_EXTENT_DATA_REF) + UINT16 extlen = 0; + + if (ref->type == TYPE_EXTENT_DATA_REF) { extlen += sizeof(EXTENT_DATA_REF); - else if (ref->type == TYPE_SHARED_DATA_REF) + rc += ref->edr.count; + } else if (ref->type == TYPE_SHARED_DATA_REF) { extlen += sizeof(SHARED_DATA_REF); + rc++; + } if (all_inline) { - if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) { + if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) { all_inline = FALSE; first_noninline = ref; } else inline_len += extlen + 1; } - + le = le->Flink; } - + ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); if (!ei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ei->refcount = rc; ei->generation = dr->ei->generation; ei->flags = dr->ei->flags; ptr = (UINT8*)&ei[1]; - + le = dr->refs.Flink; while (le != &dr->refs) { data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); - + if (ref == first_noninline) break; - + *ptr = ref->type; ptr++; - + if (ref->type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr; - + RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF)); - + ptr += sizeof(EXTENT_DATA_REF); } else if (ref->type == TYPE_SHARED_DATA_REF) { SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr; - + sdr->offset = ref->parent->new_address; sdr->count = ref->sdr.count; - + ptr += sizeof(SHARED_DATA_REF); } - + le = le->Flink; } - - if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + if (!all_inline) { le = &first_noninline->list_entry; - + while (le != &dr->refs) { data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); - + if (ref->type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* edr; - UINT64 off; - + edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG); if (!edr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF)); - - off = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset); - - if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, off, edr, sizeof(EXTENT_DATA_REF), NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, ref->hash, edr, sizeof(EXTENT_DATA_REF), NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } } else if (ref->type == TYPE_SHARED_DATA_REF) { - SHARED_DATA_REF* sdr; - - sdr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_DATA_REF), ALLOC_TAG); + UINT32* sdr; + + sdr = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32), ALLOC_TAG); if (!sdr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - sdr->offset = ref->parent->new_address; - sdr->count = ref->sdr.count; - - if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, sdr->offset, sdr, sizeof(SHARED_DATA_REF), NULL, NULL, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + *sdr = ref->sdr.count; + + Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, ref->parent->new_address, sdr, sizeof(UINT32), NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } } - + le = le->Flink; } } @@ -1535,71 +1672,71 @@ static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* change UINT64 loaded = 0, num_loaded = 0; chunk* newchunk = NULL; UINT8* data = NULL; - + TRACE("chunk %llx\n", c->offset); - + InitializeListHead(&rollback); InitializeListHead(&items); InitializeListHead(&metadata_items); - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + searchkey.obj_id = c->offset; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); goto end; } - + do { traverse_ptr next_tp; - + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) break; - + if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) { BOOL tree = FALSE; - + if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; - + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) tree = TRUE; } - + if (!tree) { Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback); - + if (!NT_SUCCESS(Status)) { ERR("add_data_reloc returned %08x\n", Status); goto end; } - + loaded += tp.item->key.offset; num_loaded++; - + if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously break; } } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); - + if (b) tp = next_tp; } while (b); - + if (IsListEmpty(&items)) { *changed = FALSE; Status = STATUS_SUCCESS; goto end; } else *changed = TRUE; - - data = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG); + + data = ExAllocatePoolWithTag(PagedPool, BALANCE_UNIT, ALLOC_TAG); if (!data) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; @@ -1612,248 +1749,443 @@ static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* change BOOL done = FALSE; LIST_ENTRY* le2; UINT32* csum; - UINT64 off; - + RTL_BITMAP bmp; + ULONG* bmparr; + ULONG runlength, index, lastoff; + if (newchunk) { ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); - + if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) { - increase_chunk_usage(newchunk, dr->size); - space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback); + newchunk->used += dr->size; + space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback); done = TRUE; } - + ExReleaseResourceLite(&newchunk->lock); } - + if (!done) { ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); le2 = Vcb->chunks.Flink; while (le2 != &Vcb->chunks) { chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); - + if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) { ExAcquireResourceExclusiveLite(&c2->lock, TRUE); - + if ((c2->chunk_item->size - c2->used) >= dr->size) { if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) { - increase_chunk_usage(c2, dr->size); - space_list_subtract(Vcb, c2, FALSE, dr->new_address, dr->size, &rollback); + c2->used += dr->size; + space_list_subtract(c2, FALSE, dr->new_address, dr->size, &rollback); ExReleaseResourceLite(&c2->lock); newchunk = c2; done = TRUE; break; } } - + ExReleaseResourceLite(&c2->lock); } le2 = le2->Flink; } - + // allocate new chunk if necessary if (!done) { - newchunk = alloc_chunk(Vcb, Vcb->data_flags); - - if (!newchunk) { - ERR("could not allocate new chunk\n"); + Status = alloc_chunk(Vcb, Vcb->data_flags, &newchunk, FALSE); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); ExReleaseResourceLite(&Vcb->chunk_lock); - Status = STATUS_DISK_FULL; goto end; } - + ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); - + + newchunk->balance_num = Vcb->balance.balance_num; + if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) { ExReleaseResourceLite(&newchunk->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); ERR("could not find address in new chunk\n"); Status = STATUS_DISK_FULL; goto end; } else { - increase_chunk_usage(newchunk, dr->size); - space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback); + newchunk->used += dr->size; + space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback); } - + ExReleaseResourceLite(&newchunk->lock); } - + ExReleaseResourceLite(&Vcb->chunk_lock); } - + dr->newchunk = newchunk; - - csum = ExAllocatePoolWithTag(PagedPool, dr->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + + bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align((dr->size / Vcb->superblock.sector_size) + 1, sizeof(ULONG)), ALLOC_TAG); + if (!bmparr) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(dr->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!csum) { ERR("out of memory\n"); + ExFreePool(bmparr); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - Status = load_csum(Vcb, csum, dr->address, dr->size / Vcb->superblock.sector_size, NULL); - if (NT_SUCCESS(Status)) { - add_checksum_entry(Vcb, dr->new_address, dr->size / Vcb->superblock.sector_size, csum, NULL, &rollback); - add_checksum_entry(Vcb, dr->address, dr->size / Vcb->superblock.sector_size, NULL, NULL, &rollback); + RtlInitializeBitMap(&bmp, bmparr, (ULONG)(dr->size / Vcb->superblock.sector_size)); + RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = dr->address; + + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("find_item returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; } - ExFreePool(csum); - - off = 0; - - while (off < dr->size) { - ULONG ds = min(dr->size - off, 0x100000); - - Status = read_data(Vcb, dr->address + off, ds, NULL, FALSE, data, c, NULL, NULL, FALSE); - if (!NT_SUCCESS(Status)) { - ERR("read_data returned %08x\n", Status); - goto end; - } - - Status = write_data_complete(Vcb, dr->new_address + off, data, ds, NULL, newchunk); - if (!NT_SUCCESS(Status)) { - ERR("write_data_complete returned %08x\n", Status); - goto end; + if (Status != STATUS_NOT_FOUND) { + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_type == TYPE_EXTENT_CSUM) { + if (tp.item->key.offset >= dr->address + dr->size) + break; + else if (tp.item->size >= sizeof(UINT32) && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= dr->address) { + UINT64 cs = max(dr->address, tp.item->key.offset); + UINT64 ce = min(dr->address + dr->size, tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32))); + + RtlCopyMemory(csum + ((cs - dr->address) / Vcb->superblock.sector_size), + tp.item->data + ((cs - tp.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size), + (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size)); + + RtlClearBits(&bmp, (ULONG)((cs - dr->address) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size)); + + if (ce == dr->address + dr->size) + break; + } + } + + if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + break; + } while (TRUE); + } + + lastoff = 0; + runlength = RtlFindFirstRunClear(&bmp, &index); + + while (runlength != 0) { + if (index > lastoff) { + ULONG off = lastoff; + ULONG size = index - lastoff; + + // handle no csum run + do { + ULONG rl; + + if (size * Vcb->superblock.sector_size > BALANCE_UNIT) + rl = BALANCE_UNIT / Vcb->superblock.sector_size; + else + rl = size; + + Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data, + c, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size, + NULL, newchunk, FALSE, 0, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("write_data_complete returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + size -= rl; + off += rl; + } while (size > 0); } - - off += ds; + + add_checksum_entry(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), runlength, &csum[index], NULL); + add_checksum_entry(Vcb, dr->address + (index * Vcb->superblock.sector_size), runlength, NULL, NULL); + + // handle csum run + do { + ULONG rl; + + if (runlength * Vcb->superblock.sector_size > BALANCE_UNIT) + rl = BALANCE_UNIT / Vcb->superblock.sector_size; + else + rl = runlength; + + Status = read_data(Vcb, dr->address + (index * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index], FALSE, data, + c, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + Status = write_data_complete(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size, + NULL, newchunk, FALSE, 0, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("write_data_complete returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + runlength -= rl; + index += rl; + } while (runlength > 0); + + lastoff = index; + runlength = RtlFindNextForwardRunClear(&bmp, index, &index); + } + + ExFreePool(csum); + ExFreePool(bmparr); + + // handle final nocsum run + if (lastoff < dr->size / Vcb->superblock.sector_size) { + ULONG off = lastoff; + ULONG size = (ULONG)((dr->size / Vcb->superblock.sector_size) - lastoff); + + do { + ULONG rl; + + if (size * Vcb->superblock.sector_size > BALANCE_UNIT) + rl = BALANCE_UNIT / Vcb->superblock.sector_size; + else + rl = size; + + Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data, + c, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size, + NULL, newchunk, FALSE, 0, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("write_data_complete returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + size -= rl; + off += rl; + } while (size > 0); } le = le->Flink; } - + ExFreePool(data); data = NULL; - + Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback); if (!NT_SUCCESS(Status)) { ERR("write_metadata_items returned %08x\n", Status); goto end; } - + le = items.Flink; while (le != &items) { data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry); - - Status = add_data_reloc_extent_item(Vcb, dr, &rollback); + + Status = add_data_reloc_extent_item(Vcb, dr); if (!NT_SUCCESS(Status)) { ERR("add_data_reloc_extent_item returned %08x\n", Status); goto end; } - + le = le->Flink; } - + le = c->changed_extents.Flink; while (le != &c->changed_extents) { LIST_ENTRY *le2, *le3; changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry); - + le3 = le->Flink; - + le2 = items.Flink; while (le2 != &items) { data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); - + if (ce->address == dr->address) { ce->address = dr->new_address; RemoveEntryList(&ce->list_entry); InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry); break; } - + le2 = le2->Flink; } - + le = le3; } - - // update open FCBs - // FIXME - speed this up - - ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE); - - le = Vcb->all_fcbs.Flink; - while (le != &Vcb->all_fcbs) { - struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all); - LIST_ENTRY* le2; - ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - - le2 = fcb->extents.Flink; - while (le2 != &fcb->extents) { - extent* ext = CONTAINING_RECORD(le2, extent, list_entry); - - if (!ext->ignore) { - if (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - - if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) { - LIST_ENTRY* le3 = items.Flink; - while (le3 != &items) { - data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry); - - if (ed2->address == dr->address) { - ed2->address = dr->new_address; - break; + Status = STATUS_SUCCESS; + + Vcb->need_write = TRUE; + +end: + if (NT_SUCCESS(Status)) { + // update extents in cache inodes before we flush + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); + + if (c2->cache) { + LIST_ENTRY* le2; + + ExAcquireResourceExclusiveLite(c2->cache->Header.Resource, TRUE); + + le2 = c2->cache->extents.Flink; + while (le2 != &c2->cache->extents) { + extent* ext = CONTAINING_RECORD(le2, extent, list_entry); + + if (!ext->ignore) { + if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + + if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) { + LIST_ENTRY* le3 = items.Flink; + while (le3 != &items) { + data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry); + + if (ed2->address == dr->address) { + ed2->address = dr->new_address; + break; + } + + le3 = le3->Flink; + } } - - le3 = le3->Flink; } } + + le2 = le2->Flink; } + + ExReleaseResourceLite(c2->cache->Header.Resource); } - - le2 = le2->Flink; + + le = le->Flink; } - - ExReleaseResourceLite(fcb->Header.Resource); - - le = le->Flink; + + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); } - - ExReleaseResourceLite(&Vcb->fcb_lock); - - Status = STATUS_SUCCESS; - - Vcb->need_write = TRUE; - -end: - if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); - else + + if (NT_SUCCESS(Status)) { + clear_rollback(&rollback); + + // update open FCBs + // FIXME - speed this up(?) + + ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE); + + le = Vcb->all_fcbs.Flink; + while (le != &Vcb->all_fcbs) { + struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all); + LIST_ENTRY* le2; + + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + + le2 = fcb->extents.Flink; + while (le2 != &fcb->extents) { + extent* ext = CONTAINING_RECORD(le2, extent, list_entry); + + if (!ext->ignore) { + if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + + if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) { + LIST_ENTRY* le3 = items.Flink; + while (le3 != &items) { + data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry); + + if (ed2->address == dr->address) { + ed2->address = dr->new_address; + break; + } + + le3 = le3->Flink; + } + } + } + } + + le2 = le2->Flink; + } + + ExReleaseResourceLite(fcb->Header.Resource); + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->fcb_lock); + } else do_rollback(Vcb, &rollback); - + + free_trees(Vcb); + ExReleaseResourceLite(&Vcb->tree_lock); - + if (data) ExFreePool(data); - + while (!IsListEmpty(&items)) { data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry); - + while (!IsListEmpty(&dr->refs)) { data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry); - + ExFreePool(ref); } - + ExFreePool(dr); } - + while (!IsListEmpty(&metadata_items)) { metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry); - + while (!IsListEmpty(&mr->refs)) { metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); - + ExFreePool(ref); } - + ExFreePool(mr); } - + return Status; } @@ -1876,41 +2208,41 @@ static __inline UINT64 get_chunk_dup_type(chunk* c) { static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) { btrfs_balance_opts* opts; - + opts = &Vcb->balance.opts[sort]; - + if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED)) return FALSE; - + if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) { UINT64 type = get_chunk_dup_type(c); - + if (!(type & opts->profiles)) return FALSE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) { UINT16 i; CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; BOOL b = FALSE; - + for (i = 0; i < c->chunk_item->num_stripes; i++) { if (cis[i].dev_id == opts->devid) { b = TRUE; break; } } - + if (!b) return FALSE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) { UINT16 i, factor; UINT64 physsize; CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; BOOL b = FALSE; - + if (c->chunk_item->type & BLOCK_FLAG_RAID0) factor = c->chunk_item->num_stripes; else if (c->chunk_item->type & BLOCK_FLAG_RAID10) @@ -1921,48 +2253,49 @@ static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) { factor = c->chunk_item->num_stripes - 2; else // SINGLE, DUPLICATE, RAID1 factor = 1; - + physsize = c->chunk_item->size / factor; - + for (i = 0; i < c->chunk_item->num_stripes; i++) { - if (cis[i].offset >= opts->drange_start && cis[i].offset + physsize < opts->drange_end) { + if (cis[i].offset < opts->drange_end && cis[i].offset + physsize >= opts->drange_start && + (!(opts->flags & BTRFS_BALANCE_OPTS_DEVID) || cis[i].dev_id == opts->devid)) { b = TRUE; break; } } - + if (!b) return FALSE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) { if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end) return FALSE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) { if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end) return FALSE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) { UINT64 usage = c->used * 100 / c->chunk_item->size; - + // usage == 0 should mean completely empty, not just that usage rounds to 0% if (c->used > 0 && usage == 0) usage = 1; - + if (usage < opts->usage_start || usage > opts->usage_end) return FALSE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) { UINT64 type = get_chunk_dup_type(c); - + if (type == opts->convert) return FALSE; } - + return TRUE; } @@ -1987,32 +2320,32 @@ static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { args->devid = opts->devid; args->flags |= BALANCE_ARGS_FLAGS_DEVID; } - + if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) { args->drange_start = opts->drange_start; args->drange_end = opts->drange_end; args->flags |= BALANCE_ARGS_FLAGS_DRANGE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) { args->vrange_start = opts->vrange_start; args->vrange_end = opts->vrange_end; args->flags |= BALANCE_ARGS_FLAGS_VRANGE; } - + if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) { args->convert = opts->convert; args->flags |= BALANCE_ARGS_FLAGS_CONVERT; - + if (opts->flags & BTRFS_BALANCE_OPTS_SOFT) args->flags |= BALANCE_ARGS_FLAGS_SOFT; } - + if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) { if (args->limit_start == 0) { args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE; - args->limit_start = opts->limit_start; - args->limit_end = opts->limit_end; + args->limit_start = (UINT32)opts->limit_start; + args->limit_end = (UINT32)opts->limit_end; } else { args->flags |= BALANCE_ARGS_FLAGS_LIMIT; args->limit = opts->limit_end; @@ -2027,175 +2360,179 @@ static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { } static NTSTATUS add_balance_item(device_extension* Vcb) { - LIST_ENTRY rollback; KEY searchkey; traverse_ptr tp; NTSTATUS Status; BALANCE_ITEM* bi; - - InitializeListHead(&rollback); - + searchkey.obj_id = BALANCE_ITEM_ID; searchkey.obj_type = TYPE_TEMP_ITEM; searchkey.offset = 0; - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); goto end; } - - if (!keycmp(tp.item->key, searchkey)) - delete_tree_item(Vcb, &tp, &rollback); - + + if (!keycmp(tp.item->key, searchkey)) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + goto end; + } + } + bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG); if (!bi) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlZeroMemory(bi, sizeof(BALANCE_ITEM)); - + if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) { bi->flags |= BALANCE_FLAGS_DATA; copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data); } - + if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) { bi->flags |= BALANCE_FLAGS_METADATA; copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata); } - + if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) { bi->flags |= BALANCE_FLAGS_SYSTEM; copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system); } - - if (!insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL, &rollback)) { - ERR("insert_tree_item failed\n"); - Status = STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(bi); goto end; } - + Status = STATUS_SUCCESS; - + end: if (NT_SUCCESS(Status)) { - do_write(Vcb, NULL, &rollback); - free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - } else - do_rollback(Vcb, &rollback); - + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + } + + free_trees(Vcb); + ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } static NTSTATUS remove_balance_item(device_extension* Vcb) { - LIST_ENTRY rollback; KEY searchkey; traverse_ptr tp; NTSTATUS Status; - - InitializeListHead(&rollback); - + searchkey.obj_id = BALANCE_ITEM_ID; searchkey.obj_type = TYPE_TEMP_ITEM; searchkey.offset = 0; - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); goto end; } - + if (!keycmp(tp.item->key, searchkey)) { - delete_tree_item(Vcb, &tp, &rollback); - - do_write(Vcb, NULL, &rollback); + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + goto end; + } + + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + goto end; + } + free_trees(Vcb); } Status = STATUS_SUCCESS; - + end: - if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); - else - do_rollback(Vcb, &rollback); - ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { opts->flags = BTRFS_BALANCE_OPTS_ENABLED; - + if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) { opts->flags |= BTRFS_BALANCE_OPTS_PROFILES; opts->profiles = args->profiles; } - + if (args->flags & BALANCE_ARGS_FLAGS_USAGE) { opts->flags |= BTRFS_BALANCE_OPTS_USAGE; - + opts->usage_start = 0; - opts->usage_end = args->usage; + opts->usage_end = (UINT8)args->usage; } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) { opts->flags |= BTRFS_BALANCE_OPTS_USAGE; - - opts->usage_start = args->usage_start; - opts->usage_end = args->usage_end; + + opts->usage_start = (UINT8)args->usage_start; + opts->usage_end = (UINT8)args->usage_end; } - + if (args->flags & BALANCE_ARGS_FLAGS_DEVID) { opts->flags |= BTRFS_BALANCE_OPTS_DEVID; opts->devid = args->devid; } - + if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) { opts->flags |= BTRFS_BALANCE_OPTS_DRANGE; opts->drange_start = args->drange_start; opts->drange_end = args->drange_end; } - + if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) { opts->flags |= BTRFS_BALANCE_OPTS_VRANGE; opts->vrange_start = args->vrange_start; opts->vrange_end = args->vrange_end; } - + if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) { opts->flags |= BTRFS_BALANCE_OPTS_LIMIT; - + opts->limit_start = 0; opts->limit_end = args->limit; } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) { opts->flags |= BTRFS_BALANCE_OPTS_LIMIT; - + opts->limit_start = args->limit_start; opts->limit_end = args->limit_end; } - + if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) { opts->flags |= BTRFS_BALANCE_OPTS_STRIPES; - - opts->stripes_start = args->stripes_start; - opts->stripes_end = args->stripes_end; + + opts->stripes_start = (UINT16)args->stripes_start; + opts->stripes_end = (UINT16)args->stripes_end; } - + if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) { opts->flags |= BTRFS_BALANCE_OPTS_CONVERT; opts->convert = args->convert; - + if (args->flags & BALANCE_ARGS_FLAGS_SOFT) opts->flags |= BTRFS_BALANCE_OPTS_SOFT; } @@ -2205,584 +2542,860 @@ static NTSTATUS remove_superblocks(device* dev) { NTSTATUS Status; superblock* sb; int i = 0; - + sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG); if (!sb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(sb, sizeof(superblock)); - - while (superblock_addrs[i] > 0 && dev->length >= superblock_addrs[i] + sizeof(superblock)) { + + while (superblock_addrs[i] > 0 && dev->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) { Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock)); - + if (!NT_SUCCESS(Status)) { ExFreePool(sb); return Status; } - + i++; } - - ExFreePool(sb); - - return STATUS_SUCCESS; -} -static NTSTATUS replace_mount_dev(device_extension* Vcb, device* dev, PDEVICE_OBJECT mountmgr, BOOL part0) { - NTSTATUS Status; - MOUNTDEV_NAME mdn, *mdn2 = NULL, *mdn3 = NULL; - ULONG mdnsize, mmpsize; - MOUNTMGR_MOUNT_POINT* mmp = NULL; - MOUNTMGR_MOUNT_POINTS mmps, *mmps2 = NULL; - ULONG i; - UNICODE_STRING us; - - // get old device name - - Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - return Status; - } - - mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; - - mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); - if (!mdn2) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - goto end; - } - - // get new device name - - Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - goto end2; - } - - mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; - - mdn3 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); - if (!mdn3) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end2; - } - - Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn3, mdnsize, TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - goto end2; - } - - // query and delete existing mount points - - mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + mdn2->NameLength; - - mmp = ExAllocatePoolWithTag(PagedPool, mmpsize, ALLOC_TAG); - if (!mmp) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end2; - } - - RtlZeroMemory(mmp, sizeof(MOUNTMGR_MOUNT_POINT)); - mmp->DeviceNameOffset = sizeof(MOUNTMGR_MOUNT_POINT); - mmp->DeviceNameLength = mdn2->NameLength; - RtlCopyMemory(&mmp[1], mdn2->Name, mdn2->NameLength); - - Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, mmp, mmpsize, &mmps, mmpsize, TRUE, NULL); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status); - goto end2; - } - - mmps2 = ExAllocatePoolWithTag(PagedPool, mmps.Size, ALLOC_TAG); - if (!mmps2) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end2; - } - - Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, mmps2, mmps.Size, TRUE, NULL); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ERR("IOCTL_MOUNTMGR_DELETE_POINTS returned %08x\n", Status); - goto end2; - } - - // re-create mount points - - for (i = 0; i < mmps2->NumberOfMountPoints; i++) { - if (mmps2->MountPoints[i].SymbolicLinkNameOffset != 0) { - ULONG mcpilen; - MOUNTMGR_CREATE_POINT_INPUT* mcpi; - - mcpilen = sizeof(MOUNTMGR_CREATE_POINT_INPUT) + mmps2->MountPoints[i].SymbolicLinkNameLength + mdn3->NameLength; - - mcpi = ExAllocatePoolWithTag(PagedPool, mcpilen, ALLOC_TAG); - if (!mcpi) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end2; - } - - mcpi->SymbolicLinkNameOffset = sizeof(MOUNTMGR_CREATE_POINT_INPUT); - mcpi->SymbolicLinkNameLength = mmps2->MountPoints[i].SymbolicLinkNameLength; - mcpi->DeviceNameOffset = mcpi->SymbolicLinkNameOffset + mcpi->SymbolicLinkNameLength; - mcpi->DeviceNameLength = mdn3->NameLength; - - RtlCopyMemory((UINT8*)mcpi + mcpi->SymbolicLinkNameOffset, (UINT8*)mmps2 + mmps2->MountPoints[i].SymbolicLinkNameOffset, - mcpi->SymbolicLinkNameLength); - RtlCopyMemory((UINT8*)mcpi + mcpi->DeviceNameOffset, mdn3->Name, mdn3->NameLength); - - Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_CREATE_POINT, mcpi, mcpilen, NULL, 0, TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("IOCTL_MOUNTMGR_CREATE_POINT returned %08x\n", Status); - ExFreePool(mcpi); - goto end2; - } - - ExFreePool(mcpi); - } - } - - Status = STATUS_SUCCESS; - -end2: - // re-add old device back to mountmgr - - if (!part0) { - us.Buffer = mdn2->Name; - us.Length = us.MaximumLength = mdn2->NameLength; - - add_volume(mountmgr, &us); - } - -end: - if (mdn2) - ExFreePool(mdn2); - - if (mdn3) - ExFreePool(mdn3); - - if (mmp) - ExFreePool(mmp); - - if (mmps2) - ExFreePool(mmps2); + ExFreePool(sb); - return Status; + return STATUS_SUCCESS; } -static NTSTATUS finish_removing_device(device_extension* Vcb, device* dev) { +static NTSTATUS finish_removing_device(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - LIST_ENTRY rollback, *le; - BOOL first_dev, part0 = FALSE; - - InitializeListHead(&rollback); - - if (Vcb->need_write) - do_write(Vcb, NULL, &rollback); - + LIST_ENTRY* le; + volume_device_extension* vde; + + if (Vcb->need_write) { + Status = do_write(Vcb, NULL); + + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + } else + Status = STATUS_SUCCESS; + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - + + if (!NT_SUCCESS(Status)) + return Status; + // remove entry in chunk tree searchkey.obj_id = 1; searchkey.obj_type = TYPE_DEV_ITEM; searchkey.offset = dev->devitem.dev_id; - + Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - if (!keycmp(searchkey, tp.item->key)) - delete_tree_item(Vcb, &tp, &rollback); - + if (!keycmp(searchkey, tp.item->key)) { + Status = delete_tree_item(Vcb, &tp); + + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } + // remove stats entry in device tree - + searchkey.obj_id = 0; searchkey.obj_type = TYPE_DEV_STATS; searchkey.offset = dev->devitem.dev_id; - + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - if (!keycmp(searchkey, tp.item->key)) - delete_tree_item(Vcb, &tp, &rollback); - + if (!keycmp(searchkey, tp.item->key)) { + Status = delete_tree_item(Vcb, &tp); + + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } + // update superblock - + Vcb->superblock.num_devices--; Vcb->superblock.total_bytes -= dev->devitem.num_bytes; Vcb->devices_loaded--; - - first_dev = first_device(Vcb) == dev; - + RemoveEntryList(&dev->list_entry); - + // flush - - do_write(Vcb, NULL, &rollback); - + + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - - if (!dev->readonly) { + + if (!NT_SUCCESS(Status)) + return Status; + + if (!dev->readonly && dev->devobj) { Status = remove_superblocks(dev); if (!NT_SUCCESS(Status)) WARN("remove_superblocks returned %08x\n", Status); } - + // remove entry in volume list - - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - - le = volumes.Flink; - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && - RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - PFILE_OBJECT FileObject; - PDEVICE_OBJECT mountmgr; - UNICODE_STRING mmdevpath; - - RemoveEntryList(&v->list_entry); - - // re-add entry to mountmgr - - if (!first_dev && v->part_num != 0) { - RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); - Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); - if (!NT_SUCCESS(Status)) - ERR("IoGetDeviceObjectPointer returned %08x\n", Status); - else { - add_volume(mountmgr, &v->devpath); - ObDereferenceObject(FileObject); + + vde = Vcb->vde; + + if (dev->devobj) { + pdo_device_extension* pdode = vde->pdode; + + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + if (RtlCompareMemory(&dev->devitem.device_uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + PFILE_OBJECT FileObject; + PDEVICE_OBJECT mountmgr; + UNICODE_STRING mmdevpath; + + pdode->children_loaded--; + + if (vc->had_drive_letter) { // re-add entry to mountmgr + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + MOUNTDEV_NAME mdn; + + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + else { + MOUNTDEV_NAME* mdn2; + ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; + + mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); + if (!mdn2) + ERR("out of memory\n"); + else { + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); + if (!NT_SUCCESS(Status)) + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + else { + UNICODE_STRING name; + + name.Buffer = mdn2->Name; + name.Length = name.MaximumLength = mdn2->NameLength; + + Status = mountmgr_add_drive_letter(mountmgr, &name); + if (!NT_SUCCESS(Status)) + WARN("mountmgr_add_drive_letter returned %08x\n", Status); + } + + ExFreePool(mdn2); + } + } + + ObDereferenceObject(FileObject); + } } + + ExFreePool(vc->pnp_name.Buffer); + RemoveEntryList(&vc->list_entry); + ExFreePool(vc); + + ObDereferenceObject(vc->fileobj); + + break; } - - part0 = v->part_num == 0 ? TRUE : FALSE; - - if (v->devpath.Buffer) - ExFreePool(v->devpath.Buffer); - - ExFreePool(v); - break; + + le = le->Flink; } - - le = le->Flink; + + if (pdode->children_loaded > 0 && vde->device->Characteristics & FILE_REMOVABLE_MEDIA) { + vde->device->Characteristics &= ~FILE_REMOVABLE_MEDIA; + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + if (vc->devobj->Characteristics & FILE_REMOVABLE_MEDIA) { + vde->device->Characteristics |= FILE_REMOVABLE_MEDIA; + break; + } + + le = le->Flink; + } + } + + pdode->num_children = Vcb->superblock.num_devices; + + ExReleaseResourceLite(&pdode->child_lock); + + // free dev + + if (dev->trim && !dev->readonly && !Vcb->options.no_trim) + trim_whole_device(dev); } - - ExReleaseResourceLite(&volumes_lock); - - if (first_dev) { - PDEVICE_OBJECT DeviceObject, olddev; - device* newfirstdev; - PFILE_OBJECT FileObject; - UNICODE_STRING mmdevpath; - PDEVICE_OBJECT mountmgr; - - DeviceObject = Vcb->Vpb->DeviceObject; - - olddev = DeviceObject->Vpb->RealDevice; - newfirstdev = first_device(Vcb); - - ObReferenceObject(newfirstdev->devobj); - DeviceObject->Vpb->RealDevice = newfirstdev->devobj; - ObDereferenceObject(olddev); - - RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); - Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); - if (!NT_SUCCESS(Status)) - ERR("IoGetDeviceObjectPointer returned %08x\n", Status); - else { - Status = replace_mount_dev(Vcb, dev, mountmgr, part0); - if (!NT_SUCCESS(Status)) - ERR("replace_mount_dev returned %08x\n", Status); - - ObDereferenceObject(FileObject); - } - - } - - // free dev - - ObDereferenceObject(dev->devobj); - + while (!IsListEmpty(&dev->space)) { LIST_ENTRY* le2 = RemoveHeadList(&dev->space); space* s = CONTAINING_RECORD(le2, space, list_entry); - + ExFreePool(s); } - + ExFreePool(dev); - + + if (Vcb->trim) { + Vcb->trim = FALSE; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->trim) { + Vcb->trim = TRUE; + break; + } + + le = le->Flink; + } + } + + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); + return STATUS_SUCCESS; } +static void trim_unalloc_space(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) { + DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa; + DEVICE_DATA_SET_RANGE* ranges; + ULONG datalen, i; + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + BOOL b; + UINT64 lastoff = 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there + LIST_ENTRY* le; + + dev->num_trim_entries = 0; + + searchkey.obj_id = dev->devitem.dev_id; + searchkey.obj_type = TYPE_DEV_EXTENT; + searchkey.offset = 0; + + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return; + } + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id == dev->devitem.dev_id && tp.item->key.obj_type == TYPE_DEV_EXTENT) { + if (tp.item->size >= sizeof(DEV_EXTENT)) { + DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data; + + if (tp.item->key.offset > lastoff) + add_trim_entry_avoid_sb(Vcb, dev, lastoff, tp.item->key.offset - lastoff); + + lastoff = tp.item->key.offset + de->length; + } else { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_EXTENT)); + return; + } + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); + + if (b) { + tp = next_tp; + if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) + break; + } + } while (b); + + if (lastoff < dev->devitem.num_bytes) + add_trim_entry_avoid_sb(Vcb, dev, lastoff, dev->devitem.num_bytes - lastoff); + + if (dev->num_trim_entries == 0) + return; + + datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)); + + dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); + if (!dmdsa) { + ERR("out of memory\n"); + goto end; + } + + dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES); + dmdsa->Action = DeviceDsmAction_Trim; + dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED; + dmdsa->ParameterBlockOffset = 0; + dmdsa->ParameterBlockLength = 0; + dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)); + dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE); + + ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)dmdsa + dmdsa->DataSetRangesOffset); + + i = 0; + le = dev->trim_list.Flink; + while (le != &dev->trim_list) { + space* s = CONTAINING_RECORD(le, space, list_entry); + + ranges[i].StartingOffset = s->address; + ranges[i].LengthInBytes = s->size; + i++; + + le = le->Flink; + } + + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES, dmdsa, datalen, NULL, 0, TRUE, NULL); + if (!NT_SUCCESS(Status)) + WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status); + + ExFreePool(dmdsa); + +end: + while (!IsListEmpty(&dev->trim_list)) { + space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry); + ExFreePool(s); + } + + dev->num_trim_entries = 0; +} + +static NTSTATUS try_consolidation(device_extension* Vcb, UINT64 flags, chunk** newchunk) { + NTSTATUS Status; + BOOL changed; + LIST_ENTRY* le; + chunk* rc; + + // FIXME - allow with metadata chunks? + + while (TRUE) { + rc = NULL; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + // choose the least-used chunk we haven't looked at yet + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + // FIXME - skip full-size chunks over e.g. 90% full? + if (c->chunk_item->type & BLOCK_FLAG_DATA && !c->readonly && c->balance_num != Vcb->balance.balance_num && (!rc || c->used < rc->used)) + rc = c; + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (!rc) { + ExReleaseResourceLite(&Vcb->tree_lock); + break; + } + + if (rc->list_entry_balance.Flink) { + RemoveEntryList(&rc->list_entry_balance); + Vcb->balance.chunks_left--; + } + + rc->list_entry_balance.Flink = (LIST_ENTRY*)1; // so it doesn't get dropped + rc->reloc = TRUE; + + ExReleaseResourceLite(&Vcb->tree_lock); + + do { + changed = FALSE; + + Status = balance_data_chunk(Vcb, rc, &changed); + if (!NT_SUCCESS(Status)) { + ERR("balance_data_chunk returned %08x\n", Status); + Vcb->balance.status = Status; + rc->list_entry_balance.Flink = NULL; + rc->reloc = FALSE; + return Status; + } + + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); + + if (Vcb->readonly) + Vcb->balance.stopping = TRUE; + + if (Vcb->balance.stopping) + return STATUS_SUCCESS; + } while (changed); + + rc->list_entry_balance.Flink = NULL; + + rc->changed = TRUE; + rc->space_changed = TRUE; + rc->balance_num = Vcb->balance.balance_num; + + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + return Status; + } + } + + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + Status = alloc_chunk(Vcb, flags, &rc, TRUE); + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (NT_SUCCESS(Status)) { + *newchunk = rc; + return Status; + } else { + ERR("alloc_chunk returned %08x\n", Status); + return Status; + } +} + +static NTSTATUS regenerate_space_list(device_extension* Vcb, device* dev) { + LIST_ENTRY* le; + + while (!IsListEmpty(&dev->space)) { + space* s = CONTAINING_RECORD(RemoveHeadList(&dev->space), space, list_entry); + + ExFreePool(s); + } + + // The Linux driver doesn't like to allocate chunks within the first megabyte of a device. + + space_list_add2(&dev->space, NULL, 0x100000, dev->devitem.num_bytes - 0x100000, NULL, NULL); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + UINT16 n; + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + for (n = 0; n < c->chunk_item->num_stripes; n++) { + UINT64 stripe_size = 0; + + if (cis[n].dev_id == dev->devitem.dev_id) { + if (stripe_size == 0) { + UINT16 factor; + + if (c->chunk_item->type & BLOCK_FLAG_RAID0) + factor = c->chunk_item->num_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) + factor = c->chunk_item->num_stripes - 1; + else if (c->chunk_item->type & BLOCK_FLAG_RAID6) + factor = c->chunk_item->num_stripes - 2; + else // SINGLE, DUP, RAID1 + factor = 1; + + stripe_size = c->chunk_item->size / factor; + } + + space_list_subtract2(&dev->space, NULL, cis[n].offset, stripe_size, NULL, NULL); + } + } + + le = le->Flink; + } + + return STATUS_SUCCESS; +} + +_Function_class_(KSTART_ROUTINE) #ifndef __REACTOS__ -static void balance_thread(void* context) { +void balance_thread(void* context) { #else -static void NTAPI balance_thread(void* context) { +void NTAPI balance_thread(void* context) { #endif device_extension* Vcb = (device_extension*)context; LIST_ENTRY chunks; LIST_ENTRY* le; - UINT64 num_chunks[3]; + UINT64 num_chunks[3], okay_metadata_chunks = 0, okay_data_chunks = 0, okay_system_chunks = 0; + UINT64 old_data_flags = 0, old_metadata_flags = 0, old_system_flags = 0; NTSTATUS Status; - + + Vcb->balance.balance_num++; + Vcb->balance.stopping = FALSE; - Vcb->balance.cancelling = FALSE; KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE); - - if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) + + if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) { + old_data_flags = Vcb->data_flags; Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert); - - if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) + + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); + } + + if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) { + old_metadata_flags = Vcb->metadata_flags; Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert); - - if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) + } + + if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) { + old_system_flags = Vcb->system_flags; Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert); - + } + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) { if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); } - + + num_chunks[0] = num_chunks[1] = num_chunks[2] = 0; + Vcb->balance.total_chunks = Vcb->balance.chunks_left = 0; + + InitializeListHead(&chunks); + // FIXME - what are we supposed to do with limit_start? - + if (!Vcb->readonly) { - if (!Vcb->balance.removing) { + if (!Vcb->balance.removing && !Vcb->balance.shrinking) { Status = add_balance_item(Vcb); if (!NT_SUCCESS(Status)) { ERR("add_balance_item returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } + } else { + if (Vcb->need_write) { + Status = do_write(Vcb, NULL); + + free_trees(Vcb); + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } + } + } + } + + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); + + if (Vcb->balance.stopping) + goto end; + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + UINT8 sort; + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (c->chunk_item->type & BLOCK_FLAG_DATA) + sort = BALANCE_OPTS_DATA; + else if (c->chunk_item->type & BLOCK_FLAG_METADATA) + sort = BALANCE_OPTS_METADATA; + else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) + sort = BALANCE_OPTS_SYSTEM; + else { + ERR("unexpected chunk type %llx\n", c->chunk_item->type); + ExReleaseResourceLite(&c->lock); + break; + } + + if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) && + should_balance_chunk(Vcb, sort, c)) { + InsertTailList(&chunks, &c->list_entry_balance); + + num_chunks[sort]++; + Vcb->balance.total_chunks++; + Vcb->balance.chunks_left++; + } else if (sort == BALANCE_OPTS_METADATA) + okay_metadata_chunks++; + else if (sort == BALANCE_OPTS_DATA) + okay_data_chunks++; + else if (sort == BALANCE_OPTS_SYSTEM) + okay_system_chunks++; + + if (!c->cache_loaded) { + Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + Vcb->balance.status = Status; + ExReleaseResourceLite(&c->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); + goto end; + } + } + + ExReleaseResourceLite(&c->lock); + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + // If we're doing a full balance, try and allocate a new chunk now, before we mess things up + if (okay_metadata_chunks == 0 || okay_data_chunks == 0 || okay_system_chunks == 0) { + BOOL consolidated = FALSE; + chunk* c; + + if (okay_metadata_chunks == 0) { + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + Status = alloc_chunk(Vcb, Vcb->metadata_flags, &c, TRUE); + if (NT_SUCCESS(Status)) + c->balance_num = Vcb->balance.balance_num; + else if (Status != STATUS_DISK_FULL || consolidated) { + ERR("alloc_chunk returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->chunk_lock); + Vcb->balance.status = Status; + goto end; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (Status == STATUS_DISK_FULL) { + Status = try_consolidation(Vcb, Vcb->metadata_flags, &c); + if (!NT_SUCCESS(Status)) { + ERR("try_consolidation returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } else + c->balance_num = Vcb->balance.balance_num; + + consolidated = TRUE; + + if (Vcb->balance.stopping) + goto end; + } + } + + if (okay_data_chunks == 0) { + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + Status = alloc_chunk(Vcb, Vcb->data_flags, &c, TRUE); + if (NT_SUCCESS(Status)) + c->balance_num = Vcb->balance.balance_num; + else if (Status != STATUS_DISK_FULL || consolidated) { + ERR("alloc_chunk returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->chunk_lock); + Vcb->balance.status = Status; + goto end; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (Status == STATUS_DISK_FULL) { + Status = try_consolidation(Vcb, Vcb->data_flags, &c); + if (!NT_SUCCESS(Status)) { + ERR("try_consolidation returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } else + c->balance_num = Vcb->balance.balance_num; + + consolidated = TRUE; + + if (Vcb->balance.stopping) + goto end; + } + } + + if (okay_system_chunks == 0) { + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + Status = alloc_chunk(Vcb, Vcb->system_flags, &c, TRUE); + if (NT_SUCCESS(Status)) + c->balance_num = Vcb->balance.balance_num; + else if (Status != STATUS_DISK_FULL || consolidated) { + ERR("alloc_chunk returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->chunk_lock); + Vcb->balance.status = Status; goto end; } - } else { - if (Vcb->need_write) { - LIST_ENTRY rollback; - - InitializeListHead(&rollback); - do_write(Vcb, NULL, &rollback); - free_trees(Vcb); - - clear_rollback(Vcb, &rollback); + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (Status == STATUS_DISK_FULL) { + Status = try_consolidation(Vcb, Vcb->system_flags, &c); + if (!NT_SUCCESS(Status)) { + ERR("try_consolidation returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } else + c->balance_num = Vcb->balance.balance_num; + + consolidated = TRUE; + + if (Vcb->balance.stopping) + goto end; } } } - - num_chunks[0] = num_chunks[1] = num_chunks[2] = 0; - Vcb->balance.total_chunks = 0; - - InitializeListHead(&chunks); - - KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); - - if (Vcb->balance.stopping) - goto end; - + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); - - le = Vcb->chunks.Flink; - while (le != &Vcb->chunks) { - chunk* c = CONTAINING_RECORD(le, chunk, list_entry); - UINT8 sort; - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - if (c->chunk_item->type & BLOCK_FLAG_DATA) - sort = BALANCE_OPTS_DATA; - else if (c->chunk_item->type & BLOCK_FLAG_METADATA) - sort = BALANCE_OPTS_METADATA; - else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) - sort = BALANCE_OPTS_SYSTEM; - else { - ERR("unexpected chunk type %llx\n", c->chunk_item->type); - ExReleaseResourceLite(&c->lock); - break; - } - - if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) && - should_balance_chunk(Vcb, sort, c)) { - c->reloc = TRUE; - - InsertTailList(&chunks, &c->list_entry_balance); - - num_chunks[sort]++; - Vcb->balance.total_chunks++; - } - - ExReleaseResourceLite(&c->lock); - + + le = chunks.Flink; + while (le != &chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); + + c->reloc = TRUE; + le = le->Flink; } - + ExReleaseResourceLite(&Vcb->chunk_lock); - - Vcb->balance.chunks_left = Vcb->balance.total_chunks; - + // do data chunks before metadata le = chunks.Flink; while (le != &chunks) { chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); LIST_ENTRY* le2 = le->Flink; - + if (c->chunk_item->type & BLOCK_FLAG_DATA) { - NTSTATUS Status; BOOL changed; - + do { changed = FALSE; - - FsRtlEnterFileSystem(); - + Status = balance_data_chunk(Vcb, c, &changed); - - FsRtlExitFileSystem(); - if (!NT_SUCCESS(Status)) { ERR("balance_data_chunk returned %08x\n", Status); Vcb->balance.status = Status; goto end; } - + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); - + + if (Vcb->readonly) + Vcb->balance.stopping = TRUE; + if (Vcb->balance.stopping) break; } while (changed); - - if (!c->list_entry_changed.Flink) - InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); + + c->changed = TRUE; + c->space_changed = TRUE; } - - if (Vcb->balance.stopping) { - while (le != &chunks) { - c = CONTAINING_RECORD(le, chunk, list_entry_balance); - c->reloc = FALSE; - - le = le->Flink; - } + + if (Vcb->balance.stopping) goto end; - } - + if (c->chunk_item->type & BLOCK_FLAG_DATA && (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) { RemoveEntryList(&c->list_entry_balance); c->list_entry_balance.Flink = NULL; - + Vcb->balance.chunks_left--; } - + le = le2; } - + // do metadata chunks while (!IsListEmpty(&chunks)) { chunk* c; - NTSTATUS Status; BOOL changed; - + le = RemoveHeadList(&chunks); c = CONTAINING_RECORD(le, chunk, list_entry_balance); - + if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) { do { - FsRtlEnterFileSystem(); - Status = balance_metadata_chunk(Vcb, c, &changed); - - FsRtlExitFileSystem(); - if (!NT_SUCCESS(Status)) { ERR("balance_metadata_chunk returned %08x\n", Status); Vcb->balance.status = Status; goto end; } - + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); - + + if (Vcb->readonly) + Vcb->balance.stopping = TRUE; + if (Vcb->balance.stopping) break; } while (changed); - - if (!c->list_entry_changed.Flink) - InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); + + c->changed = TRUE; + c->space_changed = TRUE; } - - if (Vcb->balance.stopping) { - while (le != &chunks) { - c = CONTAINING_RECORD(le, chunk, list_entry_balance); - c->reloc = FALSE; - - le = le->Flink; - c->list_entry_balance.Flink = NULL; - } + + if (Vcb->balance.stopping) break; - } - + c->list_entry_balance.Flink = NULL; - + Vcb->balance.chunks_left--; } - + end: if (!Vcb->readonly) { - if (!Vcb->balance.removing) { - FsRtlEnterFileSystem(); - Status = remove_balance_item(Vcb); - FsRtlExitFileSystem(); - - if (!NT_SUCCESS(Status)) { - ERR("remove_balance_item returned %08x\n", Status); - goto end; + if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) { + le = chunks.Flink; + while (le != &chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); + c->reloc = FALSE; + + le = le->Flink; + c->list_entry_balance.Flink = NULL; } - } else { + + if (old_data_flags != 0) + Vcb->data_flags = old_data_flags; + + if (old_metadata_flags != 0) + Vcb->metadata_flags = old_metadata_flags; + + if (old_system_flags != 0) + Vcb->system_flags = old_system_flags; + } + + if (Vcb->balance.removing) { device* dev = NULL; - - FsRtlEnterFileSystem(); + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev2 = CONTAINING_RECORD(le, device, list_entry); - + if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) { dev = dev2; break; } - + le = le->Flink; } - + if (dev) { if (Vcb->balance.chunks_left == 0) { Status = finish_removing_device(Vcb, dev); - + if (!NT_SUCCESS(Status)) { ERR("finish_removing_device returned %08x\n", Status); dev->reloc = FALSE; @@ -2790,15 +3403,94 @@ end: } else dev->reloc = FALSE; } - + + ExReleaseResourceLite(&Vcb->tree_lock); + } else if (Vcb->balance.shrinking) { + device* dev = NULL; + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) { + dev = dev2; + break; + } + + le = le->Flink; + } + + if (!dev) { + ERR("could not find device %llx\n", Vcb->balance.opts[0].devid); + Vcb->balance.status = STATUS_INTERNAL_ERROR; + } + + if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) { + if (dev) { + Status = regenerate_space_list(Vcb, dev); + if (!NT_SUCCESS(Status)) + WARN("regenerate_space_list returned %08x\n", Status); + } + } else { + UINT64 old_size; + + old_size = dev->devitem.num_bytes; + dev->devitem.num_bytes = Vcb->balance.opts[0].drange_start; + + Status = update_dev_item(Vcb, dev, NULL); + if (!NT_SUCCESS(Status)) { + ERR("update_dev_item returned %08x\n", Status); + dev->devitem.num_bytes = old_size; + Vcb->balance.status = Status; + + Status = regenerate_space_list(Vcb, dev); + if (!NT_SUCCESS(Status)) + WARN("regenerate_space_list returned %08x\n", Status); + } else { + Vcb->superblock.total_bytes -= old_size - dev->devitem.num_bytes; + + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + + free_trees(Vcb); + } + } + + ExReleaseResourceLite(&Vcb->tree_lock); + + if (!Vcb->balance.stopping && NT_SUCCESS(Vcb->balance.status)) + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); + } else { + Status = remove_balance_item(Vcb); + if (!NT_SUCCESS(Status)) { + ERR("remove_balance_item returned %08x\n", Status); + goto end; + } + } + + if (Vcb->trim && !Vcb->options.no_trim) { + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devobj && !dev2->readonly && dev2->trim) + trim_unalloc_space(Vcb, dev2); + + le = le->Flink; + } + ExReleaseResourceLite(&Vcb->tree_lock); - FsRtlExitFileSystem(); } } - + ZwClose(Vcb->balance.thread); Vcb->balance.thread = NULL; - + KeSetEvent(&Vcb->balance.finished, 0, FALSE); } @@ -2806,26 +3498,36 @@ NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESS NTSTATUS Status; btrfs_start_balance* bsb = (btrfs_start_balance*)data; UINT8 i; - + if (length < sizeof(btrfs_start_balance) || !data) return STATUS_INVALID_PARAMETER; - + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) return STATUS_PRIVILEGE_NOT_HELD; - + + if (Vcb->locked) { + WARN("cannot start balance while locked\n"); + return STATUS_DEVICE_NOT_READY; + } + + if (Vcb->scrub.thread) { + WARN("cannot start balance while scrub running\n"); + return STATUS_DEVICE_NOT_READY; + } + if (Vcb->balance.thread) { WARN("balance already running\n"); return STATUS_DEVICE_NOT_READY; } - + if (Vcb->readonly) return STATUS_MEDIA_WRITE_PROTECTED; - + if (!(bsb->opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) && !(bsb->opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) && !(bsb->opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED)) return STATUS_SUCCESS; - + for (i = 0; i < 3; i++) { if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) { if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) { @@ -2835,46 +3537,46 @@ NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESS if (bsb->opts[i].profiles == 0) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) { if (bsb->opts[i].devid == 0) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) { if (bsb->opts[i].drange_start > bsb->opts[i].drange_end) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) { if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) { bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start); bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end); - + if (bsb->opts[i].limit_start > bsb->opts[i].limit_end) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) { bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start); bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end); - + if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) { bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start); bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end); - + if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end) return STATUS_INVALID_PARAMETER; } - + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) { if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 && bsb->opts[i].convert != BLOCK_FLAG_DUPLICATE && bsb->opts[i].convert != BLOCK_FLAG_RAID10 && @@ -2884,79 +3586,77 @@ NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESS } } } - + RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bsb->opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bsb->opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts)); - + Vcb->balance.paused = FALSE; Vcb->balance.removing = FALSE; + Vcb->balance.shrinking = FALSE; Vcb->balance.status = STATUS_SUCCESS; KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); - + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); if (!NT_SUCCESS(Status)) { ERR("PsCreateSystemThread returned %08x\n", Status); return Status; } - + return STATUS_SUCCESS; } -NTSTATUS look_for_balance_item(device_extension* Vcb) { - LIST_ENTRY rollback; +NTSTATUS look_for_balance_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; BALANCE_ITEM* bi; int i; - - InitializeListHead(&rollback); - + searchkey.obj_id = BALANCE_ITEM_ID; searchkey.obj_type = TYPE_TEMP_ITEM; searchkey.offset = 0; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - + if (keycmp(tp.item->key, searchkey)) { TRACE("no balance item found\n"); return STATUS_NOT_FOUND; } - + if (tp.item->size < sizeof(BALANCE_ITEM)) { WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BALANCE_ITEM)); return STATUS_INTERNAL_ERROR; } - + bi = (BALANCE_ITEM*)tp.item->data; - + if (bi->flags & BALANCE_FLAGS_DATA) load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data); - + if (bi->flags & BALANCE_FLAGS_METADATA) load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata); - + if (bi->flags & BALANCE_FLAGS_SYSTEM) load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system); - + // do the heuristics that Linux driver does - + for (i = 0; i < 3; i++) { if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) { // if converting, don't redo chunks already done - + if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT; - + // don't balance chunks more than 90% filled - presumably these // have already been done - + if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) && !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) ) { @@ -2966,50 +3666,54 @@ NTSTATUS look_for_balance_item(device_extension* Vcb) { } } } - + if (Vcb->readonly || Vcb->options.skip_balance) Vcb->balance.paused = TRUE; else Vcb->balance.paused = FALSE; - + Vcb->balance.removing = FALSE; + Vcb->balance.shrinking = FALSE; Vcb->balance.status = STATUS_SUCCESS; KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); - + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); if (!NT_SUCCESS(Status)) { ERR("PsCreateSystemThread returned %08x\n", Status); return Status; } - + return STATUS_SUCCESS; } NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) { btrfs_query_balance* bqb = (btrfs_query_balance*)data; - + if (length < sizeof(btrfs_query_balance) || !data) return STATUS_INVALID_PARAMETER; - + if (!Vcb->balance.thread) { bqb->status = BTRFS_BALANCE_STOPPED; - + if (!NT_SUCCESS(Vcb->balance.status)) { bqb->status |= BTRFS_BALANCE_ERROR; bqb->error = Vcb->balance.status; } - + return STATUS_SUCCESS; } - + bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING; - + if (Vcb->balance.removing) bqb->status |= BTRFS_BALANCE_REMOVAL; - + + if (Vcb->balance.shrinking) + bqb->status |= BTRFS_BALANCE_SHRINKING; + if (!NT_SUCCESS(Vcb->balance.status)) bqb->status |= BTRFS_BALANCE_ERROR; - + bqb->chunks_left = Vcb->balance.chunks_left; bqb->total_chunks = Vcb->balance.total_chunks; bqb->error = Vcb->balance.status; @@ -3023,51 +3727,50 @@ NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) { NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) return STATUS_PRIVILEGE_NOT_HELD; - + if (!Vcb->balance.thread) return STATUS_DEVICE_NOT_READY; - + if (Vcb->balance.paused) return STATUS_DEVICE_NOT_READY; - + Vcb->balance.paused = TRUE; KeClearEvent(&Vcb->balance.event); - + return STATUS_SUCCESS; } NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) return STATUS_PRIVILEGE_NOT_HELD; - + if (!Vcb->balance.thread) return STATUS_DEVICE_NOT_READY; - + if (!Vcb->balance.paused) return STATUS_DEVICE_NOT_READY; - + if (Vcb->readonly) return STATUS_MEDIA_WRITE_PROTECTED; - + Vcb->balance.paused = FALSE; KeSetEvent(&Vcb->balance.event, 0, FALSE); - + return STATUS_SUCCESS; } NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) return STATUS_PRIVILEGE_NOT_HELD; - + if (!Vcb->balance.thread) return STATUS_DEVICE_NOT_READY; - + Vcb->balance.paused = FALSE; Vcb->balance.stopping = TRUE; - Vcb->balance.cancelling = TRUE; Vcb->balance.status = STATUS_SUCCESS; KeSetEvent(&Vcb->balance.event, 0, FALSE); - + return STATUS_SUCCESS; } @@ -3078,52 +3781,52 @@ NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESS NTSTATUS Status; int i; UINT64 num_rw_devices; - + TRACE("(%p, %p, %x)\n", Vcb, data, length); - + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) return STATUS_PRIVILEGE_NOT_HELD; - + if (length < sizeof(UINT64)) return STATUS_INVALID_PARAMETER; - + devid = *(UINT64*)data; - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + if (Vcb->readonly) { ExReleaseResourceLite(&Vcb->tree_lock); return STATUS_MEDIA_WRITE_PROTECTED; } - + num_rw_devices = 0; - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev2 = CONTAINING_RECORD(le, device, list_entry); - + if (dev2->devitem.dev_id == devid) dev = dev2; - + if (!dev2->readonly) num_rw_devices++; - + le = le->Flink; } - + if (!dev) { ExReleaseResourceLite(&Vcb->tree_lock); WARN("device %llx not found\n", devid); return STATUS_NOT_FOUND; } - + if (!dev->readonly) { if (num_rw_devices == 1) { ExReleaseResourceLite(&Vcb->tree_lock); WARN("not removing last non-readonly device\n"); return STATUS_INVALID_PARAMETER; } - + if (num_rw_devices == 4 && ((Vcb->data_flags & BLOCK_FLAG_RAID10 || Vcb->metadata_flags & BLOCK_FLAG_RAID10 || Vcb->system_flags & BLOCK_FLAG_RAID10) || (Vcb->data_flags & BLOCK_FLAG_RAID6 || Vcb->metadata_flags & BLOCK_FLAG_RAID6 || Vcb->system_flags & BLOCK_FLAG_RAID6)) @@ -3132,13 +3835,13 @@ NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESS ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n"); return STATUS_CANNOT_DELETE; } - + if (num_rw_devices == 3 && (Vcb->data_flags & BLOCK_FLAG_RAID5 || Vcb->metadata_flags & BLOCK_FLAG_RAID5 || Vcb->system_flags & BLOCK_FLAG_RAID5)) { ExReleaseResourceLite(&Vcb->tree_lock); ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n"); return STATUS_CANNOT_DELETE; } - + if (num_rw_devices == 2 && ((Vcb->data_flags & BLOCK_FLAG_RAID0 || Vcb->metadata_flags & BLOCK_FLAG_RAID0 || Vcb->system_flags & BLOCK_FLAG_RAID0) || (Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->metadata_flags & BLOCK_FLAG_RAID1 || Vcb->system_flags & BLOCK_FLAG_RAID1)) @@ -3148,33 +3851,35 @@ NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESS return STATUS_CANNOT_DELETE; } } - + ExReleaseResourceLite(&Vcb->tree_lock); - + if (Vcb->balance.thread) { WARN("balance already running\n"); return STATUS_DEVICE_NOT_READY; } - + dev->reloc = TRUE; - + RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3); - + for (i = 0; i < 3; i++) { Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID; Vcb->balance.opts[i].devid = devid; } - + Vcb->balance.paused = FALSE; Vcb->balance.removing = TRUE; + Vcb->balance.shrinking = FALSE; + Vcb->balance.status = STATUS_SUCCESS; KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); - + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); if (!NT_SUCCESS(Status)) { ERR("PsCreateSystemThread returned %08x\n", Status); dev->reloc = FALSE; return Status; } - + return STATUS_SUCCESS; } diff --git a/reactos/drivers/filesystems/btrfs/btrfs.c b/reactos/drivers/filesystems/btrfs/btrfs.c index 2a54cc3c4c1..4bf12424827 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs.c +++ b/reactos/drivers/filesystems/btrfs/btrfs.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -29,31 +29,40 @@ #endif #include #include "btrfs.h" -#ifndef __REACTOS__ -#include -#else -#include -#endif #include +#ifndef _MSC_VER +#include +#include +#undef INITGUID +#endif + +#include +#include + +#ifdef _MSC_VER +#include +#include +#undef INITGUID +#endif + #define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \ BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \ BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES) -#define COMPAT_RO_SUPPORTED 0 +#define COMPAT_RO_SUPPORTED (BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE | BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID) static WCHAR device_name[] = {'\\','B','t','r','f','s',0}; static WCHAR dosdevice_name[] = {'\\','D','o','s','D','e','v','i','c','e','s','\\','B','t','r','f','s',0}; +DEFINE_GUID(BtrfsBusInterface, 0x4d414874, 0x6865, 0x6761, 0x6d, 0x65, 0x83, 0x69, 0x17, 0x9a, 0x7d, 0x1d); + PDRIVER_OBJECT drvobj; -PDEVICE_OBJECT devobj; +PDEVICE_OBJECT master_devobj; #ifndef __REACTOS__ BOOL have_sse42 = FALSE, have_sse2 = FALSE; #endif UINT64 num_reads = 0; -LIST_ENTRY uid_map_list; -LIST_ENTRY volumes; -ERESOURCE volumes_lock; -LIST_ENTRY pnp_disks; +LIST_ENTRY uid_map_list, gid_map_list; LIST_ENTRY VcbList; ERESOURCE global_loading_lock; UINT32 debug_log_level = 0; @@ -63,25 +72,41 @@ UINT32 mount_compress_type = 0; UINT32 mount_zlib_level = 3; UINT32 mount_flush_interval = 30; UINT32 mount_max_inline = 2048; -UINT32 mount_raid5_recalculation = 1; -UINT32 mount_raid6_recalculation = 1; UINT32 mount_skip_balance = 0; +UINT32 mount_no_barrier = 0; +UINT32 mount_no_trim = 0; +UINT32 mount_clear_cache = 0; +UINT32 mount_allow_degraded = 0; +UINT32 mount_readonly = 0; +UINT32 no_pnp = 0; BOOL log_started = FALSE; UNICODE_STRING log_device, log_file, registry_path; -tPsUpdateDiskCounters PsUpdateDiskCounters; -tCcCopyReadEx CcCopyReadEx; -tCcCopyWriteEx CcCopyWriteEx; -tCcSetAdditionalCacheAttributesEx CcSetAdditionalCacheAttributesEx; +tPsUpdateDiskCounters fPsUpdateDiskCounters; +tCcCopyReadEx fCcCopyReadEx; +tCcCopyWriteEx fCcCopyWriteEx; +tCcSetAdditionalCacheAttributesEx fCcSetAdditionalCacheAttributesEx; +tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; BOOL diskacc = FALSE; -void* notification_entry = NULL; +void *notification_entry = NULL, *notification_entry2 = NULL, *notification_entry3 = NULL; +ERESOURCE pdo_list_lock, mapping_lock; +LIST_ENTRY pdo_list; +BOOL finished_probing = FALSE; +HANDLE degraded_wait_handle = NULL, mountmgr_thread_handle = NULL; +BOOL degraded_wait = TRUE; +KEVENT mountmgr_thread_event; +BOOL shutting_down = FALSE; #ifdef _DEBUG PFILE_OBJECT comfo = NULL; PDEVICE_OBJECT comdo = NULL; HANDLE log_handle = NULL; +ERESOURCE log_lock; +HANDLE serial_thread_handle = NULL; + +static void init_serial(BOOL first_time); #endif -static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObject); +static NTSTATUS close_file(_In_ PFILE_OBJECT FileObject, _In_ PIRP Irp); typedef struct { KEVENT Event; @@ -89,49 +114,51 @@ typedef struct { } read_context; #ifdef _DEBUG -static NTSTATUS STDCALL dbg_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +_Function_class_(IO_COMPLETION_ROUTINE) +static NTSTATUS dbg_completion(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp, _In_ PVOID conptr) { read_context* context = conptr; - -// DbgPrint("dbg_completion\n"); - + + UNUSED(DeviceObject); + context->iosb = Irp->IoStatus; KeSetEvent(&context->Event, 0, FALSE); - -// return STATUS_SUCCESS; + return STATUS_MORE_PROCESSING_REQUIRED; } #ifdef DEBUG_LONG_MESSAGES -void STDCALL _debug_message(const char* func, const char* file, unsigned int line, char* s, ...) { +void _debug_message(_In_ const char* func, _In_ const char* file, _In_ unsigned int line, _In_ char* s, ...) { #else -void STDCALL _debug_message(const char* func, char* s, ...) { +void _debug_message(_In_ const char* func, _In_ char* s, ...) { #endif LARGE_INTEGER offset; PIO_STACK_LOCATION IrpSp; NTSTATUS Status; PIRP Irp; va_list ap; - char *buf2 = NULL, *buf; - read_context* context = NULL; + char *buf2, *buf; + read_context context; UINT32 length; - + buf2 = ExAllocatePoolWithTag(NonPagedPool, 1024, ALLOC_TAG); - + if (!buf2) { DbgPrint("Couldn't allocate buffer in debug_message\n"); return; } - + #ifdef DEBUG_LONG_MESSAGES - sprintf(buf2, "%p:%s:%s:%u:", PsGetCurrentThreadId(), func, file, line); + sprintf(buf2, "%p:%s:%s:%u:", PsGetCurrentThread(), func, file, line); #else - sprintf(buf2, "%p:%s:", PsGetCurrentThreadId(), func); + sprintf(buf2, "%p:%s:", PsGetCurrentThread(), func); #endif buf = &buf2[strlen(buf2)]; - + va_start(ap, s); vsprintf(buf, s, ap); - + + ExAcquireResourceSharedLite(&log_lock, TRUE); + if (!log_started || (log_device.Length == 0 && log_file.Length == 0)) { DbgPrint(buf2); } else if (log_device.Length > 0) { @@ -140,34 +167,26 @@ void STDCALL _debug_message(const char* func, char* s, ...) { DbgPrint(buf2); goto exit2; } - + length = (UINT32)strlen(buf2); - + offset.u.LowPart = 0; offset.u.HighPart = 0; - - context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_context), ALLOC_TAG); - if (!context) { - DbgPrint("Couldn't allocate context in debug_message\n"); - return; - } - - RtlZeroMemory(context, sizeof(read_context)); - - KeInitializeEvent(&context->Event, NotificationEvent, FALSE); - // status = ZwWriteFile(comh, NULL, NULL, NULL, &io, buf2, strlen(buf2), &offset, NULL); - + RtlZeroMemory(&context, sizeof(read_context)); + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + Irp = IoAllocateIrp(comdo->StackSize, FALSE); - + if (!Irp) { DbgPrint("IoAllocateIrp failed\n"); goto exit2; } - + IrpSp = IoGetNextIrpStackLocation(Irp); IrpSp->MajorFunction = IRP_MJ_WRITE; - + if (comdo->Flags & DO_BUFFERED_IO) { Irp->AssociatedIrp.SystemBuffer = buf2; @@ -178,82 +197,61 @@ void STDCALL _debug_message(const char* func, char* s, ...) { DbgPrint("IoAllocateMdl failed\n"); goto exit; } - - MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess); + + MmBuildMdlForNonPagedPool(Irp->MdlAddress); } else { Irp->UserBuffer = buf2; } IrpSp->Parameters.Write.Length = length; IrpSp->Parameters.Write.ByteOffset = offset; - - Irp->UserIosb = &context->iosb; - Irp->UserEvent = &context->Event; + Irp->UserIosb = &context.iosb; + + Irp->UserEvent = &context.Event; - IoSetCompletionRoutine(Irp, dbg_completion, context, TRUE, TRUE, TRUE); + IoSetCompletionRoutine(Irp, dbg_completion, &context, TRUE, TRUE, TRUE); Status = IoCallDriver(comdo, Irp); if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - Status = context->iosb.Status; + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + Status = context.iosb.Status; } - - if (comdo->Flags & DO_DIRECT_IO) { - MmUnlockPages(Irp->MdlAddress); + + if (comdo->Flags & DO_DIRECT_IO) IoFreeMdl(Irp->MdlAddress); - } - + if (!NT_SUCCESS(Status)) { DbgPrint("failed to write to COM1 - error %08x\n", Status); goto exit; } - + exit: IoFreeIrp(Irp); } else if (log_handle != NULL) { IO_STATUS_BLOCK iosb; - + length = (UINT32)strlen(buf2); - + Status = ZwWriteFile(log_handle, NULL, NULL, NULL, &iosb, buf2, length, NULL, NULL); - + if (!NT_SUCCESS(Status)) { DbgPrint("failed to write to file - error %08x\n", Status); } } - + exit2: + ExReleaseResourceLite(&log_lock); + va_end(ap); - - if (context) - ExFreePool(context); - + if (buf2) ExFreePool(buf2); } #endif -UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment ) -{ - if( Alignment & ( Alignment - 1 ) ) - { - // - // Alignment not a power of 2 - // Just returning - // - return NumberToBeAligned; - } - if( ( NumberToBeAligned & ( Alignment - 1 ) ) != 0 ) - { - NumberToBeAligned = NumberToBeAligned + Alignment; - NumberToBeAligned = NumberToBeAligned & ( ~ (Alignment-1) ); - } - return NumberToBeAligned; -} - -BOOL is_top_level(PIRP Irp) { +BOOL is_top_level(_In_ PIRP Irp) { if (!IoGetTopLevelIrp()) { IoSetTopLevelIrp(Irp); return TRUE; @@ -262,132 +260,162 @@ BOOL is_top_level(PIRP Irp) { return FALSE; } -static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) { +_Function_class_(DRIVER_UNLOAD) +#ifdef __REACTOS__ +static void NTAPI DriverUnload(_In_ PDRIVER_OBJECT DriverObject) { +#else +static void DriverUnload(_In_ PDRIVER_OBJECT DriverObject) { +#endif UNICODE_STRING dosdevice_nameW; ERR("DriverUnload\n"); - + free_cache(); - + IoUnregisterFileSystem(DriverObject->DeviceObject); - + + if (notification_entry2) +#ifdef __REACTOS__ + IoUnregisterPlugPlayNotification(notification_entry2); +#else + IoUnregisterPlugPlayNotificationEx(notification_entry2); +#endif + + if (notification_entry3) +#ifdef __REACTOS__ + IoUnregisterPlugPlayNotification(notification_entry3); +#else + IoUnregisterPlugPlayNotificationEx(notification_entry3); +#endif + if (notification_entry) #ifdef __REACTOS__ IoUnregisterPlugPlayNotification(notification_entry); #else IoUnregisterPlugPlayNotificationEx(notification_entry); #endif - + dosdevice_nameW.Buffer = dosdevice_name; dosdevice_nameW.Length = dosdevice_nameW.MaximumLength = (USHORT)wcslen(dosdevice_name) * sizeof(WCHAR); IoDeleteSymbolicLink(&dosdevice_nameW); IoDeleteDevice(DriverObject->DeviceObject); - + while (!IsListEmpty(&uid_map_list)) { LIST_ENTRY* le = RemoveHeadList(&uid_map_list); uid_map* um = CONTAINING_RECORD(le, uid_map, listentry); - + ExFreePool(um->sid); ExFreePool(um); } - + + while (!IsListEmpty(&gid_map_list)) { + gid_map* gm = CONTAINING_RECORD(RemoveHeadList(&gid_map_list), gid_map, listentry); + + ExFreePool(gm->sid); + ExFreePool(gm); + } + // FIXME - free volumes and their devpaths - // FIXME - free pnp_disks and their devpaths - + #ifdef _DEBUG if (comfo) ObDereferenceObject(comfo); - + if (log_handle) ZwClose(log_handle); #endif - + ExDeleteResourceLite(&global_loading_lock); - - ExDeleteResourceLite(&volumes_lock); - + ExDeleteResourceLite(&pdo_list_lock); + if (log_device.Buffer) ExFreePool(log_device.Buffer); - + if (log_file.Buffer) ExFreePool(log_file.Buffer); - + if (registry_path.Buffer) ExFreePool(registry_path.Buffer); + +#ifdef _DEBUG + ExDeleteResourceLite(&log_lock); +#endif + ExDeleteResourceLite(&mapping_lock); } -static BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) { +static BOOL get_last_inode(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_opt_ PIRP Irp) { KEY searchkey; traverse_ptr tp, prev_tp; NTSTATUS Status; - + // get last entry searchkey.obj_id = 0xffffffffffffffff; searchkey.obj_type = 0xff; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, r, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return FALSE; } - + if (tp.item->key.obj_type == TYPE_INODE_ITEM || (tp.item->key.obj_type == TYPE_ROOT_ITEM && !(tp.item->key.obj_id & 0x8000000000000000))) { r->lastinode = tp.item->key.obj_id; TRACE("last inode for tree %llx is %llx\n", r->id, r->lastinode); return TRUE; } - - while (find_prev_item(Vcb, &tp, &prev_tp, FALSE, Irp)) { + + while (find_prev_item(Vcb, &tp, &prev_tp, Irp)) { tp = prev_tp; - + TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - + if (tp.item->key.obj_type == TYPE_INODE_ITEM || (tp.item->key.obj_type == TYPE_ROOT_ITEM && !(tp.item->key.obj_id & 0x8000000000000000))) { r->lastinode = tp.item->key.obj_id; TRACE("last inode for tree %llx is %llx\n", r->id, r->lastinode); return TRUE; } } - + r->lastinode = SUBVOL_ROOT_INODE; - + WARN("no INODE_ITEMs in tree %llx\n", r->id); - + return TRUE; } -BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen) { +_Success_(return) +static BOOL extract_xattr(_In_reads_bytes_(size) void* item, _In_ USHORT size, _In_z_ char* name, _Out_ UINT8** data, _Out_ UINT16* datalen) { DIR_ITEM* xa = (DIR_ITEM*)item; USHORT xasize; - + while (TRUE) { if (size < sizeof(DIR_ITEM) || size < (sizeof(DIR_ITEM) - 1 + xa->m + xa->n)) { WARN("DIR_ITEM is truncated\n"); return FALSE; } - + if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) { TRACE("found xattr %s\n", name); - + *datalen = xa->m; - + if (xa->m > 0) { *data = ExAllocatePoolWithTag(PagedPool, xa->m, ALLOC_TAG); if (!*data) { ERR("out of memory\n"); return FALSE; } - + RtlCopyMemory(*data, &xa->name[xa->n], xa->m); } else *data = NULL; - + return TRUE; } - + xasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n; if (size > xasize) { @@ -396,84 +424,96 @@ BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* da } else break; } - + TRACE("xattr %s not found\n", name); - + return FALSE; } -BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp) { +_Success_(return) +BOOL get_xattr(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* subvol, _In_ UINT64 inode, _In_z_ char* name, _In_ UINT32 crc32, + _Out_ UINT8** data, _Out_ UINT16* datalen, _In_opt_ PIRP Irp) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + TRACE("(%p, %llx, %llx, %s, %08x, %p, %p)\n", Vcb, subvol->id, inode, name, crc32, data, datalen); - + searchkey.obj_id = inode; searchkey.obj_type = TYPE_XATTR_ITEM; searchkey.offset = crc32; - + Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return FALSE; } - + if (keycmp(tp.item->key, searchkey)) { TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); return FALSE; } - + if (tp.item->size < sizeof(DIR_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); return FALSE; } - + return extract_xattr(tp.item->data, tp.item->size, name, data, datalen); } -static NTSTATUS STDCALL drv_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_CLOSE) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_close(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp; device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level; - TRACE("close\n"); - FsRtlEnterFileSystem(); + TRACE("close\n"); + top_level = is_top_level(Irp); - - if (DeviceObject == devobj || (Vcb && Vcb->type == VCB_TYPE_PARTITION0)) { + + if (DeviceObject == master_devobj) { TRACE("Closing file system\n"); Status = STATUS_SUCCESS; - goto exit; + goto end; + } else if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_close(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + IrpSp = IoGetCurrentIrpStackLocation(Irp); - + // FIXME - unmount if called for volume // FIXME - call FsRtlNotifyUninitializeSync(&Vcb->NotifySync) if unmounting - - Status = close_file(DeviceObject->DeviceExtension, IrpSp->FileObject); -exit: + Status = close_file(IrpSp->FileObject, Irp); + +end: Irp->IoStatus.Status = Status; Irp->IoStatus.Information = 0; - + IoCompleteRequest( Irp, IO_DISK_INCREMENT ); - - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - - FsRtlExitFileSystem(); - + TRACE("returning %08x\n", Status); + FsRtlExitFileSystem(); + return Status; } -static NTSTATUS STDCALL drv_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_FLUSH_BUFFERS) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_flush_buffers(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation( Irp ); PFILE_OBJECT FileObject = IrpSp->FileObject; @@ -481,47 +521,65 @@ static NTSTATUS STDCALL drv_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIR device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level; - TRACE("flush buffers\n"); - FsRtlEnterFileSystem(); + TRACE("flush buffers\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_flush_buffers(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + if (!fcb) { + ERR("fcb was NULL\n"); + Status = STATUS_INVALID_PARAMETER; + goto end; } - + + if (fcb == Vcb->volume_fcb) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + Irp->IoStatus.Information = 0; + + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); + Status = STATUS_SUCCESS; Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = 0; - + if (fcb->type != BTRFS_TYPE_DIRECTORY) { CcFlushCache(&fcb->nonpaged->segment_object, NULL, 0, &Irp->IoStatus); - + if (fcb->Header.PagingIoResource) { ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE); ExReleaseResourceLite(fcb->Header.PagingIoResource); } - + Status = Irp->IoStatus.Status; } - + +end: IoCompleteRequest(Irp, IO_NO_INCREMENT); - -exit: - if (top_level) + + TRACE("returning %08x\n", Status); + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; } -static void calculate_total_space(device_extension* Vcb, LONGLONG* totalsize, LONGLONG* freespace) { - UINT16 nfactor, dfactor; - UINT64 sectors_used; - +static void calculate_total_space(_In_ device_extension* Vcb, _Out_ UINT64* totalsize, _Out_ UINT64* freespace) { + UINT64 nfactor, dfactor, sectors_used; + if (Vcb->data_flags & BLOCK_FLAG_DUPLICATE || Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->data_flags & BLOCK_FLAG_RAID10) { nfactor = 1; dfactor = 2; @@ -535,98 +593,199 @@ static void calculate_total_space(device_extension* Vcb, LONGLONG* totalsize, LO nfactor = 1; dfactor = 1; } - + sectors_used = Vcb->superblock.bytes_used / Vcb->superblock.sector_size; - + *totalsize = (Vcb->superblock.total_bytes / Vcb->superblock.sector_size) * nfactor / dfactor; *freespace = sectors_used > *totalsize ? 0 : (*totalsize - sectors_used); } -static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +#ifndef __REACTOS__ +// This function exists because we have to lie about our FS type in certain situations. +// MPR!MprGetConnection queries the FS type, and compares it to a whitelist. If it doesn't match, +// it will return ERROR_NO_NET_OR_BAD_PATH, which prevents UAC from working. +// The command mklink refuses to create hard links on anything other than NTFS, so we have to +// blacklist cmd.exe too. + +static BOOL lie_about_fs_type() { + NTSTATUS Status; + PROCESS_BASIC_INFORMATION pbi; + PPEB peb; + LIST_ENTRY* le; + ULONG retlen; + + static WCHAR mpr[] = L"MPR.DLL"; + static WCHAR cmd[] = L"CMD.EXE"; + static WCHAR fsutil[] = L"FSUTIL.EXE"; + UNICODE_STRING mprus, cmdus, fsutilus; + + mprus.Buffer = mpr; + mprus.Length = mprus.MaximumLength = (USHORT)(wcslen(mpr) * sizeof(WCHAR)); + cmdus.Buffer = cmd; + cmdus.Length = cmdus.MaximumLength = (USHORT)(wcslen(cmd) * sizeof(WCHAR)); + fsutilus.Buffer = fsutil; + fsutilus.Length = fsutilus.MaximumLength = (USHORT)(wcslen(fsutil) * sizeof(WCHAR)); + + if (!PsGetCurrentProcess()) + return FALSE; + + Status = ZwQueryInformationProcess(NtCurrentProcess(), ProcessBasicInformation, &pbi, sizeof(pbi), &retlen); + + if (!NT_SUCCESS(Status)) { + ERR("ZwQueryInformationProcess returned %08x\n", Status); + return FALSE; + } + + if (!pbi.PebBaseAddress) + return FALSE; + + peb = pbi.PebBaseAddress; + + if (!peb->Ldr) + return FALSE; + + le = peb->Ldr->InMemoryOrderModuleList.Flink; + while (le != &peb->Ldr->InMemoryOrderModuleList) { + LDR_DATA_TABLE_ENTRY* entry = CONTAINING_RECORD(le, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks); + BOOL blacklist = FALSE; + + if (entry->FullDllName.Length >= mprus.Length) { + UNICODE_STRING name; + + name.Buffer = &entry->FullDllName.Buffer[(entry->FullDllName.Length - mprus.Length) / sizeof(WCHAR)]; + name.Length = name.MaximumLength = mprus.Length; + + blacklist = FsRtlAreNamesEqual(&name, &mprus, TRUE, NULL); + } + + if (!blacklist && entry->FullDllName.Length >= cmdus.Length) { + UNICODE_STRING name; + + name.Buffer = &entry->FullDllName.Buffer[(entry->FullDllName.Length - cmdus.Length) / sizeof(WCHAR)]; + name.Length = name.MaximumLength = cmdus.Length; + + blacklist = FsRtlAreNamesEqual(&name, &cmdus, TRUE, NULL); + } + + if (!blacklist && entry->FullDllName.Length >= fsutilus.Length) { + UNICODE_STRING name; + + name.Buffer = &entry->FullDllName.Buffer[(entry->FullDllName.Length - fsutilus.Length) / sizeof(WCHAR)]; + name.Length = name.MaximumLength = fsutilus.Length; + + blacklist = FsRtlAreNamesEqual(&name, &fsutilus, TRUE, NULL); + } + + if (blacklist) { + void** frames; + ULONG i, num_frames; + + frames = ExAllocatePoolWithTag(PagedPool, 256 * sizeof(void*), ALLOC_TAG); + if (!frames) { + ERR("out of memory\n"); + return FALSE; + } + + num_frames = RtlWalkFrameChain(frames, 256, 1); + + for (i = 0; i < num_frames; i++) { + // entry->Reserved3[1] appears to be the image size + if (frames[i] >= entry->DllBase && (ULONG_PTR)frames[i] <= (ULONG_PTR)entry->DllBase + (ULONG_PTR)entry->Reserved3[1]) { + ExFreePool(frames); + return TRUE; + } + } + + ExFreePool(frames); + } + + le = le->Flink; + } + + return FALSE; +} +#endif + +_Dispatch_type_(IRP_MJ_QUERY_VOLUME_INFORMATION) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_query_volume_information(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { PIO_STACK_LOCATION IrpSp; NTSTATUS Status; ULONG BytesCopied = 0; device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level; - -#ifndef __REACTOS__ - // An unfortunate necessity - we have to lie about our FS type. MPR!MprGetConnection polls for this, - // and compares it to a whitelist. If it doesn't match, it will return ERROR_NO_NET_OR_BAD_PATH, - // which prevents UAC from working. - // FIXME - only lie if we detect that we're being called by mpr.dll - - WCHAR* fs_name = L"NTFS"; - ULONG fs_name_len = 4 * sizeof(WCHAR); -#else - WCHAR* fs_name = L"Btrfs"; - ULONG fs_name_len = 5 * sizeof(WCHAR); -#endif - TRACE("query volume information\n"); - FsRtlEnterFileSystem(); + + TRACE("query volume information\n"); top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; - } - + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_query_volume_information(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + IrpSp = IoGetCurrentIrpStackLocation(Irp); - + Status = STATUS_NOT_IMPLEMENTED; - + switch (IrpSp->Parameters.QueryVolume.FsInformationClass) { case FileFsAttributeInformation: { FILE_FS_ATTRIBUTE_INFORMATION* data = Irp->AssociatedIrp.SystemBuffer; BOOL overflow = FALSE; +#ifndef __REACTOS__ + WCHAR* fs_name = (Irp->RequestorMode == UserMode && lie_about_fs_type()) ? L"NTFS" : L"Btrfs"; + ULONG fs_name_len = (ULONG)wcslen(fs_name) * sizeof(WCHAR); +#else + WCHAR* fs_name = L"Btrfs"; + ULONG fs_name_len = 5 * sizeof(WCHAR); +#endif ULONG orig_fs_name_len = fs_name_len; - + TRACE("FileFsAttributeInformation\n"); - + if (IrpSp->Parameters.QueryVolume.Length < sizeof(FILE_FS_ATTRIBUTE_INFORMATION) - sizeof(WCHAR) + fs_name_len) { if (IrpSp->Parameters.QueryVolume.Length > sizeof(FILE_FS_ATTRIBUTE_INFORMATION) - sizeof(WCHAR)) fs_name_len = IrpSp->Parameters.QueryVolume.Length - sizeof(FILE_FS_ATTRIBUTE_INFORMATION) + sizeof(WCHAR); else fs_name_len = 0; - + overflow = TRUE; } - + data->FileSystemAttributes = FILE_CASE_PRESERVED_NAMES | FILE_CASE_SENSITIVE_SEARCH | FILE_UNICODE_ON_DISK | FILE_NAMED_STREAMS | FILE_SUPPORTS_HARD_LINKS | FILE_PERSISTENT_ACLS | FILE_SUPPORTS_REPARSE_POINTS | FILE_SUPPORTS_SPARSE_FILES | FILE_SUPPORTS_OBJECT_IDS | - FILE_SUPPORTS_OPEN_BY_FILE_ID | FILE_SUPPORTS_EXTENDED_ATTRIBUTES; + FILE_SUPPORTS_OPEN_BY_FILE_ID | FILE_SUPPORTS_EXTENDED_ATTRIBUTES | FILE_SUPPORTS_BLOCK_REFCOUNTING; if (Vcb->readonly) data->FileSystemAttributes |= FILE_READ_ONLY_VOLUME; - + // should also be FILE_FILE_COMPRESSION when supported data->MaximumComponentNameLength = 255; // FIXME - check data->FileSystemNameLength = orig_fs_name_len; RtlCopyMemory(data->FileSystemName, fs_name, fs_name_len); - + BytesCopied = sizeof(FILE_FS_ATTRIBUTE_INFORMATION) - sizeof(WCHAR) + fs_name_len; Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS; break; } - case FileFsControlInformation: - FIXME("STUB: FileFsControlInformation\n"); - break; - case FileFsDeviceInformation: { FILE_FS_DEVICE_INFORMATION* ffdi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileFsDeviceInformation\n"); - + ffdi->DeviceType = FILE_DEVICE_DISK; - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - ffdi->Characteristics = first_device(Vcb)->devobj->Characteristics; + ffdi->Characteristics = Vcb->Vpb->RealDevice->Characteristics; ExReleaseResourceLite(&Vcb->tree_lock); - + if (Vcb->readonly) ffdi->Characteristics |= FILE_READ_ONLY_DEVICE; else @@ -634,59 +793,55 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj BytesCopied = sizeof(FILE_FS_DEVICE_INFORMATION); Status = STATUS_SUCCESS; - - break; - } - case FileFsDriverPathInformation: - FIXME("STUB: FileFsDriverPathInformation\n"); break; + } case FileFsFullSizeInformation: { FILE_FS_FULL_SIZE_INFORMATION* ffsi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileFsFullSizeInformation\n"); - - calculate_total_space(Vcb, &ffsi->TotalAllocationUnits.QuadPart, &ffsi->ActualAvailableAllocationUnits.QuadPart); + + calculate_total_space(Vcb, (UINT64*)&ffsi->TotalAllocationUnits.QuadPart, (UINT64*)&ffsi->ActualAvailableAllocationUnits.QuadPart); ffsi->CallerAvailableAllocationUnits.QuadPart = ffsi->ActualAvailableAllocationUnits.QuadPart; ffsi->SectorsPerAllocationUnit = 1; ffsi->BytesPerSector = Vcb->superblock.sector_size; - + BytesCopied = sizeof(FILE_FS_FULL_SIZE_INFORMATION); Status = STATUS_SUCCESS; - + break; } case FileFsObjectIdInformation: { FILE_FS_OBJECTID_INFORMATION* ffoi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileFsObjectIdInformation\n"); - + RtlCopyMemory(ffoi->ObjectId, &Vcb->superblock.uuid.uuid[0], sizeof(UCHAR) * 16); RtlZeroMemory(ffoi->ExtendedInfo, sizeof(ffoi->ExtendedInfo)); - + BytesCopied = sizeof(FILE_FS_OBJECTID_INFORMATION); Status = STATUS_SUCCESS; - + break; } case FileFsSizeInformation: { FILE_FS_SIZE_INFORMATION* ffsi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileFsSizeInformation\n"); - - calculate_total_space(Vcb, &ffsi->TotalAllocationUnits.QuadPart, &ffsi->AvailableAllocationUnits.QuadPart); + + calculate_total_space(Vcb, (UINT64*)&ffsi->TotalAllocationUnits.QuadPart, (UINT64*)&ffsi->AvailableAllocationUnits.QuadPart); ffsi->SectorsPerAllocationUnit = 1; ffsi->BytesPerSector = Vcb->superblock.sector_size; - + BytesCopied = sizeof(FILE_FS_SIZE_INFORMATION); Status = STATUS_SUCCESS; - + break; } @@ -696,69 +851,79 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj FILE_FS_VOLUME_INFORMATION ffvi; BOOL overflow = FALSE; ULONG label_len, orig_label_len; - + TRACE("FileFsVolumeInformation\n"); TRACE("max length = %u\n", IrpSp->Parameters.QueryVolume.Length); - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - -// orig_label_len = label_len = (ULONG)(wcslen(Vcb->label) * sizeof(WCHAR)); - RtlUTF8ToUnicodeN(NULL, 0, &label_len, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label)); + + Status = RtlUTF8ToUnicodeN(NULL, 0, &label_len, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label)); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->tree_lock); + break; + } + orig_label_len = label_len; - + if (IrpSp->Parameters.QueryVolume.Length < sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR) + label_len) { if (IrpSp->Parameters.QueryVolume.Length > sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR)) label_len = IrpSp->Parameters.QueryVolume.Length - sizeof(FILE_FS_VOLUME_INFORMATION) + sizeof(WCHAR); else label_len = 0; - + overflow = TRUE; } - + TRACE("label_len = %u\n", label_len); - + ffvi.VolumeCreationTime.QuadPart = 0; // FIXME ffvi.VolumeSerialNumber = Vcb->superblock.uuid.uuid[12] << 24 | Vcb->superblock.uuid.uuid[13] << 16 | Vcb->superblock.uuid.uuid[14] << 8 | Vcb->superblock.uuid.uuid[15]; ffvi.VolumeLabelLength = orig_label_len; ffvi.SupportsObjects = FALSE; - + RtlCopyMemory(data, &ffvi, min(sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR), IrpSp->Parameters.QueryVolume.Length)); - + if (label_len > 0) { ULONG bytecount; - -// RtlCopyMemory(&data->VolumeLabel[0], Vcb->label, label_len); - RtlUTF8ToUnicodeN(&data->VolumeLabel[0], label_len, &bytecount, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label)); + + Status = RtlUTF8ToUnicodeN(&data->VolumeLabel[0], label_len, &bytecount, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label)); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_TOO_SMALL) { + ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->tree_lock); + break; + } + TRACE("label = %.*S\n", label_len / sizeof(WCHAR), data->VolumeLabel); } - + ExReleaseResourceLite(&Vcb->tree_lock); BytesCopied = sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR) + label_len; Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS; break; } - + #ifndef __REACTOS__ #ifdef _MSC_VER // not in mingw yet case FileFsSectorSizeInformation: { FILE_FS_SECTOR_SIZE_INFORMATION* data = Irp->AssociatedIrp.SystemBuffer; - + data->LogicalBytesPerSector = Vcb->superblock.sector_size; data->PhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size; data->PhysicalBytesPerSectorForPerformance = Vcb->superblock.sector_size; data->FileSystemEffectivePhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size; data->ByteOffsetForSectorAlignment = 0; data->ByteOffsetForPartitionAlignment = 0; - + data->Flags = SSINFO_FLAGS_ALIGNED_DEVICE | SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE; - - if (Vcb->trim) + + if (Vcb->trim && !Vcb->options.no_trim) data->Flags |= SSINFO_FLAGS_TRIM_ENABLED; - + BytesCopied = sizeof(FILE_FS_SECTOR_SIZE_INFORMATION); - + break; } #endif @@ -769,131 +934,64 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj WARN("unknown FsInformationClass %u\n", IrpSp->Parameters.QueryVolume.FsInformationClass); break; } - -// if (NT_SUCCESS(Status) && IrpSp->Parameters.QueryVolume.Length < BytesCopied) { // FIXME - should not copy anything if overflow -// WARN("overflow: %u < %u\n", IrpSp->Parameters.QueryVolume.Length, BytesCopied); -// BytesCopied = IrpSp->Parameters.QueryVolume.Length; -// Status = STATUS_BUFFER_OVERFLOW; -// } - Irp->IoStatus.Status = Status; - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) Irp->IoStatus.Information = 0; else Irp->IoStatus.Information = BytesCopied; - + +end: + Irp->IoStatus.Status = Status; + IoCompleteRequest( Irp, IO_DISK_INCREMENT ); - -exit: - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - - FsRtlExitFileSystem(); - + TRACE("query volume information returning %08x\n", Status); + FsRtlExitFileSystem(); + return Status; } -static NTSTATUS STDCALL read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI read_completion(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp, _In_ PVOID conptr) { +#else +static NTSTATUS read_completion(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp, _In_ PVOID conptr) { +#endif read_context* context = conptr; - -// DbgPrint("read_completion\n"); - + + UNUSED(DeviceObject); + context->iosb = Irp->IoStatus; KeSetEvent(&context->Event, 0, FALSE); - -// return STATUS_SUCCESS; + return STATUS_MORE_PROCESSING_REQUIRED; } -// static void test_tree_deletion(device_extension* Vcb) { -// KEY searchkey/*, endkey*/; -// traverse_ptr tp, next_tp; -// root* r; -// -// searchkey.obj_id = 0x100; -// searchkey.obj_type = 0x54; -// searchkey.offset = 0xca4ab2f5; -// -// // endkey.obj_id = 0x100; -// // endkey.obj_type = 0x60; -// // endkey.offset = 0x15a; -// -// r = Vcb->roots; -// while (r && r->id != 0x102) -// r = r->next; -// -// if (!r) { -// ERR("error - could not find root\n"); -// return; -// } -// -// if (!find_item(Vcb, r, &tp, &searchkey, NULL, FALSE)) { -// ERR("error - could not find key\n"); -// return; -// } -// -// while (TRUE/*keycmp(tp.item->key, endkey) < 1*/) { -// tp.item->ignore = TRUE; -// add_to_tree_cache(tc, tp.tree); -// -// if (find_next_item(Vcb, &tp, &next_tp, NULL, FALSE)) { -// free_traverse_ptr(&tp); -// tp = next_tp; -// } else -// break; -// } -// -// free_traverse_ptr(&tp); -// } - -// static void test_tree_splitting(device_extension* Vcb) { -// int i; -// -// for (i = 0; i < 1000; i++) { -// char* data = ExAllocatePoolWithTag(PagedPool, 4, ALLOC_TAG); -// -// insert_tree_item(Vcb, Vcb->extent_root, 0, 0xfd, i, data, 4, NULL); -// } -// } - -// static void test_dropping_tree(device_extension* Vcb) { -// LIST_ENTRY* le = Vcb->roots.Flink; -// -// while (le != &Vcb->roots) { -// root* r = CONTAINING_RECORD(le, root, list_entry); -// -// if (r->id == 0x101) { -// RemoveEntryList(&r->list_entry); -// InsertTailList(&Vcb->drop_roots, &r->list_entry); -// return; -// } -// -// le = le->Flink; -// } -// } - -NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_tree, UINT64 offset, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS create_root(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ UINT64 id, + _Out_ root** rootptr, _In_ BOOL no_tree, _In_ UINT64 offset, _In_opt_ PIRP Irp) { + NTSTATUS Status; root* r; - tree* t; + tree* t = NULL; ROOT_ITEM* ri; traverse_ptr tp; - + r = ExAllocatePoolWithTag(PagedPool, sizeof(root), ALLOC_TAG); if (!r) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + r->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(root_nonpaged), ALLOC_TAG); if (!r->nonpaged) { ERR("out of memory\n"); ExFreePool(r); return STATUS_INSUFFICIENT_RESOURCES; } - + if (!no_tree) { t = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); if (!t) { @@ -902,52 +1000,62 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t ExFreePool(r); return STATUS_INSUFFICIENT_RESOURCES; } + + t->is_unique = TRUE; + t->uniqueness_determined = TRUE; + t->buf = NULL; } - + ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); if (!ri) { ERR("out of memory\n"); - - if (!no_tree) + + if (t) ExFreePool(t); - + ExFreePool(r->nonpaged); ExFreePool(r); return STATUS_INSUFFICIENT_RESOURCES; } - + r->id = id; r->treeholder.address = 0; r->treeholder.generation = Vcb->superblock.generation; - r->treeholder.tree = no_tree ? NULL : t; + r->treeholder.tree = t; r->lastinode = 0; - r->path.Buffer = NULL; + r->dirty = FALSE; + r->received = FALSE; + r->reserved = NULL; + r->parent = 0; + r->send_ops = 0; RtlZeroMemory(&r->root_item, sizeof(ROOT_ITEM)); r->root_item.num_references = 1; InitializeListHead(&r->fcbs); - + RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); - + // We ask here for a traverse_ptr to the item we're inserting, so we can // copy some of the tree's variables - - if (!insert_tree_item(Vcb, Vcb->root_root, id, TYPE_ROOT_ITEM, offset, ri, sizeof(ROOT_ITEM), &tp, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->root_root, id, TYPE_ROOT_ITEM, offset, ri, sizeof(ROOT_ITEM), &tp, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(ri); - - if (!no_tree) + + if (t) ExFreePool(t); - + ExFreePool(r->nonpaged); ExFreePool(r); - return STATUS_INTERNAL_ERROR; + return Status; } - + ExInitializeResourceLite(&r->nonpaged->load_tree_lock); - + InsertTailList(&Vcb->roots, &r->list_entry); - + if (!no_tree) { + RtlZeroMemory(&t->header, sizeof(tree_header)); t->header.fs_uuid = tp.tree->header.fs_uuid; t->header.address = 0; t->header.flags = HEADER_FLAG_MIXED_BACKREF | 1; // 1 == "written"? Why does the Linux driver record this? @@ -963,313 +1071,34 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t t->parent = NULL; t->paritem = NULL; t->root = r; - + InitializeListHead(&t->itemlist); - + t->new_address = 0; t->has_new_address = FALSE; t->updated_extents = FALSE; - + InsertTailList(&Vcb->trees, &t->list_entry); t->list_entry_hash.Flink = NULL; - + t->write = TRUE; Vcb->need_write = TRUE; } - + *rootptr = r; return STATUS_SUCCESS; } -// static void test_creating_root(device_extension* Vcb) { -// NTSTATUS Status; -// LIST_ENTRY rollback; -// UINT64 id; -// root* r; -// -// InitializeListHead(&rollback); -// -// if (Vcb->root_root->lastinode == 0) -// get_last_inode(Vcb, Vcb->root_root); -// -// id = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101; -// Status = create_root(Vcb, id, &r, &rollback); -// -// if (!NT_SUCCESS(Status)) { -// ERR("create_root returned %08x\n", Status); -// do_rollback(Vcb, &rollback); -// } else { -// Vcb->root_root->lastinode = id; -// clear_rollback(&rollback); -// } -// } - -// static void test_alloc_chunk(device_extension* Vcb) { -// LIST_ENTRY rollback; -// chunk* c; -// -// InitializeListHead(&rollback); -// -// c = alloc_chunk(Vcb, BLOCK_FLAG_DATA | BLOCK_FLAG_RAID10, &rollback); -// if (!c) { -// ERR("alloc_chunk failed\n"); -// do_rollback(Vcb, &rollback); -// } else { -// clear_rollback(&rollback); -// } -// } - -// static void test_space_list(device_extension* Vcb) { -// chunk* c; -// int i, j; -// LIST_ENTRY* le; -// -// typedef struct { -// UINT64 address; -// UINT64 length; -// BOOL add; -// } space_test; -// -// static const space_test entries[] = { -// { 0x1000, 0x1000 }, -// { 0x3000, 0x2000 }, -// { 0x6000, 0x1000 }, -// { 0, 0 } -// }; -// -// static const space_test tests[] = { -// { 0x0, 0x800, TRUE }, -// { 0x1800, 0x400, TRUE }, -// { 0x800, 0x2000, TRUE }, -// { 0x1000, 0x2000, TRUE }, -// { 0x2000, 0x3800, TRUE }, -// { 0x800, 0x1000, TRUE }, -// { 0x1800, 0x1000, TRUE }, -// { 0x5000, 0x800, TRUE }, -// { 0x5000, 0x1000, TRUE }, -// { 0x7000, 0x1000, TRUE }, -// { 0x8000, 0x1000, TRUE }, -// { 0x800, 0x800, TRUE }, -// { 0x0, 0x3800, TRUE }, -// { 0x1000, 0x2800, TRUE }, -// { 0x1000, 0x1000, FALSE }, -// { 0x800, 0x2000, FALSE }, -// { 0x0, 0x3800, FALSE }, -// { 0x2800, 0x1000, FALSE }, -// { 0x1800, 0x2000, FALSE }, -// { 0x3800, 0x1000, FALSE }, -// { 0, 0, FALSE } -// }; -// -// c = CONTAINING_RECORD(Vcb->chunks.Flink, chunk, list_entry); -// -// i = 0; -// while (tests[i].length > 0) { -// InitializeListHead(&c->space); -// InitializeListHead(&c->space_size); -// ERR("test %u\n", i); -// -// j = 0; -// while (entries[j].length > 0) { -// space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); -// s->address = entries[j].address; -// s->size = entries[j].length; -// InsertTailList(&c->space, &s->list_entry); -// -// order_space_entry(s, &c->space_size); -// -// j++; -// } -// -// if (tests[i].add) -// space_list_add(Vcb, c, FALSE, tests[i].address, tests[i].length, NULL); -// else -// space_list_subtract(Vcb, c, FALSE, tests[i].address, tests[i].length, NULL); -// -// le = c->space.Flink; -// while (le != &c->space) { -// space* s = CONTAINING_RECORD(le, space, list_entry); -// -// ERR("(%llx,%llx)\n", s->address, s->size); -// -// le = le->Flink; -// } -// -// ERR("--\n"); -// -// le = c->space_size.Flink; -// while (le != &c->space_size) { -// space* s = CONTAINING_RECORD(le, space, list_entry_size); -// -// ERR("(%llx,%llx)\n", s->address, s->size); -// -// le = le->Flink; -// } -// -// i++; -// } -// -// int3; -// } - -#if 0 -void STDCALL tree_test(void* context) { - device_extension* Vcb = context; - NTSTATUS Status; - UINT64 id; - LARGE_INTEGER due_time, time; - KTIMER timer; - root* r; - LIST_ENTRY rollback; - ULONG seed; - - InitializeListHead(&rollback); - - KeInitializeTimer(&timer); - - id = InterlockedIncrement64(&Vcb->root_root->lastinode); - Status = create_root(Vcb, id, &r, FALSE, 0, NULL, &rollback); - if (!NT_SUCCESS(Status)) { - ERR("create_root returned %08x\n"); - return; - } - - clear_rollback(Vcb, &rollback); - - due_time.QuadPart = (UINT64)1 * -10000000; - - KeQueryPerformanceCounter(&time); - seed = time.LowPart; - - while (TRUE) { - UINT32 i; - - FsRtlEnterFileSystem(); - - ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - - for (i = 0; i < 100; i++) { - void* data; - ULONG datalen; - UINT64 objid, offset; - - objid = RtlRandomEx(&seed); - objid <<= 32; - objid |= RtlRandomEx(&seed); - - offset = RtlRandomEx(&seed); - offset <<= 32; - offset |= RtlRandomEx(&seed); - - datalen = 30; - data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); - - if (!insert_tree_item(Vcb, r, objid, 0xfd, offset, data, datalen, NULL, NULL, &rollback)) { - ERR("insert_tree_item failed\n"); - } - } - - for (i = 0; i < 25; i++) { - KEY searchkey; - traverse_ptr tp; - - searchkey.obj_id = RtlRandomEx(&seed); - searchkey.obj_id <<= 32; - searchkey.obj_id |= RtlRandomEx(&seed); - - searchkey.obj_type = 0xfd; - - searchkey.offset = RtlRandomEx(&seed); - searchkey.offset <<= 32; - searchkey.offset |= RtlRandomEx(&seed); - - Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - } else { - delete_tree_item(Vcb, &tp, &rollback); - } - } - - clear_rollback(Vcb, &rollback); - - ExReleaseResourceLite(&Vcb->tree_lock); - - FsRtlExitFileSystem(); - - KeSetTimer(&timer, due_time, NULL); - - KeWaitForSingleObject(&timer, Executive, KernelMode, FALSE, NULL); - } -} -#endif - -// static void test_calc_thread(device_extension* Vcb) { -// UINT8* data; -// ULONG sectors, max_sectors, i, j; -// calc_job* cj; -// LARGE_INTEGER* sertimes; -// LARGE_INTEGER* partimes; -// LARGE_INTEGER time1, time2; -// -// max_sectors = 256; -// -// sertimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG); -// partimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG); -// RtlZeroMemory(sertimes, sizeof(LARGE_INTEGER) * max_sectors); -// RtlZeroMemory(partimes, sizeof(LARGE_INTEGER) * max_sectors); -// -// for (sectors = 1; sectors <= max_sectors; sectors++) { -// data = ExAllocatePoolWithTag(PagedPool, sectors * Vcb->superblock.sector_size, ALLOC_TAG); -// RtlZeroMemory(data, sectors * Vcb->superblock.sector_size); -// -// for (j = 0; j < 100; j++) { -// time1 = KeQueryPerformanceCounter(NULL); -// -// for (i = 0; i < sectors; i++) { -// UINT32 tmp; -// -// tmp = ~calc_crc32c(0xffffffff, data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); -// } -// -// time2 = KeQueryPerformanceCounter(NULL); -// -// sertimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart; -// -// time1 = KeQueryPerformanceCounter(NULL); -// -// add_calc_job(Vcb, data, sectors, &cj); -// KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); -// -// time2 = KeQueryPerformanceCounter(NULL); -// -// partimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart; -// -// free_calc_job(cj); -// } -// -// ExFreePool(data); -// } -// -// for (sectors = 1; sectors <= max_sectors; sectors++) { -// ERR("%u sectors: serial %llu, parallel %llu\n", sectors, sertimes[sectors - 1].QuadPart, partimes[sectors - 1].QuadPart); -// } -// -// ExFreePool(partimes); -// ExFreePool(sertimes); -// } - -static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATION* ffli) { +static NTSTATUS set_label(_In_ device_extension* Vcb, _In_ FILE_FS_LABEL_INFORMATION* ffli) { ULONG utf8len; NTSTATUS Status; - USHORT vollen, i; -// HANDLE h; - + ULONG vollen, i; + TRACE("label = %.*S\n", ffli->VolumeLabelLength / sizeof(WCHAR), ffli->VolumeLabel); - + vollen = ffli->VolumeLabelLength; - + for (i = 0; i < ffli->VolumeLabelLength / sizeof(WCHAR); i++) { if (ffli->VolumeLabel[i] == 0) { vollen = i * sizeof(WCHAR); @@ -1279,45 +1108,35 @@ static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATI goto end; } } - + if (vollen == 0) { utf8len = 0; } else { Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, ffli->VolumeLabel, vollen); if (!NT_SUCCESS(Status)) goto end; - + if (utf8len > MAX_LABEL_SIZE) { Status = STATUS_INVALID_VOLUME_LABEL; goto end; } } - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + if (utf8len > 0) { Status = RtlUnicodeToUTF8N((PCHAR)&Vcb->superblock.label, MAX_LABEL_SIZE, &utf8len, ffli->VolumeLabel, vollen); if (!NT_SUCCESS(Status)) goto release; } else Status = STATUS_SUCCESS; - + if (utf8len < MAX_LABEL_SIZE) RtlZeroMemory(Vcb->superblock.label + utf8len, MAX_LABEL_SIZE - utf8len); - -// test_tree_deletion(Vcb); // TESTING -// test_tree_splitting(Vcb); -// test_dropping_tree(Vcb); -// test_creating_root(Vcb); -// test_alloc_chunk(Vcb); -// test_space_list(Vcb); -// test_calc_thread(Vcb); - + Vcb->need_write = TRUE; - -// PsCreateSystemThread(&h, 0, NULL, NULL, NULL, tree_test, Vcb); - -release: + +release: ExReleaseResourceLite(&Vcb->tree_lock); end: @@ -1326,35 +1145,40 @@ end: return Status; } -static NTSTATUS STDCALL drv_set_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_SET_VOLUME_INFORMATION) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_set_volume_information(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); device_extension* Vcb = DeviceObject->DeviceExtension; NTSTATUS Status; BOOL top_level; - TRACE("set volume information\n"); - FsRtlEnterFileSystem(); + TRACE("set volume information\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_set_volume_information(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + Status = STATUS_NOT_IMPLEMENTED; - + if (Vcb->readonly) { Status = STATUS_MEDIA_WRITE_PROTECTED; goto end; } - + if (Vcb->removing || Vcb->locked) { Status = STATUS_ACCESS_DENIED; goto end; } - + switch (IrpSp->Parameters.SetVolume.FsInformationClass) { case FileFsControlInformation: FIXME("STUB: FileFsControlInformation\n"); @@ -1362,7 +1186,7 @@ static NTSTATUS STDCALL drv_set_volume_information(IN PDEVICE_OBJECT DeviceObjec case FileFsLabelInformation: TRACE("FileFsLabelInformation\n"); - + Status = set_label(Vcb, Irp->AssociatedIrp.SystemBuffer); break; @@ -1374,346 +1198,367 @@ static NTSTATUS STDCALL drv_set_volume_information(IN PDEVICE_OBJECT DeviceObjec WARN("Unrecognized FsInformationClass 0x%x\n", IrpSp->Parameters.SetVolume.FsInformationClass); break; } - + end: Irp->IoStatus.Status = Status; Irp->IoStatus.Information = 0; + TRACE("returning %08x\n", Status); + IoCompleteRequest( Irp, IO_NO_INCREMENT ); - -exit: - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; } -static WCHAR* file_desc_fcb(fcb* fcb) { +static WCHAR* file_desc_fcb(_In_ fcb* fcb) { char s[60]; + NTSTATUS Status; UNICODE_STRING us; ANSI_STRING as; - + if (fcb->debug_desc) return fcb->debug_desc; - + if (fcb == fcb->Vcb->volume_fcb) return L"volume FCB"; - + fcb->debug_desc = ExAllocatePoolWithTag(PagedPool, 60 * sizeof(WCHAR), ALLOC_TAG); if (!fcb->debug_desc) return L"(memory error)"; - + // I know this is pretty hackish... // GCC doesn't like %llx in sprintf, and MSVC won't let us use swprintf // without the CRT, which breaks drivers. - + sprintf(s, "subvol %x, inode %x", (UINT32)fcb->subvol->id, (UINT32)fcb->inode); - + as.Buffer = s; - as.Length = as.MaximumLength = strlen(s); - + as.Length = as.MaximumLength = (USHORT)strlen(s); + us.Buffer = fcb->debug_desc; us.MaximumLength = 60 * sizeof(WCHAR); us.Length = 0; - - RtlAnsiStringToUnicodeString(&us, &as, FALSE); - + + Status = RtlAnsiStringToUnicodeString(&us, &as, FALSE); + if (!NT_SUCCESS(Status)) + return L"(RtlAnsiStringToUnicodeString error)"; + us.Buffer[us.Length / sizeof(WCHAR)] = 0; - + return fcb->debug_desc; } -WCHAR* file_desc_fileref(file_ref* fileref) { +WCHAR* file_desc_fileref(_In_ file_ref* fileref) { NTSTATUS Status; UNICODE_STRING fn; - + ULONG reqlen; + if (fileref->debug_desc) return fileref->debug_desc; - - Status = fileref_get_filename(fileref, &fn, NULL); - if (!NT_SUCCESS(Status)) { + + fn.Length = fn.MaximumLength = 0; + Status = fileref_get_filename(fileref, &fn, NULL, &reqlen); + if (Status != STATUS_BUFFER_OVERFLOW) return L"ERROR"; - } - - fileref->debug_desc = ExAllocatePoolWithTag(PagedPool, fn.Length + sizeof(WCHAR), ALLOC_TAG); - if (!fileref->debug_desc) { - ExFreePool(fn.Buffer); + + if (reqlen > 0xffff - sizeof(WCHAR)) + return L"(too long)"; + + fileref->debug_desc = ExAllocatePoolWithTag(PagedPool, reqlen + sizeof(WCHAR), ALLOC_TAG); + if (!fileref->debug_desc) return L"(memory error)"; + + fn.Buffer = fileref->debug_desc; + fn.Length = 0; + fn.MaximumLength = (USHORT)(reqlen + sizeof(WCHAR)); + + Status = fileref_get_filename(fileref, &fn, NULL, &reqlen); + if (!NT_SUCCESS(Status)) { + ExFreePool(fileref->debug_desc); + fileref->debug_desc = NULL; + return L"ERROR"; } - - RtlCopyMemory(fileref->debug_desc, fn.Buffer, fn.Length); + fileref->debug_desc[fn.Length / sizeof(WCHAR)] = 0; - - ExFreePool(fn.Buffer); - + return fileref->debug_desc; } -WCHAR* file_desc(PFILE_OBJECT FileObject) { +_Ret_z_ +WCHAR* file_desc(_In_ PFILE_OBJECT FileObject) { fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; file_ref* fileref = ccb ? ccb->fileref : NULL; - + if (fileref) return file_desc_fileref(fileref); else return file_desc_fcb(fcb); } -void send_notification_fileref(file_ref* fileref, ULONG filter_match, ULONG action) { +void send_notification_fileref(_In_ file_ref* fileref, _In_ ULONG filter_match, _In_ ULONG action, _In_opt_ PUNICODE_STRING stream) { UNICODE_STRING fn; NTSTATUS Status; + ULONG reqlen; USHORT name_offset; fcb* fcb = fileref->fcb; - - Status = fileref_get_filename(fileref, &fn, &name_offset); + + fn.Length = fn.MaximumLength = 0; + Status = fileref_get_filename(fileref, &fn, NULL, &reqlen); + if (Status != STATUS_BUFFER_OVERFLOW) { + ERR("fileref_get_filename returned %08x\n", Status); + return; + } + + if (reqlen > 0xffff) { + WARN("reqlen was too long for FsRtlNotifyFilterReportChange\n"); + return; + } + + fn.Buffer = ExAllocatePoolWithTag(PagedPool, reqlen, ALLOC_TAG); + if (!fn.Buffer) { + ERR("out of memory\n"); + return; + } + + fn.MaximumLength = (USHORT)reqlen; + fn.Length = 0; + + Status = fileref_get_filename(fileref, &fn, &name_offset, &reqlen); if (!NT_SUCCESS(Status)) { ERR("fileref_get_filename returned %08x\n", Status); + ExFreePool(fn.Buffer); return; } - + FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&fn, name_offset, - NULL, NULL, filter_match, action, NULL, NULL); + (PSTRING)stream, NULL, filter_match, action, NULL, NULL); ExFreePool(fn.Buffer); } -void send_notification_fcb(file_ref* fileref, ULONG filter_match, ULONG action) { +void send_notification_fcb(_In_ file_ref* fileref, _In_ ULONG filter_match, _In_ ULONG action, _In_opt_ PUNICODE_STRING stream) { fcb* fcb = fileref->fcb; LIST_ENTRY* le; NTSTATUS Status; - + // no point looking for hardlinks if st_nlink == 1 if (fileref->fcb->inode_item.st_nlink == 1) { - send_notification_fileref(fileref, filter_match, action); + send_notification_fileref(fileref, filter_match, action, stream); return; } - + ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE); - + le = fcb->hardlinks.Flink; while (le != &fcb->hardlinks) { hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); file_ref* parfr; - + Status = open_fileref_by_inode(fcb->Vcb, fcb->subvol, hl->parent, &parfr, NULL); - - if (!NT_SUCCESS(Status)) { + + if (!NT_SUCCESS(Status)) ERR("open_fileref_by_inode returned %08x\n", Status); - } else if (!parfr->deleted) { - LIST_ENTRY* le2; - BOOL found = FALSE, deleted = FALSE; - UNICODE_STRING* fn; - - le2 = parfr->children.Flink; - while (le2 != &parfr->children) { - file_ref* fr2 = CONTAINING_RECORD(le2, file_ref, list_entry); - - if (fr2->index == hl->index) { - found = TRUE; - deleted = fr2->deleted; - - if (!deleted) - fn = &fr2->filepart; - - break; - } - - le2 = le2->Flink; + else if (!parfr->deleted) { + UNICODE_STRING fn; + ULONG pathlen; + + fn.Length = fn.MaximumLength = 0; + Status = fileref_get_filename(parfr, &fn, NULL, &pathlen); + if (Status != STATUS_BUFFER_OVERFLOW) { + ERR("fileref_get_filename returned %08x\n", Status); + free_fileref(fcb->Vcb, parfr); + break; } - - if (!found) - fn = &hl->name; - - if (!deleted) { - UNICODE_STRING path; - - Status = fileref_get_filename(parfr, &path, NULL); - if (!NT_SUCCESS(Status)) { - ERR("fileref_get_filename returned %08x\n", Status); - } else { - UNICODE_STRING fn2; - ULONG name_offset; - - name_offset = path.Length; - if (parfr != fileref->fcb->Vcb->root_fileref) name_offset += sizeof(WCHAR); - - fn2.Length = fn2.MaximumLength = fn->Length + name_offset; - fn2.Buffer = ExAllocatePoolWithTag(PagedPool, fn2.MaximumLength, ALLOC_TAG); - - RtlCopyMemory(fn2.Buffer, path.Buffer, path.Length); - if (parfr != fileref->fcb->Vcb->root_fileref) fn2.Buffer[path.Length / sizeof(WCHAR)] = '\\'; - RtlCopyMemory(&fn2.Buffer[name_offset / sizeof(WCHAR)], fn->Buffer, fn->Length); - - TRACE("%.*S\n", fn2.Length / sizeof(WCHAR), fn2.Buffer); - - FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&fn2, name_offset, - NULL, NULL, filter_match, action, NULL, NULL); - - ExFreePool(fn2.Buffer); - ExFreePool(path.Buffer); - } + + if (parfr != fcb->Vcb->root_fileref) + pathlen += sizeof(WCHAR); + + if (pathlen + hl->name.Length > 0xffff) { + WARN("pathlen + hl->name.Length was too long for FsRtlNotifyFilterReportChange\n"); + free_fileref(fcb->Vcb, parfr); + break; + } + + fn.MaximumLength = (USHORT)(pathlen + hl->name.Length); + fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG); + if (!fn.Buffer) { + ERR("out of memory\n"); + free_fileref(fcb->Vcb, parfr); + break; } - - free_fileref(parfr); + + Status = fileref_get_filename(parfr, &fn, NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("fileref_get_filename returned %08x\n", Status); + free_fileref(fcb->Vcb, parfr); + ExFreePool(fn.Buffer); + break; + } + + if (parfr != fcb->Vcb->root_fileref) { + fn.Buffer[(pathlen / sizeof(WCHAR)) - 1] = '\\'; + fn.Length += sizeof(WCHAR); + } + + RtlCopyMemory(&fn.Buffer[pathlen / sizeof(WCHAR)], hl->name.Buffer, hl->name.Length); + fn.Length += hl->name.Length; + + FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&fn, (USHORT)pathlen, + (PSTRING)stream, NULL, filter_match, action, NULL, NULL); + + ExFreePool(fn.Buffer); + + free_fileref(fcb->Vcb, parfr); } - + le = le->Flink; } - + ExReleaseResourceLite(&fcb->Vcb->fcb_lock); } -void mark_fcb_dirty(fcb* fcb) { +void mark_fcb_dirty(_In_ fcb* fcb) { if (!fcb->dirty) { #ifdef DEBUG_FCB_REFCOUNTS LONG rc; #endif - dirty_fcb* dirt = ExAllocatePoolWithTag(NonPagedPool, sizeof(dirty_fcb), ALLOC_TAG); - - if (!dirt) { - ExFreePool("out of memory\n"); - return; - } - fcb->dirty = TRUE; - + #ifdef DEBUG_FCB_REFCOUNTS rc = InterlockedIncrement(&fcb->refcount); WARN("fcb %p: refcount now %i\n", fcb, rc); #else InterlockedIncrement(&fcb->refcount); #endif - - dirt->fcb = fcb; - - ExInterlockedInsertTailList(&fcb->Vcb->dirty_fcbs, &dirt->list_entry, &fcb->Vcb->dirty_fcbs_lock); + + ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, TRUE); + InsertTailList(&fcb->Vcb->dirty_fcbs, &fcb->list_entry_dirty); + ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock); } - + fcb->Vcb->need_write = TRUE; } -void mark_fileref_dirty(file_ref* fileref) { +void mark_fileref_dirty(_In_ file_ref* fileref) { if (!fileref->dirty) { - dirty_fileref* dirt = ExAllocatePoolWithTag(NonPagedPool, sizeof(dirty_fileref), ALLOC_TAG); - - if (!dirt) { - ExFreePool("out of memory\n"); - return; - } - fileref->dirty = TRUE; increase_fileref_refcount(fileref); - - dirt->fileref = fileref; - - ExInterlockedInsertTailList(&fileref->fcb->Vcb->dirty_filerefs, &dirt->list_entry, &fileref->fcb->Vcb->dirty_filerefs_lock); + + ExAcquireResourceExclusiveLite(&fileref->fcb->Vcb->dirty_filerefs_lock, TRUE); + InsertTailList(&fileref->fcb->Vcb->dirty_filerefs, &fileref->list_entry_dirty); + ExReleaseResourceLite(&fileref->fcb->Vcb->dirty_filerefs_lock); } - + fileref->fcb->Vcb->need_write = TRUE; } -void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line) { +#ifdef DEBUG_FCB_REFCOUNTS +void _free_fcb(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ fcb* fcb, _In_ const char* func) { +#else +void free_fcb(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ fcb* fcb) { +#endif LONG rc; -// #ifdef DEBUG -// if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->fcb_lock) && !ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->tree_lock)) { -// ERR("fcb_lock not acquired exclusively\n"); -// int3; -// } -// #endif - rc = InterlockedDecrement(&fcb->refcount); - + #ifdef DEBUG_FCB_REFCOUNTS -// WARN("fcb %p: refcount now %i (%.*S)\n", fcb, rc, fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer); #ifdef DEBUG_LONG_MESSAGES - _debug_message(func, file, line, "fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode); + ERR("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode); #else - _debug_message(func, "fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode); + ERR("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode); #endif #endif - + if (rc > 0) return; - -// ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE); - + if (fcb->list_entry.Flink) RemoveEntryList(&fcb->list_entry); - + if (fcb->list_entry_all.Flink) RemoveEntryList(&fcb->list_entry_all); - -// ExReleaseResourceLite(&fcb->Vcb->fcb_lock); - + ExDeleteResourceLite(&fcb->nonpaged->resource); ExDeleteResourceLite(&fcb->nonpaged->paging_resource); ExDeleteResourceLite(&fcb->nonpaged->dir_children_lock); - ExFreePool(fcb->nonpaged); - + + ExFreeToNPagedLookasideList(&Vcb->fcb_np_lookaside, fcb->nonpaged); + if (fcb->sd) ExFreePool(fcb->sd); - + if (fcb->adsxattr.Buffer) ExFreePool(fcb->adsxattr.Buffer); - + if (fcb->reparse_xattr.Buffer) ExFreePool(fcb->reparse_xattr.Buffer); - + if (fcb->ea_xattr.Buffer) ExFreePool(fcb->ea_xattr.Buffer); - + if (fcb->adsdata.Buffer) ExFreePool(fcb->adsdata.Buffer); - + if (fcb->debug_desc) ExFreePool(fcb->debug_desc); - + while (!IsListEmpty(&fcb->extents)) { LIST_ENTRY* le = RemoveHeadList(&fcb->extents); extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + if (ext->csum) ExFreePool(ext->csum); - - ExFreePool(ext->data); + ExFreePool(ext); } - + while (!IsListEmpty(&fcb->hardlinks)) { LIST_ENTRY* le = RemoveHeadList(&fcb->hardlinks); hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); - + if (hl->name.Buffer) ExFreePool(hl->name.Buffer); - + if (hl->utf8.Buffer) ExFreePool(hl->utf8.Buffer); ExFreePool(hl); } - + + while (!IsListEmpty(&fcb->xattrs)) { + xattr* xa = CONTAINING_RECORD(RemoveHeadList(&fcb->xattrs), xattr, list_entry); + + ExFreePool(xa); + } + while (!IsListEmpty(&fcb->dir_children_index)) { LIST_ENTRY* le = RemoveHeadList(&fcb->dir_children_index); dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index); - + ExFreePool(dc->utf8.Buffer); ExFreePool(dc->name.Buffer); ExFreePool(dc->name_uc.Buffer); ExFreePool(dc); } - + if (fcb->hash_ptrs) ExFreePool(fcb->hash_ptrs); - + if (fcb->hash_ptrs_uc) ExFreePool(fcb->hash_ptrs_uc); - + FsRtlUninitializeFileLock(&fcb->lock); - - ExFreePool(fcb); + + if (fcb->pool_type == NonPagedPool) + ExFreePool(fcb); + else + ExFreeToPagedLookasideList(&Vcb->fcb_lookaside, fcb); + #ifdef DEBUG_FCB_REFCOUNTS #ifdef DEBUG_LONG_MESSAGES _debug_message(func, file, line, "freeing fcb %p\n", fcb); @@ -1723,326 +1568,397 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line) #endif } -void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line) { +void free_fileref(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ file_ref* fr) { LONG rc; -// #ifdef DEBUG -// if (!ExIsResourceAcquiredExclusiveLite(&fr->fcb->Vcb->fcb_lock) && !ExIsResourceAcquiredExclusiveLite(&fr->fcb->Vcb->tree_lock) && !fr->dirty) { -// ERR("fcb_lock not acquired exclusively\n"); -// int3; -// } -// #endif - rc = InterlockedDecrement(&fr->refcount); - + #ifdef DEBUG_FCB_REFCOUNTS -#ifdef DEBUG_LONG_MESSAGES - _debug_message(func, file, line, "fileref %p: refcount now %i\n", fr, rc); -#else - _debug_message(func, "fileref %p: refcount now %i\n", fr, rc); + ERR("fileref %p: refcount now %i\n", fr, rc); #endif -#endif - + #ifdef _DEBUG if (rc < 0) { ERR("fileref %p: refcount now %i\n", fr, rc); int3; } #endif - + if (rc > 0) return; - + if (fr->parent) ExAcquireResourceExclusiveLite(&fr->parent->nonpaged->children_lock, TRUE); - + // FIXME - do we need a file_ref lock? - + // FIXME - do delete if needed - - if (fr->filepart.Buffer) - ExFreePool(fr->filepart.Buffer); - - if (fr->filepart_uc.Buffer) - ExFreePool(fr->filepart_uc.Buffer); - - if (fr->utf8.Buffer) - ExFreePool(fr->utf8.Buffer); - + if (fr->debug_desc) ExFreePool(fr->debug_desc); - + ExDeleteResourceLite(&fr->nonpaged->children_lock); - - ExFreePool(fr->nonpaged); - + ExDeleteResourceLite(&fr->nonpaged->fileref_lock); + + ExFreeToNPagedLookasideList(&Vcb->fileref_np_lookaside, fr->nonpaged); + // FIXME - throw error if children not empty - + if (fr->fcb->fileref == fr) fr->fcb->fileref = NULL; - - if (fr->dc) + + if (fr->dc) { + if (fr->fcb->ads) + fr->dc->size = fr->fcb->adsdata.Length; + fr->dc->fileref = NULL; + } if (fr->list_entry.Flink) RemoveEntryList(&fr->list_entry); - + if (fr->parent) { ExReleaseResourceLite(&fr->parent->nonpaged->children_lock); - free_fileref(fr->parent); + free_fileref(Vcb, fr->parent); } - - free_fcb(fr->fcb); - ExFreePool(fr); + + free_fcb(Vcb, fr->fcb); + + ExFreeToPagedLookasideList(&Vcb->fileref_lookaside, fr); } -static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObject) { +static NTSTATUS close_file(_In_ PFILE_OBJECT FileObject, _In_ PIRP Irp) { fcb* fcb; ccb* ccb; file_ref* fileref = NULL; LONG open_files; - + device_extension* Vcb; + + UNUSED(Irp); + TRACE("FileObject = %p\n", FileObject); - - open_files = InterlockedDecrement(&Vcb->open_files); - + fcb = FileObject->FsContext; if (!fcb) { TRACE("FCB was NULL, returning success\n"); - - if (open_files == 0 && Vcb->removing) - uninit(Vcb, FALSE); - return STATUS_SUCCESS; } - + + open_files = InterlockedDecrement(&fcb->Vcb->open_files); + ccb = FileObject->FsContext2; - + TRACE("close called for %S (fcb == %p)\n", file_desc(FileObject), fcb); - + // FIXME - make sure notification gets sent if file is being deleted - - if (ccb) { + + if (ccb) { if (ccb->query_string.Buffer) RtlFreeUnicodeString(&ccb->query_string); - + if (ccb->filename.Buffer) ExFreePool(ccb->filename.Buffer); - + // FIXME - use refcounts for fileref fileref = ccb->fileref; - - ExFreePool(ccb); - } - - CcUninitializeCacheMap(FileObject, NULL, NULL); - - if (open_files == 0 && Vcb->removing) { - uninit(Vcb, FALSE); + + if (fcb->Vcb->running_sends > 0) { + BOOL send_cancelled = FALSE; + + ExAcquireResourceExclusiveLite(&fcb->Vcb->send_load_lock, TRUE); + + if (ccb->send) { + ccb->send->cancelling = TRUE; + send_cancelled = TRUE; + KeSetEvent(&ccb->send->cleared_event, 0, FALSE); + } + + ExReleaseResourceLite(&fcb->Vcb->send_load_lock); + + if (send_cancelled) { + while (ccb->send) { + ExAcquireResourceExclusiveLite(&fcb->Vcb->send_load_lock, TRUE); + ExReleaseResourceLite(&fcb->Vcb->send_load_lock); + } + } + } + + ExFreePool(ccb); + } + + CcUninitializeCacheMap(FileObject, NULL, NULL); + + if (open_files == 0 && fcb->Vcb->removing) { + uninit(fcb->Vcb, FALSE); return STATUS_SUCCESS; } - - if (!(Vcb->Vpb->Flags & VPB_MOUNTED)) + + if (!(fcb->Vcb->Vpb->Flags & VPB_MOUNTED)) return STATUS_SUCCESS; - + + Vcb = fcb->Vcb; + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - + if (fileref) - free_fileref(fileref); + free_fileref(fcb->Vcb, fileref); else - free_fcb(fcb); - + free_fcb(Vcb, fcb); + ExReleaseResourceLite(&Vcb->fcb_lock); - + return STATUS_SUCCESS; } -void STDCALL uninit(device_extension* Vcb, BOOL flush) { - space* s; +void uninit(_In_ device_extension* Vcb, _In_ BOOL flush) { UINT64 i; - LIST_ENTRY rollback; NTSTATUS Status; LIST_ENTRY* le; LARGE_INTEGER time; - - Vcb->removing = TRUE; - + + if (!Vcb->removing) { + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + Vcb->removing = TRUE; + ExReleaseResourceLite(&Vcb->tree_lock); + } + RemoveEntryList(&Vcb->list_entry); - + if (Vcb->balance.thread) { Vcb->balance.paused = FALSE; Vcb->balance.stopping = TRUE; KeSetEvent(&Vcb->balance.event, 0, FALSE); KeWaitForSingleObject(&Vcb->balance.finished, Executive, KernelMode, FALSE, NULL); } - + + if (Vcb->scrub.thread) { + Vcb->scrub.paused = FALSE; + Vcb->scrub.stopping = TRUE; + KeSetEvent(&Vcb->scrub.event, 0, FALSE); + KeWaitForSingleObject(&Vcb->scrub.finished, Executive, KernelMode, FALSE, NULL); + } + + if (Vcb->running_sends != 0) { + BOOL send_cancelled = FALSE; + + ExAcquireResourceExclusiveLite(&Vcb->send_load_lock, TRUE); + + le = Vcb->send_ops.Flink; + while (le != &Vcb->send_ops) { + send_info* send = CONTAINING_RECORD(le, send_info, list_entry); + + if (!send->cancelling) { + send->cancelling = TRUE; + send_cancelled = TRUE; + send->ccb = NULL; + KeSetEvent(&send->cleared_event, 0, FALSE); + } + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->send_load_lock); + + if (send_cancelled) { + while (Vcb->running_sends != 0) { + ExAcquireResourceExclusiveLite(&Vcb->send_load_lock, TRUE); + ExReleaseResourceLite(&Vcb->send_load_lock); + } + } + } + Status = registry_mark_volume_unmounted(&Vcb->superblock.uuid); if (!NT_SUCCESS(Status) && Status != STATUS_TOO_LATE) WARN("registry_mark_volume_unmounted returned %08x\n", Status); - + if (flush) { - InitializeListHead(&rollback); - ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - if (Vcb->need_write && !Vcb->readonly) - do_write(Vcb, NULL, &rollback); - + if (Vcb->need_write && !Vcb->readonly) { + Status = do_write(Vcb, NULL); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + } + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); ExReleaseResourceLite(&Vcb->tree_lock); } - + for (i = 0; i < Vcb->calcthreads.num_threads; i++) { Vcb->calcthreads.threads[i].quit = TRUE; } - + KeSetEvent(&Vcb->calcthreads.event, 0, FALSE); - + for (i = 0; i < Vcb->calcthreads.num_threads; i++) { KeWaitForSingleObject(&Vcb->calcthreads.threads[i].finished, Executive, KernelMode, FALSE, NULL); - + ZwClose(Vcb->calcthreads.threads[i].handle); } - + ExDeleteResourceLite(&Vcb->calcthreads.lock); ExFreePool(Vcb->calcthreads.threads); - + time.QuadPart = 0; KeSetTimer(&Vcb->flush_thread_timer, time, NULL); // trigger the timer early KeWaitForSingleObject(&Vcb->flush_thread_finished, Executive, KernelMode, FALSE, NULL); - - free_fcb(Vcb->volume_fcb); - + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fcb(Vcb, Vcb->volume_fcb); + free_fcb(Vcb, Vcb->dummy_fcb); + ExReleaseResourceLite(&Vcb->fcb_lock); + if (Vcb->root_file) ObDereferenceObject(Vcb->root_file); - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { chunk* c = CONTAINING_RECORD(le, chunk, list_entry); - + if (c->cache) { - free_fcb(c->cache); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fcb(Vcb, c->cache); + ExReleaseResourceLite(&Vcb->fcb_lock); c->cache = NULL; } - + le = le->Flink; } while (!IsListEmpty(&Vcb->roots)) { - LIST_ENTRY* le = RemoveHeadList(&Vcb->roots); - root* r = CONTAINING_RECORD(le, root, list_entry); + root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->roots), root, list_entry); ExDeleteResourceLite(&r->nonpaged->load_tree_lock); ExFreePool(r->nonpaged); ExFreePool(r); } - + while (!IsListEmpty(&Vcb->chunks)) { - chunk* c; - - le = RemoveHeadList(&Vcb->chunks); - c = CONTAINING_RECORD(le, chunk, list_entry); - + chunk* c = CONTAINING_RECORD(RemoveHeadList(&Vcb->chunks), chunk, list_entry); + while (!IsListEmpty(&c->space)) { LIST_ENTRY* le2 = RemoveHeadList(&c->space); - s = CONTAINING_RECORD(le2, space, list_entry); - + space* s = CONTAINING_RECORD(le2, space, list_entry); + ExFreePool(s); } - + while (!IsListEmpty(&c->deleting)) { LIST_ENTRY* le2 = RemoveHeadList(&c->deleting); - s = CONTAINING_RECORD(le2, space, list_entry); - + space* s = CONTAINING_RECORD(le2, space, list_entry); + ExFreePool(s); } - + if (c->devices) ExFreePool(c->devices); - - if (c->cache) - free_fcb(c->cache); - + + if (c->cache) { + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fcb(Vcb, c->cache); + ExReleaseResourceLite(&Vcb->fcb_lock); + } + + ExDeleteResourceLite(&c->range_locks_lock); + ExDeleteResourceLite(&c->partial_stripes_lock); ExDeleteResourceLite(&c->lock); ExDeleteResourceLite(&c->changed_extents_lock); - + ExFreePool(c->chunk_item); ExFreePool(c); } - + // FIXME - free any open fcbs? - + while (!IsListEmpty(&Vcb->devices)) { - LIST_ENTRY* le = RemoveHeadList(&Vcb->devices); - device* dev = CONTAINING_RECORD(le, device, list_entry); - + device* dev = CONTAINING_RECORD(RemoveHeadList(&Vcb->devices), device, list_entry); + while (!IsListEmpty(&dev->space)) { LIST_ENTRY* le2 = RemoveHeadList(&dev->space); space* s = CONTAINING_RECORD(le2, space, list_entry); - + ExFreePool(s); } - + ExFreePool(dev); } - + + ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); + while (!IsListEmpty(&Vcb->scrub.errors)) { + scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry); + + ExFreePool(err); + } + ExReleaseResourceLite(&Vcb->scrub.stats_lock); + ExDeleteResourceLite(&Vcb->fcb_lock); ExDeleteResourceLite(&Vcb->load_lock); ExDeleteResourceLite(&Vcb->tree_lock); ExDeleteResourceLite(&Vcb->chunk_lock); - + ExDeleteResourceLite(&Vcb->dirty_fcbs_lock); + ExDeleteResourceLite(&Vcb->dirty_filerefs_lock); + ExDeleteResourceLite(&Vcb->dirty_subvols_lock); + ExDeleteResourceLite(&Vcb->scrub.stats_lock); + ExDeleteResourceLite(&Vcb->send_load_lock); + ExDeletePagedLookasideList(&Vcb->tree_data_lookaside); ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside); - ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside); ExDeletePagedLookasideList(&Vcb->batch_item_lookaside); + ExDeletePagedLookasideList(&Vcb->fileref_lookaside); + ExDeletePagedLookasideList(&Vcb->fcb_lookaside); + ExDeletePagedLookasideList(&Vcb->name_bit_lookaside); ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside); - + ExDeleteNPagedLookasideList(&Vcb->fileref_np_lookaside); + ExDeleteNPagedLookasideList(&Vcb->fcb_np_lookaside); + ZwClose(Vcb->flush_thread_handle); } -NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS delete_fileref(_In_ file_ref* fileref, _In_ PFILE_OBJECT FileObject, _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback) { LARGE_INTEGER newlength, time; BTRFS_TIME now; NTSTATUS Status; + ULONG utf8len = 0; KeQuerySystemTime(&time); win_time_to_unix(time, &now); ExAcquireResourceExclusiveLite(fileref->fcb->Header.Resource, TRUE); - + if (fileref->deleted) { ExReleaseResourceLite(fileref->fcb->Header.Resource); return STATUS_SUCCESS; } - + + if (fileref->fcb->subvol->send_ops > 0) { + ExReleaseResourceLite(fileref->fcb->Header.Resource); + return STATUS_ACCESS_DENIED; + } + fileref->deleted = TRUE; mark_fileref_dirty(fileref); - + // delete INODE_ITEM (0x1) TRACE("nlink = %u\n", fileref->fcb->inode_item.st_nlink); - + if (!fileref->fcb->ads) { if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { LIST_ENTRY* le; - + mark_fcb_dirty(fileref->fcb); - + fileref->fcb->inode_item_changed = TRUE; - + if (fileref->fcb->inode_item.st_nlink > 1) { fileref->fcb->inode_item.st_nlink--; fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->inode_item.sequence++; fileref->fcb->inode_item.st_ctime = now; } else { - fileref->fcb->deleted = TRUE; - // excise extents - + if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY && fileref->fcb->inode_item.st_size > 0) { Status = excise_extents(fileref->fcb->Vcb, fileref->fcb, 0, sector_align(fileref->fcb->inode_item.st_size, fileref->fcb->Vcb->superblock.sector_size), Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -2051,51 +1967,67 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI return Status; } } - + fileref->fcb->Header.AllocationSize.QuadPart = 0; fileref->fcb->Header.FileSize.QuadPart = 0; fileref->fcb->Header.ValidDataLength.QuadPart = 0; - + if (FileObject) { CC_FILE_SIZES ccfs; - + ccfs.AllocationSize = fileref->fcb->Header.AllocationSize; ccfs.FileSize = fileref->fcb->Header.FileSize; ccfs.ValidDataLength = fileref->fcb->Header.ValidDataLength; - - CcSetFileSizes(FileObject, &ccfs); + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + CcSetFileSizes(FileObject, &ccfs); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("CcSetFileSizes threw exception %08x\n", Status); + ExReleaseResourceLite(fileref->fcb->Header.Resource); + return Status; + } } + + fileref->fcb->deleted = TRUE; } - - le = fileref->fcb->hardlinks.Flink; - while (le != &fileref->fcb->hardlinks) { - hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); - - if (hl->parent == fileref->parent->fcb->inode && hl->index == fileref->index) { - RemoveEntryList(&hl->list_entry); - - if (hl->name.Buffer) - ExFreePool(hl->name.Buffer); - - if (hl->utf8.Buffer) - ExFreePool(hl->utf8.Buffer); - - ExFreePool(hl); - break; + + if (fileref->dc) { + le = fileref->fcb->hardlinks.Flink; + while (le != &fileref->fcb->hardlinks) { + hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); + + if (hl->parent == fileref->parent->fcb->inode && hl->index == fileref->dc->index) { + RemoveEntryList(&hl->list_entry); + + if (hl->name.Buffer) + ExFreePool(hl->name.Buffer); + + if (hl->utf8.Buffer) + ExFreePool(hl->utf8.Buffer); + + ExFreePool(hl); + break; + } + + le = le->Flink; } - - le = le->Flink; } - } else { // subvolume + } else if (fileref->fcb->subvol->parent == fileref->parent->fcb->subvol->id) { // valid subvolume if (fileref->fcb->subvol->root_item.num_references > 1) { fileref->fcb->subvol->root_item.num_references--; - + mark_fcb_dirty(fileref->fcb); // so ROOT_ITEM gets updated } else { // FIXME - we need a lock here - + RemoveEntryList(&fileref->fcb->subvol->list_entry); - + InsertTailList(&fileref->fcb->Vcb->drop_roots, &fileref->fcb->subvol->list_entry); } } @@ -2103,144 +2035,200 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI fileref->fcb->deleted = TRUE; mark_fcb_dirty(fileref->fcb); } - + // remove dir_child from parent - + if (fileref->dc) { + TRACE("delete file %.*S\n", fileref->dc->name.Length / sizeof(WCHAR), fileref->dc->name.Buffer); + ExAcquireResourceExclusiveLite(&fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); RemoveEntryList(&fileref->dc->list_entry_index); - remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc); + + if (!fileref->fcb->ads) + remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc); + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); - - ExFreePool(fileref->dc->utf8.Buffer); + + if (!fileref->oldutf8.Buffer) + fileref->oldutf8 = fileref->dc->utf8; + else + ExFreePool(fileref->dc->utf8.Buffer); + + utf8len = fileref->dc->utf8.Length; + + fileref->oldindex = fileref->dc->index; + ExFreePool(fileref->dc->name.Buffer); ExFreePool(fileref->dc->name_uc.Buffer); ExFreePool(fileref->dc); - + fileref->dc = NULL; } - + // update INODE_ITEM of parent - - TRACE("delete file %.*S\n", fileref->filepart.Length / sizeof(WCHAR), fileref->filepart.Buffer); + ExAcquireResourceExclusiveLite(fileref->parent->fcb->Header.Resource, TRUE); - TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size); - fileref->parent->fcb->inode_item.st_size -= fileref->utf8.Length * 2; - TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) now %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size); + fileref->parent->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation; fileref->parent->fcb->inode_item.sequence++; fileref->parent->fcb->inode_item.st_ctime = now; - fileref->parent->fcb->inode_item.st_mtime = now; - ExReleaseResourceLite(fileref->parent->fcb->Header.Resource); + + if (!fileref->fcb->ads) { + TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size); + fileref->parent->fcb->inode_item.st_size -= utf8len * 2; + TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) now %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size); + fileref->parent->fcb->inode_item.st_mtime = now; + } fileref->parent->fcb->inode_item_changed = TRUE; + ExReleaseResourceLite(fileref->parent->fcb->Header.Resource); + + if (!fileref->fcb->ads && fileref->parent->dc) + send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + mark_fcb_dirty(fileref->parent->fcb); - - send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); - + fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->subvol->root_item.ctime = now; - + newlength.QuadPart = 0; - + if (FileObject && !CcUninitializeCacheMap(FileObject, &newlength, NULL)) TRACE("CcUninitializeCacheMap failed\n"); ExReleaseResourceLite(fileref->fcb->Header.Resource); - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_CLEANUP) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_cleanup(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); PFILE_OBJECT FileObject = IrpSp->FileObject; device_extension* Vcb = DeviceObject->DeviceExtension; - fcb* fcb; + fcb* fcb = FileObject->FsContext; BOOL top_level; - TRACE("cleanup\n"); - FsRtlEnterFileSystem(); + TRACE("cleanup\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit2; - } - - if (DeviceObject == devobj) { + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_cleanup(DeviceObject, Irp); + goto exit; + } else if (DeviceObject == master_devobj) { TRACE("closing file system\n"); Status = STATUS_SUCCESS; goto exit; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto exit; + } + + if (FileObject->Flags & FO_CLEANUP_COMPLETE) { + TRACE("FileObject %p already cleaned up\n", FileObject); + Status = STATUS_SUCCESS; + goto exit; + } + + if (!fcb) { + ERR("fcb was NULL\n"); + Status = STATUS_INVALID_PARAMETER; + goto exit; } - + + // We have to use the pointer to Vcb stored in the fcb, as we can receive cleanup + // messages belonging to other devices. + if (FileObject && FileObject->FsContext) { LONG oc; ccb* ccb; file_ref* fileref; - - fcb = FileObject->FsContext; + BOOL locked = TRUE; + ccb = FileObject->FsContext2; fileref = ccb ? ccb->fileref : NULL; - + TRACE("cleanup called for FileObject %p\n", FileObject); TRACE("fileref %p (%S), refcount = %u, open_count = %u\n", fileref, file_desc(FileObject), fileref ? fileref->refcount : 0, fileref ? fileref->open_count : 0); - + + ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); + + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + IoRemoveShareAccess(FileObject, &fcb->share_access); - - FsRtlNotifyCleanup(Vcb->NotifySync, &Vcb->DirNotifyList, ccb); - + + if (ccb) + FsRtlNotifyCleanup(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, ccb); + if (fileref) { oc = InterlockedDecrement(&fileref->open_count); #ifdef DEBUG_FCB_REFCOUNTS ERR("fileref %p: open_count now %i\n", fileref, oc); #endif } - + if (ccb && ccb->options & FILE_DELETE_ON_CLOSE && fileref) fileref->delete_on_close = TRUE; - - if (fileref && fileref->delete_on_close && fcb->type == BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) + + if (fileref && fileref->delete_on_close && fcb->type == BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0 && fcb != fcb->Vcb->dummy_fcb) fileref->delete_on_close = FALSE; - - if (Vcb->locked && Vcb->locked_fileobj == FileObject) { + + if (fcb->Vcb->locked && fcb->Vcb->locked_fileobj == FileObject) { TRACE("unlocking volume\n"); - do_unlock_volume(Vcb); + do_unlock_volume(fcb->Vcb); FsRtlNotifyVolumeEvent(FileObject, FSRTL_VOLUME_UNLOCK); } - + + if (ccb && ccb->reserving) { + fcb->subvol->reserved = NULL; + ccb->reserving = FALSE; + // FIXME - flush all of subvol's fcbs + } + if (fileref && oc == 0) { - if (!Vcb->removing) { - LIST_ENTRY rollback; - - InitializeListHead(&rollback); - + if (!fcb->Vcb->removing) { if (fileref && fileref->delete_on_close && fileref != fcb->Vcb->root_fileref && fcb != fcb->Vcb->volume_fcb) { - send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED); - - ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); - + LIST_ENTRY rollback; + + InitializeListHead(&rollback); + + if (!fileref->fcb->ads || fileref->dc) { + if (fileref->fcb->ads) { + send_notification_fileref(fileref->parent, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, + FILE_ACTION_REMOVED, &fileref->dc->name); + } else + send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED, NULL); + } + + ExReleaseResourceLite(fcb->Header.Resource); + locked = FALSE; + + // fcb_lock needs to be acquired before fcb->Header.Resource ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE); - + Status = delete_fileref(fileref, FileObject, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("delete_fileref returned %08x\n", Status); - do_rollback(Vcb, &rollback); + do_rollback(fcb->Vcb, &rollback); ExReleaseResourceLite(&fcb->Vcb->fcb_lock); ExReleaseResourceLite(&fcb->Vcb->tree_lock); goto exit; } - + ExReleaseResourceLite(&fcb->Vcb->fcb_lock); - - ExReleaseResourceLite(&fcb->Vcb->tree_lock); - clear_rollback(Vcb, &rollback); + + locked = FALSE; + + clear_rollback(&rollback); } else if (FileObject->Flags & FO_CACHE_SUPPORTED && fcb->nonpaged->segment_object.DataSectionObject) { IO_STATUS_BLOCK iosb; CcFlushCache(FileObject->SectionObjectPointer, NULL, 0, &iosb); - + if (!NT_SUCCESS(iosb.Status)) { ERR("CcFlushCache returned %08x\n", iosb.Status); } @@ -2251,44 +2239,51 @@ static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) } CcPurgeCacheSection(&fcb->nonpaged->segment_object, NULL, 0, FALSE); - + TRACE("flushed cache on close (FileObject = %p, fcb = %p, AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx)\n", FileObject, fcb, fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); } } - + if (fcb->Vcb && fcb != fcb->Vcb->volume_fcb) CcUninitializeCacheMap(FileObject, NULL, NULL); } - + + if (locked) + ExReleaseResourceLite(fcb->Header.Resource); + + ExReleaseResourceLite(&fcb->Vcb->tree_lock); + FileObject->Flags |= FO_CLEANUP_COMPLETE; } - + Status = STATUS_SUCCESS; exit: + TRACE("returning %08x\n", Status); + Irp->IoStatus.Status = Status; Irp->IoStatus.Information = 0; - + IoCompleteRequest(Irp, IO_NO_INCREMENT); - -exit2: - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; } -BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts) { +_Success_(return) +BOOL get_file_attributes_from_xattr(_In_reads_bytes_(len) char* val, _In_ UINT16 len, _Out_ ULONG* atts) { if (len > 2 && val[0] == '0' && val[1] == 'x') { int i; ULONG dosnum = 0; for (i = 2; i < len; i++) { dosnum *= 0x10; - + if (val[i] >= '0' && val[i] <= '9') dosnum |= val[i] - '0'; else if (val[i] >= 'a' && val[i] <= 'f') @@ -2296,252 +2291,260 @@ BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts) { else if (val[i] >= 'A' && val[i] <= 'F') dosnum |= val[i] + 10 - 'a'; } - + TRACE("DOSATTRIB: %08x\n", dosnum); - + *atts = dosnum; - + return TRUE; } - + return FALSE; } -ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp) { +ULONG get_file_attributes(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_ UINT64 inode, + _In_ UINT8 type, _In_ BOOL dotfile, _In_ BOOL ignore_xa, _In_opt_ PIRP Irp) { ULONG att; char* eaval; UINT16 ealen; - - // ii can be NULL - + if (!ignore_xa && get_xattr(Vcb, r, inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, (UINT8**)&eaval, &ealen, Irp)) { ULONG dosnum = 0; - + if (get_file_attributes_from_xattr(eaval, ealen, &dosnum)) { ExFreePool(eaval); - + if (type == BTRFS_TYPE_DIRECTORY) dosnum |= FILE_ATTRIBUTE_DIRECTORY; else if (type == BTRFS_TYPE_SYMLINK) dosnum |= FILE_ATTRIBUTE_REPARSE_POINT; - + + if (type != BTRFS_TYPE_DIRECTORY) + dosnum &= ~FILE_ATTRIBUTE_DIRECTORY; + if (inode == SUBVOL_ROOT_INODE) { if (r->root_item.flags & BTRFS_SUBVOL_READONLY) dosnum |= FILE_ATTRIBUTE_READONLY; else dosnum &= ~FILE_ATTRIBUTE_READONLY; } - + return dosnum; } - + ExFreePool(eaval); } - + switch (type) { case BTRFS_TYPE_DIRECTORY: att = FILE_ATTRIBUTE_DIRECTORY; break; - + case BTRFS_TYPE_SYMLINK: att = FILE_ATTRIBUTE_REPARSE_POINT; break; - + default: att = 0; break; } - + if (dotfile) { att |= FILE_ATTRIBUTE_HIDDEN; } - + att |= FILE_ATTRIBUTE_ARCHIVE; - + if (inode == SUBVOL_ROOT_INODE) { if (r->root_item.flags & BTRFS_SUBVOL_READONLY) att |= FILE_ATTRIBUTE_READONLY; else att &= ~FILE_ATTRIBUTE_READONLY; } - + // FIXME - get READONLY from ii->st_mode // FIXME - return SYSTEM for block/char devices? - + if (att == 0) att = FILE_ATTRIBUTE_NORMAL; - + return att; } -NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override) { - IO_STATUS_BLOCK* IoStatus; +NTSTATUS sync_read_phys(_In_ PDEVICE_OBJECT DeviceObject, _In_ UINT64 StartingOffset, _In_ ULONG Length, + _Out_writes_bytes_(Length) PUCHAR Buffer, _In_ BOOL override) { + IO_STATUS_BLOCK IoStatus; LARGE_INTEGER Offset; PIRP Irp; PIO_STACK_LOCATION IrpSp; NTSTATUS Status; - read_context* context; - + read_context context; + num_reads++; - - context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_context), ALLOC_TAG); - if (!context) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(context, sizeof(read_context)); - KeInitializeEvent(&context->Event, NotificationEvent, FALSE); - - IoStatus = ExAllocatePoolWithTag(NonPagedPool, sizeof(IO_STATUS_BLOCK), ALLOC_TAG); - if (!IoStatus) { - ERR("out of memory\n"); - ExFreePool(context); - return STATUS_INSUFFICIENT_RESOURCES; - } - Offset.QuadPart = StartingOffset; + RtlZeroMemory(&context, sizeof(read_context)); + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + Offset.QuadPart = (LONGLONG)StartingOffset; -// Irp = IoBuildSynchronousFsdRequest(IRP_MJ_READ, DeviceObject, Buffer, Length, &Offset, /*&Event*/NULL, IoStatus); Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - + if (!Irp) { ERR("IoAllocateIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + return STATUS_INSUFFICIENT_RESOURCES; } - + Irp->Flags |= IRP_NOCACHE; IrpSp = IoGetNextIrpStackLocation(Irp); IrpSp->MajorFunction = IRP_MJ_READ; - + if (override) IrpSp->Flags |= SL_OVERRIDE_VERIFY_VOLUME; - + if (DeviceObject->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); + Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, Length, ALLOC_TAG); + if (!Irp->AssociatedIrp.SystemBuffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; + + Irp->UserBuffer = Buffer; } else if (DeviceObject->Flags & DO_DIRECT_IO) { -// TRACE("direct IO\n"); - Irp->MdlAddress = IoAllocateMdl(Buffer, Length, FALSE, FALSE, NULL); if (!Irp->MdlAddress) { ERR("IoAllocateMdl failed\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - // IoFreeIrp(Irp); goto exit; -// } else { -// TRACE("got MDL %p from buffer %p\n", Irp->MdlAddress, Buffer); } - - MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { -// TRACE("neither buffered nor direct IO\n"); - Irp->UserBuffer = Buffer; - } - IrpSp->Parameters.Read.Length = Length; - IrpSp->Parameters.Read.ByteOffset = Offset; - - Irp->UserIosb = IoStatus; -// Irp->Tail.Overlay.Thread = PsGetCurrentThread(); - - Irp->UserEvent = &context->Event; + Status = STATUS_SUCCESS; -// IoQueueThreadIrp(Irp); - - IoSetCompletionRoutine(Irp, read_completion, context, TRUE, TRUE, TRUE); + _SEH2_TRY { + MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(Irp->MdlAddress); + goto exit; + } + } else + Irp->UserBuffer = Buffer; + + IrpSp->Parameters.Read.Length = Length; + IrpSp->Parameters.Read.ByteOffset = Offset; + + Irp->UserIosb = &IoStatus; + + Irp->UserEvent = &context.Event; + + IoSetCompletionRoutine(Irp, read_completion, &context, TRUE, TRUE, TRUE); Status = IoCallDriver(DeviceObject, Irp); if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - Status = context->iosb.Status; + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + Status = context.iosb.Status; } - + if (DeviceObject->Flags & DO_DIRECT_IO) { MmUnlockPages(Irp->MdlAddress); IoFreeMdl(Irp->MdlAddress); } - + exit: IoFreeIrp(Irp); - ExFreePool(IoStatus); - ExFreePool(context); - return Status; } -static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT device, UINT64 length) { +static NTSTATUS read_superblock(_In_ device_extension* Vcb, _In_ PDEVICE_OBJECT device, _In_ UINT64 length) { NTSTATUS Status; superblock* sb; - unsigned int i, to_read; + ULONG i, to_read; UINT8 valid_superblocks; - - to_read = device->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), device->SectorSize); - + + to_read = device->SectorSize == 0 ? sizeof(superblock) : (ULONG)sector_align(sizeof(superblock), device->SectorSize); + sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG); if (!sb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + + if (superblock_addrs[0] + to_read > length) { + WARN("device was too short to have any superblock\n"); + ExFreePool(sb); + return STATUS_UNRECOGNIZED_VOLUME; + } + i = 0; valid_superblocks = 0; - + while (superblock_addrs[i] > 0) { UINT32 crc32; - - if (i > 0 && superblock_addrs[i] + sizeof(superblock) > length) + + if (i > 0 && superblock_addrs[i] + to_read > length) break; - + Status = sync_read_phys(device, superblock_addrs[i], to_read, (PUCHAR)sb, FALSE); if (!NT_SUCCESS(Status)) { ERR("Failed to read superblock %u: %08x\n", i, Status); ExFreePool(sb); return Status; } - + if (sb->magic != BTRFS_MAGIC) { if (i == 0) { TRACE("not a BTRFS volume\n"); + ExFreePool(sb); return STATUS_UNRECOGNIZED_VOLUME; } } else { TRACE("got superblock %u!\n", i); - + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); - + if (crc32 != *((UINT32*)sb->checksum)) WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum)); + else if (sb->sector_size == 0) + WARN("superblock sector size was 0\n"); + else if (sb->node_size < sizeof(tree_header) + sizeof(internal_node) || sb->node_size > 0x10000) + WARN("invalid node size %x\n", sb->node_size); + else if ((sb->node_size % sb->sector_size) != 0) + WARN("node size %x was not a multiple of sector_size %x\n", sb->node_size, sb->sector_size); else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) { RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock)); valid_superblocks++; } } - + i++; } - + ExFreePool(sb); - + if (valid_superblocks == 0) { ERR("could not find any valid superblocks\n"); return STATUS_INTERNAL_ERROR; } - + TRACE("label is %s\n", Vcb->superblock.label); - + return STATUS_SUCCESS; } -NTSTATUS STDCALL dev_ioctl(PDEVICE_OBJECT DeviceObject, ULONG ControlCode, PVOID InputBuffer, ULONG InputBufferSize, - PVOID OutputBuffer, ULONG OutputBufferSize, BOOLEAN Override, IO_STATUS_BLOCK* iosb) -{ +NTSTATUS dev_ioctl(_In_ PDEVICE_OBJECT DeviceObject, _In_ ULONG ControlCode, _In_reads_bytes_opt_(InputBufferSize) PVOID InputBuffer, _In_ ULONG InputBufferSize, + _Out_writes_bytes_opt_(OutputBufferSize) PVOID OutputBuffer, _In_ ULONG OutputBufferSize, _In_ BOOLEAN Override, _Out_opt_ IO_STATUS_BLOCK* iosb) { PIRP Irp; KEVENT Event; NTSTATUS Status; - PIO_STACK_LOCATION Stack; + PIO_STACK_LOCATION IrpSp; IO_STATUS_BLOCK IoStatus; KeInitializeEvent(&Event, NotificationEvent, FALSE); @@ -2559,8 +2562,8 @@ NTSTATUS STDCALL dev_ioctl(PDEVICE_OBJECT DeviceObject, ULONG ControlCode, PVOID if (!Irp) return STATUS_INSUFFICIENT_RESOURCES; if (Override) { - Stack = IoGetNextIrpStackLocation(Irp); - Stack->Flags |= SL_OVERRIDE_VERIFY_VOLUME; + IrpSp = IoGetNextIrpStackLocation(Irp); + IrpSp->Flags |= SL_OVERRIDE_VERIFY_VOLUME; } Status = IoCallDriver(DeviceObject, Irp); @@ -2569,24 +2572,31 @@ NTSTATUS STDCALL dev_ioctl(PDEVICE_OBJECT DeviceObject, ULONG ControlCode, PVOID KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); Status = IoStatus.Status; } - + if (iosb) *iosb = IoStatus; return Status; } -static NTSTATUS STDCALL add_root(device_extension* Vcb, UINT64 id, UINT64 addr, traverse_ptr* tp) { +_Requires_exclusive_lock_held_(Vcb->tree_lock) +static NTSTATUS add_root(_Inout_ device_extension* Vcb, _In_ UINT64 id, _In_ UINT64 addr, + _In_ UINT64 generation, _In_opt_ traverse_ptr* tp) { root* r = ExAllocatePoolWithTag(PagedPool, sizeof(root), ALLOC_TAG); if (!r) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + r->id = id; - r->path.Buffer = NULL; + r->dirty = FALSE; + r->received = FALSE; + r->reserved = NULL; r->treeholder.address = addr; r->treeholder.tree = NULL; + r->treeholder.generation = generation; + r->parent = 0; + r->send_ops = 0; InitializeListHead(&r->fcbs); r->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(root_nonpaged), ALLOC_TAG); @@ -2595,121 +2605,128 @@ static NTSTATUS STDCALL add_root(device_extension* Vcb, UINT64 id, UINT64 addr, ExFreePool(r); return STATUS_INSUFFICIENT_RESOURCES; } - + ExInitializeResourceLite(&r->nonpaged->load_tree_lock); - + r->lastinode = 0; - + if (tp) { RtlCopyMemory(&r->root_item, tp->item->data, min(sizeof(ROOT_ITEM), tp->item->size)); if (tp->item->size < sizeof(ROOT_ITEM)) RtlZeroMemory(((UINT8*)&r->root_item) + tp->item->size, sizeof(ROOT_ITEM) - tp->item->size); - } - + } else + RtlZeroMemory(&r->root_item, sizeof(ROOT_ITEM)); + if (!Vcb->readonly && (r->id == BTRFS_ROOT_ROOT || r->id == BTRFS_ROOT_FSTREE || (r->id >= 0x100 && !(r->id & 0xf000000000000000)))) { // FS tree root // FIXME - don't call this if subvol is readonly (though we will have to if we ever toggle this flag) get_last_inode(Vcb, r, NULL); - + if (r->id == BTRFS_ROOT_ROOT && r->lastinode < 0x100) r->lastinode = 0x100; } - + InsertTailList(&Vcb->roots, &r->list_entry); - + switch (r->id) { case BTRFS_ROOT_ROOT: Vcb->root_root = r; break; - + case BTRFS_ROOT_EXTENT: Vcb->extent_root = r; break; - + case BTRFS_ROOT_CHUNK: Vcb->chunk_root = r; break; - + case BTRFS_ROOT_DEVTREE: Vcb->dev_root = r; break; - + case BTRFS_ROOT_CHECKSUM: Vcb->checksum_root = r; break; - + case BTRFS_ROOT_UUID: Vcb->uuid_root = r; break; - + + case BTRFS_ROOT_FREE_SPACE: + Vcb->space_root = r; + break; + case BTRFS_ROOT_DATA_RELOC: Vcb->data_reloc_root = r; + break; } - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL look_for_roots(device_extension* Vcb, PIRP Irp) { +static NTSTATUS look_for_roots(_Requires_exclusive_lock_held_(_Curr_->tree_lock) _In_ device_extension* Vcb, _In_opt_ PIRP Irp) { traverse_ptr tp, next_tp; KEY searchkey; BOOL b; NTSTATUS Status; - + searchkey.obj_id = 0; searchkey.obj_type = 0; searchkey.offset = 0; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { - ERR("error - find_tree returned %08x\n", Status); + ERR("error - find_item returned %08x\n", Status); return Status; } - + do { TRACE("(%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - + if (tp.item->key.obj_type == TYPE_ROOT_ITEM) { ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; - + if (tp.item->size < offsetof(ROOT_ITEM, byte_limit)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, offsetof(ROOT_ITEM, byte_limit)); } else { TRACE("root %llx - address %llx\n", tp.item->key.obj_id, ri->block_number); - - Status = add_root(Vcb, tp.item->key.obj_id, ri->block_number, &tp); + + Status = add_root(Vcb, tp.item->key.obj_id, ri->block_number, ri->generation, &tp); if (!NT_SUCCESS(Status)) { ERR("add_root returned %08x\n", Status); return Status; } } + } else if (tp.item->key.obj_type == TYPE_ROOT_BACKREF && !IsListEmpty(&Vcb->roots)) { + root* lastroot = CONTAINING_RECORD(Vcb->roots.Blink, root, list_entry); + + if (lastroot->id == tp.item->key.obj_id) + lastroot->parent = tp.item->key.offset; } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - + if (b) tp = next_tp; } while (b); - + if (!Vcb->readonly && !Vcb->data_reloc_root) { root* reloc_root; INODE_ITEM* ii; - ULONG irlen; + UINT16 irlen; INODE_REF* ir; LARGE_INTEGER time; BTRFS_TIME now; - LIST_ENTRY rollback; - - InitializeListHead(&rollback); - + WARN("data reloc root doesn't exist, creating it\n"); - - Status = create_root(Vcb, BTRFS_ROOT_DATA_RELOC, &reloc_root, FALSE, 0, Irp, &rollback); - + + Status = create_root(Vcb, BTRFS_ROOT_DATA_RELOC, &reloc_root, FALSE, 0, Irp); + if (!NT_SUCCESS(Status)) { ERR("create_root returned %08x\n", Status); - do_rollback(Vcb, &rollback); - goto end; + return Status; } - + reloc_root->root_item.inode.generation = 1; reloc_root->root_item.inode.st_size = 3; reloc_root->root_item.inode.st_blocks = Vcb->superblock.node_size; @@ -2718,17 +2735,16 @@ static NTSTATUS STDCALL look_for_roots(device_extension* Vcb, PIRP Irp) { reloc_root->root_item.inode.flags = 0xffffffff80000000; reloc_root->root_item.objid = SUBVOL_ROOT_INODE; reloc_root->root_item.bytes_used = Vcb->superblock.node_size; - + ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); if (!ii) { ERR("out of memory\n"); - do_rollback(Vcb, &rollback); - goto end; + return STATUS_INSUFFICIENT_RESOURCES; } - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + RtlZeroMemory(ii, sizeof(INODE_ITEM)); ii->generation = Vcb->superblock.generation; ii->st_blocks = Vcb->superblock.node_size; @@ -2737,60 +2753,74 @@ static NTSTATUS STDCALL look_for_roots(device_extension* Vcb, PIRP Irp) { ii->st_atime = now; ii->st_ctime = now; ii->st_mtime = now; - - insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, &rollback); - irlen = offsetof(INODE_REF, name[0]) + 2; + Status = insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ii); + return Status; + } + + irlen = (UINT16)offsetof(INODE_REF, name[0]) + 2; ir = ExAllocatePoolWithTag(PagedPool, irlen, ALLOC_TAG); if (!ir) { ERR("out of memory\n"); - do_rollback(Vcb, &rollback); - goto end; + return STATUS_INSUFFICIENT_RESOURCES; } - + ir->index = 0; ir->n = 2; ir->name[0] = '.'; ir->name[1] = '.'; - - insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_REF, SUBVOL_ROOT_INODE, ir, irlen, NULL, Irp, &rollback); - - clear_rollback(Vcb, &rollback); - + + Status = insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_REF, SUBVOL_ROOT_INODE, ir, irlen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ir); + return Status; + } + Vcb->data_reloc_root = reloc_root; Vcb->need_write = TRUE; } - -end: + return STATUS_SUCCESS; } -static NTSTATUS find_disk_holes(device_extension* Vcb, device* dev, PIRP Irp) { +static NTSTATUS find_disk_holes(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ device* dev, _In_opt_ PIRP Irp) { KEY searchkey; traverse_ptr tp, next_tp; BOOL b; UINT64 lastaddr; NTSTATUS Status; - + InitializeListHead(&dev->space); - + + searchkey.obj_id = 0; + searchkey.obj_type = TYPE_DEV_STATS; + searchkey.offset = dev->devitem.dev_id; + + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); + if (NT_SUCCESS(Status) && !keycmp(tp.item->key, searchkey)) + RtlCopyMemory(dev->stats, tp.item->data, min(sizeof(UINT64) * 5, tp.item->size)); + searchkey.obj_id = dev->devitem.dev_id; searchkey.obj_type = TYPE_DEV_EXTENT; searchkey.offset = 0; - + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { - ERR("error - find_tree returned %08x\n", Status); + ERR("error - find_item returned %08x\n", Status); return Status; } - + lastaddr = 0; - + do { if (tp.item->key.obj_id == dev->devitem.dev_id && tp.item->key.obj_type == TYPE_DEV_EXTENT) { if (tp.item->size >= sizeof(DEV_EXTENT)) { DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data; - + if (tp.item->key.offset > lastaddr) { Status = add_space_entry(&dev->space, NULL, lastaddr, tp.item->key.offset - lastaddr); if (!NT_SUCCESS(Status)) { @@ -2804,16 +2834,16 @@ static NTSTATUS find_disk_holes(device_extension* Vcb, device* dev, PIRP Irp) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_EXTENT)); } } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - + if (b) { tp = next_tp; if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type) break; } } while (b); - + if (lastaddr < dev->devitem.num_bytes) { Status = add_space_entry(&dev->space, NULL, lastaddr, dev->devitem.num_bytes - lastaddr); if (!NT_SUCCESS(Status)) { @@ -2821,195 +2851,183 @@ static NTSTATUS find_disk_holes(device_extension* Vcb, device* dev, PIRP Irp) { return Status; } } - + // The Linux driver doesn't like to allocate chunks within the first megabyte of a device. - - space_list_subtract2(Vcb, &dev->space, NULL, 0, 0x100000, NULL); - + + space_list_subtract2(&dev->space, NULL, 0, 0x100000, NULL, NULL); + return STATUS_SUCCESS; } -static void add_device_to_list(device_extension* Vcb, device* dev) { +static void add_device_to_list(_In_ device_extension* Vcb, _In_ device* dev) { LIST_ENTRY* le; - + le = Vcb->devices.Flink; - + while (le != &Vcb->devices) { device* dev2 = CONTAINING_RECORD(le, device, list_entry); - + if (dev2->devitem.dev_id > dev->devitem.dev_id) { InsertHeadList(le->Blink, &dev->list_entry); return; } - + le = le->Flink; } - + InsertTailList(&Vcb->devices, &dev->list_entry); } -device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) { +_Ret_maybenull_ +device* find_device_from_uuid(_In_ device_extension* Vcb, _In_ BTRFS_UUID* uuid) { + volume_device_extension* vde; + pdo_device_extension* pdode; LIST_ENTRY* le; - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - + TRACE("device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", dev->devitem.dev_id, dev->devitem.device_uuid.uuid[0], dev->devitem.device_uuid.uuid[1], dev->devitem.device_uuid.uuid[2], dev->devitem.device_uuid.uuid[3], dev->devitem.device_uuid.uuid[4], dev->devitem.device_uuid.uuid[5], dev->devitem.device_uuid.uuid[6], dev->devitem.device_uuid.uuid[7], dev->devitem.device_uuid.uuid[8], dev->devitem.device_uuid.uuid[9], dev->devitem.device_uuid.uuid[10], dev->devitem.device_uuid.uuid[11], dev->devitem.device_uuid.uuid[12], dev->devitem.device_uuid.uuid[13], dev->devitem.device_uuid.uuid[14], dev->devitem.device_uuid.uuid[15]); - - if (dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + + if (RtlCompareMemory(&dev->devitem.device_uuid, uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { TRACE("returning device %llx\n", dev->devitem.dev_id); return dev; } - + le = le->Flink; } - - ExAcquireResourceSharedLite(&volumes_lock, TRUE); - - if (Vcb->devices_loaded < Vcb->superblock.num_devices && !IsListEmpty(&volumes)) { - LIST_ENTRY* le = volumes.Flink; - - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && - RtlCompareMemory(uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) - ) { - NTSTATUS Status; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; + + vde = Vcb->vde; + + if (!vde) + goto end; + + pdode = vde->pdode; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (Vcb->devices_loaded < Vcb->superblock.num_devices) { + le = pdode->children.Flink; + + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + if (RtlCompareMemory(uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { device* dev; - - Status = IoGetDeviceObjectPointer(&v->devpath, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); - if (!NT_SUCCESS(Status)) { - ExReleaseResourceLite(&volumes_lock); - ERR("IoGetDeviceObjectPointer(%.*S) returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status); - return NULL; - } - - DeviceObject = FileObject->DeviceObject; - - ObReferenceObject(DeviceObject); - ObDereferenceObject(FileObject); - + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); if (!dev) { - ExReleaseResourceLite(&volumes_lock); + ExReleaseResourceLite(&pdode->child_lock); ERR("out of memory\n"); - ObDereferenceObject(DeviceObject); return NULL; } - + RtlZeroMemory(dev, sizeof(device)); - dev->devobj = DeviceObject; + dev->devobj = vc->devobj; dev->devitem.device_uuid = *uuid; - dev->devitem.dev_id = v->devnum; - dev->seeding = v->seeding; + dev->devitem.dev_id = vc->devid; + dev->devitem.num_bytes = vc->size; + dev->seeding = vc->seeding; dev->readonly = dev->seeding; dev->reloc = FALSE; dev->removable = FALSE; - dev->disk_num = v->disk_num; - dev->part_num = v->part_num; + dev->disk_num = vc->disk_num; + dev->part_num = vc->part_num; + dev->num_trim_entries = 0; + InitializeListHead(&dev->trim_list); + add_device_to_list(Vcb, dev); Vcb->devices_loaded++; - - ExReleaseResourceLite(&volumes_lock); - + + ExReleaseResourceLite(&pdode->child_lock); + return dev; } - + le = le->Flink; } } - - ExReleaseResourceLite(&volumes_lock); - + + ExReleaseResourceLite(&pdode->child_lock); + +end: WARN("could not find device with uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", uuid->uuid[0], uuid->uuid[1], uuid->uuid[2], uuid->uuid[3], uuid->uuid[4], uuid->uuid[5], uuid->uuid[6], uuid->uuid[7], uuid->uuid[8], uuid->uuid[9], uuid->uuid[10], uuid->uuid[11], uuid->uuid[12], uuid->uuid[13], uuid->uuid[14], uuid->uuid[15]); - + return NULL; } -static BOOL is_device_removable(PDEVICE_OBJECT devobj) { +static BOOL is_device_removable(_In_ PDEVICE_OBJECT devobj) { NTSTATUS Status; STORAGE_HOTPLUG_INFO shi; - + Status = dev_ioctl(devobj, IOCTL_STORAGE_GET_HOTPLUG_INFO, NULL, 0, &shi, sizeof(STORAGE_HOTPLUG_INFO), TRUE, NULL); - + if (!NT_SUCCESS(Status)) { ERR("dev_ioctl returned %08x\n", Status); return FALSE; } - + return shi.MediaRemovable != 0 ? TRUE : FALSE; } -static ULONG get_device_change_count(PDEVICE_OBJECT devobj) { +static ULONG get_device_change_count(_In_ PDEVICE_OBJECT devobj) { NTSTATUS Status; ULONG cc; IO_STATUS_BLOCK iosb; - + Status = dev_ioctl(devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); - + if (!NT_SUCCESS(Status)) { ERR("dev_ioctl returned %08x\n", Status); return 0; } - + if (iosb.Information < sizeof(ULONG)) { ERR("iosb.Information was too short\n"); return 0; } - + return cc; } -void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_nums) { +void init_device(_In_ device_extension* Vcb, _Inout_ device* dev, _In_ BOOL get_nums) { NTSTATUS Status; ULONG aptelen; ATA_PASS_THROUGH_EX* apte; - IDENTIFY_DEVICE_DATA* idd; - + STORAGE_PROPERTY_QUERY spq; + DEVICE_TRIM_DESCRIPTOR dtd; + dev->removable = is_device_removable(dev->devobj); dev->change_count = dev->removable ? get_device_change_count(dev->devobj) : 0; - - if (get_length) { - GET_LENGTH_INFORMATION gli; - - Status = dev_ioctl(dev->devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, - &gli, sizeof(GET_LENGTH_INFORMATION), TRUE, NULL); - - if (!NT_SUCCESS(Status)) - ERR("IOCTL_DISK_GET_LENGTH_INFO returned %08x\n", Status); - - dev->length = gli.Length.QuadPart; - } - + if (get_nums) { STORAGE_DEVICE_NUMBER sdn; - + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); - + if (!NT_SUCCESS(Status)) { WARN("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); - dev->disk_num = 0; - dev->part_num = 0; + dev->disk_num = 0xffffffff; + dev->part_num = 0xffffffff; } else { dev->disk_num = sdn.DeviceNumber; dev->part_num = sdn.PartitionNumber; } } - - dev->ssd = FALSE; + dev->trim = FALSE; dev->readonly = dev->seeding; dev->reloc = FALSE; - + dev->num_trim_entries = 0; + dev->stats_changed = FALSE; + InitializeListHead(&dev->trim_list); + if (!dev->readonly) { Status = dev_ioctl(dev->devobj, IOCTL_DISK_IS_WRITABLE, NULL, 0, NULL, 0, TRUE, NULL); @@ -3023,44 +3041,53 @@ void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_n ERR("out of memory\n"); return; } - + RtlZeroMemory(apte, aptelen); - + apte->Length = sizeof(ATA_PASS_THROUGH_EX); apte->AtaFlags = ATA_FLAGS_DATA_IN; apte->DataTransferLength = aptelen - sizeof(ATA_PASS_THROUGH_EX); apte->TimeOutValue = 3; apte->DataBufferOffset = apte->Length; - apte->CurrentTaskFile[6] = 0xec; // IDENTIFY DEVICE - + apte->CurrentTaskFile[6] = IDE_COMMAND_IDENTIFY; + Status = dev_ioctl(dev->devobj, IOCTL_ATA_PASS_THROUGH, apte, aptelen, apte, aptelen, TRUE, NULL); - + if (!NT_SUCCESS(Status)) TRACE("IOCTL_ATA_PASS_THROUGH returned %08x for IDENTIFY DEVICE\n", Status); else { - idd = (IDENTIFY_DEVICE_DATA*)((UINT8*)apte + sizeof(ATA_PASS_THROUGH_EX)); - - if (idd->NominalMediaRotationRate == 1) { - dev->ssd = TRUE; - TRACE("device identified as SSD\n"); - } else if (idd->NominalMediaRotationRate == 0) - TRACE("no rotational speed returned, assuming not SSD\n"); - else - TRACE("rotational speed of %u RPM\n", idd->NominalMediaRotationRate); - - if (idd->DataSetManagementFeature.SupportsTrim) { + IDENTIFY_DEVICE_DATA* idd = (IDENTIFY_DEVICE_DATA*)((UINT8*)apte + sizeof(ATA_PASS_THROUGH_EX)); + + if (idd->CommandSetSupport.FlushCache) { + dev->can_flush = TRUE; + TRACE("FLUSH CACHE supported\n"); + } else + TRACE("FLUSH CACHE not supported\n"); + } + + ExFreePool(apte); + + spq.PropertyId = StorageDeviceTrimProperty; + spq.QueryType = PropertyStandardQuery; + spq.AdditionalParameters[0] = 0; + + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_QUERY_PROPERTY, &spq, sizeof(STORAGE_PROPERTY_QUERY), + &dtd, sizeof(DEVICE_TRIM_DESCRIPTOR), TRUE, NULL); + + if (NT_SUCCESS(Status)) { + if (dtd.TrimEnabled) { dev->trim = TRUE; Vcb->trim = TRUE; TRACE("TRIM supported\n"); } else TRACE("TRIM not supported\n"); } - - ExFreePool(apte); + + RtlZeroMemory(dev->stats, sizeof(UINT64) * 5); } -static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { +static NTSTATUS load_chunk_root(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_opt_ PIRP Irp) { traverse_ptr tp, next_tp; KEY searchkey; BOOL b; @@ -3070,20 +3097,20 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { searchkey.obj_id = 0; searchkey.obj_type = 0; searchkey.offset = 0; - + Vcb->data_flags = 0; Vcb->metadata_flags = 0; Vcb->system_flags = 0; - + Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + do { TRACE("(%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - + if (tp.item->key.obj_id == 1 && tp.item->key.obj_type == TYPE_DEV_ITEM) { if (tp.item->size < sizeof(DEV_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_ITEM)); @@ -3091,328 +3118,365 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { DEV_ITEM* di = (DEV_ITEM*)tp.item->data; LIST_ENTRY* le; BOOL done = FALSE; - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - + if (dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, &di->device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { RtlCopyMemory(&dev->devitem, tp.item->data, min(tp.item->size, sizeof(DEV_ITEM))); - + if (le != Vcb->devices.Flink) - init_device(Vcb, dev, TRUE, TRUE); - + init_device(Vcb, dev, TRUE); + done = TRUE; break; } le = le->Flink; } - - if (!done) { - ExAcquireResourceSharedLite(&volumes_lock, TRUE); - - if (!IsListEmpty(&volumes) && Vcb->devices_loaded < Vcb->superblock.num_devices) { - LIST_ENTRY* le = volumes.Flink; - - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; + + if (!done && Vcb->vde) { + volume_device_extension* vde = Vcb->vde; + pdo_device_extension* pdode = vde->pdode; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (Vcb->devices_loaded < Vcb->superblock.num_devices) { + le = pdode->children.Flink; + + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + if (RtlCompareMemory(&di->device_uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { device* dev; - - Status = IoGetDeviceObjectPointer(&v->devpath, FILE_READ_DATA | FILE_WRITE_DATA, &FileObject, &DeviceObject); - if (!NT_SUCCESS(Status)) { - ExReleaseResourceLite(&volumes_lock); - ERR("IoGetDeviceObjectPointer(%.*S) returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status); - return Status; - } - - DeviceObject = FileObject->DeviceObject; - - ObReferenceObject(DeviceObject); - ObDereferenceObject(FileObject); - + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); if (!dev) { - ExReleaseResourceLite(&volumes_lock); + ExReleaseResourceLite(&pdode->child_lock); ERR("out of memory\n"); - ObDereferenceObject(DeviceObject); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(dev, sizeof(device)); - - dev->devobj = DeviceObject; + + dev->devobj = vc->devobj; RtlCopyMemory(&dev->devitem, di, min(tp.item->size, sizeof(DEV_ITEM))); - dev->seeding = v->seeding; - init_device(Vcb, dev, FALSE, FALSE); + dev->seeding = vc->seeding; + init_device(Vcb, dev, FALSE); - dev->length = v->length; - dev->disk_num = v->disk_num; - dev->part_num = v->part_num; + if (dev->devitem.num_bytes > vc->size) { + WARN("device %llx: DEV_ITEM says %llx bytes, but Windows only reports %llx\n", tp.item->key.offset, + dev->devitem.num_bytes, vc->size); + + dev->devitem.num_bytes = vc->size; + } + + dev->disk_num = vc->disk_num; + dev->part_num = vc->part_num; add_device_to_list(Vcb, dev); Vcb->devices_loaded++; done = TRUE; break; } - + le = le->Flink; } - + if (!done) { - ERR("volume not found: device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", tp.item->key.offset, - di->device_uuid.uuid[0], di->device_uuid.uuid[1], di->device_uuid.uuid[2], di->device_uuid.uuid[3], di->device_uuid.uuid[4], di->device_uuid.uuid[5], di->device_uuid.uuid[6], di->device_uuid.uuid[7], - di->device_uuid.uuid[8], di->device_uuid.uuid[9], di->device_uuid.uuid[10], di->device_uuid.uuid[11], di->device_uuid.uuid[12], di->device_uuid.uuid[13], di->device_uuid.uuid[14], di->device_uuid.uuid[15]); + if (!Vcb->options.allow_degraded) { + ERR("volume not found: device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", tp.item->key.offset, + di->device_uuid.uuid[0], di->device_uuid.uuid[1], di->device_uuid.uuid[2], di->device_uuid.uuid[3], di->device_uuid.uuid[4], di->device_uuid.uuid[5], di->device_uuid.uuid[6], di->device_uuid.uuid[7], + di->device_uuid.uuid[8], di->device_uuid.uuid[9], di->device_uuid.uuid[10], di->device_uuid.uuid[11], di->device_uuid.uuid[12], di->device_uuid.uuid[13], di->device_uuid.uuid[14], di->device_uuid.uuid[15]); + } else { + device* dev; + + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); + if (!dev) { + ExReleaseResourceLite(&pdode->child_lock); + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(dev, sizeof(device)); + + // Missing device, so we keep dev->devobj as NULL + RtlCopyMemory(&dev->devitem, di, min(tp.item->size, sizeof(DEV_ITEM))); + InitializeListHead(&dev->trim_list); + + add_device_to_list(Vcb, dev); + Vcb->devices_loaded++; + } } } else ERR("unexpected device %llx found\n", tp.item->key.offset); - - ExReleaseResourceLite(&volumes_lock); + + ExReleaseResourceLite(&pdode->child_lock); } } } else if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) { if (tp.item->size < sizeof(CHUNK_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM)); - } else { + } else { c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG); - + if (!c) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + c->size = tp.item->size; c->offset = tp.item->key.offset; c->used = c->oldused = 0; - c->cache = NULL; + c->cache = c->old_cache = NULL; c->created = FALSE; c->readonly = FALSE; c->reloc = FALSE; - + c->cache_loaded = FALSE; + c->changed = FALSE; + c->space_changed = FALSE; + c->balance_num = 0; + c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, tp.item->size, ALLOC_TAG); - + if (!c->chunk_item) { ERR("out of memory\n"); ExFreePool(c); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(c->chunk_item, tp.item->data, tp.item->size); - + if (c->chunk_item->type & BLOCK_FLAG_DATA && c->chunk_item->type > Vcb->data_flags) Vcb->data_flags = c->chunk_item->type; - + if (c->chunk_item->type & BLOCK_FLAG_METADATA && c->chunk_item->type > Vcb->metadata_flags) Vcb->metadata_flags = c->chunk_item->type; - + if (c->chunk_item->type & BLOCK_FLAG_SYSTEM && c->chunk_item->type > Vcb->system_flags) Vcb->system_flags = c->chunk_item->type; - + + if (c->chunk_item->type & BLOCK_FLAG_RAID10) { + if (c->chunk_item->sub_stripes == 0 || c->chunk_item->sub_stripes > c->chunk_item->num_stripes) { + ERR("chunk %llx: invalid stripes (num_stripes %u, sub_stripes %u)\n", c->offset, c->chunk_item->num_stripes, c->chunk_item->sub_stripes); + ExFreePool(c->chunk_item); + ExFreePool(c); + return STATUS_INTERNAL_ERROR; + } + } + if (c->chunk_item->num_stripes > 0) { CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; UINT16 i; - + c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * c->chunk_item->num_stripes, ALLOC_TAG); - + if (!c->devices) { ERR("out of memory\n"); ExFreePool(c->chunk_item); ExFreePool(c); return STATUS_INSUFFICIENT_RESOURCES; } - + for (i = 0; i < c->chunk_item->num_stripes; i++) { c->devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); TRACE("device %llu = %p\n", i, c->devices[i]); - + if (!c->devices[i]) { ERR("missing device\n"); ExFreePool(c->chunk_item); ExFreePool(c); return STATUS_INTERNAL_ERROR; } - + if (c->devices[i]->readonly) c->readonly = TRUE; } - } else - c->devices = NULL; - + } else { + ERR("chunk %llx: number of stripes is 0\n", c->offset); + ExFreePool(c->chunk_item); + ExFreePool(c); + return STATUS_INTERNAL_ERROR; + } + ExInitializeResourceLite(&c->lock); ExInitializeResourceLite(&c->changed_extents_lock); - + InitializeListHead(&c->space); InitializeListHead(&c->space_size); InitializeListHead(&c->deleting); InitializeListHead(&c->changed_extents); - + InitializeListHead(&c->range_locks); - KeInitializeSpinLock(&c->range_locks_spinlock); + ExInitializeResourceLite(&c->range_locks_lock); KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE); - + + InitializeListHead(&c->partial_stripes); + ExInitializeResourceLite(&c->partial_stripes_lock); + c->last_alloc_set = FALSE; + c->last_stripe = 0; + InsertTailList(&Vcb->chunks, &c->list_entry); - - c->list_entry_changed.Flink = NULL; + c->list_entry_balance.Flink = NULL; } } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - + if (b) tp = next_tp; } while (b); - + Vcb->log_to_phys_loaded = TRUE; - + if (Vcb->data_flags == 0) Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID0 : 0); - + if (Vcb->metadata_flags == 0) Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID1 : BLOCK_FLAG_DUPLICATE); - + if (Vcb->system_flags == 0) Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID1 : BLOCK_FLAG_DUPLICATE); - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) { Vcb->metadata_flags |= BLOCK_FLAG_DATA; Vcb->data_flags = Vcb->metadata_flags; } - + return STATUS_SUCCESS; } -void protect_superblocks(device_extension* Vcb, chunk* c) { +void protect_superblocks(_Inout_ chunk* c) { UINT16 i = 0, j; UINT64 off_start, off_end; - + // The Linux driver also protects all the space before the first superblock. - // I realize this confuses physical and logical addresses, but this is what btrfs-progs does - + // I realize this confuses physical and logical addresses, but this is what btrfs-progs does - // evidently Linux assumes the chunk at 0 is always SINGLE. if (c->offset < superblock_addrs[0]) - space_list_subtract(Vcb, c, FALSE, c->offset, superblock_addrs[0] - c->offset, NULL); - + space_list_subtract(c, FALSE, c->offset, superblock_addrs[0] - c->offset, NULL); + while (superblock_addrs[i] != 0) { CHUNK_ITEM* ci = c->chunk_item; CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - + if (ci->type & BLOCK_FLAG_RAID0 || ci->type & BLOCK_FLAG_RAID10) { for (j = 0; j < ci->num_stripes; j++) { - ULONG sub_stripes = max(ci->sub_stripes, 1); - + UINT16 sub_stripes = max(ci->sub_stripes, 1); + if (cis[j].offset + (ci->size * ci->num_stripes / sub_stripes) > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { #ifdef _DEBUG UINT64 startoff; UINT16 startoffstripe; #endif - + TRACE("cut out superblock in chunk %llx\n", c->offset); - + off_start = superblock_addrs[i] - cis[j].offset; off_start -= off_start % ci->stripe_length; off_start *= ci->num_stripes / sub_stripes; off_start += (j / sub_stripes) * ci->stripe_length; off_end = off_start + ci->stripe_length; - + #ifdef _DEBUG get_raid0_offset(off_start, ci->stripe_length, ci->num_stripes / sub_stripes, &startoff, &startoffstripe); TRACE("j = %u, startoffstripe = %u\n", j, startoffstripe); TRACE("startoff = %llx, superblock = %llx\n", startoff + cis[j].offset, superblock_addrs[i]); #endif - - space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL); + + space_list_subtract(c, FALSE, c->offset + off_start, off_end - off_start, NULL); } } } else if (ci->type & BLOCK_FLAG_RAID5) { + UINT64 stripe_size = ci->size / (ci->num_stripes - 1); + for (j = 0; j < ci->num_stripes; j++) { - UINT64 stripe_size = ci->size / (ci->num_stripes - 1); - if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { TRACE("cut out superblock in chunk %llx\n", c->offset); - + off_start = superblock_addrs[i] - cis[j].offset; - off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1)); + off_start -= off_start % ci->stripe_length; off_start *= ci->num_stripes - 1; - off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1)); - + off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), ci->stripe_length); + off_end *= ci->num_stripes - 1; + TRACE("cutting out %llx, size %llx\n", c->offset + off_start, off_end - off_start); - space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL); + space_list_subtract(c, FALSE, c->offset + off_start, off_end - off_start, NULL); } } } else if (ci->type & BLOCK_FLAG_RAID6) { + UINT64 stripe_size = ci->size / (ci->num_stripes - 2); + for (j = 0; j < ci->num_stripes; j++) { - UINT64 stripe_size = ci->size / (ci->num_stripes - 2); - if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { TRACE("cut out superblock in chunk %llx\n", c->offset); - + off_start = superblock_addrs[i] - cis[j].offset; - off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2)); + off_start -= off_start % ci->stripe_length; off_start *= ci->num_stripes - 2; - off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2)); - + off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), ci->stripe_length); + off_end *= ci->num_stripes - 2; + TRACE("cutting out %llx, size %llx\n", c->offset + off_start, off_end - off_start); - space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL); + space_list_subtract(c, FALSE, c->offset + off_start, off_end - off_start, NULL); } } } else { // SINGLE, DUPLICATE, RAID1 for (j = 0; j < ci->num_stripes; j++) { if (cis[j].offset + ci->size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { TRACE("cut out superblock in chunk %llx\n", c->offset); - + // The Linux driver protects the whole stripe in which the superblock lives off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length; off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length); - - space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL); + + space_list_subtract(c, FALSE, c->offset + off_start, off_end - off_start, NULL); } } } - + i++; } } -static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) { +NTSTATUS find_chunk_usage(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_opt_ PIRP Irp) { LIST_ENTRY* le = Vcb->chunks.Flink; chunk* c; KEY searchkey; traverse_ptr tp; BLOCK_GROUP_ITEM* bgi; NTSTATUS Status; - + searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; - + while (le != &Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + searchkey.obj_id = c->offset; searchkey.offset = c->chunk_item->size; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(searchkey, tp.item->key)) { if (tp.item->size >= sizeof(BLOCK_GROUP_ITEM)) { bgi = (BLOCK_GROUP_ITEM*)tp.item->data; - + c->used = c->oldused = bgi->used; - + TRACE("chunk %llx has %llx bytes used\n", c->offset, c->used); } else { ERR("(%llx;%llx,%x,%llx) is %u bytes, expected %u\n", @@ -3420,314 +3484,449 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) { } } - if (!Vcb->readonly) { - // It doesn't make a great deal of sense to load the free space cache of a - // readonly seeding chunk, as we'll never write to it. But btrfs check will - // complain if we don't write a valid cache, so we have to do it anyway... - - // FIXME - make sure we free occasionally after doing one of these, or we - // might use up a lot of memory with a big disk. - - Status = load_free_space_cache(Vcb, c, Irp); - if (!NT_SUCCESS(Status)) { - ERR("load_free_space_cache returned %08x\n", Status); - return Status; - } - - protect_superblocks(Vcb, c); - } - le = le->Flink; } - + + Vcb->chunk_usage_found = TRUE; + return STATUS_SUCCESS; } -// static void STDCALL root_test(device_extension* Vcb) { -// root* r; -// KEY searchkey; -// traverse_ptr tp, next_tp; -// BOOL b; -// -// r = Vcb->roots; -// while (r) { -// if (r->id == 0x102) -// break; -// r = r->next; -// } -// -// if (!r) { -// ERR("Could not find root tree.\n"); -// return; -// } -// -// searchkey.obj_id = 0x1b6; -// searchkey.obj_type = 0xb; -// searchkey.offset = 0; -// -// if (!find_item(Vcb, r, &tp, &searchkey, NULL, FALSE)) { -// ERR("Could not find first item.\n"); -// return; -// } -// -// b = TRUE; -// do { -// TRACE("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset); -// -// b = find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE); -// -// if (b) { -// free_traverse_ptr(&tp); -// tp = next_tp; -// } -// } while (b); -// -// free_traverse_ptr(&tp); -// } - -static NTSTATUS load_sys_chunks(device_extension* Vcb) { +static NTSTATUS load_sys_chunks(_In_ device_extension* Vcb) { KEY key; ULONG n = Vcb->superblock.n; - + while (n > 0) { if (n > sizeof(KEY)) { RtlCopyMemory(&key, &Vcb->superblock.sys_chunk_array[Vcb->superblock.n - n], sizeof(KEY)); n -= sizeof(KEY); } else return STATUS_SUCCESS; - + TRACE("bootstrap: %llx,%x,%llx\n", key.obj_id, key.obj_type, key.offset); - + if (key.obj_type == TYPE_CHUNK_ITEM) { CHUNK_ITEM* ci; - ULONG cisize; + USHORT cisize; sys_chunk* sc; - + if (n < sizeof(CHUNK_ITEM)) return STATUS_SUCCESS; - + ci = (CHUNK_ITEM*)&Vcb->superblock.sys_chunk_array[Vcb->superblock.n - n]; cisize = sizeof(CHUNK_ITEM) + (ci->num_stripes * sizeof(CHUNK_ITEM_STRIPE)); - + if (n < cisize) return STATUS_SUCCESS; - + sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG); - + if (!sc) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + sc->key = key; sc->size = cisize; sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG); - + if (!sc->data) { ERR("out of memory\n"); + ExFreePool(sc); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(sc->data, ci, sc->size); InsertTailList(&Vcb->sys_chunks, &sc->list_entry); - + n -= cisize; } else { ERR("unexpected item %llx,%x,%llx in bootstrap\n", key.obj_id, key.obj_type, key.offset); return STATUS_INTERNAL_ERROR; } } - + return STATUS_SUCCESS; } -static root* find_default_subvol(device_extension* Vcb, PIRP Irp) { +_Ret_maybenull_ +static root* find_default_subvol(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_opt_ PIRP Irp) { LIST_ENTRY* le; - + static char fn[] = "default"; static UINT32 crc32 = 0x8dbfc2d2; - + if (Vcb->options.subvol_id != 0) { le = Vcb->roots.Flink; while (le != &Vcb->roots) { root* r = CONTAINING_RECORD(le, root, list_entry); - + if (r->id == Vcb->options.subvol_id) return r; - + le = le->Flink; } } - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL) { NTSTATUS Status; KEY searchkey; traverse_ptr tp; DIR_ITEM* di; - + searchkey.obj_id = Vcb->superblock.root_dir_objectid; searchkey.obj_type = TYPE_DIR_ITEM; searchkey.offset = crc32; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); goto end; } - + if (keycmp(tp.item->key, searchkey)) { ERR("could not find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); goto end; } - + if (tp.item->size < sizeof(DIR_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); goto end; } - + di = (DIR_ITEM*)tp.item->data; - + if (tp.item->size < sizeof(DIR_ITEM) - 1 + di->n) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM) - 1 + di->n); goto end; } - + if (di->n != strlen(fn) || RtlCompareMemory(di->name, fn, di->n) != di->n) { ERR("root DIR_ITEM had same CRC32, but was not \"default\"\n"); goto end; } - + if (di->key.obj_type != TYPE_ROOT_ITEM) { ERR("default root has key (%llx,%x,%llx), expected subvolume\n", di->key.obj_id, di->key.obj_type, di->key.offset); goto end; } - + le = Vcb->roots.Flink; while (le != &Vcb->roots) { root* r = CONTAINING_RECORD(le, root, list_entry); - + if (r->id == di->key.obj_id) return r; - + le = le->Flink; } - + ERR("could not find root %llx, using default instead\n", di->key.obj_id); } - + end: le = Vcb->roots.Flink; while (le != &Vcb->roots) { root* r = CONTAINING_RECORD(le, root, list_entry); - + if (r->id == BTRFS_ROOT_FSTREE) return r; - + le = le->Flink; } - + return NULL; } -void init_file_cache(PFILE_OBJECT FileObject, CC_FILE_SIZES* ccfs) { +void init_file_cache(_In_ PFILE_OBJECT FileObject, _In_ CC_FILE_SIZES* ccfs) { TRACE("(%p, %p)\n", FileObject, ccfs); - + CcInitializeCacheMap(FileObject, ccfs, FALSE, cache_callbacks, FileObject); - + if (diskacc) - CcSetAdditionalCacheAttributesEx(FileObject, CC_ENABLE_DISK_IO_ACCOUNTING); + fCcSetAdditionalCacheAttributesEx(FileObject, CC_ENABLE_DISK_IO_ACCOUNTING); CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY); } -static NTSTATUS create_calc_threads(PDEVICE_OBJECT DeviceObject) { +static NTSTATUS create_calc_threads(_In_ PDEVICE_OBJECT DeviceObject) { device_extension* Vcb = DeviceObject->DeviceExtension; ULONG i; - + Vcb->calcthreads.num_threads = KeQueryActiveProcessorCount(NULL); - + Vcb->calcthreads.threads = ExAllocatePoolWithTag(NonPagedPool, sizeof(drv_calc_thread) * Vcb->calcthreads.num_threads, ALLOC_TAG); if (!Vcb->calcthreads.threads) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + InitializeListHead(&Vcb->calcthreads.job_list); ExInitializeResourceLite(&Vcb->calcthreads.lock); KeInitializeEvent(&Vcb->calcthreads.event, NotificationEvent, FALSE); - + RtlZeroMemory(Vcb->calcthreads.threads, sizeof(drv_calc_thread) * Vcb->calcthreads.num_threads); - + for (i = 0; i < Vcb->calcthreads.num_threads; i++) { NTSTATUS Status; - + Vcb->calcthreads.threads[i].DeviceObject = DeviceObject; KeInitializeEvent(&Vcb->calcthreads.threads[i].finished, NotificationEvent, FALSE); - + Status = PsCreateSystemThread(&Vcb->calcthreads.threads[i].handle, 0, NULL, NULL, NULL, calc_thread, &Vcb->calcthreads.threads[i]); if (!NT_SUCCESS(Status)) { ULONG j; - + ERR("PsCreateSystemThread returned %08x\n", Status); - + for (j = 0; j < i; j++) { Vcb->calcthreads.threads[i].quit = TRUE; } - + KeSetEvent(&Vcb->calcthreads.event, 0, FALSE); - + return Status; } } - + return STATUS_SUCCESS; } -static BOOL raid_generations_okay(device_extension* Vcb) { - LIST_ENTRY* le2; - - // FIXME - if the difference between superblocks is small, we should try to recover - - le2 = Vcb->devices.Flink; - while (le2 != &Vcb->devices) { - LIST_ENTRY* le; - device* dev = CONTAINING_RECORD(le2, device, list_entry); - - ExAcquireResourceSharedLite(&volumes_lock, TRUE); - - le = volumes.Flink; - - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && - RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) - ) { - if (v->gen1 != Vcb->superblock.generation - 1) { - WARN("device %llu had generation %llx, expected %llx\n", dev->devitem.dev_id, v->gen1, Vcb->superblock.generation - 1); - ExReleaseResourceLite(&volumes_lock); - return FALSE; - } else - break; +static BOOL is_btrfs_volume(_In_ PDEVICE_OBJECT DeviceObject) { + NTSTATUS Status; + MOUNTDEV_NAME mdn, *mdn2; + ULONG mdnsize; + + Status = dev_ioctl(DeviceObject, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + return FALSE; + } + + mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; + + mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); + if (!mdn2) { + ERR("out of memory\n"); + return FALSE; + } + + Status = dev_ioctl(DeviceObject, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + ExFreePool(mdn2); + return FALSE; + } + + if (mdn2->NameLength > wcslen(BTRFS_VOLUME_PREFIX) * sizeof(WCHAR) && + RtlCompareMemory(mdn2->Name, BTRFS_VOLUME_PREFIX, wcslen(BTRFS_VOLUME_PREFIX) * sizeof(WCHAR)) == wcslen(BTRFS_VOLUME_PREFIX) * sizeof(WCHAR)) { + ExFreePool(mdn2); + return TRUE; + } + + ExFreePool(mdn2); + + return FALSE; +} + +static NTSTATUS get_device_pnp_name_guid(_In_ PDEVICE_OBJECT DeviceObject, _Out_ PUNICODE_STRING pnp_name, _In_ const GUID* guid) { + NTSTATUS Status; + WCHAR *list = NULL, *s; + + Status = IoGetDeviceInterfaces((PVOID)guid, NULL, 0, &list); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceInterfaces returned %08x\n", Status); + return Status; + } + + s = list; + while (s[0] != 0) { + PFILE_OBJECT FileObject; + PDEVICE_OBJECT devobj; + UNICODE_STRING name; + + name.Length = name.MaximumLength = (USHORT)wcslen(s) * sizeof(WCHAR); + name.Buffer = s; + + if (NT_SUCCESS(IoGetDeviceObjectPointer(&name, FILE_READ_ATTRIBUTES, &FileObject, &devobj))) { + if (DeviceObject == devobj || DeviceObject == FileObject->DeviceObject) { + ObDereferenceObject(FileObject); + + pnp_name->Buffer = ExAllocatePoolWithTag(PagedPool, name.Length, ALLOC_TAG); + if (!pnp_name->Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(pnp_name->Buffer, name.Buffer, name.Length); + pnp_name->Length = pnp_name->MaximumLength = name.Length; + + Status = STATUS_SUCCESS; + goto end; } - le = le->Flink; + + ObDereferenceObject(FileObject); } - - ExReleaseResourceLite(&volumes_lock); - - le2 = le2->Flink; + + s = &s[wcslen(s) + 1]; + } + + pnp_name->Length = pnp_name->MaximumLength = 0; + pnp_name->Buffer = 0; + + Status = STATUS_NOT_FOUND; + +end: + if (list) + ExFreePool(list); + + return Status; +} + +NTSTATUS get_device_pnp_name(_In_ PDEVICE_OBJECT DeviceObject, _Out_ PUNICODE_STRING pnp_name, _Out_ const GUID** guid) { + NTSTATUS Status; + + Status = get_device_pnp_name_guid(DeviceObject, pnp_name, &GUID_DEVINTERFACE_VOLUME); + if (NT_SUCCESS(Status)) { + *guid = &GUID_DEVINTERFACE_VOLUME; + return Status; + } + + Status = get_device_pnp_name_guid(DeviceObject, pnp_name, &GUID_DEVINTERFACE_HIDDEN_VOLUME); + if (NT_SUCCESS(Status)) { + *guid = &GUID_DEVINTERFACE_HIDDEN_VOLUME; + return Status; } - + + Status = get_device_pnp_name_guid(DeviceObject, pnp_name, &GUID_DEVINTERFACE_DISK); + if (NT_SUCCESS(Status)) { + *guid = &GUID_DEVINTERFACE_DISK; + return Status; + } + + return STATUS_NOT_FOUND; +} + +_Success_(return>=0) +static NTSTATUS check_mount_device(_In_ PDEVICE_OBJECT DeviceObject, _Out_ BOOL* no_pnp) { + NTSTATUS Status; + ULONG to_read; + superblock* sb; + UINT32 crc32; + UNICODE_STRING pnp_name; + const GUID* guid; + + to_read = DeviceObject->SectorSize == 0 ? sizeof(superblock) : (ULONG)sector_align(sizeof(superblock), DeviceObject->SectorSize); + + sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG); + if (!sb) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = sync_read_phys(DeviceObject, superblock_addrs[0], to_read, (PUCHAR)sb, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + goto end; + } + + if (sb->magic != BTRFS_MAGIC) { + Status = STATUS_SUCCESS; + goto end; + } + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); + + if (crc32 != *((UINT32*)sb->checksum)) { + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum)); + Status = STATUS_SUCCESS; + goto end; + } + + DeviceObject->Flags &= ~DO_VERIFY_VOLUME; + + pnp_name.Buffer = NULL; + + Status = get_device_pnp_name(DeviceObject, &pnp_name, &guid); + if (!NT_SUCCESS(Status)) { + WARN("get_device_pnp_name returned %08x\n", Status); + pnp_name.Length = 0; + } + + if (pnp_name.Length == 0) + *no_pnp = TRUE; + else { + *no_pnp = FALSE; + volume_arrival(drvobj, &pnp_name); + } + + if (pnp_name.Buffer) + ExFreePool(pnp_name.Buffer); + + Status = STATUS_SUCCESS; + +end: + ExFreePool(sb); + + return Status; +} + +static BOOL still_has_superblock(_In_ PDEVICE_OBJECT device) { + NTSTATUS Status; + ULONG to_read; + superblock* sb; + PDEVICE_OBJECT device2; + + if (!device) + return FALSE; + + to_read = device->SectorSize == 0 ? sizeof(superblock) : (ULONG)sector_align(sizeof(superblock), device->SectorSize); + + sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG); + if (!sb) { + ERR("out of memory\n"); + return FALSE; + } + + Status = sync_read_phys(device, superblock_addrs[0], to_read, (PUCHAR)sb, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("Failed to read superblock: %08x\n", Status); + ExFreePool(sb); + return FALSE; + } + + if (sb->magic != BTRFS_MAGIC) { + TRACE("not a BTRFS volume\n"); + ExFreePool(sb); + return FALSE; + } else { + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); + + if (crc32 != *((UINT32*)sb->checksum)) { + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum)); + ExFreePool(sb); + return FALSE; + } + } + + device2 = device; + + do { + device2->Flags &= ~DO_VERIFY_VOLUME; + device2 = IoGetLowerDeviceObject(device2); + } while (device2); + + ExFreePool(sb); return TRUE; } -static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +static NTSTATUS mount_vol(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { PIO_STACK_LOCATION IrpSp; PDEVICE_OBJECT NewDeviceObject = NULL; - PDEVICE_OBJECT DeviceToMount; + PDEVICE_OBJECT DeviceToMount, readobj; NTSTATUS Status; device_extension* Vcb = NULL; - GET_LENGTH_INFORMATION gli; LIST_ENTRY *le, batchlist; KEY searchkey; traverse_ptr tp; @@ -3735,10 +3934,15 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ccb* root_ccb = NULL; BOOL init_lookaside = FALSE; device* dev; - + volume_device_extension* vde = NULL; + pdo_device_extension* pdode = NULL; + volume_child* vc; + BOOL no_pnp = FALSE; + UINT64 readobjsize; + TRACE("(%p, %p)\n", DeviceObject, Irp); - - if (DeviceObject != devobj) { + + if (DeviceObject != master_devobj) { Status = STATUS_INVALID_DEVICE_REQUEST; goto exit; } @@ -3746,11 +3950,103 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { IrpSp = IoGetCurrentIrpStackLocation(Irp); DeviceToMount = IrpSp->Parameters.MountVolume.DeviceObject; - Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, &gli, sizeof(gli), TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("error reading length information: %08x\n", Status); - Status = STATUS_UNRECOGNIZED_VOLUME; - goto exit; + if (!is_btrfs_volume(DeviceToMount)) { + Status = check_mount_device(DeviceToMount, &no_pnp); + if (!NT_SUCCESS(Status)) + WARN("check_mount_device returned %08x\n", Status); + + if (!no_pnp) { + Status = STATUS_UNRECOGNIZED_VOLUME; + goto exit2; + } + } else { + PDEVICE_OBJECT pdo; + + pdo = DeviceToMount; + + while (IoGetLowerDeviceObject(pdo)) { + pdo = IoGetLowerDeviceObject(pdo); + } + + ExAcquireResourceSharedLite(&pdo_list_lock, TRUE); + + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + + if (pdode->pdo == pdo) { + vde = pdode->vde; + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&pdo_list_lock); + + if (!vde || vde->type != VCB_TYPE_VOLUME) { + vde = NULL; + Status = STATUS_UNRECOGNIZED_VOLUME; + goto exit2; + } + } + + if (vde) { + pdode = vde->pdode; + + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + LIST_ENTRY* le2 = le->Flink; + + vc = CONTAINING_RECORD(pdode->children.Flink, volume_child, list_entry); + + if (!still_has_superblock(vc->devobj)) { + remove_volume_child(vde, vc, FALSE); + + if (pdode->num_children == 0) { + ERR("error - number of devices is zero\n"); + Status = STATUS_INTERNAL_ERROR; + goto exit2; + } + + Status = STATUS_DEVICE_NOT_READY; + goto exit2; + } + + le = le2; + } + + if (pdode->num_children == 0 || pdode->children_loaded == 0) { + ERR("error - number of devices is zero\n"); + Status = STATUS_INTERNAL_ERROR; + goto exit; + } + + ExConvertExclusiveToSharedLite(&pdode->child_lock); + + vc = CONTAINING_RECORD(pdode->children.Flink, volume_child, list_entry); + + readobj = vc->devobj; + readobjsize = vc->size; + + vde->device->Characteristics &= ~FILE_DEVICE_SECURE_OPEN; + } else { + GET_LENGTH_INFORMATION gli; + + vc = NULL; + readobj = DeviceToMount; + + Status = dev_ioctl(readobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, + &gli, sizeof(gli), TRUE, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("error reading length information: %08x\n", Status); + goto exit; + } + + readobjsize = gli.Length.QuadPart; } Status = IoCreateDevice(drvobj, sizeof(device_extension), NULL, FILE_DEVICE_DISK_FILE_SYSTEM, 0, FALSE, &NewDeviceObject); @@ -3759,28 +4055,47 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Status = STATUS_UNRECOGNIZED_VOLUME; goto exit; } - + NewDeviceObject->Flags |= DO_DIRECT_IO; + + // Some programs seem to expect that the sector size will be 512, for + // FILE_NO_INTERMEDIATE_BUFFERING and the like. + NewDeviceObject->SectorSize = min(DeviceToMount->SectorSize, 512); + Vcb = (PVOID)NewDeviceObject->DeviceExtension; RtlZeroMemory(Vcb, sizeof(device_extension)); - Vcb->type = VCB_TYPE_VOLUME; - + Vcb->type = VCB_TYPE_FS; + Vcb->vde = vde; + ExInitializeResourceLite(&Vcb->tree_lock); - Vcb->open_trees = 0; Vcb->need_write = FALSE; ExInitializeResourceLite(&Vcb->fcb_lock); ExInitializeResourceLite(&Vcb->chunk_lock); + ExInitializeResourceLite(&Vcb->dirty_fcbs_lock); + ExInitializeResourceLite(&Vcb->dirty_filerefs_lock); + ExInitializeResourceLite(&Vcb->dirty_subvols_lock); + ExInitializeResourceLite(&Vcb->scrub.stats_lock); ExInitializeResourceLite(&Vcb->load_lock); ExAcquireResourceExclusiveLite(&Vcb->load_lock, TRUE); + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + DeviceToMount->Flags |= DO_DIRECT_IO; - - TRACE("partition length = %llx\n", gli.Length.QuadPart); - Status = read_superblock(Vcb, DeviceToMount, gli.Length.QuadPart); + Status = read_superblock(Vcb, readobj, readobjsize); if (!NT_SUCCESS(Status)) { + if (!IoIsErrorUserInduced(Status)) + Status = STATUS_UNRECOGNIZED_VOLUME; + else if (Irp->Tail.Overlay.Thread) + IoSetHardErrorOrVerifyDevice(Irp, readobj); + + goto exit; + } + + if (!vde && Vcb->superblock.num_devices > 1) { + ERR("cannot mount multi-device FS with non-PNP device\n"); Status = STATUS_UNRECOGNIZED_VOLUME; goto exit; } @@ -3790,7 +4105,13 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ERR("registry_load_volume_options returned %08x\n", Status); goto exit; } - + + if (pdode && pdode->children_loaded < pdode->num_children && (!Vcb->options.allow_degraded || !finished_probing || degraded_wait)) { + ERR("could not mount as %u device(s) missing\n", pdode->num_children - pdode->children_loaded); + Status = STATUS_DEVICE_NOT_READY; + goto exit; + } + if (Vcb->options.ignore) { TRACE("ignoring volume\n"); Status = STATUS_UNRECOGNIZED_VOLUME; @@ -3802,37 +4123,19 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Status = STATUS_UNRECOGNIZED_VOLUME; goto exit; } - - ExAcquireResourceSharedLite(&volumes_lock, TRUE); - - le = volumes.Flink; - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && v->devnum < Vcb->superblock.dev_item.dev_id) { - // skipping over device in RAID which isn't the first one - ExReleaseResourceLite(&volumes_lock); - Status = STATUS_UNRECOGNIZED_VOLUME; - goto exit; - } - - le = le->Flink; - } - - ExReleaseResourceLite(&volumes_lock); - + Vcb->readonly = FALSE; if (Vcb->superblock.compat_ro_flags & ~COMPAT_RO_SUPPORTED) { WARN("mounting read-only because of unsupported flags (%llx)\n", Vcb->superblock.compat_ro_flags & ~COMPAT_RO_SUPPORTED); Vcb->readonly = TRUE; } - + if (Vcb->options.readonly) Vcb->readonly = TRUE; - + Vcb->superblock.generation++; Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF; - + InitializeListHead(&Vcb->devices); dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); if (!dev) { @@ -3840,147 +4143,160 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - - dev->devobj = DeviceToMount; + + dev->devobj = readobj; RtlCopyMemory(&dev->devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM)); - + + if (dev->devitem.num_bytes > readobjsize) { + WARN("device %llx: DEV_ITEM says %llx bytes, but Windows only reports %llx\n", dev->devitem.dev_id, + dev->devitem.num_bytes, readobjsize); + + dev->devitem.num_bytes = readobjsize; + } + dev->seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE; - - init_device(Vcb, dev, FALSE, TRUE); - dev->length = gli.Length.QuadPart; - + + init_device(Vcb, dev, TRUE); + InsertTailList(&Vcb->devices, &dev->list_entry); Vcb->devices_loaded = 1; - + if (DeviceToMount->Flags & DO_SYSTEM_BOOT_PARTITION) Vcb->disallow_dismount = TRUE; - + TRACE("DeviceToMount = %p\n", DeviceToMount); TRACE("IrpSp->Parameters.MountVolume.Vpb = %p\n", IrpSp->Parameters.MountVolume.Vpb); NewDeviceObject->StackSize = DeviceToMount->StackSize + 1; NewDeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; - + InitializeListHead(&Vcb->roots); InitializeListHead(&Vcb->drop_roots); - + Vcb->log_to_phys_loaded = FALSE; - - add_root(Vcb, BTRFS_ROOT_CHUNK, Vcb->superblock.chunk_tree_addr, NULL); - + + add_root(Vcb, BTRFS_ROOT_CHUNK, Vcb->superblock.chunk_tree_addr, Vcb->superblock.chunk_root_generation, NULL); + if (!Vcb->chunk_root) { ERR("Could not load chunk root.\n"); Status = STATUS_INTERNAL_ERROR; goto exit; } - + InitializeListHead(&Vcb->sys_chunks); Status = load_sys_chunks(Vcb); if (!NT_SUCCESS(Status)) { ERR("load_sys_chunks returned %08x\n", Status); goto exit; } - + InitializeListHead(&Vcb->chunks); - InitializeListHead(&Vcb->chunks_changed); InitializeListHead(&Vcb->trees); InitializeListHead(&Vcb->trees_hash); InitializeListHead(&Vcb->all_fcbs); InitializeListHead(&Vcb->dirty_fcbs); InitializeListHead(&Vcb->dirty_filerefs); - - KeInitializeSpinLock(&Vcb->dirty_fcbs_lock); - KeInitializeSpinLock(&Vcb->dirty_filerefs_lock); - + InitializeListHead(&Vcb->dirty_subvols); + InitializeListHead(&Vcb->send_ops); + InitializeListHead(&Vcb->DirNotifyList); + InitializeListHead(&Vcb->scrub.errors); FsRtlNotifyInitializeSync(&Vcb->NotifySync); - + ExInitializePagedLookasideList(&Vcb->tree_data_lookaside, NULL, NULL, 0, sizeof(tree_data), ALLOC_TAG, 0); ExInitializePagedLookasideList(&Vcb->traverse_ptr_lookaside, NULL, NULL, 0, sizeof(traverse_ptr), ALLOC_TAG, 0); - ExInitializePagedLookasideList(&Vcb->rollback_item_lookaside, NULL, NULL, 0, sizeof(rollback_item), ALLOC_TAG, 0); ExInitializePagedLookasideList(&Vcb->batch_item_lookaside, NULL, NULL, 0, sizeof(batch_item), ALLOC_TAG, 0); + ExInitializePagedLookasideList(&Vcb->fileref_lookaside, NULL, NULL, 0, sizeof(file_ref), ALLOC_TAG, 0); + ExInitializePagedLookasideList(&Vcb->fcb_lookaside, NULL, NULL, 0, sizeof(fcb), ALLOC_TAG, 0); + ExInitializePagedLookasideList(&Vcb->name_bit_lookaside, NULL, NULL, 0, sizeof(name_bit), ALLOC_TAG, 0); ExInitializeNPagedLookasideList(&Vcb->range_lock_lookaside, NULL, NULL, 0, sizeof(range_lock), ALLOC_TAG, 0); + ExInitializeNPagedLookasideList(&Vcb->fileref_np_lookaside, NULL, NULL, 0, sizeof(file_ref_nonpaged), ALLOC_TAG, 0); + ExInitializeNPagedLookasideList(&Vcb->fcb_np_lookaside, NULL, NULL, 0, sizeof(fcb_nonpaged), ALLOC_TAG, 0); init_lookaside = TRUE; - + + Vcb->Vpb = IrpSp->Parameters.MountVolume.Vpb; + Status = load_chunk_root(Vcb, Irp); if (!NT_SUCCESS(Status)) { ERR("load_chunk_root returned %08x\n", Status); goto exit; } - + if (Vcb->superblock.num_devices > 1) { - if (Vcb->devices_loaded < Vcb->superblock.num_devices) { + if (Vcb->devices_loaded < Vcb->superblock.num_devices && (!Vcb->options.allow_degraded || !finished_probing)) { ERR("could not mount as %u device(s) missing\n", Vcb->superblock.num_devices - Vcb->devices_loaded); - + IoRaiseInformationalHardError(IO_ERR_INTERNAL_ERROR, NULL, NULL); Status = STATUS_INTERNAL_ERROR; goto exit; } - + if (dev->readonly && !Vcb->readonly) { Vcb->readonly = TRUE; - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev2 = CONTAINING_RECORD(le, device, list_entry); - + if (dev2->readonly && !dev2->seeding) break; - + if (!dev2->readonly) { Vcb->readonly = FALSE; break; } - + le = le->Flink; } - + if (Vcb->readonly) WARN("setting volume to readonly\n"); } - - if (!raid_generations_okay(Vcb)) { - ERR("could not mount as generation mismatch\n"); - - IoRaiseInformationalHardError(IO_ERR_INTERNAL_ERROR, NULL, NULL); - - Status = STATUS_INTERNAL_ERROR; - goto exit; - } } else { if (dev->readonly) { WARN("setting volume to readonly as device is readonly\n"); Vcb->readonly = TRUE; } } - - add_root(Vcb, BTRFS_ROOT_ROOT, Vcb->superblock.root_tree_addr, NULL); - + + add_root(Vcb, BTRFS_ROOT_ROOT, Vcb->superblock.root_tree_addr, Vcb->superblock.generation - 1, NULL); + if (!Vcb->root_root) { ERR("Could not load root of roots.\n"); Status = STATUS_INTERNAL_ERROR; goto exit; } - + Status = look_for_roots(Vcb, Irp); if (!NT_SUCCESS(Status)) { ERR("look_for_roots returned %08x\n", Status); goto exit; } - - Status = find_chunk_usage(Vcb, Irp); - if (!NT_SUCCESS(Status)) { - ERR("find_chunk_usage returned %08x\n", Status); - goto exit; + + if (!Vcb->readonly) { + Status = find_chunk_usage(Vcb, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_chunk_usage returned %08x\n", Status); + goto exit; + } } - + InitializeListHead(&batchlist); - + // We've already increased the generation by one - if (!Vcb->readonly && Vcb->superblock.generation - 1 != Vcb->superblock.cache_generation) { - WARN("generation was %llx, free-space cache generation was %llx; clearing cache...\n", Vcb->superblock.generation - 1, Vcb->superblock.cache_generation); + if (!Vcb->readonly && ( + Vcb->options.clear_cache || + (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE) && Vcb->superblock.generation - 1 != Vcb->superblock.cache_generation) || + (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID)))) { + if (Vcb->options.clear_cache) + WARN("ClearCache option was set, clearing cache...\n"); + else if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID)) + WARN("clearing free-space tree created by buggy Linux driver\n"); + else + WARN("generation was %llx, free-space cache generation was %llx; clearing cache...\n", Vcb->superblock.generation - 1, Vcb->superblock.cache_generation); + Status = clear_free_space_cache(Vcb, &batchlist, Irp); if (!NT_SUCCESS(Status)) { ERR("clear_free_space_cache returned %08x\n", Status); @@ -3988,35 +4304,72 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } } - - commit_batch_list(Vcb, &batchlist, Irp, NULL); - - Vcb->volume_fcb = create_fcb(NonPagedPool); + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + goto exit; + } + + Vcb->volume_fcb = create_fcb(Vcb, NonPagedPool); if (!Vcb->volume_fcb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - + Vcb->volume_fcb->Vcb = Vcb; Vcb->volume_fcb->sd = NULL; - - root_fcb = create_fcb(NonPagedPool); + + Vcb->dummy_fcb = create_fcb(Vcb, NonPagedPool); + if (!Vcb->dummy_fcb) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Vcb->dummy_fcb->Vcb = Vcb; + Vcb->dummy_fcb->type = BTRFS_TYPE_DIRECTORY; + Vcb->dummy_fcb->inode = 2; + Vcb->dummy_fcb->subvol = Vcb->root_root; + Vcb->dummy_fcb->atts = FILE_ATTRIBUTE_DIRECTORY; + Vcb->dummy_fcb->inode_item.st_nlink = 1; + Vcb->dummy_fcb->inode_item.st_mode = __S_IFDIR; + + Vcb->dummy_fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!Vcb->dummy_fcb->hash_ptrs) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(Vcb->dummy_fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); + + Vcb->dummy_fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!Vcb->dummy_fcb->hash_ptrs_uc) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(Vcb->dummy_fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); + + root_fcb = create_fcb(Vcb, NonPagedPool); if (!root_fcb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - + root_fcb->Vcb = Vcb; root_fcb->inode = SUBVOL_ROOT_INODE; root_fcb->type = BTRFS_TYPE_DIRECTORY; - + #ifdef DEBUG_FCB_REFCOUNTS WARN("volume FCB = %p\n", Vcb->volume_fcb); WARN("root FCB = %p\n", root_fcb); #endif - + root_fcb->subvol = find_default_subvol(Vcb, Irp); if (!root_fcb->subvol) { @@ -4024,87 +4377,87 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Status = STATUS_INTERNAL_ERROR; goto exit; } - - Status = load_dir_children(root_fcb, TRUE, Irp); + + Status = load_dir_children(Vcb, root_fcb, TRUE, Irp); if (!NT_SUCCESS(Status)) { ERR("load_dir_children returned %08x\n", Status); goto exit; } - + searchkey.obj_id = root_fcb->inode; searchkey.obj_type = TYPE_INODE_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, root_fcb->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); goto exit; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("couldn't find INODE_ITEM for root directory\n"); Status = STATUS_INTERNAL_ERROR; goto exit; } - + if (tp.item->size > 0) RtlCopyMemory(&root_fcb->inode_item, tp.item->data, min(sizeof(INODE_ITEM), tp.item->size)); - + fcb_get_sd(root_fcb, NULL, TRUE, Irp); - - root_fcb->atts = get_file_attributes(Vcb, &root_fcb->inode_item, root_fcb->subvol, root_fcb->inode, root_fcb->type, FALSE, FALSE, Irp); - - Vcb->root_fileref = create_fileref(); + + root_fcb->atts = get_file_attributes(Vcb, root_fcb->subvol, root_fcb->inode, root_fcb->type, FALSE, FALSE, Irp); + + Vcb->root_fileref = create_fileref(Vcb); if (!Vcb->root_fileref) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - + Vcb->root_fileref->fcb = root_fcb; InsertTailList(&root_fcb->subvol->fcbs, &root_fcb->list_entry); InsertTailList(&Vcb->all_fcbs, &root_fcb->list_entry_all); - + root_fcb->fileref = Vcb->root_fileref; - + root_ccb = ExAllocatePoolWithTag(PagedPool, sizeof(ccb), ALLOC_TAG); if (!root_ccb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - + Vcb->root_file = IoCreateStreamFileObject(NULL, DeviceToMount); Vcb->root_file->FsContext = root_fcb; Vcb->root_file->SectionObjectPointer = &root_fcb->nonpaged->segment_object; Vcb->root_file->Vpb = DeviceObject->Vpb; - + RtlZeroMemory(root_ccb, sizeof(ccb)); root_ccb->NodeType = BTRFS_NODE_TYPE_CCB; root_ccb->NodeSize = sizeof(ccb); - + Vcb->root_file->FsContext2 = root_ccb; - + _SEH2_TRY { CcInitializeCacheMap(Vcb->root_file, (PCC_FILE_SIZES)(&root_fcb->Header.AllocationSize), FALSE, cache_callbacks, Vcb->root_file); } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { Status = _SEH2_GetExceptionCode(); goto exit; } _SEH2_END; - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev2 = CONTAINING_RECORD(le, device, list_entry); - + Status = find_disk_holes(Vcb, dev2, Irp); if (!NT_SUCCESS(Status)) { ERR("find_disk_holes returned %08x\n", Status); goto exit; } - + le = le->Flink; } - + NewDeviceObject->Vpb = IrpSp->Parameters.MountVolume.Vpb; IrpSp->Parameters.MountVolume.Vpb->DeviceObject = NewDeviceObject; IrpSp->Parameters.MountVolume.Vpb->Flags |= VPB_MOUNTED; @@ -4112,34 +4465,43 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { NewDeviceObject->Vpb->VolumeLabel[0] = '?'; NewDeviceObject->Vpb->VolumeLabel[1] = 0; NewDeviceObject->Vpb->ReferenceCount++; // FIXME - should we deref this at any point? - Vcb->Vpb = NewDeviceObject->Vpb; - + KeInitializeEvent(&Vcb->flush_thread_finished, NotificationEvent, FALSE); - + Status = PsCreateSystemThread(&Vcb->flush_thread_handle, 0, NULL, NULL, NULL, flush_thread, NewDeviceObject); if (!NT_SUCCESS(Status)) { ERR("PsCreateSystemThread returned %08x\n", Status); goto exit; } - + Status = create_calc_threads(NewDeviceObject); if (!NT_SUCCESS(Status)) { ERR("create_calc_threads returned %08x\n", Status); goto exit; } - + Status = registry_mark_volume_mounted(&Vcb->superblock.uuid); if (!NT_SUCCESS(Status)) WARN("registry_mark_volume_mounted returned %08x\n", Status); - + Status = look_for_balance_item(Vcb); if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) WARN("look_for_balance_item returned %08x\n", Status); - + Status = STATUS_SUCCESS; + if (vde) + vde->mounted_device = NewDeviceObject; + + ExInitializeResourceLite(&Vcb->send_load_lock); + exit: + if (pdode) + ExReleaseResourceLite(&pdode->child_lock); + +exit2: if (Vcb) { + ExReleaseResourceLite(&Vcb->tree_lock); ExReleaseResourceLite(&Vcb->load_lock); } @@ -4148,32 +4510,47 @@ exit: if (init_lookaside) { ExDeletePagedLookasideList(&Vcb->tree_data_lookaside); ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside); - ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside); ExDeletePagedLookasideList(&Vcb->batch_item_lookaside); + ExDeletePagedLookasideList(&Vcb->fileref_lookaside); + ExDeletePagedLookasideList(&Vcb->fcb_lookaside); + ExDeletePagedLookasideList(&Vcb->name_bit_lookaside); ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside); + ExDeleteNPagedLookasideList(&Vcb->fileref_np_lookaside); + ExDeleteNPagedLookasideList(&Vcb->fcb_np_lookaside); } - + if (Vcb->root_file) ObDereferenceObject(Vcb->root_file); - else if (Vcb->root_fileref) - free_fileref(Vcb->root_fileref); - else if (root_fcb) - free_fcb(root_fcb); + else if (Vcb->root_fileref) { + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, Vcb->root_fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + } else if (root_fcb) { + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fcb(Vcb, root_fcb); + ExReleaseResourceLite(&Vcb->fcb_lock); + } - if (Vcb->volume_fcb) - free_fcb(Vcb->volume_fcb); + if (Vcb->volume_fcb) { + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fcb(Vcb, Vcb->volume_fcb); + ExReleaseResourceLite(&Vcb->fcb_lock); + } ExDeleteResourceLite(&Vcb->tree_lock); ExDeleteResourceLite(&Vcb->load_lock); ExDeleteResourceLite(&Vcb->fcb_lock); ExDeleteResourceLite(&Vcb->chunk_lock); + ExDeleteResourceLite(&Vcb->dirty_fcbs_lock); + ExDeleteResourceLite(&Vcb->dirty_filerefs_lock); + ExDeleteResourceLite(&Vcb->dirty_subvols_lock); + ExDeleteResourceLite(&Vcb->scrub.stats_lock); if (Vcb->devices.Flink) { while (!IsListEmpty(&Vcb->devices)) { - LIST_ENTRY* le = RemoveHeadList(&Vcb->devices); - device* dev = CONTAINING_RECORD(le, device, list_entry); - - ExFreePool(dev); + device* dev2 = CONTAINING_RECORD(RemoveHeadList(&Vcb->devices), device, list_entry); + + ExFreePool(dev2); } } } @@ -4184,7 +4561,7 @@ exit: ExAcquireResourceExclusiveLite(&global_loading_lock, TRUE); InsertTailList(&VcbList, &Vcb->list_entry); ExReleaseResourceLite(&global_loading_lock); - + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_MOUNT); } @@ -4193,178 +4570,246 @@ exit: return Status; } -static NTSTATUS verify_volume(PDEVICE_OBJECT devobj) { - device_extension* Vcb = devobj->DeviceExtension; - ULONG cc, to_read; - IO_STATUS_BLOCK iosb; +static NTSTATUS verify_device(_In_ device_extension* Vcb, _Inout_ device* dev) { NTSTATUS Status; superblock* sb; UINT32 crc32; - LIST_ENTRY* le; - - if (Vcb->removing) + ULONG to_read, cc; + + if (!dev->devobj) return STATUS_WRONG_VOLUME; - - Status = dev_ioctl(Vcb->Vpb->RealDevice, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); - - if (!NT_SUCCESS(Status)) { - ERR("dev_ioctl returned %08x\n", Status); - return Status; + + if (dev->removable) { + IO_STATUS_BLOCK iosb; + + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); + + if (IoIsErrorUserInduced(Status)) { + ERR("IOCTL_STORAGE_CHECK_VERIFY returned %08x (user-induced)\n", Status); + + if (Vcb->vde) { + pdo_device_extension* pdode = Vcb->vde->pdode; + LIST_ENTRY* le2; + BOOL changed = FALSE; + + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + le2 = pdode->children.Flink; + while (le2 != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le2, volume_child, list_entry); + + if (vc->devobj == dev->devobj) { + TRACE("removing device\n"); + + remove_volume_child(Vcb->vde, vc, TRUE); + changed = TRUE; + + break; + } + + le2 = le2->Flink; + } + + if (!changed) + ExReleaseResourceLite(&pdode->child_lock); + } + } else if (!NT_SUCCESS(Status)) { + ERR("IOCTL_STORAGE_CHECK_VERIFY returned %08x\n", Status); + return Status; + } else if (iosb.Information < sizeof(ULONG)) { + ERR("iosb.Information was too short\n"); + return STATUS_INTERNAL_ERROR; + } + + dev->change_count = cc; } - - to_read = devobj->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), devobj->SectorSize); - + + to_read = dev->devobj->SectorSize == 0 ? sizeof(superblock) : (ULONG)sector_align(sizeof(superblock), dev->devobj->SectorSize); + sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG); if (!sb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = sync_read_phys(Vcb->Vpb->RealDevice, superblock_addrs[0], to_read, (PUCHAR)sb, TRUE); + + Status = sync_read_phys(dev->devobj, superblock_addrs[0], to_read, (PUCHAR)sb, TRUE); if (!NT_SUCCESS(Status)) { ERR("Failed to read superblock: %08x\n", Status); ExFreePool(sb); return Status; } - + if (sb->magic != BTRFS_MAGIC) { ERR("not a BTRFS volume\n"); ExFreePool(sb); return STATUS_WRONG_VOLUME; } - - if (RtlCompareMemory(&sb->uuid, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)) != sizeof(BTRFS_UUID)) { - ERR("different UUIDs\n"); - ExFreePool(sb); - return STATUS_WRONG_VOLUME; - } - + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); TRACE("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum)); - + if (crc32 != *((UINT32*)sb->checksum)) { + ERR("checksum error\n"); + ExFreePool(sb); + return STATUS_WRONG_VOLUME; + } + + if (RtlCompareMemory(&sb->uuid, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)) != sizeof(BTRFS_UUID)) { ERR("different UUIDs\n"); ExFreePool(sb); return STATUS_WRONG_VOLUME; } - + ExFreePool(sb); - - ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + + dev->devobj->Flags &= ~DO_VERIFY_VOLUME; + + return STATUS_SUCCESS; +} + +static NTSTATUS verify_volume(_In_ PDEVICE_OBJECT devobj) { + device_extension* Vcb = devobj->DeviceExtension; + NTSTATUS Status; + LIST_ENTRY* le; + UINT64 failed_devices = 0; + BOOL locked = FALSE, remove = FALSE; + + if (!(Vcb->Vpb->Flags & VPB_MOUNTED)) + return STATUS_WRONG_VOLUME; + + if (!ExIsResourceAcquiredExclusive(&Vcb->tree_lock)) { + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + locked = TRUE; + } + + if (Vcb->removing) { + if (locked) ExReleaseResourceLite(&Vcb->tree_lock); + return STATUS_WRONG_VOLUME; + } + + InterlockedIncrement(&Vcb->open_files); // so pnp_surprise_removal doesn't uninit the device while we're still using it + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - - if (dev->removable) { - NTSTATUS Status; - ULONG cc; - IO_STATUS_BLOCK iosb; - - Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); - - if (!NT_SUCCESS(Status)) { - ExReleaseResourceLite(&Vcb->tree_lock); - ERR("dev_ioctl returned %08x\n", Status); - return Status; - } - - if (iosb.Information < sizeof(ULONG)) { - ExReleaseResourceLite(&Vcb->tree_lock); - ERR("iosb.Information was too short\n"); - return STATUS_INTERNAL_ERROR; - } - - dev->change_count = cc; + + Status = verify_device(Vcb, dev); + if (!NT_SUCCESS(Status)) { + failed_devices++; + + if (dev->devobj && Vcb->options.allow_degraded) + dev->devobj = NULL; } - - dev->devobj->Flags &= ~DO_VERIFY_VOLUME; - + le = le->Flink; } - - ExReleaseResourceLite(&Vcb->tree_lock); - - Vcb->Vpb->RealDevice->Flags &= ~DO_VERIFY_VOLUME; - - return STATUS_SUCCESS; + + InterlockedDecrement(&Vcb->open_files); + + if (Vcb->removing && Vcb->open_files == 0) + remove = TRUE; + + if (locked) + ExReleaseResourceLite(&Vcb->tree_lock); + + if (remove) { + uninit(Vcb, FALSE); + return Status; + } + + if (failed_devices == 0 || (Vcb->options.allow_degraded && failed_devices < Vcb->superblock.num_devices)) { + Vcb->Vpb->RealDevice->Flags &= ~DO_VERIFY_VOLUME; + + return STATUS_SUCCESS; + } + + return Status; } -static NTSTATUS STDCALL drv_file_system_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_FILE_SYSTEM_CONTROL) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_file_system_control(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { PIO_STACK_LOCATION IrpSp; NTSTATUS Status; device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level; - TRACE("file system control\n"); - FsRtlEnterFileSystem(); + TRACE("file system control\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_file_system_control(DeviceObject, Irp); + goto end; + } else if (!Vcb || (Vcb->type != VCB_TYPE_FS && Vcb->type != VCB_TYPE_CONTROL)) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + Status = STATUS_NOT_IMPLEMENTED; IrpSp = IoGetCurrentIrpStackLocation( Irp ); - + Irp->IoStatus.Information = 0; - + switch (IrpSp->MinorFunction) { case IRP_MN_MOUNT_VOLUME: TRACE("IRP_MN_MOUNT_VOLUME\n"); - + Status = mount_vol(DeviceObject, Irp); break; - + case IRP_MN_KERNEL_CALL: TRACE("IRP_MN_KERNEL_CALL\n"); - - Status = fsctl_request(DeviceObject, Irp, IrpSp->Parameters.FileSystemControl.FsControlCode, FALSE); + + Status = fsctl_request(DeviceObject, &Irp, IrpSp->Parameters.FileSystemControl.FsControlCode); break; - + case IRP_MN_USER_FS_REQUEST: TRACE("IRP_MN_USER_FS_REQUEST\n"); - - Status = fsctl_request(DeviceObject, Irp, IrpSp->Parameters.FileSystemControl.FsControlCode, TRUE); + + Status = fsctl_request(DeviceObject, &Irp, IrpSp->Parameters.FileSystemControl.FsControlCode); break; - + case IRP_MN_VERIFY_VOLUME: TRACE("IRP_MN_VERIFY_VOLUME\n"); - + Status = verify_volume(DeviceObject); - + if (!NT_SUCCESS(Status) && Vcb->Vpb->Flags & VPB_MOUNTED) { - if (Vcb->open_files > 0) { - Vcb->removing = TRUE; -// Vcb->Vpb->Flags &= ~VPB_MOUNTED; - } else - uninit(Vcb, FALSE); + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + Vcb->removing = TRUE; + ExReleaseResourceLite(&Vcb->tree_lock); } - + break; - + default: break; } - Irp->IoStatus.Status = Status; +end: + TRACE("returning %08x\n", Status); - IoCompleteRequest(Irp, IO_NO_INCREMENT); - -exit: - if (top_level) + if (Irp) { + Irp->IoStatus.Status = Status; + + IoCompleteRequest(Irp, IO_NO_INCREMENT); + } + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; } -static NTSTATUS STDCALL drv_lock_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_LOCK_CONTROL) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_lock_control(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); fcb* fcb = IrpSp->FileObject->FsContext; @@ -4374,185 +4819,320 @@ static NTSTATUS STDCALL drv_lock_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP FsRtlEnterFileSystem(); top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_lock_control(DeviceObject, Irp); + + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + goto exit; } - + TRACE("lock control\n"); - + Status = FsRtlProcessFileLock(&fcb->lock, Irp, NULL); fcb->Header.IsFastIoPossible = fast_io_possible(fcb); - + exit: - if (top_level) + TRACE("returning %08x\n", Status); + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); - + return Status; } -NTSTATUS part0_passthrough(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_SHUTDOWN) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_shutdown(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { NTSTATUS Status; - part0_device_extension* p0de = DeviceObject->DeviceExtension; - - IoSkipCurrentIrpStackLocation(Irp); - - Status = IoCallDriver(p0de->devobj, Irp); - + BOOL top_level; + device_extension* Vcb = DeviceObject->DeviceExtension; + + FsRtlEnterFileSystem(); + + TRACE("shutdown\n"); + + top_level = is_top_level(Irp); + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_shutdown(DeviceObject, Irp); + goto end; + } + + Status = STATUS_SUCCESS; + + shutting_down = TRUE; + KeSetEvent(&mountmgr_thread_event, 0, FALSE); + + while (!IsListEmpty(&VcbList)) { + Vcb = CONTAINING_RECORD(VcbList.Flink, device_extension, list_entry); + + TRACE("shutting down Vcb %p\n", Vcb); + + uninit(Vcb, TRUE); + } + +#ifdef _DEBUG + if (comfo) { + ObDereferenceObject(comfo); + comdo = NULL; + comfo = NULL; + } +#endif + +end: + Irp->IoStatus.Status = Status; + Irp->IoStatus.Information = 0; + + IoCompleteRequest( Irp, IO_NO_INCREMENT ); + + if (top_level) + IoSetTopLevelIrp(NULL); + + FsRtlExitFileSystem(); + return Status; } -static NTSTATUS STDCALL drv_shutdown(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_POWER) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_power(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { NTSTATUS Status; + device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level; + + FsRtlEnterFileSystem(); + + top_level = is_top_level(Irp); + + Irp->IoStatus.Information = 0; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_power(DeviceObject, Irp); + + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + + goto exit; + } else if (Vcb && Vcb->type == VCB_TYPE_FS) { + IoSkipCurrentIrpStackLocation(Irp); + + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); + + goto exit; + } + + Status = STATUS_INVALID_DEVICE_REQUEST; + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + +exit: + if (top_level) + IoSetTopLevelIrp(NULL); + + FsRtlExitFileSystem(); + + return Status; +} + +_Dispatch_type_(IRP_MJ_SYSTEM_CONTROL) +_Function_class_(DRIVER_DISPATCH) +static NTSTATUS drv_system_control(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) { + NTSTATUS Status; device_extension* Vcb = DeviceObject->DeviceExtension; + BOOL top_level; - TRACE("shutdown\n"); - FsRtlEnterFileSystem(); top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); + + Irp->IoStatus.Information = 0; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + + IoSkipCurrentIrpStackLocation(Irp); + + Status = IoCallDriver(vde->pdo, Irp); + goto exit; - } - - Status = STATUS_SUCCESS; + } else if (Vcb && Vcb->type == VCB_TYPE_FS) { + IoSkipCurrentIrpStackLocation(Irp); - while (!IsListEmpty(&VcbList)) { - Vcb = CONTAINING_RECORD(VcbList.Flink, device_extension, list_entry); - - TRACE("shutting down Vcb %p\n", Vcb); - - uninit(Vcb, TRUE); + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); + + goto exit; } - - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = 0; - IoCompleteRequest( Irp, IO_NO_INCREMENT ); + Status = Irp->IoStatus.Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); exit: - if (top_level) + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; } -BOOL is_file_name_valid(PUNICODE_STRING us) { +BOOL is_file_name_valid(_In_ PUNICODE_STRING us, _In_ BOOL posix) { ULONG i; - + if (us->Length < sizeof(WCHAR)) return FALSE; - + if (us->Length > 255 * sizeof(WCHAR)) return FALSE; - + for (i = 0; i < us->Length / sizeof(WCHAR); i++) { - if (us->Buffer[i] == '/' || us->Buffer[i] == '<' || us->Buffer[i] == '>' || us->Buffer[i] == ':' || us->Buffer[i] == '"' || - us->Buffer[i] == '|' || us->Buffer[i] == '?' || us->Buffer[i] == '*' || (us->Buffer[i] >= 1 && us->Buffer[i] <= 31)) + if (us->Buffer[i] == '/' || us->Buffer[i] == 0 || + (!posix && (us->Buffer[i] == '<' || us->Buffer[i] == '>' || us->Buffer[i] == ':' || us->Buffer[i] == '"' || + us->Buffer[i] == '|' || us->Buffer[i] == '?' || us->Buffer[i] == '*' || (us->Buffer[i] >= 1 && us->Buffer[i] <= 31)))) return FALSE; } - + if (us->Buffer[0] == '.' && (us->Length == sizeof(WCHAR) || (us->Length == 2 * sizeof(WCHAR) && us->Buffer[1] == '.'))) return FALSE; - + return TRUE; } -void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length) { +void chunk_lock_range(_In_ device_extension* Vcb, _In_ chunk* c, _In_ UINT64 start, _In_ UINT64 length) { LIST_ENTRY* le; BOOL locked; range_lock* rl; - + rl = ExAllocateFromNPagedLookasideList(&Vcb->range_lock_lookaside); if (!rl) { ERR("out of memory\n"); return; } - + rl->start = start; rl->length = length; rl->thread = PsGetCurrentThread(); - + while (TRUE) { - KIRQL irql; - locked = FALSE; - - KeAcquireSpinLock(&c->range_locks_spinlock, &irql); - + + ExAcquireResourceExclusiveLite(&c->range_locks_lock, TRUE); + le = c->range_locks.Flink; while (le != &c->range_locks) { range_lock* rl2 = CONTAINING_RECORD(le, range_lock, list_entry); - + if (rl2->start < start + length && rl2->start + rl2->length > start && rl2->thread != PsGetCurrentThread()) { locked = TRUE; break; } - + le = le->Flink; } - + if (!locked) { InsertTailList(&c->range_locks, &rl->list_entry); - - KeReleaseSpinLock(&c->range_locks_spinlock, irql); + + ExReleaseResourceLite(&c->range_locks_lock); return; } - + KeClearEvent(&c->range_locks_event); - - KeReleaseSpinLock(&c->range_locks_spinlock, irql); - + + ExReleaseResourceLite(&c->range_locks_lock); + KeWaitForSingleObject(&c->range_locks_event, UserRequest, KernelMode, FALSE, NULL); } } -void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length) { - KIRQL irql; +void chunk_unlock_range(_In_ device_extension* Vcb, _In_ chunk* c, _In_ UINT64 start, _In_ UINT64 length) { LIST_ENTRY* le; - - KeAcquireSpinLock(&c->range_locks_spinlock, &irql); - + + ExAcquireResourceExclusiveLite(&c->range_locks_lock, TRUE); + le = c->range_locks.Flink; while (le != &c->range_locks) { range_lock* rl = CONTAINING_RECORD(le, range_lock, list_entry); - + if (rl->start == start && rl->length == length) { RemoveEntryList(&rl->list_entry); ExFreeToNPagedLookasideList(&Vcb->range_lock_lookaside, rl); break; } - + le = le->Flink; } - + KeSetEvent(&c->range_locks_event, 0, FALSE); - - KeReleaseSpinLock(&c->range_locks_spinlock, irql); + + ExReleaseResourceLite(&c->range_locks_lock); +} + +void log_device_error(_In_ device_extension* Vcb, _Inout_ device* dev, _In_ int error) { + dev->stats[error]++; + dev->stats_changed = TRUE; + Vcb->stats_changed = TRUE; } #ifdef _DEBUG -static void STDCALL init_serial() { +_Function_class_(KSTART_ROUTINE) +static void serial_thread(void* context) { + LARGE_INTEGER due_time; + KTIMER timer; + + UNUSED(context); + + KeInitializeTimer(&timer); + + due_time.QuadPart = (UINT64)-10000000; + + KeSetTimer(&timer, due_time, NULL); + + while (TRUE) { + KeWaitForSingleObject(&timer, Executive, KernelMode, FALSE, NULL); + + init_serial(FALSE); + + if (comdo) + break; + + KeSetTimer(&timer, due_time, NULL); + } + + KeCancelTimer(&timer); + + PsTerminateSystemThread(STATUS_SUCCESS); + + serial_thread_handle = NULL; +} + +static void init_serial(BOOL first_time) { NTSTATUS Status; - + Status = IoGetDeviceObjectPointer(&log_device, FILE_WRITE_DATA, &comfo, &comdo); if (!NT_SUCCESS(Status)) { ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + + if (first_time) { + NTSTATUS Status; + + Status = PsCreateSystemThread(&serial_thread_handle, 0, NULL, NULL, NULL, serial_thread, NULL); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + return; + } + } } } #endif #ifndef __REACTOS__ -static void STDCALL check_cpu() { +static void check_cpu() { unsigned int cpuInfo[4]; #ifndef _MSC_VER __get_cpuid(1, &cpuInfo[0], &cpuInfo[1], &cpuInfo[2], &cpuInfo[3]); @@ -4568,7 +5148,7 @@ static void STDCALL check_cpu() { TRACE("SSE4.2 is supported\n"); else TRACE("SSE4.2 not supported\n"); - + if (have_sse2) TRACE("SSE2 is supported\n"); else @@ -4578,8 +5158,10 @@ static void STDCALL check_cpu() { #ifdef _DEBUG static void init_logging() { + ExAcquireResourceExclusiveLite(&log_lock, TRUE); + if (log_device.Length > 0) - init_serial(); + init_serial(TRUE); else if (log_file.Length > 0) { NTSTATUS Status; OBJECT_ATTRIBUTES oa; @@ -4587,151 +5169,308 @@ static void init_logging() { char* dateline; LARGE_INTEGER time; TIME_FIELDS tf; - + InitializeObjectAttributes(&oa, &log_file, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); - + Status = ZwCreateFile(&log_handle, FILE_WRITE_DATA, &oa, &iosb, NULL, FILE_ATTRIBUTE_NORMAL, FILE_SHARE_READ, FILE_OPEN_IF, FILE_NON_DIRECTORY_FILE | FILE_WRITE_THROUGH | FILE_SYNCHRONOUS_IO_ALERT, NULL, 0); - + if (!NT_SUCCESS(Status)) { ERR("ZwCreateFile returned %08x\n", Status); - return; + goto end; } - + if (iosb.Information == FILE_OPENED) { // already exists FILE_STANDARD_INFORMATION fsi; FILE_POSITION_INFORMATION fpi; - + static char delim[] = "\n---\n"; - + // move to end of file - + Status = ZwQueryInformationFile(log_handle, &iosb, &fsi, sizeof(FILE_STANDARD_INFORMATION), FileStandardInformation); - + if (!NT_SUCCESS(Status)) { ERR("ZwQueryInformationFile returned %08x\n", Status); - return; + goto end; } - + fpi.CurrentByteOffset = fsi.EndOfFile; - + Status = ZwSetInformationFile(log_handle, &iosb, &fpi, sizeof(FILE_POSITION_INFORMATION), FilePositionInformation); - + if (!NT_SUCCESS(Status)) { ERR("ZwSetInformationFile returned %08x\n", Status); - return; + goto end; } - Status = ZwWriteFile(log_handle, NULL, NULL, NULL, &iosb, delim, strlen(delim), NULL, NULL); - + Status = ZwWriteFile(log_handle, NULL, NULL, NULL, &iosb, delim, (ULONG)strlen(delim), NULL, NULL); + if (!NT_SUCCESS(Status)) { ERR("ZwWriteFile returned %08x\n", Status); - return; + goto end; } } - + dateline = ExAllocatePoolWithTag(PagedPool, 256, ALLOC_TAG); - + if (!dateline) { ERR("out of memory\n"); - return; + goto end; } - + KeQuerySystemTime(&time); - + RtlTimeToTimeFields(&time, &tf); - - sprintf(dateline, "Starting logging at %04u-%02u-%02u %02u:%02u:%02u\n", tf.Year, tf.Month, tf.Day, tf.Hour, tf.Minute, tf.Second); - Status = ZwWriteFile(log_handle, NULL, NULL, NULL, &iosb, dateline, strlen(dateline), NULL, NULL); - + sprintf(dateline, "Starting logging at %04i-%02i-%02i %02i:%02i:%02i\n", tf.Year, tf.Month, tf.Day, tf.Hour, tf.Minute, tf.Second); + + Status = ZwWriteFile(log_handle, NULL, NULL, NULL, &iosb, dateline, (ULONG)strlen(dateline), NULL, NULL); + + ExFreePool(dateline); + if (!NT_SUCCESS(Status)) { ERR("ZwWriteFile returned %08x\n", Status); - return; + goto end; } - - ExFreePool(dateline); } + +end: + ExReleaseResourceLite(&log_lock); +} +#endif + +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +static void NTAPI degraded_wait_thread(_In_ void* context) { +#else +static void degraded_wait_thread(_In_ void* context) { +#endif + KTIMER timer; + LARGE_INTEGER delay; + + UNUSED(context); + + KeInitializeTimer(&timer); + + delay.QuadPart = -30000000; // wait three seconds + KeSetTimer(&timer, delay, NULL); + KeWaitForSingleObject(&timer, Executive, KernelMode, FALSE, NULL); + + TRACE("timer expired\n"); + + degraded_wait = FALSE; + + ZwClose(degraded_wait_handle); + degraded_wait_handle = NULL; + + PsTerminateSystemThread(STATUS_SUCCESS); } + +#ifdef __REACTOS__ +NTSTATUS NTAPI AddDevice(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT PhysicalDeviceObject) { +#else +NTSTATUS AddDevice(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT PhysicalDeviceObject) { #endif + LIST_ENTRY* le; + NTSTATUS Status; + UNICODE_STRING volname; + ULONG i, j; + pdo_device_extension* pdode = NULL; + PDEVICE_OBJECT voldev; + volume_device_extension* vde; + + TRACE("(%p, %p)\n", DriverObject, PhysicalDeviceObject); -NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath) { + ExAcquireResourceSharedLite(&pdo_list_lock, TRUE); + + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode2 = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + + if (pdode2->pdo == PhysicalDeviceObject) { + pdode = pdode2; + break; + } + + le = le->Flink; + } + + if (!pdode) { + WARN("unrecognized PDO %p\n", PhysicalDeviceObject); + Status = STATUS_NOT_SUPPORTED; + goto end; + } + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + volname.Length = volname.MaximumLength = (USHORT)((wcslen(BTRFS_VOLUME_PREFIX) + 36 + 1) * sizeof(WCHAR)); + volname.Buffer = ExAllocatePoolWithTag(PagedPool, volname.MaximumLength, ALLOC_TAG); // FIXME - when do we free this? + + if (!volname.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; + } + + RtlCopyMemory(volname.Buffer, BTRFS_VOLUME_PREFIX, wcslen(BTRFS_VOLUME_PREFIX) * sizeof(WCHAR)); + + j = (ULONG)wcslen(BTRFS_VOLUME_PREFIX); + for (i = 0; i < 16; i++) { + volname.Buffer[j] = hex_digit(pdode->uuid.uuid[i] >> 4); j++; + volname.Buffer[j] = hex_digit(pdode->uuid.uuid[i] & 0xf); j++; + + if (i == 3 || i == 5 || i == 7 || i == 9) { + volname.Buffer[j] = '-'; + j++; + } + } + + volname.Buffer[j] = '}'; + + Status = IoCreateDevice(drvobj, sizeof(volume_device_extension), &volname, FILE_DEVICE_DISK, + RtlIsNtDdiVersionAvailable(NTDDI_WIN8) ? FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL : 0, FALSE, &voldev); + if (!NT_SUCCESS(Status)) { + ERR("IoCreateDevice returned %08x\n", Status); + goto end2; + } + + voldev->SectorSize = PhysicalDeviceObject->SectorSize; + voldev->Flags |= DO_DIRECT_IO; + + vde = voldev->DeviceExtension; + vde->type = VCB_TYPE_VOLUME; + vde->name = volname; + vde->device = voldev; + vde->mounted_device = NULL; + vde->pdo = PhysicalDeviceObject; + vde->pdode = pdode; + vde->removing = FALSE; + vde->open_count = 0; + + Status = IoRegisterDeviceInterface(PhysicalDeviceObject, &GUID_DEVINTERFACE_VOLUME, NULL, &vde->bus_name); + if (!NT_SUCCESS(Status)) + WARN("IoRegisterDeviceInterface returned %08x\n", Status); + + vde->attached_device = IoAttachDeviceToDeviceStack(voldev, PhysicalDeviceObject); + + pdode->vde = vde; + + if (pdode->removable) + voldev->Characteristics |= FILE_REMOVABLE_MEDIA; + + voldev->Flags &= ~DO_DEVICE_INITIALIZING; + + Status = IoSetDeviceInterfaceState(&vde->bus_name, TRUE); + if (!NT_SUCCESS(Status)) + WARN("IoSetDeviceInterfaceState returned %08x\n", Status); + + Status = STATUS_SUCCESS; + +end2: + ExReleaseResourceLite(&pdode->child_lock); + +end: + ExReleaseResourceLite(&pdo_list_lock); + + return Status; +} + +_Function_class_(DRIVER_INITIALIZE) +#ifdef __REACTOS__ +NTSTATUS NTAPI DriverEntry(_In_ PDRIVER_OBJECT DriverObject, _In_ PUNICODE_STRING RegistryPath) { +#else +NTSTATUS DriverEntry(_In_ PDRIVER_OBJECT DriverObject, _In_ PUNICODE_STRING RegistryPath) { +#endif NTSTATUS Status; PDEVICE_OBJECT DeviceObject; UNICODE_STRING device_nameW; UNICODE_STRING dosdevice_nameW; control_device_extension* cde; - + HANDLE regh; + OBJECT_ATTRIBUTES oa; + ULONG dispos; + InitializeListHead(&uid_map_list); - + InitializeListHead(&gid_map_list); + +#ifdef _DEBUG + ExInitializeResourceLite(&log_lock); +#endif + ExInitializeResourceLite(&mapping_lock); + log_device.Buffer = NULL; log_device.Length = log_device.MaximumLength = 0; log_file.Buffer = NULL; log_file.Length = log_file.MaximumLength = 0; - - read_registry(RegistryPath); - -#ifdef _DEBUG - if (debug_log_level > 0) - init_logging(); - - log_started = TRUE; -#endif - TRACE("DriverEntry\n"); - registry_path.Length = registry_path.MaximumLength = RegistryPath->Length; registry_path.Buffer = ExAllocatePoolWithTag(PagedPool, registry_path.Length, ALLOC_TAG); - + if (!registry_path.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(registry_path.Buffer, RegistryPath->Buffer, registry_path.Length); - + + read_registry(®istry_path, FALSE); + +#ifdef _DEBUG + if (debug_log_level > 0) + init_logging(); + + log_started = TRUE; +#endif + + TRACE("DriverEntry\n"); + #ifndef __REACTOS__ check_cpu(); #endif - -// TRACE("check CRC32C: %08x\n", calc_crc32c((UINT8*)"123456789", 9)); // should be e3069283 - + if (RtlIsNtDdiVersionAvailable(NTDDI_WIN8)) { UNICODE_STRING name; - tPsIsDiskCountersEnabled PsIsDiskCountersEnabled; - + tPsIsDiskCountersEnabled fPsIsDiskCountersEnabled; + RtlInitUnicodeString(&name, L"PsIsDiskCountersEnabled"); - PsIsDiskCountersEnabled = (tPsIsDiskCountersEnabled)MmGetSystemRoutineAddress(&name); - - if (PsIsDiskCountersEnabled) { - diskacc = PsIsDiskCountersEnabled(); - + fPsIsDiskCountersEnabled = (tPsIsDiskCountersEnabled)MmGetSystemRoutineAddress(&name); + + if (fPsIsDiskCountersEnabled) { + diskacc = fPsIsDiskCountersEnabled(); + RtlInitUnicodeString(&name, L"PsUpdateDiskCounters"); - PsUpdateDiskCounters = (tPsUpdateDiskCounters)MmGetSystemRoutineAddress(&name); - - if (!PsUpdateDiskCounters) + fPsUpdateDiskCounters = (tPsUpdateDiskCounters)MmGetSystemRoutineAddress(&name); + + if (!fPsUpdateDiskCounters) diskacc = FALSE; + + RtlInitUnicodeString(&name, L"FsRtlUpdateDiskCounters"); + fFsRtlUpdateDiskCounters = (tFsRtlUpdateDiskCounters)MmGetSystemRoutineAddress(&name); } - + RtlInitUnicodeString(&name, L"CcCopyReadEx"); - CcCopyReadEx = (tCcCopyReadEx)MmGetSystemRoutineAddress(&name); - + fCcCopyReadEx = (tCcCopyReadEx)MmGetSystemRoutineAddress(&name); + RtlInitUnicodeString(&name, L"CcCopyWriteEx"); - CcCopyWriteEx = (tCcCopyWriteEx)MmGetSystemRoutineAddress(&name); - + fCcCopyWriteEx = (tCcCopyWriteEx)MmGetSystemRoutineAddress(&name); + RtlInitUnicodeString(&name, L"CcSetAdditionalCacheAttributesEx"); - CcSetAdditionalCacheAttributesEx = (tCcSetAdditionalCacheAttributesEx)MmGetSystemRoutineAddress(&name); + fCcSetAdditionalCacheAttributesEx = (tCcSetAdditionalCacheAttributesEx)MmGetSystemRoutineAddress(&name); } else { - PsUpdateDiskCounters = NULL; - CcCopyReadEx = NULL; - CcCopyWriteEx = NULL; - CcSetAdditionalCacheAttributesEx = NULL; + fPsUpdateDiskCounters = NULL; + fCcCopyReadEx = NULL; + fCcCopyWriteEx = NULL; + fCcSetAdditionalCacheAttributesEx = NULL; + fFsRtlUpdateDiskCounters = NULL; } - + drvobj = DriverObject; DriverObject->DriverUnload = DriverUnload; + DriverObject->DriverExtension->AddDevice = AddDevice; + DriverObject->MajorFunction[IRP_MJ_CREATE] = (PDRIVER_DISPATCH)drv_create; DriverObject->MajorFunction[IRP_MJ_CLOSE] = (PDRIVER_DISPATCH)drv_close; DriverObject->MajorFunction[IRP_MJ_READ] = (PDRIVER_DISPATCH)drv_read; @@ -4743,15 +5482,17 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist DriverObject->MajorFunction[IRP_MJ_FLUSH_BUFFERS] = (PDRIVER_DISPATCH)drv_flush_buffers; DriverObject->MajorFunction[IRP_MJ_QUERY_VOLUME_INFORMATION] = (PDRIVER_DISPATCH)drv_query_volume_information; DriverObject->MajorFunction[IRP_MJ_SET_VOLUME_INFORMATION] = (PDRIVER_DISPATCH)drv_set_volume_information; - DriverObject->MajorFunction[IRP_MJ_CLEANUP] = (PDRIVER_DISPATCH)drv_cleanup; DriverObject->MajorFunction[IRP_MJ_DIRECTORY_CONTROL] = (PDRIVER_DISPATCH)drv_directory_control; DriverObject->MajorFunction[IRP_MJ_FILE_SYSTEM_CONTROL] = (PDRIVER_DISPATCH)drv_file_system_control; - DriverObject->MajorFunction[IRP_MJ_LOCK_CONTROL] = (PDRIVER_DISPATCH)drv_lock_control; DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = (PDRIVER_DISPATCH)drv_device_control; DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = (PDRIVER_DISPATCH)drv_shutdown; - DriverObject->MajorFunction[IRP_MJ_PNP] = (PDRIVER_DISPATCH)drv_pnp; + DriverObject->MajorFunction[IRP_MJ_LOCK_CONTROL] = (PDRIVER_DISPATCH)drv_lock_control; + DriverObject->MajorFunction[IRP_MJ_CLEANUP] = (PDRIVER_DISPATCH)drv_cleanup; DriverObject->MajorFunction[IRP_MJ_QUERY_SECURITY] = (PDRIVER_DISPATCH)drv_query_security; DriverObject->MajorFunction[IRP_MJ_SET_SECURITY] = (PDRIVER_DISPATCH)drv_set_security; + DriverObject->MajorFunction[IRP_MJ_POWER] = (PDRIVER_DISPATCH)drv_power; + DriverObject->MajorFunction[IRP_MJ_SYSTEM_CONTROL] = (PDRIVER_DISPATCH)drv_system_control; + DriverObject->MajorFunction[IRP_MJ_PNP] = (PDRIVER_DISPATCH)drv_pnp; init_fast_io_dispatch(&DriverObject->FastIoDispatch); @@ -4766,12 +5507,14 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist ERR("IoCreateDevice returned %08x\n", Status); return Status; } - - devobj = DeviceObject; - cde = (control_device_extension*)devobj->DeviceExtension; - + + master_devobj = DeviceObject; + cde = (control_device_extension*)master_devobj->DeviceExtension; + + RtlZeroMemory(cde, sizeof(control_device_extension)); + cde->type = VCB_TYPE_CONTROL; - + DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; Status = IoCreateSymbolicLink(&dosdevice_nameW, &device_nameW); @@ -4779,25 +5522,76 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist ERR("IoCreateSymbolicLink returned %08x\n", Status); return Status; } - + Status = init_cache(); if (!NT_SUCCESS(Status)) { ERR("init_cache returned %08x\n", Status); return Status; } - InitializeListHead(&volumes); - InitializeListHead(&pnp_disks); - InitializeListHead(&VcbList); ExInitializeResourceLite(&global_loading_lock); - ExInitializeResourceLite(&volumes_lock); - + ExInitializeResourceLite(&pdo_list_lock); + + InitializeListHead(&pdo_list); + + InitializeObjectAttributes(&oa, RegistryPath, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); + Status = ZwCreateKey(®h, KEY_QUERY_VALUE | KEY_ENUMERATE_SUB_KEYS | KEY_NOTIFY, &oa, 0, NULL, REG_OPTION_NON_VOLATILE, &dispos); + if (!NT_SUCCESS(Status)) { + ERR("ZwCreateKey returned %08x\n", Status); + return Status; + } + + watch_registry(regh); + + Status = IoReportDetectedDevice(drvobj, InterfaceTypeUndefined, 0xFFFFFFFF, 0xFFFFFFFF, + NULL, NULL, 0, &cde->buspdo); + if (!NT_SUCCESS(Status)) { + ERR("IoReportDetectedDevice returned %08x\n", Status); + return Status; + } + + Status = IoRegisterDeviceInterface(cde->buspdo, &BtrfsBusInterface, NULL, &cde->bus_name); + if (!NT_SUCCESS(Status)) + WARN("IoRegisterDeviceInterface returned %08x\n", Status); + + cde->attached_device = IoAttachDeviceToDeviceStack(DeviceObject, cde->buspdo); + + Status = IoSetDeviceInterfaceState(&cde->bus_name, TRUE); + if (!NT_SUCCESS(Status)) + WARN("IoSetDeviceInterfaceState returned %08x\n", Status); + + DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; + + IoInvalidateDeviceRelations(cde->buspdo, BusRelations); + + Status = PsCreateSystemThread(°raded_wait_handle, 0, NULL, NULL, NULL, degraded_wait_thread, NULL); + if (!NT_SUCCESS(Status)) + WARN("PsCreateSystemThread returned %08x\n", Status); + + Status = IoRegisterPlugPlayNotification(EventCategoryDeviceInterfaceChange, PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, + (PVOID)&GUID_DEVINTERFACE_VOLUME, DriverObject, volume_notification, DriverObject, ¬ification_entry2); + if (!NT_SUCCESS(Status)) + ERR("IoRegisterPlugPlayNotification returned %08x\n", Status); + + Status = IoRegisterPlugPlayNotification(EventCategoryDeviceInterfaceChange, PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, + (PVOID)&GUID_DEVINTERFACE_HIDDEN_VOLUME, DriverObject, volume_notification, DriverObject, ¬ification_entry3); + if (!NT_SUCCESS(Status)) + ERR("IoRegisterPlugPlayNotification returned %08x\n", Status); + Status = IoRegisterPlugPlayNotification(EventCategoryDeviceInterfaceChange, PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, (PVOID)&GUID_DEVINTERFACE_DISK, DriverObject, pnp_notification, DriverObject, ¬ification_entry); if (!NT_SUCCESS(Status)) ERR("IoRegisterPlugPlayNotification returned %08x\n", Status); - + + finished_probing = TRUE; + + KeInitializeEvent(&mountmgr_thread_event, NotificationEvent, FALSE); + + Status = PsCreateSystemThread(&mountmgr_thread_handle, 0, NULL, NULL, NULL, mountmgr_thread, NULL); + if (!NT_SUCCESS(Status)) + WARN("PsCreateSystemThread returned %08x\n", Status); + IoRegisterFileSystem(DeviceObject); return STATUS_SUCCESS; diff --git a/reactos/drivers/filesystems/btrfs/btrfs.h b/reactos/drivers/filesystems/btrfs/btrfs.h index f7a3d457c3b..01d2c3b9ab0 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs.h +++ b/reactos/drivers/filesystems/btrfs/btrfs.h @@ -34,12 +34,16 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4 #define TYPE_SHARED_BLOCK_REF 0xB6 #define TYPE_SHARED_DATA_REF 0xB8 #define TYPE_BLOCK_GROUP_ITEM 0xC0 +#define TYPE_FREE_SPACE_INFO 0xC6 +#define TYPE_FREE_SPACE_EXTENT 0xC7 +#define TYPE_FREE_SPACE_BITMAP 0xC8 #define TYPE_DEV_EXTENT 0xCC #define TYPE_DEV_ITEM 0xD8 #define TYPE_CHUNK_ITEM 0xE4 #define TYPE_TEMP_ITEM 0xF8 #define TYPE_DEV_STATS 0xF9 #define TYPE_SUBVOL_UUID 0xFB +#define TYPE_SUBVOL_REC_UUID 0xFC #define BTRFS_ROOT_ROOT 1 #define BTRFS_ROOT_EXTENT 2 @@ -48,6 +52,7 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4 #define BTRFS_ROOT_FSTREE 5 #define BTRFS_ROOT_CHECKSUM 7 #define BTRFS_ROOT_UUID 9 +#define BTRFS_ROOT_FREE_SPACE 0xa #define BTRFS_ROOT_DATA_RELOC 0xFFFFFFFFFFFFFFF7 #define BTRFS_COMPRESSION_NONE 0 @@ -91,7 +96,8 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4 #define BTRFS_SUBVOL_READONLY 0x1 -#define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE 0x1 +#define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE 0x1 +#define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID 0x2 #define BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF 0x0001 #define BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL 0x0002 @@ -495,6 +501,85 @@ typedef struct { UINT8 reserved[32]; } BALANCE_ITEM; +#define BTRFS_FREE_SPACE_USING_BITMAPS 1 + +typedef struct { + UINT32 count; + UINT32 flags; +} FREE_SPACE_INFO; + +#define BTRFS_DEV_STAT_WRITE_ERRORS 0 +#define BTRFS_DEV_STAT_READ_ERRORS 1 +#define BTRFS_DEV_STAT_FLUSH_ERRORS 2 +#define BTRFS_DEV_STAT_CORRUPTION_ERRORS 3 +#define BTRFS_DEV_STAT_GENERATION_ERRORS 4 + +#define BTRFS_SEND_CMD_SUBVOL 1 +#define BTRFS_SEND_CMD_SNAPSHOT 2 +#define BTRFS_SEND_CMD_MKFILE 3 +#define BTRFS_SEND_CMD_MKDIR 4 +#define BTRFS_SEND_CMD_MKNOD 5 +#define BTRFS_SEND_CMD_MKFIFO 6 +#define BTRFS_SEND_CMD_MKSOCK 7 +#define BTRFS_SEND_CMD_SYMLINK 8 +#define BTRFS_SEND_CMD_RENAME 9 +#define BTRFS_SEND_CMD_LINK 10 +#define BTRFS_SEND_CMD_UNLINK 11 +#define BTRFS_SEND_CMD_RMDIR 12 +#define BTRFS_SEND_CMD_SET_XATTR 13 +#define BTRFS_SEND_CMD_REMOVE_XATTR 14 +#define BTRFS_SEND_CMD_WRITE 15 +#define BTRFS_SEND_CMD_CLONE 16 +#define BTRFS_SEND_CMD_TRUNCATE 17 +#define BTRFS_SEND_CMD_CHMOD 18 +#define BTRFS_SEND_CMD_CHOWN 19 +#define BTRFS_SEND_CMD_UTIMES 20 +#define BTRFS_SEND_CMD_END 21 +#define BTRFS_SEND_CMD_UPDATE_EXTENT 22 + +#define BTRFS_SEND_TLV_UUID 1 +#define BTRFS_SEND_TLV_TRANSID 2 +#define BTRFS_SEND_TLV_INODE 3 +#define BTRFS_SEND_TLV_SIZE 4 +#define BTRFS_SEND_TLV_MODE 5 +#define BTRFS_SEND_TLV_UID 6 +#define BTRFS_SEND_TLV_GID 7 +#define BTRFS_SEND_TLV_RDEV 8 +#define BTRFS_SEND_TLV_CTIME 9 +#define BTRFS_SEND_TLV_MTIME 10 +#define BTRFS_SEND_TLV_ATIME 11 +#define BTRFS_SEND_TLV_OTIME 12 +#define BTRFS_SEND_TLV_XATTR_NAME 13 +#define BTRFS_SEND_TLV_XATTR_DATA 14 +#define BTRFS_SEND_TLV_PATH 15 +#define BTRFS_SEND_TLV_PATH_TO 16 +#define BTRFS_SEND_TLV_PATH_LINK 17 +#define BTRFS_SEND_TLV_OFFSET 18 +#define BTRFS_SEND_TLV_DATA 19 +#define BTRFS_SEND_TLV_CLONE_UUID 20 +#define BTRFS_SEND_TLV_CLONE_CTRANSID 21 +#define BTRFS_SEND_TLV_CLONE_PATH 22 +#define BTRFS_SEND_TLV_CLONE_OFFSET 23 +#define BTRFS_SEND_TLV_CLONE_LENGTH 24 + +#define BTRFS_SEND_MAGIC "btrfs-stream\0" + +typedef struct { + UINT8 magic[13]; + UINT32 version; +} btrfs_send_header; + +typedef struct { + UINT32 length; + UINT16 cmd; + UINT32 csum; +} btrfs_send_command; + +typedef struct { + UINT16 type; + UINT16 length; +} btrfs_send_tlv; + #pragma pack(pop) #endif diff --git a/reactos/drivers/filesystems/btrfs/btrfs.rc b/reactos/drivers/filesystems/btrfs/btrfs.rc index 333e8b31ad2..f18e6ef56ab 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs.rc +++ b/reactos/drivers/filesystems/btrfs/btrfs.rc @@ -7,7 +7,7 @@ // // Generated from the TEXTINCLUDE 2 resource. // -#include "afxres.h" +#include "winres.h" ///////////////////////////////////////////////////////////////////////////// #undef APSTUDIO_READONLY_SYMBOLS @@ -27,18 +27,18 @@ LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_UK // TEXTINCLUDE // -1 TEXTINCLUDE +1 TEXTINCLUDE BEGIN "resource.h\0" END -2 TEXTINCLUDE +2 TEXTINCLUDE BEGIN - "#include ""afxres.h""\r\n" + "#include ""winres.h""\r\n" "\0" END -3 TEXTINCLUDE +3 TEXTINCLUDE BEGIN "\r\n" "\0" @@ -70,12 +70,12 @@ BEGIN BLOCK "080904b0" BEGIN VALUE "FileDescription", "WinBtrfs" - VALUE "FileVersion", "0.8" + VALUE "FileVersion", "1.0" VALUE "InternalName", "btrfs" - VALUE "LegalCopyright", "Copyright (c) Mark Harmstone 2016" + VALUE "LegalCopyright", "Copyright (c) Mark Harmstone 2016-17" VALUE "OriginalFilename", "btrfs.sys" VALUE "ProductName", "WinBtrfs" - VALUE "ProductVersion", "0.8" + VALUE "ProductVersion", "1.0" END END BLOCK "VarFileInfo" diff --git a/reactos/drivers/filesystems/btrfs/btrfs_drv.h b/reactos/drivers/filesystems/btrfs/btrfs_drv.h index 80188ddfda7..b658726f9a2 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs_drv.h +++ b/reactos/drivers/filesystems/btrfs/btrfs_drv.h @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -22,11 +22,23 @@ #undef _WIN32_WINNT #undef NTDDI_VERSION -#define _WIN32_WINNT 0x0600 -#define NTDDI_VERSION 0x06010000 // Win 7 +#define _WIN32_WINNT 0x0601 +#define NTDDI_VERSION 0x06020000 // Win 8 #define _CRT_SECURE_NO_WARNINGS +#define _NO_CRT_STDIO_INLINE #endif /* __REACTOS__ */ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4163) +#pragma warning(disable:4311) +#pragma warning(disable:4312) +#else +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + #include #include #ifdef __REACTOS__ @@ -38,9 +50,15 @@ #include #include #endif /* __REACTOS__ */ -//#include #include #include + +#ifdef _MSC_VER +#pragma warning(pop) +#else +#pragma GCC diagnostic pop +#endif + #include #include #include @@ -59,14 +77,14 @@ #define DEBUG_PARANOID #endif +#define UNUSED(x) (void)(x) + #define BTRFS_NODE_TYPE_CCB 0x2295 #define BTRFS_NODE_TYPE_FCB 0x2296 #define ALLOC_TAG 0x7442484D //'MHBt' #define ALLOC_TAG_ZLIB 0x7A42484D //'MHBz' -#define STDCALL __stdcall - #define UID_NOBODY 65534 #define GID_NOBODY 65534 @@ -76,12 +94,15 @@ #define EA_DOSATTRIB "user.DOSATTRIB" #define EA_DOSATTRIB_HASH 0x914f9939 -#define EA_REPARSE "system.reparse" -#define EA_REPARSE_HASH 0x786f6167 +#define EA_REPARSE "user.reparse" +#define EA_REPARSE_HASH 0xfabad1fe #define EA_EA "user.EA" #define EA_EA_HASH 0x8270dd43 +#define EA_PROP_COMPRESSION "btrfs.compression" +#define EA_PROP_COMPRESSION_HASH 0x20ccdf69 + #define MAX_EXTENT_SIZE 0x8000000 // 128 MB #define COMPRESSED_EXTENT_SIZE 0x20000 // 128 KB @@ -89,6 +110,8 @@ #define IO_REPARSE_TAG_LXSS_SYMLINK 0xa000001d // undocumented? +#define BTRFS_VOLUME_PREFIX L"\\Device\\Btrfs{" + #ifdef _MSC_VER #define try __try #define except __except @@ -99,29 +122,62 @@ #define finally if (1) #endif -// #pragma pack(push, 1) +#ifndef FILE_SUPPORTS_BLOCK_REFCOUNTING +#define FILE_SUPPORTS_BLOCK_REFCOUNTING 0x08000000 +#endif -struct _device_extension; +#ifndef FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL +#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000 +#endif -typedef struct { - BTRFS_UUID fsuuid; - BTRFS_UUID devuuid; - UINT64 devnum; - UNICODE_STRING devpath; - UINT64 length; - UINT64 gen1, gen2; - BOOL seeding; - BOOL processed; - DWORD disk_num; - DWORD part_num; - LIST_ENTRY list_entry; -} volume; +typedef struct _DUPLICATE_EXTENTS_DATA { + HANDLE FileHandle; + LARGE_INTEGER SourceFileOffset; + LARGE_INTEGER TargetFileOffset; + LARGE_INTEGER ByteCount; +} DUPLICATE_EXTENTS_DATA, *PDUPLICATE_EXTENTS_DATA; -typedef struct { - UNICODE_STRING devpath; - ULONG disk_num; - LIST_ENTRY list_entry; -} pnp_disk; +#define FSCTL_DUPLICATE_EXTENTS_TO_FILE CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 209, METHOD_BUFFERED, FILE_WRITE_ACCESS) + +typedef struct _FSCTL_GET_INTEGRITY_INFORMATION_BUFFER { + WORD ChecksumAlgorithm; + WORD Reserved; + DWORD Flags; + DWORD ChecksumChunkSizeInBytes; + DWORD ClusterSizeInBytes; +} FSCTL_GET_INTEGRITY_INFORMATION_BUFFER, *PFSCTL_GET_INTEGRITY_INFORMATION_BUFFER; + +typedef struct _FSCTL_SET_INTEGRITY_INFORMATION_BUFFER { + WORD ChecksumAlgorithm; + WORD Reserved; + DWORD Flags; +} FSCTL_SET_INTEGRITY_INFORMATION_BUFFER, *PFSCTL_SET_INTEGRITY_INFORMATION_BUFFER; + +#define FSCTL_GET_INTEGRITY_INFORMATION CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 159, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define FSCTL_SET_INTEGRITY_INFORMATION CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 160, METHOD_BUFFERED, FILE_READ_DATA | FILE_WRITE_DATA) + +#ifndef __REACTOS__ +#ifndef _MSC_VER +#define __drv_aliasesMem +#define _Requires_lock_held_(a) +#define _Requires_exclusive_lock_held_(a) +#define _Releases_lock_(a) +#define _Out_writes_bytes_opt_(a) +#define _Pre_satisfies_(a) +#define _Post_satisfies_(a) +#define _Releases_exclusive_lock_(a) +#define _Dispatch_type_(a) +#define _Create_lock_level_(a) +#define _Lock_level_order_(a,b) +#define _Has_lock_level_(a) +#endif +#endif + +_Create_lock_level_(tree_lock) +_Create_lock_level_(fcb_lock) +_Lock_level_order_(tree_lock, fcb_lock) + +struct _device_extension; typedef struct _fcb_nonpaged { FAST_MUTEX HeaderMutex; @@ -135,14 +191,15 @@ struct _root; typedef struct { UINT64 offset; - EXTENT_DATA* data; - ULONG datalen; + UINT16 datalen; BOOL unique; BOOL ignore; BOOL inserted; UINT32* csum; - + LIST_ENTRY list_entry; + + EXTENT_DATA extent_data; } extent; typedef struct { @@ -164,16 +221,32 @@ typedef struct { UNICODE_STRING name; UINT32 hash_uc; UNICODE_STRING name_uc; + ULONG size; struct _file_ref* fileref; LIST_ENTRY list_entry_index; LIST_ENTRY list_entry_hash; LIST_ENTRY list_entry_hash_uc; } dir_child; +enum prop_compression_type { + PropCompression_None, + PropCompression_Zlib, + PropCompression_LZO +}; + +typedef struct { + LIST_ENTRY list_entry; + USHORT namelen; + USHORT valuelen; + BOOL dirty; + char data[1]; +} xattr; + typedef struct _fcb { FSRTL_ADVANCED_FCB_HEADER Header; struct _fcb_nonpaged* nonpaged; LONG refcount; + POOL_TYPE pool_type; struct _device_extension* Vcb; struct _root* subvol; UINT64 inode; @@ -186,55 +259,53 @@ typedef struct _fcb { ULONG atts; SHARE_ACCESS share_access; WCHAR* debug_desc; + BOOL csum_loaded; LIST_ENTRY extents; - UINT64 last_dir_index; ANSI_STRING reparse_xattr; ANSI_STRING ea_xattr; ULONG ealen; LIST_ENTRY hardlinks; struct _file_ref* fileref; BOOL inode_item_changed; - + enum prop_compression_type prop_compression; + LIST_ENTRY xattrs; + LIST_ENTRY dir_children_index; LIST_ENTRY dir_children_hash; LIST_ENTRY dir_children_hash_uc; LIST_ENTRY** hash_ptrs; LIST_ENTRY** hash_ptrs_uc; - + BOOL dirty; - BOOL sd_dirty; + BOOL sd_dirty, sd_deleted; BOOL atts_changed, atts_deleted; BOOL extents_changed; BOOL reparse_xattr_changed; BOOL ea_changed; + BOOL prop_compression_changed; + BOOL xattrs_changed; BOOL created; - + BOOL ads; UINT32 adshash; ULONG adsmaxlen; ANSI_STRING adsxattr; ANSI_STRING adsdata; - + LIST_ENTRY list_entry; LIST_ENTRY list_entry_all; + LIST_ENTRY list_entry_dirty; } fcb; typedef struct { - fcb* fcb; - LIST_ENTRY list_entry; -} dirty_fcb; - -typedef struct { + ERESOURCE fileref_lock; ERESOURCE children_lock; } file_ref_nonpaged; typedef struct _file_ref { fcb* fcb; - UNICODE_STRING filepart; - UNICODE_STRING filepart_uc; - ANSI_STRING utf8; ANSI_STRING oldutf8; - UINT64 index; + UINT64 oldindex; BOOL delete_on_close; BOOL deleted; BOOL created; @@ -245,16 +316,21 @@ typedef struct _file_ref { struct _file_ref* parent; WCHAR* debug_desc; dir_child* dc; - + BOOL dirty; - + LIST_ENTRY list_entry; + LIST_ENTRY list_entry_dirty; } file_ref; typedef struct { - file_ref* fileref; + HANDLE thread; + struct _ccb* ccb; + void* context; + KEVENT cleared_event; + BOOL cancelling; LIST_ENTRY list_entry; -} dirty_fileref; +} send_info; typedef struct _ccb { USHORT NodeType; @@ -262,12 +338,12 @@ typedef struct _ccb { ULONG disposition; ULONG options; UINT64 query_dir_offset; -// char* query_string; UNICODE_STRING query_string; BOOL has_wildcard; BOOL specific_file; BOOL manage_volume_privilege; BOOL allow_extended_dasd_io; + BOOL reserving; ACCESS_MASK access; file_ref* fileref; UNICODE_STRING filename; @@ -277,36 +353,17 @@ typedef struct _ccb { BOOL user_set_access_time; BOOL user_set_write_time; BOOL user_set_change_time; + BOOL lxss; + send_info* send; + NTSTATUS send_status; } ccb; -// typedef struct _log_to_phys { -// UINT64 address; -// UINT64 size; -// UINT64 physaddr; -// UINT32 sector_size; -// struct _log_to_phys* next; -// } log_to_phys; - struct _device_extension; -// enum tree_holder_status { -// tree_holder_unloaded, -// tree_holder_loading, -// tree_holder_loaded, -// tree_holder_unloading -// }; - -// typedef struct { -// enum tree_holder_status status; -// KSPIN_LOCK spin_lock; -// ERESOURCE lock; -// } tree_holder_nonpaged; - typedef struct { UINT64 address; UINT64 generation; struct _tree* tree; -// tree_holder_nonpaged* nonpaged; } tree_holder; typedef struct _tree_data { @@ -314,24 +371,18 @@ typedef struct _tree_data { LIST_ENTRY list_entry; BOOL ignore; BOOL inserted; - + union { tree_holder treeholder; - + struct { - UINT32 size; + UINT16 size; UINT8* data; }; }; } tree_data; -// typedef struct _tree_nonpaged { -// ERESOURCE load_tree_lock; -// } tree_nonpaged; - typedef struct _tree { -// UINT64 address; -// UINT8 level; tree_header header; UINT32 hash; BOOL has_address; @@ -340,7 +391,6 @@ typedef struct _tree { struct _tree* parent; tree_data* paritem; struct _root* root; -// tree_nonpaged* nonpaged; LIST_ENTRY itemlist; LIST_ENTRY list_entry; LIST_ENTRY list_entry_hash; @@ -348,10 +398,12 @@ typedef struct _tree { BOOL has_new_address; BOOL updated_extents; BOOL write; + BOOL is_unique; + BOOL uniqueness_determined; + UINT8* buf; } tree; typedef struct { -// KSPIN_LOCK load_tree_lock; ERESOURCE load_tree_lock; } root_nonpaged; @@ -361,9 +413,14 @@ typedef struct _root { tree_holder treeholder; root_nonpaged* nonpaged; ROOT_ITEM root_item; - UNICODE_STRING path; + BOOL dirty; + BOOL received; + PEPROCESS reserved; + UINT64 parent; + LONG send_ops; LIST_ENTRY fcbs; LIST_ENTRY list_entry; + LIST_ENTRY list_entry_dirty; } root; enum batch_operation { @@ -377,6 +434,9 @@ enum batch_operation { Batch_DeleteDirItem, Batch_DeleteInodeRef, Batch_DeleteInodeExtRef, + Batch_DeleteXattr, + Batch_DeleteExtentData, + Batch_DeleteFreeSpace, }; typedef struct { @@ -417,14 +477,17 @@ typedef struct { BOOL seeding; BOOL readonly; BOOL reloc; - BOOL ssd; BOOL trim; + BOOL can_flush; ULONG change_count; - UINT64 length; ULONG disk_num; ULONG part_num; + UINT64 stats[5]; + BOOL stats_changed; LIST_ENTRY space; LIST_ENTRY list_entry; + ULONG num_trim_entries; + LIST_ENTRY trim_list; } device; typedef struct { @@ -434,20 +497,29 @@ typedef struct { LIST_ENTRY list_entry; } range_lock; +typedef struct { + UINT64 address; + ULONG* bmparr; + RTL_BITMAP bmp; + LIST_ENTRY list_entry; + UINT8 data[1]; +} partial_stripe; + typedef struct { CHUNK_ITEM* chunk_item; - UINT32 size; + UINT16 size; UINT64 offset; UINT64 used; - UINT32 oldused; + UINT64 oldused; device** devices; fcb* cache; + fcb* old_cache; LIST_ENTRY space; LIST_ENTRY space_size; LIST_ENTRY deleting; LIST_ENTRY changed_extents; LIST_ENTRY range_locks; - KSPIN_LOCK range_locks_spinlock; + ERESOURCE range_locks_lock; KEVENT range_locks_event; ERESOURCE lock; ERESOURCE changed_extents_lock; @@ -455,10 +527,16 @@ typedef struct { BOOL readonly; BOOL reloc; BOOL last_alloc_set; + BOOL cache_loaded; + BOOL changed; + BOOL space_changed; UINT64 last_alloc; - + UINT16 last_stripe; + LIST_ENTRY partial_stripes; + ERESOURCE partial_stripes_lock; + ULONG balance_num; + LIST_ENTRY list_entry; - LIST_ENTRY list_entry_changed; LIST_ENTRY list_entry_balance; } chunk; @@ -477,12 +555,12 @@ typedef struct { typedef struct { UINT8 type; - + union { EXTENT_DATA_REF edr; SHARED_DATA_REF sdr; }; - + LIST_ENTRY list_entry; } changed_extent_ref; @@ -528,14 +606,17 @@ typedef struct { UINT32 flush_interval; UINT32 max_inline; UINT64 subvol_id; - UINT32 raid5_recalculation; - UINT32 raid6_recalculation; BOOL skip_balance; + BOOL no_barrier; + BOOL no_trim; + BOOL clear_cache; + BOOL allow_degraded; } mount_options; -#define VCB_TYPE_VOLUME 1 -#define VCB_TYPE_PARTITION0 2 -#define VCB_TYPE_CONTROL 3 +#define VCB_TYPE_FS 1 +#define VCB_TYPE_CONTROL 2 +#define VCB_TYPE_VOLUME 3 +#define VCB_TYPE_PDO 4 #ifdef DEBUG_STATS typedef struct { @@ -544,13 +625,15 @@ typedef struct { UINT64 read_total_time; UINT64 read_csum_time; UINT64 read_disk_time; - + UINT64 num_opens; UINT64 open_total_time; UINT64 num_overwrites; UINT64 overwrite_total_time; UINT64 num_creates; UINT64 create_total_time; + UINT64 open_fcb_calls; + UINT64 open_fcb_time; } debug_stats; #endif @@ -565,26 +648,71 @@ typedef struct { btrfs_balance_opts opts[3]; BOOL paused; BOOL stopping; - BOOL cancelling; BOOL removing; + BOOL shrinking; BOOL dev_readonly; + ULONG balance_num; NTSTATUS status; KEVENT event; KEVENT finished; } balance_info; +typedef struct { + UINT64 address; + UINT64 device; + BOOL recovered; + BOOL is_metadata; + BOOL parity; + LIST_ENTRY list_entry; + + union { + struct { + UINT64 subvol; + UINT64 offset; + UINT16 filename_length; + WCHAR filename[1]; + } data; + + struct { + UINT64 root; + UINT8 level; + KEY firstitem; + } metadata; + }; +} scrub_error; + +typedef struct { + HANDLE thread; + ERESOURCE stats_lock; + KEVENT event; + KEVENT finished; + BOOL stopping; + BOOL paused; + LARGE_INTEGER start_time; + LARGE_INTEGER finish_time; + LARGE_INTEGER resume_time; + LARGE_INTEGER duration; + UINT64 total_chunks; + UINT64 chunks_left; + UINT64 data_scrubbed; + NTSTATUS error; + ULONG num_errors; + LIST_ENTRY errors; +} scrub_info; + +struct _volume_device_extension; + typedef struct _device_extension { UINT32 type; mount_options options; PVPB Vpb; + struct _volume_device_extension* vde; LIST_ENTRY devices; #ifdef DEBUG_STATS debug_stats stats; #endif UINT64 devices_loaded; -// DISK_GEOMETRY geometry; superblock superblock; -// WCHAR label[MAX_LABEL_SIZE]; BOOL readonly; BOOL removing; BOOL locked; @@ -593,21 +721,19 @@ typedef struct _device_extension { BOOL trim; PFILE_OBJECT locked_fileobj; fcb* volume_fcb; + fcb* dummy_fcb; file_ref* root_fileref; LONG open_files; - ERESOURCE fcb_lock; + _Has_lock_level_(fcb_lock) ERESOURCE fcb_lock; ERESOURCE load_lock; - ERESOURCE tree_lock; + _Has_lock_level_(tree_lock) ERESOURCE tree_lock; PNOTIFY_SYNC NotifySync; LIST_ENTRY DirNotifyList; - LONG open_trees; BOOL need_write; -// ERESOURCE LogToPhysLock; -// UINT64 chunk_root_phys_addr; + BOOL stats_changed; UINT64 data_flags; UINT64 metadata_flags; UINT64 system_flags; -// log_to_phys* log_to_phys; LIST_ENTRY roots; LIST_ENTRY drop_roots; root* chunk_root; @@ -617,43 +743,96 @@ typedef struct _device_extension { root* dev_root; root* uuid_root; root* data_reloc_root; + root* space_root; BOOL log_to_phys_loaded; + BOOL chunk_usage_found; LIST_ENTRY sys_chunks; LIST_ENTRY chunks; - LIST_ENTRY chunks_changed; LIST_ENTRY trees; LIST_ENTRY trees_hash; LIST_ENTRY* trees_ptrs[256]; LIST_ENTRY all_fcbs; LIST_ENTRY dirty_fcbs; - KSPIN_LOCK dirty_fcbs_lock; + ERESOURCE dirty_fcbs_lock; LIST_ENTRY dirty_filerefs; - KSPIN_LOCK dirty_filerefs_lock; + ERESOURCE dirty_filerefs_lock; + LIST_ENTRY dirty_subvols; + ERESOURCE dirty_subvols_lock; ERESOURCE chunk_lock; HANDLE flush_thread_handle; KTIMER flush_thread_timer; KEVENT flush_thread_finished; drv_calc_threads calcthreads; balance_info balance; + scrub_info scrub; + ERESOURCE send_load_lock; + LONG running_sends; + LIST_ENTRY send_ops; PFILE_OBJECT root_file; PAGED_LOOKASIDE_LIST tree_data_lookaside; PAGED_LOOKASIDE_LIST traverse_ptr_lookaside; - PAGED_LOOKASIDE_LIST rollback_item_lookaside; PAGED_LOOKASIDE_LIST batch_item_lookaside; + PAGED_LOOKASIDE_LIST fileref_lookaside; + PAGED_LOOKASIDE_LIST fcb_lookaside; + PAGED_LOOKASIDE_LIST name_bit_lookaside; NPAGED_LOOKASIDE_LIST range_lock_lookaside; + NPAGED_LOOKASIDE_LIST fileref_np_lookaside; + NPAGED_LOOKASIDE_LIST fcb_np_lookaside; LIST_ENTRY list_entry; } device_extension; typedef struct { UINT32 type; - PDEVICE_OBJECT devobj; + PDEVICE_OBJECT buspdo; + PDEVICE_OBJECT attached_device; + UNICODE_STRING bus_name; +} control_device_extension; + +typedef struct { BTRFS_UUID uuid; + UINT64 devid; + UINT64 generation; + PDEVICE_OBJECT devobj; + PFILE_OBJECT fileobj; + UNICODE_STRING pnp_name; + UINT64 size; + BOOL seeding; + BOOL had_drive_letter; + void* notification_entry; + ULONG disk_num; + ULONG part_num; + LIST_ENTRY list_entry; +} volume_child; + +struct pdo_device_extension; + +typedef struct _volume_device_extension { + UINT32 type; UNICODE_STRING name; -} part0_device_extension; + PDEVICE_OBJECT device; + PDEVICE_OBJECT mounted_device; + PDEVICE_OBJECT pdo; + struct pdo_device_extension* pdode; + UNICODE_STRING bus_name; + PDEVICE_OBJECT attached_device; + BOOL removing; + LONG open_count; +} volume_device_extension; -typedef struct { +typedef struct pdo_device_extension { UINT32 type; -} control_device_extension; + BTRFS_UUID uuid; + volume_device_extension* vde; + PDEVICE_OBJECT pdo; + BOOL removable; + + UINT64 num_children; + UINT64 children_loaded; + ERESOURCE child_lock; + LIST_ENTRY children; + + LIST_ENTRY list_entry; +} pdo_device_extension; typedef struct { LIST_ENTRY listentry; @@ -661,6 +840,12 @@ typedef struct { UINT32 uid; } uid_map; +typedef struct { + LIST_ENTRY listentry; + PSID sid; + UINT32 gid; +} gid_map; + enum write_data_status { WriteDataStatus_Pending, WriteDataStatus_Success, @@ -675,7 +860,7 @@ struct _write_data_context; typedef struct { struct _write_data_context* context; UINT8* buf; - BOOL need_free; + PMDL mdl; device* device; PIRP Irp; IO_STATUS_BLOCK iosb; @@ -687,24 +872,29 @@ typedef struct _write_data_context { KEVENT Event; LIST_ENTRY stripes; LONG stripes_left; - BOOL tree; + BOOL need_wait; + UINT8 *parity1, *parity2, *scratch; + PMDL mdl, parity1_mdl, parity2_mdl; } write_data_context; typedef struct { UINT64 address; UINT32 length; - BOOL overlap; UINT8* data; + chunk* c; LIST_ENTRY list_entry; } tree_write; -// #pragma pack(pop) +typedef struct { + UNICODE_STRING us; + LIST_ENTRY list_entry; +} name_bit; -static __inline void* map_user_buffer(PIRP Irp) { +static __inline void* map_user_buffer(PIRP Irp, ULONG priority) { if (!Irp->MdlAddress) { return Irp->UserBuffer; } else { - return MmGetSystemAddressForMdlSafe(Irp->MdlAddress, NormalPagePriority); + return MmGetSystemAddressForMdlSafe(Irp->MdlAddress, priority); } } @@ -714,17 +904,18 @@ static __inline UINT64 unix_time_to_win(BTRFS_TIME* t) { static __inline void win_time_to_unix(LARGE_INTEGER t, BTRFS_TIME* out) { ULONGLONG l = t.QuadPart - 116444736000000000; - + out->seconds = l / 10000000; out->nanoseconds = (l % 10000000) * 100; } -static __inline void get_raid0_offset(UINT64 off, UINT64 stripe_length, UINT16 num_stripes, UINT64* stripeoff, UINT16* stripe) { +_Post_satisfies_(*stripe>=0&&*stripeid << 40) | (inode & 0xffffffffff); } @@ -751,38 +938,126 @@ static UINT64 __inline make_file_id(root* r, UINT64 inode) { ((key1.offset > key2.offset) ? 1 :\ 0)))))) +_Post_satisfies_(return>=n) +__inline static UINT64 sector_align(_In_ UINT64 n, _In_ UINT64 a) { + if (n & (a - 1)) + n = (n + a) & ~(a - 1); + + return n; +} + +__inline static BOOL is_subvol_readonly(root* r, PIRP Irp) { + if (!(r->root_item.flags & BTRFS_SUBVOL_READONLY)) + return FALSE; + + if (!r->reserved) + return TRUE; + + return (!Irp || Irp->RequestorMode == UserMode) && PsGetCurrentProcess() != r->reserved ? TRUE : FALSE; +} + +__inline static UINT16 get_extent_data_len(UINT8 type) { + switch (type) { + case TYPE_TREE_BLOCK_REF: + return sizeof(TREE_BLOCK_REF); + + case TYPE_EXTENT_DATA_REF: + return sizeof(EXTENT_DATA_REF); + + case TYPE_EXTENT_REF_V0: + return sizeof(EXTENT_REF_V0); + + case TYPE_SHARED_BLOCK_REF: + return sizeof(SHARED_BLOCK_REF); + + case TYPE_SHARED_DATA_REF: + return sizeof(SHARED_DATA_REF); + + default: + return 0; + } +} + +__inline static UINT32 get_extent_data_refcount(UINT8 type, void* data) { + switch (type) { + case TYPE_TREE_BLOCK_REF: + return 1; + + case TYPE_EXTENT_DATA_REF: + { + EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)data; + return edr->count; + } + + case TYPE_EXTENT_REF_V0: + { + EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)data; + return erv0->count; + } + + case TYPE_SHARED_BLOCK_REF: + return 1; + + case TYPE_SHARED_DATA_REF: + { + SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data; + return sdr->count; + } + + default: + return 0; + } +} + // in btrfs.c -device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid); -UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment ); -BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts); -ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp); -BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen); -BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp); -void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line); -void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line); -fcb* create_fcb(POOL_TYPE pool_type); -file_ref* create_fileref(); -void protect_superblocks(device_extension* Vcb, chunk* c); -BOOL is_top_level(PIRP Irp); -NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_tree, UINT64 offset, PIRP Irp, LIST_ENTRY* rollback); -void STDCALL uninit(device_extension* Vcb, BOOL flush); -NTSTATUS STDCALL dev_ioctl(PDEVICE_OBJECT DeviceObject, ULONG ControlCode, PVOID InputBuffer, - ULONG InputBufferSize, PVOID OutputBuffer, ULONG OutputBufferSize, BOOLEAN Override, IO_STATUS_BLOCK* iosb); -BOOL is_file_name_valid(PUNICODE_STRING us); -void send_notification_fileref(file_ref* fileref, ULONG filter_match, ULONG action); -void send_notification_fcb(file_ref* fileref, ULONG filter_match, ULONG action); -WCHAR* file_desc(PFILE_OBJECT FileObject); -WCHAR* file_desc_fileref(file_ref* fileref); -BOOL add_thread_job(device_extension* Vcb, PIRP Irp); -NTSTATUS part0_passthrough(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -void mark_fcb_dirty(fcb* fcb); -void mark_fileref_dirty(file_ref* fileref); -NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback); -void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length); -void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length); -void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_nums); -void init_file_cache(PFILE_OBJECT FileObject, CC_FILE_SIZES* ccfs); -NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override); +_Ret_maybenull_ +device* find_device_from_uuid(_In_ device_extension* Vcb, _In_ BTRFS_UUID* uuid); + +_Success_(return) +BOOL get_file_attributes_from_xattr(_In_reads_bytes_(len) char* val, _In_ UINT16 len, _Out_ ULONG* atts); + +ULONG get_file_attributes(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_ UINT64 inode, + _In_ UINT8 type, _In_ BOOL dotfile, _In_ BOOL ignore_xa, _In_opt_ PIRP Irp); + +_Success_(return) +BOOL get_xattr(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* subvol, _In_ UINT64 inode, _In_z_ char* name, _In_ UINT32 crc32, + _Out_ UINT8** data, _Out_ UINT16* datalen, _In_opt_ PIRP Irp); + +#ifndef DEBUG_FCB_REFCOUNTS +void free_fcb(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ fcb* fcb); +#endif +void free_fileref(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ file_ref* fr); +void protect_superblocks(_Inout_ chunk* c); +BOOL is_top_level(_In_ PIRP Irp); +NTSTATUS create_root(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ UINT64 id, + _Out_ root** rootptr, _In_ BOOL no_tree, _In_ UINT64 offset, _In_opt_ PIRP Irp); +void uninit(_In_ device_extension* Vcb, _In_ BOOL flush); +NTSTATUS dev_ioctl(_In_ PDEVICE_OBJECT DeviceObject, _In_ ULONG ControlCode, _In_reads_bytes_opt_(InputBufferSize) PVOID InputBuffer, _In_ ULONG InputBufferSize, + _Out_writes_bytes_opt_(OutputBufferSize) PVOID OutputBuffer, _In_ ULONG OutputBufferSize, _In_ BOOLEAN Override, _Out_opt_ IO_STATUS_BLOCK* iosb); +BOOL is_file_name_valid(_In_ PUNICODE_STRING us, _In_ BOOL posix); +void send_notification_fileref(_In_ file_ref* fileref, _In_ ULONG filter_match, _In_ ULONG action, _In_opt_ PUNICODE_STRING stream); +void send_notification_fcb(_In_ file_ref* fileref, _In_ ULONG filter_match, _In_ ULONG action, _In_opt_ PUNICODE_STRING stream); + +_Ret_z_ +WCHAR* file_desc(_In_ PFILE_OBJECT FileObject); +WCHAR* file_desc_fileref(_In_ file_ref* fileref); +void mark_fcb_dirty(_In_ fcb* fcb); +void mark_fileref_dirty(_In_ file_ref* fileref); +NTSTATUS delete_fileref(_In_ file_ref* fileref, _In_ PFILE_OBJECT FileObject, _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback); +void chunk_lock_range(_In_ device_extension* Vcb, _In_ chunk* c, _In_ UINT64 start, _In_ UINT64 length); +void chunk_unlock_range(_In_ device_extension* Vcb, _In_ chunk* c, _In_ UINT64 start, _In_ UINT64 length); +void init_device(_In_ device_extension* Vcb, _Inout_ device* dev, _In_ BOOL get_nums); +void init_file_cache(_In_ PFILE_OBJECT FileObject, _In_ CC_FILE_SIZES* ccfs); +NTSTATUS sync_read_phys(_In_ PDEVICE_OBJECT DeviceObject, _In_ UINT64 StartingOffset, _In_ ULONG Length, + _Out_writes_bytes_(Length) PUCHAR Buffer, _In_ BOOL override); +NTSTATUS get_device_pnp_name(_In_ PDEVICE_OBJECT DeviceObject, _Out_ PUNICODE_STRING pnp_name, _Out_ const GUID** guid); +void log_device_error(_In_ device_extension* Vcb, _Inout_ device* dev, _In_ int error); +NTSTATUS find_chunk_usage(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_opt_ PIRP Irp); +#ifdef __REACTOS__ +NTSTATUS NTAPI AddDevice(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT PhysicalDeviceObject); +#else +NTSTATUS AddDevice(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT PhysicalDeviceObject); +#endif #ifdef _MSC_VER #define funcname __FUNCTION__ @@ -790,10 +1065,6 @@ NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, UL #define funcname __func__ #endif -// FIXME - we probably shouldn't be moving funcname etc. around if we're not printing debug messages -#define free_fcb(fcb) _free_fcb(fcb, funcname, __FILE__, __LINE__) -#define free_fileref(fileref) _free_fileref(fileref, funcname, __FILE__, __LINE__) - extern BOOL have_sse2; extern UINT32 mount_compress; @@ -802,9 +1073,13 @@ extern UINT32 mount_compress_type; extern UINT32 mount_zlib_level; extern UINT32 mount_flush_interval; extern UINT32 mount_max_inline; -extern UINT32 mount_raid5_recalculation; -extern UINT32 mount_raid6_recalculation; extern UINT32 mount_skip_balance; +extern UINT32 mount_no_barrier; +extern UINT32 mount_no_trim; +extern UINT32 mount_clear_cache; +extern UINT32 mount_allow_degraded; +extern UINT32 mount_readonly; +extern UINT32 no_pnp; #ifdef _DEBUG @@ -820,7 +1095,7 @@ extern UINT32 debug_log_level; #define FIXME(s, ...) MSG(funcname, __FILE__, __LINE__, s, 1, ##__VA_ARGS__) #define ERR(s, ...) MSG(funcname, __FILE__, __LINE__, s, 1, ##__VA_ARGS__) -void STDCALL _debug_message(const char* func, const char* file, unsigned int line, char* s, ...); +void _debug_message(_In_ const char* func, _In_ const char* file, _In_ unsigned int line, _In_ char* s, ...); #else @@ -831,7 +1106,7 @@ void STDCALL _debug_message(const char* func, const char* file, unsigned int lin #define FIXME(s, ...) MSG(funcname, s, 1, ##__VA_ARGS__) #define ERR(s, ...) MSG(funcname, s, 1, ##__VA_ARGS__) -void STDCALL _debug_message(const char* func, char* s, ...); +void _debug_message(_In_ const char* func, _In_ char* s, ...); #endif @@ -839,27 +1114,21 @@ void STDCALL _debug_message(const char* func, char* s, ...); #define TRACE(s, ...) #define WARN(s, ...) -#ifndef __REACTOS__ -#define FIXME(s, ...) DbgPrint("Btrfs FIXME : " funcname " : " s, ##__VA_ARGS__) -#define ERR(s, ...) DbgPrint("Btrfs ERR : " funcname " : " s, ##__VA_ARGS__) -#else #define FIXME(s, ...) DbgPrint("Btrfs FIXME : %s : " s, funcname, ##__VA_ARGS__) #define ERR(s, ...) DbgPrint("Btrfs ERR : %s : " s, funcname, ##__VA_ARGS__) -#endif #endif -static __inline void increase_chunk_usage(chunk* c, UINT64 delta) { - c->used += delta; - - TRACE("increasing size of chunk %llx by %llx\n", c->offset, delta); -} +#ifdef DEBUG_FCB_REFCOUNTS +void _free_fcb(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ fcb* fcb, _In_ const char* func); +#define free_fcb(Vcb, fcb) _free_fcb(Vcb, fcb, funcname) +#endif // in fastio.c -void STDCALL init_fast_io_dispatch(FAST_IO_DISPATCH** fiod); +void init_fast_io_dispatch(FAST_IO_DISPATCH** fiod); // in crc32c.c -UINT32 STDCALL calc_crc32c(UINT32 seed, UINT8* msg, ULONG msglen); +UINT32 calc_crc32c(_In_ UINT32 seed, _In_reads_bytes_(msglen) UINT8* msg, _In_ ULONG msglen); typedef struct { LIST_ENTRY* list; @@ -875,8 +1144,6 @@ typedef struct { } rollback_extent; enum rollback_type { - ROLLBACK_INSERT_ITEM, - ROLLBACK_DELETE_ITEM, ROLLBACK_INSERT_EXTENT, ROLLBACK_DELETE_EXTENT, ROLLBACK_ADD_SPACE, @@ -890,92 +1157,145 @@ typedef struct { } rollback_item; // in treefuncs.c -NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line); -NTSTATUS STDCALL _find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, - PIRP Irp, const char* func, const char* file, unsigned int line); -BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line); -BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line); -void STDCALL free_trees(device_extension* Vcb); -BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, UINT32 size, traverse_ptr* ptp, PIRP Irp, LIST_ENTRY* rollback); -void STDCALL delete_tree_item(device_extension* Vcb, traverse_ptr* tp, LIST_ENTRY* rollback); -tree* STDCALL _free_tree(tree* t, const char* func, const char* file, unsigned int line); -NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, tree* parent, PIRP Irp, const char* func, const char* file, unsigned int line); -NTSTATUS STDCALL _do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, PIRP Irp, - const char* func, const char* file, unsigned int line); -void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback); +NTSTATUS find_item(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _Out_ traverse_ptr* tp, + _In_ const KEY* searchkey, _In_ BOOL ignore, _In_opt_ PIRP Irp); +NTSTATUS find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, PIRP Irp); +BOOL find_next_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp); +BOOL find_prev_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, PIRP Irp); +void free_trees(device_extension* Vcb); +NTSTATUS insert_tree_item(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_ UINT64 obj_id, + _In_ UINT8 obj_type, _In_ UINT64 offset, _In_reads_bytes_opt_(size) _When_(return >= 0, __drv_aliasesMem) void* data, + _In_ UINT16 size, _Out_opt_ traverse_ptr* ptp, _In_opt_ PIRP Irp); +NTSTATUS delete_tree_item(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _Inout_ traverse_ptr* tp); +tree* free_tree(tree* t); +NTSTATUS load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, UINT64 generation, PIRP Irp); +NTSTATUS do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, PIRP Irp); +void clear_rollback(LIST_ENTRY* rollback); void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback); void free_trees_root(device_extension* Vcb, root* r); -void add_rollback(device_extension* Vcb, LIST_ENTRY* rollback, enum rollback_type type, void* ptr); -void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback); +void add_rollback(_In_ LIST_ENTRY* rollback, _In_ enum rollback_type type, _In_ __drv_aliasesMem void* ptr); +NTSTATUS commit_batch_list(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp); void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist); - -#define find_item(Vcb, r, tp, searchkey, ignore, Irp) _find_item(Vcb, r, tp, searchkey, ignore, Irp, funcname, __FILE__, __LINE__) -#define find_item_to_level(Vcb, r, tp, searchkey, ignore, level, Irp) _find_item_to_level(Vcb, r, tp, searchkey, ignore, level, Irp, funcname, __FILE__, __LINE__) -#define find_next_item(Vcb, tp, next_tp, ignore, Irp) _find_next_item(Vcb, tp, next_tp, ignore, Irp, funcname, __FILE__, __LINE__) -#define find_prev_item(Vcb, tp, prev_tp, ignore, Irp) _find_prev_item(Vcb, tp, prev_tp, ignore, Irp, funcname, __FILE__, __LINE__) -#define free_tree(t) _free_tree(t, funcname, __FILE__, __LINE__) -#define load_tree(t, addr, r, pt, parent, Irp) _load_tree(t, addr, r, pt, parent, Irp, funcname, __FILE__, __LINE__) -#define do_load_tree(Vcb, th, r, t, td, loaded, Irp) _do_load_tree(Vcb, th, r, t, td, loaded, Irp, funcname, __FILE__, __LINE__) +NTSTATUS skip_to_difference(device_extension* Vcb, traverse_ptr* tp, traverse_ptr* tp2, BOOL* ended1, BOOL* ended2); // in search.c -void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v); -void add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us); +NTSTATUS remove_drive_letter(PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath); + +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +void NTAPI mountmgr_thread(_In_ void* context); +#else +void mountmgr_thread(_In_ void* context); +#endif + +_Function_class_(DRIVER_NOTIFICATION_CALLBACK_ROUTINE) #ifdef __REACTOS__ NTSTATUS NTAPI pnp_notification(PVOID NotificationStructure, PVOID Context); #else NTSTATUS pnp_notification(PVOID NotificationStructure, PVOID Context); #endif +void disk_arrival(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath); +void volume_arrival(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath); +void volume_removal(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath); + +_Function_class_(DRIVER_NOTIFICATION_CALLBACK_ROUTINE) +#ifdef __REACTOS__ +NTSTATUS NTAPI volume_notification(PVOID NotificationStructure, PVOID Context); +#else +NTSTATUS volume_notification(PVOID NotificationStructure, PVOID Context); +#endif + +void remove_volume_child(_Inout_ _Requires_exclusive_lock_held_(_Curr_->child_lock) _Releases_exclusive_lock_(_Curr_->child_lock) _In_ volume_device_extension* vde, + _In_ volume_child* vc, _In_ BOOL skip_dev); + // in cache.c -NTSTATUS STDCALL init_cache(); -void STDCALL free_cache(); +NTSTATUS init_cache(); +void free_cache(); extern CACHE_MANAGER_CALLBACKS* cache_callbacks; // in write.c -NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_write); -NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, - BOOL wait, BOOL deferred_write, LIST_ENTRY* rollback); +NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOLEAN wait, BOOLEAN deferred_write); +NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOLEAN paging_io, BOOLEAN no_cache, + BOOLEAN wait, BOOLEAN deferred_write, BOOLEAN write_irp, LIST_ENTRY* rollback); NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback); chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address); -chunk* alloc_chunk(device_extension* Vcb, UINT64 flags); -NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c); -NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c); +NTSTATUS alloc_chunk(device_extension* Vcb, UINT64 flags, chunk** pc, BOOL full_size); +NTSTATUS write_data(_In_ device_extension* Vcb, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length, _In_ write_data_context* wtc, + _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ BOOL file_write, _In_ UINT64 irp_offset, _In_ ULONG priority); +NTSTATUS write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c, BOOL file_write, UINT64 irp_offset, ULONG priority); void free_write_data_stripes(write_data_context* wtc); -NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, - PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size); -NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback); + +_Dispatch_type_(IRP_MJ_WRITE) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Requires_lock_held_(c->lock) +_When_(return != 0, _Releases_lock_(c->lock)) +BOOL insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ UINT64 start_data, _In_ UINT64 length, _In_ BOOL prealloc, _In_opt_ void* data, + _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ UINT8 compression, _In_ UINT64 decoded_size, _In_ BOOL file_write, _In_ UINT64 irp_offset); + +NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, BOOL file_write, UINT32 irp_offset, LIST_ENTRY* rollback); NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback); BOOL find_data_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address); void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen); +NTSTATUS calc_csum(_In_ device_extension* Vcb, _In_reads_bytes_(sectors*Vcb->superblock.sector_size) UINT8* data, + _In_ UINT32 sectors, _Out_writes_bytes_(sectors*sizeof(UINT32)) UINT32* csum); +void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext); +NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ UINT64 offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ UINT16 edsize, + _In_ BOOL unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) UINT32* csum, _In_ LIST_ENTRY* rollback); +void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext); // in dirctrl.c -NTSTATUS STDCALL drv_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, ULONG atts, PIRP Irp); + +_Dispatch_type_(IRP_MJ_DIRECTORY_CONTROL) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +ULONG get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, ULONG atts, BOOL lxss, PIRP Irp); // in security.c -NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -NTSTATUS STDCALL drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -BOOL get_sd_from_xattr(fcb* fcb, ULONG buflen); + +_Dispatch_type_(IRP_MJ_QUERY_SECURITY) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Dispatch_type_(IRP_MJ_SET_SECURITY) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + void fcb_get_sd(fcb* fcb, struct _fcb* parent, BOOL look_for_xattr, PIRP Irp); -// UINT32 STDCALL get_uid(); void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid); +void add_group_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 gid); UINT32 sid_to_uid(PSID sid); -void uid_to_sid(UINT32 uid, PSID* sid); +NTSTATUS uid_to_sid(UINT32 uid, PSID* sid); NTSTATUS fcb_get_new_sd(fcb* fcb, file_ref* parfileref, ACCESS_STATE* as); +void find_gid(struct _fcb* fcb, struct _fcb* parfcb, PSECURITY_SUBJECT_CONTEXT subjcont); // in fileinfo.c -NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -NTSTATUS STDCALL drv_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Dispatch_type_(IRP_MJ_SET_INFORMATION) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Dispatch_type_(IRP_MJ_QUERY_INFORMATION) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Dispatch_type_(IRP_MJ_QUERY_EA) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Dispatch_type_(IRP_MJ_SET_EA) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + BOOL has_open_children(file_ref* fileref); -NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT64 end, fcb* fcb, file_ref* fileref, PFILE_OBJECT FileObject, BOOL advance_only, LIST_ENTRY* rollback); -NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* name_offset); -NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp); -NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS stream_set_end_of_file_information(device_extension* Vcb, UINT16 end, fcb* fcb, file_ref* fileref, BOOL advance_only); +NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* name_offset, ULONG* preqlen); +NTSTATUS open_fileref_by_inode(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp); void insert_dir_child_into_hash_lists(fcb* fcb, dir_child* dc); void remove_dir_child_from_hash_lists(fcb* fcb, dir_child* dc); @@ -985,92 +1305,128 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp); NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp); // in create.c -NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* parsed, ULONG* fn_offset, - POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp); -NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp); -NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI_STRING* xattr, UINT32 streamhash, fcb* parent, fcb** pfcb, PIRP Irp); -void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock); -NTSTATUS fcb_get_last_dir_index(fcb* fcb, UINT64* index, PIRP Irp); -NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp); -NTSTATUS load_csum(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp); -NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp); -NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, UINT64 index, PANSI_STRING utf8, PUNICODE_STRING name, PUNICODE_STRING name_uc, UINT8 type, dir_child** pdc); + +_Dispatch_type_(IRP_MJ_CREATE) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +NTSTATUS open_fileref(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Out_ file_ref** pfr, + _In_ PUNICODE_STRING fnus, _In_opt_ file_ref* related, _In_ BOOL parent, _Out_opt_ USHORT* parsed, _Out_opt_ ULONG* fn_offset, _In_ POOL_TYPE pooltype, + _In_ BOOL case_sensitive, _In_opt_ PIRP Irp); +NTSTATUS open_fcb(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, + root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp); +NTSTATUS load_csum(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp); +NTSTATUS load_dir_children(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, fcb* fcb, BOOL ignore_size, PIRP Irp); +NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, PANSI_STRING utf8, PUNICODE_STRING name, UINT8 type, dir_child** pdc); +NTSTATUS open_fileref_child(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, + _In_ file_ref* sf, _In_ PUNICODE_STRING name, _In_ BOOL case_sensitive, _In_ BOOL lastpart, _In_ BOOL streampart, + _In_ POOL_TYPE pooltype, _Out_ file_ref** psf2, _In_opt_ PIRP Irp); +fcb* create_fcb(device_extension* Vcb, POOL_TYPE pool_type); +NTSTATUS find_file_in_dir(PUNICODE_STRING filename, fcb* fcb, root** subvol, UINT64* inode, dir_child** pdc, BOOL case_sensitive); +UINT32 inherit_mode(fcb* parfcb, BOOL is_dir); +file_ref* create_fileref(device_extension* Vcb); // in fsctl.c -NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user); +NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP* Pirp, UINT32 type); void do_unlock_volume(device_extension* Vcb); +void trim_whole_device(device* dev); +void flush_subvol_fcbs(root* subvol); +BOOL fcb_is_inline(fcb* fcb); // in flushthread.c -void STDCALL flush_thread(void* context); -NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback); + +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +void NTAPI flush_thread(void* context); +#else +void flush_thread(void* context); +#endif + +NTSTATUS do_write(device_extension* Vcb, PIRP Irp); NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback); -void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length); +NTSTATUS flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp); +NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length); BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp); -NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, PIRP Irp); -void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, BOOL no_free); +void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp); BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address); +void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, UINT64 address, UINT64 size); +NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT8 objtype, UINT64 offset, + _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, UINT16 datalen, enum batch_operation operation); +NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps); +NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp); // in read.c -NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp); -NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, - PIRP Irp, BOOL check_nocsum_parity); -NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp, BOOL check_nocsum_parity); -NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read); + +_Dispatch_type_(IRP_MJ_READ) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp); + +NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum, + _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read, + _In_ ULONG priority); +NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp); +NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr); +NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read); +NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum); +void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out); // in pnp.c -NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp); + +_Dispatch_type_(IRP_MJ_PNP) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp); + +NTSTATUS pnp_surprise_removal(PDEVICE_OBJECT DeviceObject, PIRP Irp); +NTSTATUS pnp_query_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp); // in free-space.c -NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp); +NTSTATUS load_cache_chunk(device_extension* Vcb, chunk* c, PIRP Irp); NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp); NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS update_chunk_caches_tree(device_extension* Vcb, PIRP Irp); NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 offset, UINT64 size); -void _space_list_add(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func); -void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func); -void _space_list_subtract(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func); -void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func); - -#define space_list_add(Vcb, c, deleting, address, length, rollback) _space_list_add(Vcb, c, deleting, address, length, rollback, funcname) -#define space_list_add2(Vcb, list, list_size, address, length, rollback) _space_list_add2(Vcb, list, list_size, address, length, NULL, rollback, funcname) -#define space_list_subtract(Vcb, c, deleting, address, length, rollback) _space_list_subtract(Vcb, c, deleting, address, length, rollback, funcname) -#define space_list_subtract2(Vcb, list, list_size, address, length, rollback) _space_list_subtract2(Vcb, list, list_size, address, length, NULL, rollback, funcname) +void space_list_add(chunk* c, UINT64 address, UINT64 length, LIST_ENTRY* rollback); +void space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback); +void space_list_subtract(chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback); +void space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback); +NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, BOOL load_only, PIRP Irp); // in extent-tree.c -NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp); NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, - UINT32 refcount, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback); -void decrease_chunk_usage(chunk* c, UINT64 delta); -// NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback); + UINT32 refcount, BOOL superseded, PIRP Irp); +NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT8 level, PIRP Irp); UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp); BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp); -NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp); UINT64 get_extent_flags(device_extension* Vcb, UINT64 address, PIRP Irp); void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PIRP Irp); NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, - signed long long count, BOOL no_csum, BOOL superseded, PIRP Irp); + INT32 count, BOOL no_csum, BOOL superseded, PIRP Irp); void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum); UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp); -UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp); +UINT32 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp); NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, - UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback); + UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp); UINT64 get_extent_data_ref_hash2(UINT64 root, UINT64 objid, UINT64 offset); // in worker-thread.c void do_read_job(PIRP Irp); void do_write_job(device_extension* Vcb, PIRP Irp); +BOOL add_thread_job(device_extension* Vcb, PIRP Irp); // in registry.c -void STDCALL read_registry(PUNICODE_STRING regpath); +void read_registry(PUNICODE_STRING regpath, BOOL refresh); NTSTATUS registry_mark_volume_mounted(BTRFS_UUID* uuid); NTSTATUS registry_mark_volume_unmounted(BTRFS_UUID* uuid); NTSTATUS registry_load_volume_options(device_extension* Vcb); +void watch_registry(HANDLE regh); // in compress.c -NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen); +NTSTATUS zlib_decompress(UINT8* inbuf, UINT32 inlen, UINT8* outbuf, UINT32 outlen); +NTSTATUS lzo_decompress(UINT8* inbuf, UINT32 inlen, UINT8* outbuf, UINT32 outlen, UINT32 inpageoff); NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback); // in galois.c @@ -1081,14 +1437,20 @@ UINT8 gmul(UINT8 a, UINT8 b); UINT8 gdiv(UINT8 a, UINT8 b); // in devctrl.c -NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); + +_Dispatch_type_(IRP_MJ_DEVICE_CONTROL) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); // in calcthread.c + +_Function_class_(KSTART_ROUTINE) #ifdef __REACTOS__ void NTAPI calc_thread(void* context); #else void calc_thread(void* context); #endif + NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum, calc_job** pcj); void free_calc_job(calc_job* cj); @@ -1098,9 +1460,61 @@ NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length); NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode); NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode); NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode); -NTSTATUS look_for_balance_item(device_extension* Vcb); +NTSTATUS look_for_balance_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb); NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode); +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +void NTAPI balance_thread(void* context); +#else +void balance_thread(void* context); +#endif + +// in volume.c +NTSTATUS vol_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_read(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_query_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_set_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_file_system_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_lock_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_shutdown(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +NTSTATUS vol_power(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +void add_volume_device(superblock* sb, PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath, UINT64 length, ULONG disk_num, ULONG part_num); +NTSTATUS mountmgr_add_drive_letter(PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath); + +_Function_class_(DRIVER_NOTIFICATION_CALLBACK_ROUTINE) +#ifdef __REACTOS__ +NTSTATUS NTAPI pnp_removal(PVOID NotificationStructure, PVOID Context); +#else +NTSTATUS pnp_removal(PVOID NotificationStructure, PVOID Context); +#endif + +// in scrub.c +NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode); +NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length); +NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode); +NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode); +NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode); + +// in send.c +NTSTATUS send_subvol(device_extension* Vcb, void* data, ULONG datalen, PFILE_OBJECT FileObject, PIRP Irp); +NTSTATUS read_send_buffer(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, ULONG_PTR* retlen, KPROCESSOR_MODE processor_mode); + +// based on function in sys/sysmacros.h +#define makedev(major, minor) (((minor) & 0xFF) | (((major) & 0xFFF) << 8) | (((UINT64)((minor) & ~0xFF)) << 12) | (((UINT64)((major) & ~0xFFF)) << 32)) + #define fast_io_possible(fcb) (!FsRtlAreThereCurrentFileLocks(&fcb->lock) && !fcb->Vcb->readonly ? FastIoIsPossible : FastIoIsQuestionable) static __inline void print_open_trees(device_extension* Vcb) { @@ -1119,16 +1533,16 @@ static __inline BOOL write_fcb_compressed(fcb* fcb) { // make sure we don't accidentally write the cache inodes or pagefile compressed if (fcb->subvol->id == BTRFS_ROOT_ROOT || fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE) return FALSE; - + if (fcb->Vcb->options.compress_force) return TRUE; - + if (fcb->inode_item.flags & BTRFS_INODE_NOCOMPRESS) return FALSE; - + if (fcb->inode_item.flags & BTRFS_INODE_COMPRESS || fcb->Vcb->options.compress) return TRUE; - + return FALSE; } @@ -1137,7 +1551,7 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { #ifndef __REACTOS__ __m128i x1, x2; #endif - + #ifndef __REACTOS__ if (have_sse2 && ((uintptr_t)buf1 & 0xf) == 0 && ((uintptr_t)buf2 & 0xf) == 0) { while (len >= 16) { @@ -1145,14 +1559,14 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { x2 = _mm_load_si128((__m128i*)buf2); x1 = _mm_xor_si128(x1, x2); _mm_store_si128((__m128i*)buf1, x1); - + buf1 += 16; buf2 += 16; len -= 16; } } #endif - + for (j = 0; j < len; j++) { *buf1 ^= *buf2; buf1++; @@ -1160,8 +1574,6 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { } } -#define first_device(Vcb) CONTAINING_RECORD(Vcb->devices.Flink, device, list_entry) - #ifdef DEBUG_FCB_REFCOUNTS #ifdef DEBUG_LONG_MESSAGES #define increase_fileref_refcount(fileref) {\ @@ -1179,14 +1591,12 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { #endif #ifdef _MSC_VER -// #define int3 __asm { int 3 } #define int3 __debugbreak() #else #define int3 asm("int3;") #endif -// if (Vcb->open_trees > 0) { ERR("open tree count = %i\n", Vcb->open_trees); print_open_trees(Vcb); int3; } -// else TRACE("open tree count = %i\n", Vcb->open_trees); +#define hex_digit(c) ((c) <= 9) ? ((c) + '0') : ((c) - 10 + 'a') // FIXME - find a way to catch unfreed trees again @@ -1246,18 +1656,27 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { #define S_IXOTH (S_IXGRP >> 3) #endif -// LXSS programs can be distinguished by the fact they have a NULL PEB. -#ifdef _AMD64_ - static __inline BOOL called_from_lxss() { - UINT8* proc = (UINT8*)PsGetCurrentProcess(); - ULONG_PTR* peb = (ULONG_PTR*)&proc[0x3f8]; - - return !*peb; - } -#else -#define called_from_lxss() FALSE +#ifndef S_ISUID +#define S_ISUID 0004000 #endif +#ifndef S_ISGID +#define S_ISGID 0002000 +#endif + +#ifndef S_ISVTX +#define S_ISVTX 0001000 +#endif + +static __inline UINT64 fcb_alloc_size(fcb* fcb) { + if (S_ISDIR(fcb->inode_item.st_mode)) + return 0; + else if (fcb->atts & FILE_ATTRIBUTE_SPARSE_FILE) + return fcb->inode_item.st_blocks; + else + return sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); +} + typedef BOOLEAN (*tPsIsDiskCountersEnabled)(); typedef VOID (*tPsUpdateDiskCounters)(PEPROCESS Process, ULONG64 BytesRead, ULONG64 BytesWritten, @@ -1269,16 +1688,81 @@ typedef BOOLEAN (*tCcCopyWriteEx)(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOf typedef BOOLEAN (*tCcCopyReadEx)(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PETHREAD IoIssuerThread); +#ifndef CC_ENABLE_DISK_IO_ACCOUNTING #define CC_ENABLE_DISK_IO_ACCOUNTING 0x00000010 +#endif typedef VOID (*tCcSetAdditionalCacheAttributesEx)(PFILE_OBJECT FileObject, ULONG Flags); +typedef VOID (*tFsRtlUpdateDiskCounters)(ULONG64 BytesRead, ULONG64 BytesWritten); + #ifndef __REACTOS__ +#ifndef _MSC_VER + #undef RtlIsNtDdiVersionAvailable BOOLEAN RtlIsNtDdiVersionAvailable(ULONG Version); -PEPROCESS PsGetThreadProcess(PETHREAD Thread); // not in mingw +PEPROCESS PsGetThreadProcess(_In_ PETHREAD Thread); // not in mingw +#endif + +// not in DDK headers - taken from winternl.h +typedef struct _LDR_DATA_TABLE_ENTRY { + PVOID Reserved1[2]; + LIST_ENTRY InMemoryOrderLinks; + PVOID Reserved2[2]; + PVOID DllBase; + PVOID Reserved3[2]; + UNICODE_STRING FullDllName; + BYTE Reserved4[8]; + PVOID Reserved5[3]; + union { + ULONG CheckSum; + PVOID Reserved6; + }; + ULONG TimeDateStamp; +} LDR_DATA_TABLE_ENTRY,*PLDR_DATA_TABLE_ENTRY; + +typedef struct _PEB_LDR_DATA { + BYTE Reserved1[8]; + PVOID Reserved2[3]; + LIST_ENTRY InMemoryOrderModuleList; +} PEB_LDR_DATA,*PPEB_LDR_DATA; + +typedef struct _RTL_USER_PROCESS_PARAMETERS { + BYTE Reserved1[16]; + PVOID Reserved2[10]; + UNICODE_STRING ImagePathName; + UNICODE_STRING CommandLine; +} RTL_USER_PROCESS_PARAMETERS,*PRTL_USER_PROCESS_PARAMETERS; + +typedef VOID (NTAPI *PPS_POST_PROCESS_INIT_ROUTINE)(VOID); + +typedef struct _PEB { + BYTE Reserved1[2]; + BYTE BeingDebugged; + BYTE Reserved2[1]; + PVOID Reserved3[2]; + PPEB_LDR_DATA Ldr; + PRTL_USER_PROCESS_PARAMETERS ProcessParameters; + BYTE Reserved4[104]; + PVOID Reserved5[52]; + PPS_POST_PROCESS_INIT_ROUTINE PostProcessInitRoutine; + BYTE Reserved6[128]; + PVOID Reserved7[1]; + ULONG SessionId; +} PEB,*PPEB; + +#ifdef _MSC_VER +__kernel_entry +NTSTATUS NTAPI ZwQueryInformationProcess( + IN HANDLE ProcessHandle, + IN PROCESSINFOCLASS ProcessInformationClass, + OUT PVOID ProcessInformation, + IN ULONG ProcessInformationLength, + OUT PULONG ReturnLength OPTIONAL +); +#endif #endif #if defined(__REACTOS__) && (NTDDI_VERSION < NTDDI_WIN7) diff --git a/reactos/drivers/filesystems/btrfs/btrfsioctl.h b/reactos/drivers/filesystems/btrfs/btrfsioctl.h index c28aafe4386..9c57c9cb609 100644 --- a/reactos/drivers/filesystems/btrfs/btrfsioctl.h +++ b/reactos/drivers/filesystems/btrfs/btrfsioctl.h @@ -3,6 +3,8 @@ #ifndef BTRFSIOCTL_H_DEFINED #define BTRFSIOCTL_H_DEFINED +#include "btrfs.h" + #define FSCTL_BTRFS_GET_FILE_IDS CTL_CODE(FILE_DEVICE_UNKNOWN, 0x829, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) #define FSCTL_BTRFS_CREATE_SUBVOL CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82a, METHOD_IN_DIRECT, FILE_ANY_ACCESS) #define FSCTL_BTRFS_CREATE_SNAPSHOT CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82b, METHOD_IN_DIRECT, FILE_ANY_ACCESS) @@ -19,6 +21,22 @@ #define FSCTL_BTRFS_REMOVE_DEVICE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x836, METHOD_IN_DIRECT, FILE_ANY_ACCESS) #define IOCTL_BTRFS_QUERY_FILESYSTEMS CTL_CODE(FILE_DEVICE_UNKNOWN, 0x837, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) #define FSCTL_BTRFS_GET_UUID CTL_CODE(FILE_DEVICE_UNKNOWN, 0x838, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_START_SCRUB CTL_CODE(FILE_DEVICE_UNKNOWN, 0x839, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_QUERY_SCRUB CTL_CODE(FILE_DEVICE_UNKNOWN, 0x83a, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_PAUSE_SCRUB CTL_CODE(FILE_DEVICE_UNKNOWN, 0x83b, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_RESUME_SCRUB CTL_CODE(FILE_DEVICE_UNKNOWN, 0x83c, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_STOP_SCRUB CTL_CODE(FILE_DEVICE_UNKNOWN, 0x83d, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define IOCTL_BTRFS_PROBE_VOLUME CTL_CODE(FILE_DEVICE_UNKNOWN, 0x83e, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_RESET_STATS CTL_CODE(FILE_DEVICE_UNKNOWN, 0x83f, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_MKNOD CTL_CODE(FILE_DEVICE_UNKNOWN, 0x840, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_RECEIVED_SUBVOL CTL_CODE(FILE_DEVICE_UNKNOWN, 0x841, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_GET_XATTRS CTL_CODE(FILE_DEVICE_UNKNOWN, 0x842, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_SET_XATTR CTL_CODE(FILE_DEVICE_UNKNOWN, 0x843, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_RESERVE_SUBVOL CTL_CODE(FILE_DEVICE_UNKNOWN, 0x844, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_FIND_SUBVOL CTL_CODE(FILE_DEVICE_UNKNOWN, 0x845, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_SEND_SUBVOL CTL_CODE(FILE_DEVICE_UNKNOWN, 0x846, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_READ_SEND_BUFFER CTL_CODE(FILE_DEVICE_UNKNOWN, 0x847, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_RESIZE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x848, METHOD_IN_DIRECT, FILE_ANY_ACCESS) typedef struct { UINT64 subvol; @@ -28,10 +46,24 @@ typedef struct { typedef struct { HANDLE subvol; - UINT32 namelen; + BOOL readonly; + BOOL posix; + UINT16 namelen; WCHAR name[1]; } btrfs_create_snapshot; +typedef struct { + void* POINTER_32 subvol; + BOOL readonly; + BOOL posix; + UINT16 namelen; + WCHAR name[1]; +} btrfs_create_snapshot32; + +#define BTRFS_COMPRESSION_ANY 0 +#define BTRFS_COMPRESSION_ZLIB 1 +#define BTRFS_COMPRESSION_LZO 2 + typedef struct { UINT64 subvol; UINT64 inode; @@ -44,6 +76,7 @@ typedef struct { UINT64 flags; UINT32 inline_length; UINT64 disk_size[3]; + UINT8 compression_type; } btrfs_inode_info; typedef struct { @@ -55,15 +88,20 @@ typedef struct { BOOL gid_changed; UINT32 st_mode; BOOL mode_changed; + UINT8 compression_type; + BOOL compression_type_changed; } btrfs_set_inode_info; typedef struct { UINT32 next_entry; UINT64 dev_id; UINT64 size; + UINT64 max_size; BOOL readonly; + BOOL missing; ULONG device_number; ULONG partition_number; + UINT64 stats[5]; USHORT namelen; WCHAR name[1]; } btrfs_device; @@ -117,6 +155,7 @@ typedef struct { #define BTRFS_BALANCE_PAUSED 2 #define BTRFS_BALANCE_REMOVAL 4 #define BTRFS_BALANCE_ERROR 8 +#define BTRFS_BALANCE_SHRINKING 16 typedef struct { UINT32 status; @@ -134,6 +173,7 @@ typedef struct { typedef struct { UINT8 uuid[16]; + BOOL missing; USHORT name_length; WCHAR name[1]; } btrfs_filesystem_device; @@ -145,4 +185,93 @@ typedef struct { btrfs_filesystem_device device; } btrfs_filesystem; +#define BTRFS_SCRUB_STOPPED 0 +#define BTRFS_SCRUB_RUNNING 1 +#define BTRFS_SCRUB_PAUSED 2 + +typedef struct { + UINT32 next_entry; + UINT64 address; + UINT64 device; + BOOL recovered; + BOOL is_metadata; + BOOL parity; + + union { + struct { + UINT64 subvol; + UINT64 offset; + UINT16 filename_length; + WCHAR filename[1]; + } data; + + struct { + UINT64 root; + UINT8 level; + KEY firstitem; + } metadata; + }; +} btrfs_scrub_error; + +typedef struct { + UINT32 status; + LARGE_INTEGER start_time; + LARGE_INTEGER finish_time; + UINT64 chunks_left; + UINT64 total_chunks; + UINT64 data_scrubbed; + UINT64 duration; + NTSTATUS error; + UINT32 num_errors; + btrfs_scrub_error errors; +} btrfs_query_scrub; + +typedef struct { + UINT64 inode; + UINT8 type; + UINT64 st_rdev; + UINT16 namelen; + WCHAR name[1]; +} btrfs_mknod; + +typedef struct { + UINT64 generation; + BTRFS_UUID uuid; +} btrfs_received_subvol; + +typedef struct { + USHORT namelen; + USHORT valuelen; + char data[1]; +} btrfs_set_xattr; + +typedef struct { + BOOL readonly; + BOOL posix; + USHORT namelen; + WCHAR name[1]; +} btrfs_create_subvol; + +typedef struct { + BTRFS_UUID uuid; + UINT64 ctransid; +} btrfs_find_subvol; + +typedef struct { + HANDLE parent; + ULONG num_clones; + HANDLE clones[1]; +} btrfs_send_subvol; + +typedef struct { + void* POINTER_32 parent; + ULONG num_clones; + void* POINTER_32 clones[1]; +} btrfs_send_subvol32; + +typedef struct { + UINT64 device; + UINT64 size; +} btrfs_resize; + #endif diff --git a/reactos/drivers/filesystems/btrfs/cache.c b/reactos/drivers/filesystems/btrfs/cache.c index 75d87dbc98c..22d8d65c0d9 100644 --- a/reactos/drivers/filesystems/btrfs/cache.c +++ b/reactos/drivers/filesystems/btrfs/cache.c @@ -1,34 +1,34 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" -#include CACHE_MANAGER_CALLBACKS* cache_callbacks; -static BOOLEAN STDCALL acquire_for_lazy_write(PVOID Context, BOOLEAN Wait) { +#ifdef __REACTOS__ +static BOOLEAN NTAPI acquire_for_lazy_write(PVOID Context, BOOLEAN Wait) { +#else +static BOOLEAN acquire_for_lazy_write(PVOID Context, BOOLEAN Wait) { +#endif PFILE_OBJECT FileObject = Context; fcb* fcb = FileObject->FsContext; - + TRACE("(%p, %u)\n", Context, Wait); - -// if (!fcb || FileObject->Flags & FO_CLEANUP_COMPLETE) -// return FALSE; - + if (!ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, Wait)) return FALSE; @@ -36,53 +36,83 @@ static BOOLEAN STDCALL acquire_for_lazy_write(PVOID Context, BOOLEAN Wait) { ExReleaseResourceLite(&fcb->Vcb->tree_lock); return FALSE; } - + fcb->lazy_writer_thread = KeGetCurrentThread(); - + + IoSetTopLevelIrp((PIRP)FSRTL_CACHE_TOP_LEVEL_IRP); + return TRUE; } -static void STDCALL release_from_lazy_write(PVOID Context) { +#ifdef __REACTOS__ +static void NTAPI release_from_lazy_write(PVOID Context) { +#else +static void release_from_lazy_write(PVOID Context) { +#endif PFILE_OBJECT FileObject = Context; fcb* fcb = FileObject->FsContext; - + TRACE("(%p)\n", Context); - -// if (!fcb || FileObject->Flags & FO_CLEANUP_COMPLETE) -// return; - + fcb->lazy_writer_thread = NULL; - + ExReleaseResourceLite(fcb->Header.Resource); - + ExReleaseResourceLite(&fcb->Vcb->tree_lock); + + if (IoGetTopLevelIrp() == (PIRP)FSRTL_CACHE_TOP_LEVEL_IRP) + IoSetTopLevelIrp(NULL); } -static BOOLEAN STDCALL acquire_for_read_ahead(PVOID Context, BOOLEAN Wait) { +#ifdef __REACTOS__ +static BOOLEAN NTAPI acquire_for_read_ahead(PVOID Context, BOOLEAN Wait) { +#else +static BOOLEAN acquire_for_read_ahead(PVOID Context, BOOLEAN Wait) { +#endif + PFILE_OBJECT FileObject = Context; + fcb* fcb = FileObject->FsContext; + TRACE("(%p, %u)\n", Context, Wait); - + + if (!ExAcquireResourceSharedLite(fcb->Header.Resource, Wait)) + return FALSE; + + IoSetTopLevelIrp((PIRP)FSRTL_CACHE_TOP_LEVEL_IRP); + return TRUE; } -static void STDCALL release_from_read_ahead(PVOID Context) { +#ifdef __REACTOS__ +static void NTAPI release_from_read_ahead(PVOID Context) { +#else +static void release_from_read_ahead(PVOID Context) { +#endif + PFILE_OBJECT FileObject = Context; + fcb* fcb = FileObject->FsContext; + TRACE("(%p)\n", Context); + + ExReleaseResourceLite(fcb->Header.Resource); + + if (IoGetTopLevelIrp() == (PIRP)FSRTL_CACHE_TOP_LEVEL_IRP) + IoSetTopLevelIrp(NULL); } -NTSTATUS STDCALL init_cache() { +NTSTATUS init_cache() { cache_callbacks = ExAllocatePoolWithTag(NonPagedPool, sizeof(CACHE_MANAGER_CALLBACKS), ALLOC_TAG); if (!cache_callbacks) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + cache_callbacks->AcquireForLazyWrite = acquire_for_lazy_write; cache_callbacks->ReleaseFromLazyWrite = release_from_lazy_write; cache_callbacks->AcquireForReadAhead = acquire_for_read_ahead; cache_callbacks->ReleaseFromReadAhead = release_from_read_ahead; - + return STATUS_SUCCESS; } -void STDCALL free_cache() { +void free_cache() { ExFreePool(cache_callbacks); } diff --git a/reactos/drivers/filesystems/btrfs/calcthread.c b/reactos/drivers/filesystems/btrfs/calcthread.c index e84107d064f..2b6da81743d 100644 --- a/reactos/drivers/filesystems/btrfs/calcthread.c +++ b/reactos/drivers/filesystems/btrfs/calcthread.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -21,13 +21,13 @@ NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum, calc_job** pcj) { calc_job* cj; - + cj = ExAllocatePoolWithTag(NonPagedPool, sizeof(calc_job), ALLOC_TAG); if (!cj) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + cj->data = data; cj->sectors = sectors; cj->csum = csum; @@ -39,18 +39,18 @@ NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32 ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE); InsertTailList(&Vcb->calcthreads.job_list, &cj->list_entry); ExReleaseResourceLite(&Vcb->calcthreads.lock); - + KeSetEvent(&Vcb->calcthreads.event, 0, FALSE); KeClearEvent(&Vcb->calcthreads.event); - + *pcj = cj; - + return STATUS_SUCCESS; } void free_calc_job(calc_job* cj) { LONG rc = InterlockedDecrement(&cj->refcount); - + if (rc == 0) ExFreePool(cj); } @@ -60,35 +60,36 @@ static BOOL do_calc(device_extension* Vcb, calc_job* cj) { UINT32* csum; UINT8* data; ULONG blocksize, i; - + pos = InterlockedIncrement(&cj->pos) - 1; - - if (pos * SECTOR_BLOCK >= cj->sectors) + + if ((UINT32)pos * SECTOR_BLOCK >= cj->sectors) return FALSE; csum = &cj->csum[pos * SECTOR_BLOCK]; data = cj->data + (pos * SECTOR_BLOCK * Vcb->superblock.sector_size); - + blocksize = min(SECTOR_BLOCK, cj->sectors - (pos * SECTOR_BLOCK)); for (i = 0; i < blocksize; i++) { *csum = ~calc_crc32c(0xffffffff, data, Vcb->superblock.sector_size); csum++; data += Vcb->superblock.sector_size; } - + done = InterlockedIncrement(&cj->done); - - if (done * SECTOR_BLOCK >= cj->sectors) { + + if ((UINT32)done * SECTOR_BLOCK >= cj->sectors) { ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE); RemoveEntryList(&cj->list_entry); ExReleaseResourceLite(&Vcb->calcthreads.lock); - + KeSetEvent(&cj->event, 0, FALSE); } - + return TRUE; } +_Function_class_(KSTART_ROUTINE) #ifdef __REACTOS__ void NTAPI calc_thread(void* context) { #else @@ -96,47 +97,43 @@ void calc_thread(void* context) { #endif drv_calc_thread* thread = context; device_extension* Vcb = thread->DeviceObject->DeviceExtension; - + ObReferenceObject(thread->DeviceObject); - + while (TRUE) { KeWaitForSingleObject(&Vcb->calcthreads.event, Executive, KernelMode, FALSE, NULL); - - FsRtlEnterFileSystem(); - + while (TRUE) { calc_job* cj; BOOL b; - + ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE); - + if (IsListEmpty(&Vcb->calcthreads.job_list)) { ExReleaseResourceLite(&Vcb->calcthreads.lock); break; } - + cj = CONTAINING_RECORD(Vcb->calcthreads.job_list.Flink, calc_job, list_entry); cj->refcount++; - + ExReleaseResourceLite(&Vcb->calcthreads.lock); - + b = do_calc(Vcb, cj); - + free_calc_job(cj); - + if (!b) break; } - - FsRtlExitFileSystem(); - + if (thread->quit) break; } ObDereferenceObject(thread->DeviceObject); - + KeSetEvent(&thread->finished, 0, FALSE); - + PsTerminateSystemThread(STATUS_SUCCESS); } diff --git a/reactos/drivers/filesystems/btrfs/compress.c b/reactos/drivers/filesystems/btrfs/compress.c index e94211b8dab..0a8a1f72692 100755 --- a/reactos/drivers/filesystems/btrfs/compress.c +++ b/reactos/drivers/filesystems/btrfs/compress.c @@ -1,23 +1,23 @@ -/* Copyright (c) Mark Harmstone 2016 +/* Copyright (c) Mark Harmstone 2016-17 * Copyright (c) Reimar Doeffinger 2006 * Copyright (c) Markus Oberhumer 1996 - * + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ -// Portion of the LZO decompression code here were cribbed from code in +// Portions of the LZO decompression code here were cribbed from code in // libavcodec, also under the LGPL. Thank you, Reimar Doeffinger. // The LZO compression code comes from v0.22 of lzo, written way back in @@ -79,36 +79,36 @@ typedef struct { ((void*) m_pos < (void*) in || \ (m_off = (UINT8*) ip - (UINT8*) m_pos) <= 0 || \ m_off > max_offset) - + #define LZO_BYTE(x) ((unsigned char) (x)) static UINT8 lzo_nextbyte(lzo_stream* stream) { UINT8 c; - + if (stream->inpos >= stream->inlen) { stream->error = TRUE; return 0; } - + c = stream->in[stream->inpos]; stream->inpos++; - + return c; } static int lzo_len(lzo_stream* stream, int byte, int mask) { int len = byte & mask; - + if (len == 0) { while (!(byte = lzo_nextbyte(stream))) { if (stream->error) return 0; - + len += 255; } - + len += mask + byte; } - + return len; } @@ -117,12 +117,12 @@ static void lzo_copy(lzo_stream* stream, int len) { stream->error = TRUE; return; } - + if (stream->outpos + len > stream->outlen) { stream->error = TRUE; return; } - + do { stream->out[stream->outpos] = stream->in[stream->inpos]; stream->inpos++; @@ -131,17 +131,17 @@ static void lzo_copy(lzo_stream* stream, int len) { } while (len > 0); } -static void lzo_copyback(lzo_stream* stream, int back, int len) { +static void lzo_copyback(lzo_stream* stream, UINT32 back, int len) { if (stream->outpos < back) { stream->error = TRUE; return; } - + if (stream->outpos + len > stream->outlen) { stream->error = TRUE; return; } - + do { stream->out[stream->outpos] = stream->out[stream->outpos - back]; stream->outpos++; @@ -153,22 +153,25 @@ static NTSTATUS do_lzo_decompress(lzo_stream* stream) { UINT8 byte; UINT32 len, back; BOOL backcopy = FALSE; - + stream->error = FALSE; - + byte = lzo_nextbyte(stream); if (stream->error) return STATUS_INTERNAL_ERROR; - + if (byte > 17) { - lzo_copy(stream, byte - 17); + lzo_copy(stream, min((UINT8)(byte - 17), (UINT32)(stream->outlen - stream->outpos))); if (stream->error) return STATUS_INTERNAL_ERROR; - + + if (stream->outlen == stream->outpos) + return STATUS_SUCCESS; + byte = lzo_nextbyte(stream); if (stream->error) return STATUS_INTERNAL_ERROR; - + if (byte < 16) return STATUS_INTERNAL_ERROR; } - + while (1) { if (byte >> 4) { backcopy = TRUE; @@ -179,24 +182,24 @@ static NTSTATUS do_lzo_decompress(lzo_stream* stream) { } else if (byte >> 5) { len = lzo_len(stream, byte, 31); if (stream->error) return STATUS_INTERNAL_ERROR; - + byte = lzo_nextbyte(stream); if (stream->error) return STATUS_INTERNAL_ERROR; - + back = (lzo_nextbyte(stream) << 6) + (byte >> 2) + 1; if (stream->error) return STATUS_INTERNAL_ERROR; } else { len = lzo_len(stream, byte, 7); if (stream->error) return STATUS_INTERNAL_ERROR; - + back = (1 << 14) + ((byte & 8) << 11); - + byte = lzo_nextbyte(stream); if (stream->error) return STATUS_INTERNAL_ERROR; - + back += (lzo_nextbyte(stream) << 6) + (byte >> 2); if (stream->error) return STATUS_INTERNAL_ERROR; - + if (back == (1 << 14)) { if (len != 1) return STATUS_INTERNAL_ERROR; @@ -210,100 +213,109 @@ static NTSTATUS do_lzo_decompress(lzo_stream* stream) { } else { len = lzo_len(stream, byte, 15); if (stream->error) return STATUS_INTERNAL_ERROR; - - lzo_copy(stream, len + 3); + + lzo_copy(stream, min(len + 3, stream->outlen - stream->outpos)); if (stream->error) return STATUS_INTERNAL_ERROR; - + + if (stream->outlen == stream->outpos) + return STATUS_SUCCESS; + byte = lzo_nextbyte(stream); if (stream->error) return STATUS_INTERNAL_ERROR; - + if (byte >> 4) continue; - + len = 1; back = (1 << 11) + (lzo_nextbyte(stream) << 2) + (byte >> 2) + 1; if (stream->error) return STATUS_INTERNAL_ERROR; - + break; } - - lzo_copyback(stream, back, len + 2); + + lzo_copyback(stream, back, min(len + 2, stream->outlen - stream->outpos)); if (stream->error) return STATUS_INTERNAL_ERROR; - + + if (stream->outlen == stream->outpos) + return STATUS_SUCCESS; + len = byte & 3; - + if (len) { - lzo_copy(stream, len); + lzo_copy(stream, min(len, stream->outlen - stream->outpos)); if (stream->error) return STATUS_INTERNAL_ERROR; + + if (stream->outlen == stream->outpos) + return STATUS_SUCCESS; } else backcopy = !backcopy; - + byte = lzo_nextbyte(stream); if (stream->error) return STATUS_INTERNAL_ERROR; } - + return STATUS_SUCCESS; } -static NTSTATUS lzo_decompress(UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen) { +NTSTATUS lzo_decompress(UINT8* inbuf, UINT32 inlen, UINT8* outbuf, UINT32 outlen, UINT32 inpageoff) { NTSTATUS Status; - UINT32 extlen, partlen, inoff, outoff; + UINT32 partlen, inoff, outoff; lzo_stream stream; - - extlen = *((UINT32*)inbuf); - if (inlen < extlen) { - ERR("compressed extent was %llx, should have been at least %x\n", inlen, extlen); - return STATUS_INTERNAL_ERROR; - } - - inoff = sizeof(UINT32); + + inoff = 0; outoff = 0; - + do { partlen = *(UINT32*)&inbuf[inoff]; - + if (partlen + inoff > inlen) { ERR("overflow: %x + %x > %llx\n", partlen, inoff, inlen); return STATUS_INTERNAL_ERROR; } - + inoff += sizeof(UINT32); - + stream.in = &inbuf[inoff]; stream.inlen = partlen; stream.inpos = 0; stream.out = &outbuf[outoff]; - stream.outlen = LINUX_PAGE_SIZE; + stream.outlen = min(outlen, LINUX_PAGE_SIZE); stream.outpos = 0; - + Status = do_lzo_decompress(&stream); if (!NT_SUCCESS(Status)) { ERR("do_lzo_decompress returned %08x\n", Status); return Status; } - + if (stream.outpos < stream.outlen) RtlZeroMemory(&stream.out[stream.outpos], stream.outlen - stream.outpos); - + inoff += partlen; outoff += stream.outlen; - - if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32)) - inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE; - } while (inoff < extlen); - + + if (LINUX_PAGE_SIZE - ((inpageoff + inoff) % LINUX_PAGE_SIZE) < sizeof(UINT32)) + inoff = ((((inpageoff + inoff) / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE) - inpageoff; + + outlen -= stream.outlen; + } while (inoff < inlen && outlen > 0); + return STATUS_SUCCESS; } static void* zlib_alloc(void* opaque, unsigned int items, unsigned int size) { + UNUSED(opaque); + return ExAllocatePoolWithTag(PagedPool, items * size, ALLOC_TAG_ZLIB); } static void zlib_free(void* opaque, void* ptr) { + UNUSED(opaque); + ExFreePool(ptr); } -static NTSTATUS zlib_decompress(UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen) { +NTSTATUS zlib_decompress(UINT8* inbuf, UINT32 inlen, UINT8* outbuf, UINT32 outlen) { z_stream c_stream; int ret; @@ -312,7 +324,7 @@ static NTSTATUS zlib_decompress(UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT6 c_stream.opaque = (voidpf)0; ret = inflateInit(&c_stream); - + if (ret != Z_OK) { ERR("inflateInit returned %08x\n", ret); return STATUS_INTERNAL_ERROR; @@ -320,174 +332,179 @@ static NTSTATUS zlib_decompress(UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT6 c_stream.next_in = inbuf; c_stream.avail_in = inlen; - + c_stream.next_out = outbuf; c_stream.avail_out = outlen; - + do { ret = inflate(&c_stream, Z_NO_FLUSH); - + if (ret != Z_OK && ret != Z_STREAM_END) { ERR("inflate returned %08x\n", ret); inflateEnd(&c_stream); return STATUS_INTERNAL_ERROR; } + + if (c_stream.avail_out == 0) + break; } while (ret != Z_STREAM_END); ret = inflateEnd(&c_stream); - + if (ret != Z_OK) { ERR("inflateEnd returned %08x\n", ret); return STATUS_INTERNAL_ERROR; } - + // FIXME - if we're short, should we zero the end of outbuf so we don't leak information into userspace? - - return STATUS_SUCCESS; -} -NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen) { - if (type == BTRFS_COMPRESSION_ZLIB) - return zlib_decompress(inbuf, inlen, outbuf, outlen); - else if (type == BTRFS_COMPRESSION_LZO) - return lzo_decompress(inbuf, inlen, outbuf, outlen); - else { - ERR("unsupported compression type %x\n", type); - return STATUS_NOT_SUPPORTED; - } + return STATUS_SUCCESS; } static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; UINT8 compression; - UINT64 comp_length; + UINT32 comp_length; UINT8* comp_data; UINT32 out_left; LIST_ENTRY* le; chunk* c; z_stream c_stream; int ret; - - comp_data = ExAllocatePoolWithTag(PagedPool, end_data - start_data, ALLOC_TAG); + + comp_data = ExAllocatePoolWithTag(PagedPool, (UINT32)(end_data - start_data), ALLOC_TAG); if (!comp_data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); ExFreePool(comp_data); return Status; } - + c_stream.zalloc = zlib_alloc; c_stream.zfree = zlib_free; c_stream.opaque = (voidpf)0; ret = deflateInit(&c_stream, fcb->Vcb->options.zlib_level); - + if (ret != Z_OK) { ERR("deflateInit returned %08x\n", ret); ExFreePool(comp_data); return STATUS_INTERNAL_ERROR; } - - c_stream.avail_in = end_data - start_data; + + c_stream.avail_in = (UINT32)(end_data - start_data); c_stream.next_in = data; - c_stream.avail_out = end_data - start_data; + c_stream.avail_out = (UINT32)(end_data - start_data); c_stream.next_out = comp_data; - + do { ret = deflate(&c_stream, Z_FINISH); - + if (ret == Z_STREAM_ERROR) { ERR("deflate returned %x\n", ret); ExFreePool(comp_data); return STATUS_INTERNAL_ERROR; } } while (c_stream.avail_in > 0 && c_stream.avail_out > 0); - + out_left = c_stream.avail_out; - + ret = deflateEnd(&c_stream); - + if (ret != Z_OK) { ERR("deflateEnd returned %08x\n", ret); ExFreePool(comp_data); return STATUS_INTERNAL_ERROR; } - + if (out_left < fcb->Vcb->superblock.sector_size) { // compressed extent would be larger than or same size as uncompressed extent ExFreePool(comp_data); - - comp_length = end_data - start_data; + + comp_length = (UINT32)(end_data - start_data); comp_data = data; compression = BTRFS_COMPRESSION_NONE; - + *compressed = FALSE; } else { UINT32 cl; - + compression = BTRFS_COMPRESSION_ZLIB; - cl = end_data - start_data - out_left; - comp_length = sector_align(cl, fcb->Vcb->superblock.sector_size); - + cl = (UINT32)(end_data - start_data - out_left); + comp_length = (UINT32)sector_align(cl, fcb->Vcb->superblock.sector_size); + RtlZeroMemory(comp_data + cl, comp_length - cl); - + *compressed = TRUE; } - + ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE); - + le = fcb->Vcb->chunks.Flink; while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data, FALSE, 0)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + if (compression != BTRFS_COMPRESSION_NONE) ExFreePool(comp_data); - + return STATUS_SUCCESS; } } - + ExReleaseResourceLite(&c->lock); } le = le->Flink; } - + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE); - - if ((c = alloc_chunk(fcb->Vcb, fcb->Vcb->data_flags))) { - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + + Status = alloc_chunk(fcb->Vcb, fcb->Vcb->data_flags, &c, FALSE); + + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); + + if (compression != BTRFS_COMPRESSION_NONE) + ExFreePool(comp_data); + + return Status; + } + + if (c) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data, FALSE, 0)) { if (compression != BTRFS_COMPRESSION_NONE) ExFreePool(comp_data); - + return STATUS_SUCCESS; } } - + ExReleaseResourceLite(&c->lock); - } else - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + } + WARN("couldn't find any data chunks with %llx bytes free\n", comp_length); + if (compression != BTRFS_COMPRESSION_NONE) + ExFreePool(comp_data); + return STATUS_DISK_FULL; } @@ -520,7 +537,7 @@ static NTSTATUS lzo_do_compress(const UINT8* in, UINT32 in_len, UINT8* out, UINT UPDATE_I(dict, cycle, dindex, ip); if (!LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET) && m_pos[0] == ip[0] && m_pos[1] == ip[1] && m_pos[2] == ip[2]) { - lit = ip - ii; + lit = (UINT32)(ip - ii); m_pos += 3; if (m_off <= M2_MAX_OFFSET) goto match; @@ -528,7 +545,7 @@ static NTSTATUS lzo_do_compress(const UINT8* in, UINT32 in_len, UINT8* out, UINT if (lit == 3) { /* better compression, but slower */ if (op - 2 <= out) return STATUS_INTERNAL_ERROR; - + op[-2] |= LZO_BYTE(3); *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; goto code_match; @@ -566,13 +583,13 @@ match: tt -= 255; *op++ = 0; } - + if (tt <= 0) return STATUS_INTERNAL_ERROR; *op++ = LZO_BYTE(tt); } - + do { *op++ = *ii++; } while (--t > 0); @@ -583,13 +600,13 @@ match: code_match: if (ii != ip) return STATUS_INTERNAL_ERROR; - + ip += 3; if (*m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++) { --ip; - m_len = ip - ii; - + m_len = (UINT32)(ip - ii); + if (m_len < 3 || m_len > 8) return STATUS_INTERNAL_ERROR; @@ -603,10 +620,10 @@ code_match: goto m3_m4_offset; } else { m_off -= 0x4000; - + if (m_off <= 0 || m_off > 0x7fff) return STATUS_INTERNAL_ERROR; - + *op++ = LZO_BYTE(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); goto m3_m4_offset; } @@ -615,8 +632,8 @@ code_match: end = in_end; while (ip < end && *m_pos == *ip) m_pos++, ip++; - m_len = (ip - ii); - + m_len = (UINT32)(ip - ii); + if (m_len < 3) return STATUS_INTERNAL_ERROR; @@ -631,7 +648,7 @@ code_match: } } else { m_off -= 0x4000; - + if (m_off <= 0 || m_off > 0x7fff) return STATUS_INTERNAL_ERROR; @@ -645,10 +662,10 @@ m3_m4_len: m_len -= 255; *op++ = 0; } - + if (m_len <= 0) return STATUS_INTERNAL_ERROR; - + *op++ = LZO_BYTE(m_len); } } @@ -666,7 +683,7 @@ m3_m4_offset: /* store final literal run */ if (in_end - ii > 0) { - UINT32 t = in_end - ii; + UINT32 t = (UINT32)(in_end - ii); if (op == out && t <= 238) *op++ = LZO_BYTE(17 + t); @@ -682,20 +699,20 @@ m3_m4_offset: tt -= 255; *op++ = 0; } - + if (tt <= 0) return STATUS_INTERNAL_ERROR; *op++ = LZO_BYTE(tt); } - + do { *op++ = *ii++; } while (--t > 0); } - *out_len = op - out; - + *out_len = (UINT32)(op - out); + return STATUS_SUCCESS; } @@ -707,16 +724,16 @@ static NTSTATUS lzo1x_1_compress(lzo_stream* stream) { stream->outlen = 0; else if (stream->inlen <= 9 + 4) { *op++ = LZO_BYTE(17 + stream->inlen); - + stream->inpos = 0; do { *op++ = stream->in[stream->inpos]; stream->inpos++; } while (stream->inlen < stream->inpos); - stream->outlen = op - stream->out; + stream->outlen = (UINT32)(op - stream->out); } else Status = lzo_do_compress(stream->in, stream->inlen, stream->out, &stream->outlen, stream->wrkmem); - + if (Status == STATUS_SUCCESS) { op = stream->out + stream->outlen; *op++ = M4_MARKER | 1; @@ -743,28 +760,28 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end UINT32* out_size; LIST_ENTRY* le; chunk* c; - - num_pages = (sector_align(end_data - start_data, LINUX_PAGE_SIZE)) / LINUX_PAGE_SIZE; - + + num_pages = (ULONG)((sector_align(end_data - start_data, LINUX_PAGE_SIZE)) / LINUX_PAGE_SIZE); + // Four-byte overall header // Another four-byte header page // Each page has a maximum size of lzo_max_outlen(LINUX_PAGE_SIZE) // Plus another four bytes for possible padding comp_data_len = sizeof(UINT32) + ((lzo_max_outlen(LINUX_PAGE_SIZE) + (2 * sizeof(UINT32))) * num_pages); - + comp_data = ExAllocatePoolWithTag(PagedPool, comp_data_len, ALLOC_TAG); if (!comp_data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + stream.wrkmem = ExAllocatePoolWithTag(PagedPool, LZO1X_MEM_COMPRESS, ALLOC_TAG); if (!stream.wrkmem) { ERR("out of memory\n"); ExFreePool(comp_data); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); @@ -772,122 +789,138 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end ExFreePool(stream.wrkmem); return Status; } - + out_size = (UINT32*)comp_data; *out_size = sizeof(UINT32); - + stream.in = data; stream.out = comp_data + (2 * sizeof(UINT32)); - + for (i = 0; i < num_pages; i++) { UINT32* pagelen = (UINT32*)(stream.out - sizeof(UINT32)); - - stream.inlen = min(LINUX_PAGE_SIZE, end_data - start_data - (i * LINUX_PAGE_SIZE)); - + + stream.inlen = (UINT32)min(LINUX_PAGE_SIZE, end_data - start_data - (i * LINUX_PAGE_SIZE)); + Status = lzo1x_1_compress(&stream); if (!NT_SUCCESS(Status)) { ERR("lzo1x_1_compress returned %08x\n", Status); skip_compression = TRUE; break; } - + *pagelen = stream.outlen; *out_size += stream.outlen + sizeof(UINT32); - + stream.in += LINUX_PAGE_SIZE; stream.out += stream.outlen + sizeof(UINT32); - + if (LINUX_PAGE_SIZE - (*out_size % LINUX_PAGE_SIZE) < sizeof(UINT32)) { RtlZeroMemory(stream.out, LINUX_PAGE_SIZE - (*out_size % LINUX_PAGE_SIZE)); stream.out += LINUX_PAGE_SIZE - (*out_size % LINUX_PAGE_SIZE); *out_size += LINUX_PAGE_SIZE - (*out_size % LINUX_PAGE_SIZE); } } - + ExFreePool(stream.wrkmem); - + if (skip_compression || *out_size >= end_data - start_data - fcb->Vcb->superblock.sector_size) { // compressed extent would be larger than or same size as uncompressed extent ExFreePool(comp_data); - + comp_length = end_data - start_data; comp_data = data; compression = BTRFS_COMPRESSION_NONE; - + *compressed = FALSE; } else { compression = BTRFS_COMPRESSION_LZO; comp_length = sector_align(*out_size, fcb->Vcb->superblock.sector_size); - - RtlZeroMemory(comp_data + *out_size, comp_length - *out_size); - + + RtlZeroMemory(comp_data + *out_size, (ULONG)(comp_length - *out_size)); + *compressed = TRUE; } - + ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE); - + le = fcb->Vcb->chunks.Flink; while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data, FALSE, 0)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + if (compression != BTRFS_COMPRESSION_NONE) ExFreePool(comp_data); - + return STATUS_SUCCESS; } } - + ExReleaseResourceLite(&c->lock); } le = le->Flink; } - + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE); - - if ((c = alloc_chunk(fcb->Vcb, fcb->Vcb->data_flags))) { - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + + Status = alloc_chunk(fcb->Vcb, fcb->Vcb->data_flags, &c, FALSE); + + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); + + if (compression != BTRFS_COMPRESSION_NONE) + ExFreePool(comp_data); + + return Status; + } + + if (c) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data, FALSE, 0)) { if (compression != BTRFS_COMPRESSION_NONE) ExFreePool(comp_data); - + return STATUS_SUCCESS; } } - + ExReleaseResourceLite(&c->lock); - } else - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + } + WARN("couldn't find any data chunks with %llx bytes free\n", comp_length); + if (compression != BTRFS_COMPRESSION_NONE) + ExFreePool(comp_data); + return STATUS_DISK_FULL; } NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) { UINT8 type; - if (fcb->Vcb->options.compress_type != 0) + if (fcb->Vcb->options.compress_type != 0 && fcb->prop_compression == PropCompression_None) type = fcb->Vcb->options.compress_type; else { - if (fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO) + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO) && fcb->prop_compression == PropCompression_LZO) { + fcb->Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO; + type = BTRFS_COMPRESSION_LZO; + } else if (fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO && fcb->prop_compression != PropCompression_Zlib) type = BTRFS_COMPRESSION_LZO; else type = BTRFS_COMPRESSION_ZLIB; } - + if (type == BTRFS_COMPRESSION_LZO) { fcb->Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO; return lzo_write_compressed_bit(fcb, start_data, end_data, data, compressed, Irp, rollback); diff --git a/reactos/drivers/filesystems/btrfs/crc32c.c b/reactos/drivers/filesystems/btrfs/crc32c.c index 7390d668579..e3ef75c9fbd 100644 --- a/reactos/drivers/filesystems/btrfs/crc32c.c +++ b/reactos/drivers/filesystems/btrfs/crc32c.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -23,37 +23,37 @@ extern BOOL have_sse42; #endif /* __REACTOS__ */ static const UINT32 crctable[] = { - 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, - 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, - 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, - 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, - 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, - 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, - 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, - 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, - 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, - 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, - 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, - 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, - 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, - 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, - 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, - 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, - 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, - 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, - 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, - 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, - 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, - 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, - 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, - 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, - 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, - 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, - 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, - 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, - 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, - 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, - 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, }; @@ -68,41 +68,64 @@ static const UINT32 crctable[] = { } \ } while(0) -static UINT32 crc32c_hw(const void *input, int len, UINT32 crc) { +static UINT32 crc32c_hw(const void *input, ULONG len, UINT32 crc) { const char* buf = (const char*)input; + // Annoyingly, the CRC32 intrinsics don't work properly in modern versions of MSVC - + // it compiles _mm_crc32_u8 as if it was _mm_crc32_u32. And because we're apparently + // not allowed to use inline asm on amd64, there's no easy way to fix this! + for (; (len > 0) && ((size_t)buf & ALIGN_MASK); len--, buf++) { +#ifdef _MSC_VER + crc = crctable[(crc ^ *buf) & 0xff] ^ (crc >> 8); +#else crc = _mm_crc32_u8(crc, *buf); +#endif } #ifdef _AMD64_ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) // _mm_crc32_u64 wants to return UINT64(!) +#pragma warning(disable:4242) +#endif CALC_CRC(_mm_crc32_u64, crc, UINT64, buf, len); +#ifdef _MSC_VER +#pragma warning(pop) +#endif #endif CALC_CRC(_mm_crc32_u32, crc, UINT32, buf, len); + +#ifdef _MSC_VER + for (; len > 0; len--, buf++) { + crc = crctable[(crc ^ *buf) & 0xff] ^ (crc >> 8); + } +#else CALC_CRC(_mm_crc32_u16, crc, UINT16, buf, len); CALC_CRC(_mm_crc32_u8, crc, UINT8, buf, len); +#endif return crc; } #endif -UINT32 __stdcall calc_crc32c(UINT32 seed, UINT8* msg, ULONG msglen) { +UINT32 calc_crc32c(_In_ UINT32 seed, _In_reads_bytes_(msglen) UINT8* msg, _In_ ULONG msglen) { UINT32 rem; ULONG i; - + #ifndef __REACTOS__ if (have_sse42) { return crc32c_hw(msg, msglen, seed); } else { #endif rem = seed; - + for (i = 0; i < msglen; i++) { rem = crctable[(rem ^ msg[i]) & 0xff] ^ (rem >> 8); } #ifndef __REACTOS__ } #endif - + return rem; } diff --git a/reactos/drivers/filesystems/btrfs/create.c b/reactos/drivers/filesystems/btrfs/create.c index 04dc8f240e6..0ea66bb8db6 100644 --- a/reactos/drivers/filesystems/btrfs/create.c +++ b/reactos/drivers/filesystems/btrfs/create.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -19,157 +19,174 @@ #include #endif /* __REACTOS__ */ #include "btrfs_drv.h" -#ifndef __REACTOS__ -#include -#endif +#include -extern PDEVICE_OBJECT devobj; +extern PDEVICE_OBJECT master_devobj; static WCHAR datastring[] = L"::$DATA"; -fcb* create_fcb(POOL_TYPE pool_type) { +fcb* create_fcb(device_extension* Vcb, POOL_TYPE pool_type) { fcb* fcb; - - fcb = ExAllocatePoolWithTag(PagedPool, sizeof(struct _fcb), ALLOC_TAG); - if (!fcb) { - ERR("out of memory\n"); - return NULL; + + if (pool_type == NonPagedPool) { + fcb = ExAllocatePoolWithTag(pool_type, sizeof(struct _fcb), ALLOC_TAG); + if (!fcb) { + ERR("out of memory\n"); + return NULL; + } + } else { + fcb = ExAllocateFromPagedLookasideList(&Vcb->fcb_lookaside); + if (!fcb) { + ERR("out of memory\n"); + return NULL; + } } - + #ifdef DEBUG_FCB_REFCOUNTS WARN("allocating fcb %p\n", fcb); #endif RtlZeroMemory(fcb, sizeof(struct _fcb)); - + fcb->pool_type = pool_type; + fcb->Header.NodeTypeCode = BTRFS_NODE_TYPE_FCB; fcb->Header.NodeByteSize = sizeof(struct _fcb); - - fcb->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(struct _fcb_nonpaged), ALLOC_TAG); + + fcb->nonpaged = ExAllocateFromNPagedLookasideList(&Vcb->fcb_np_lookaside); if (!fcb->nonpaged) { ERR("out of memory\n"); - ExFreePool(fcb); + + if (pool_type == NonPagedPool) + ExFreePool(fcb); + else + ExFreeToPagedLookasideList(&Vcb->fcb_lookaside, fcb); + return NULL; } RtlZeroMemory(fcb->nonpaged, sizeof(struct _fcb_nonpaged)); - + ExInitializeResourceLite(&fcb->nonpaged->paging_resource); fcb->Header.PagingIoResource = &fcb->nonpaged->paging_resource; - + ExInitializeFastMutex(&fcb->nonpaged->HeaderMutex); FsRtlSetupAdvancedHeader(&fcb->Header, &fcb->nonpaged->HeaderMutex); - + fcb->refcount = 1; #ifdef DEBUG_FCB_REFCOUNTS WARN("fcb %p: refcount now %i\n", fcb, fcb->refcount); #endif - + ExInitializeResourceLite(&fcb->nonpaged->resource); fcb->Header.Resource = &fcb->nonpaged->resource; - + ExInitializeResourceLite(&fcb->nonpaged->dir_children_lock); - + FsRtlInitializeFileLock(&fcb->lock, NULL, NULL); - + InitializeListHead(&fcb->extents); InitializeListHead(&fcb->hardlinks); - + InitializeListHead(&fcb->xattrs); + InitializeListHead(&fcb->dir_children_index); InitializeListHead(&fcb->dir_children_hash); InitializeListHead(&fcb->dir_children_hash_uc); - + return fcb; } -file_ref* create_fileref() { +file_ref* create_fileref(device_extension* Vcb) { file_ref* fr; - - fr = ExAllocatePoolWithTag(PagedPool, sizeof(file_ref), ALLOC_TAG); + + fr = ExAllocateFromPagedLookasideList(&Vcb->fileref_lookaside); if (!fr) { ERR("out of memory\n"); return NULL; } - + RtlZeroMemory(fr, sizeof(file_ref)); - - fr->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(file_ref_nonpaged), ALLOC_TAG); + + fr->nonpaged = ExAllocateFromNPagedLookasideList(&Vcb->fileref_np_lookaside); if (!fr->nonpaged) { ERR("out of memory\n"); - ExFreePool(fr); + ExFreeToPagedLookasideList(&Vcb->fileref_lookaside, fr); return NULL; } - + fr->refcount = 1; - + #ifdef DEBUG_FCB_REFCOUNTS WARN("fileref %p: refcount now 1\n", fr); #endif - + InitializeListHead(&fr->children); - + + ExInitializeResourceLite(&fr->nonpaged->fileref_lock); ExInitializeResourceLite(&fr->nonpaged->children_lock); - + return fr; } -static NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, fcb* fcb, - root** subvol, UINT64* inode, dir_child** pdc, BOOL case_sensitive, PIRP Irp) { +NTSTATUS find_file_in_dir(PUNICODE_STRING filename, fcb* fcb, root** subvol, UINT64* inode, dir_child** pdc, BOOL case_sensitive) { NTSTATUS Status; UNICODE_STRING fnus; UINT32 hash; LIST_ENTRY* le; UINT8 c; - + BOOL locked = FALSE; + if (!case_sensitive) { Status = RtlUpcaseUnicodeString(&fnus, filename, TRUE); - + if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); return Status; } } else fnus = *filename; - + hash = calc_crc32c(0xffffffff, (UINT8*)fnus.Buffer, fnus.Length); - + c = hash >> 24; - - ExAcquireResourceSharedLite(&fcb->nonpaged->dir_children_lock, TRUE); - + + if (!ExIsResourceAcquiredSharedLite(&fcb->nonpaged->dir_children_lock)) { + ExAcquireResourceSharedLite(&fcb->nonpaged->dir_children_lock, TRUE); + locked = TRUE; + } + if (case_sensitive) { if (!fcb->hash_ptrs[c]) { Status = STATUS_OBJECT_NAME_NOT_FOUND; goto end; } - + le = fcb->hash_ptrs[c]; while (le != &fcb->dir_children_hash) { dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_hash); - + if (dc->hash == hash) { if (dc->name.Length == fnus.Length && RtlCompareMemory(dc->name.Buffer, fnus.Buffer, fnus.Length) == fnus.Length) { if (dc->key.obj_type == TYPE_ROOT_ITEM) { LIST_ENTRY* le2; - + *subvol = NULL; - + le2 = fcb->Vcb->roots.Flink; while (le2 != &fcb->Vcb->roots) { root* r2 = CONTAINING_RECORD(le2, root, list_entry); - + if (r2->id == dc->key.obj_id) { *subvol = r2; break; } - + le2 = le2->Flink; } - + *inode = SUBVOL_ROOT_INODE; } else { *subvol = fcb->subvol; *inode = dc->key.obj_id; } - + *pdc = dc; Status = STATUS_SUCCESS; @@ -179,7 +196,7 @@ static NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING Status = STATUS_OBJECT_NAME_NOT_FOUND; goto end; } - + le = le->Flink; } } else { @@ -187,38 +204,38 @@ static NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING Status = STATUS_OBJECT_NAME_NOT_FOUND; goto end; } - + le = fcb->hash_ptrs_uc[c]; while (le != &fcb->dir_children_hash_uc) { dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc); - + if (dc->hash_uc == hash) { if (dc->name_uc.Length == fnus.Length && RtlCompareMemory(dc->name_uc.Buffer, fnus.Buffer, fnus.Length) == fnus.Length) { if (dc->key.obj_type == TYPE_ROOT_ITEM) { LIST_ENTRY* le2; - + *subvol = NULL; - + le2 = fcb->Vcb->roots.Flink; while (le2 != &fcb->Vcb->roots) { root* r2 = CONTAINING_RECORD(le2, root, list_entry); - + if (r2->id == dc->key.obj_id) { *subvol = r2; break; } - + le2 = le2->Flink; } - + *inode = SUBVOL_ROOT_INODE; } else { *subvol = fcb->subvol; *inode = dc->key.obj_id; } - + *pdc = dc; - + Status = STATUS_SUCCESS; goto end; } @@ -226,522 +243,236 @@ static NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING Status = STATUS_OBJECT_NAME_NOT_FOUND; goto end; } - + le = le->Flink; } } - + Status = STATUS_OBJECT_NAME_NOT_FOUND; end: - ExReleaseResourceLite(&fcb->nonpaged->dir_children_lock); - + if (locked) + ExReleaseResourceLite(&fcb->nonpaged->dir_children_lock); + if (!case_sensitive) ExFreePool(fnus.Buffer); - - return Status; -} -static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream, PUNICODE_STRING newstreamname, UINT32* hash, PANSI_STRING xattr, PIRP Irp) { - NTSTATUS Status; - ULONG utf8len; - char* utf8; - UINT32 crc32; - KEY searchkey; - traverse_ptr tp, next_tp; - BOOL success = FALSE, b; - - static char xapref[] = "user."; - ULONG xapreflen = strlen(xapref); - - TRACE("(%p, %p, %.*S)\n", Vcb, fcb, stream->Length / sizeof(WCHAR), stream->Buffer); - - Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, stream->Buffer, stream->Length); - if (!NT_SUCCESS(Status)) { - ERR("RtlUnicodeToUTF8N 1 returned %08x\n", Status); - return FALSE; - } - - TRACE("utf8len = %u\n", utf8len); - - utf8 = ExAllocatePoolWithTag(PagedPool, xapreflen + utf8len + 1, ALLOC_TAG); - if (!utf8) { - ERR("out of memory\n"); - goto end; - } - - RtlCopyMemory(utf8, xapref, xapreflen); - - Status = RtlUnicodeToUTF8N(&utf8[xapreflen], utf8len, &utf8len, stream->Buffer, stream->Length); - if (!NT_SUCCESS(Status)) { - ERR("RtlUnicodeToUTF8N 2 returned %08x\n", Status); - goto end; - } - - utf8len += xapreflen; - utf8[utf8len] = 0; - - TRACE("utf8 = %s\n", utf8); - - crc32 = calc_crc32c(0xfffffffe, (UINT8*)utf8, utf8len); - TRACE("crc32 = %08x\n", crc32); - - if ((crc32 == EA_DOSATTRIB_HASH && utf8len == strlen(EA_DOSATTRIB) && RtlCompareMemory(utf8, EA_DOSATTRIB, utf8len) == utf8len) || - (crc32 == EA_EA_HASH && utf8len == strlen(EA_EA) && RtlCompareMemory(utf8, EA_EA, utf8len) == utf8len)) { - return FALSE; - } - - searchkey.obj_id = fcb->inode; - searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = crc32; - - Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - if (!keycmp(tp.item->key, searchkey)) { - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - } else { - ULONG len = tp.item->size, xasize; - DIR_ITEM* di = (DIR_ITEM*)tp.item->data; - - TRACE("found match on hash\n"); - - while (len > 0) { - if (len < sizeof(DIR_ITEM) || len < sizeof(DIR_ITEM) - 1 + di->m + di->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - if (RtlCompareMemory(di->name, utf8, utf8len) == utf8len) { - TRACE("found exact match for %s\n", utf8); - - *hash = tp.item->key.offset; - - xattr->Buffer = ExAllocatePoolWithTag(PagedPool, di->n + 1, ALLOC_TAG); - if (!xattr->Buffer) { - ERR("out of memory\n"); - goto end; - } - - xattr->Length = xattr->MaximumLength = di->n; - RtlCopyMemory(xattr->Buffer, di->name, di->n); - xattr->Buffer[di->n] = 0; - - success = TRUE; - goto end; - } - - xasize = sizeof(DIR_ITEM) - 1 + di->m + di->n; - - if (len > xasize) { - len -= xasize; - di = (DIR_ITEM*)&di->name[di->m + di->n]; - } else - break; - } - } - } - - searchkey.offset = 0; - - Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - do { - if (tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_XATTR_ITEM && tp.item->key.offset != crc32) { - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - } else { - ULONG len = tp.item->size, xasize; - DIR_ITEM* di = (DIR_ITEM*)tp.item->data; - ULONG utf16len; - - TRACE("found xattr with hash %08x\n", (UINT32)tp.item->key.offset); - - while (len > 0) { - if (len < sizeof(DIR_ITEM) || len < sizeof(DIR_ITEM) - 1 + di->m + di->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - if (di->n > xapreflen && RtlCompareMemory(di->name, xapref, xapreflen) == xapreflen) { - TRACE("found potential xattr %.*s\n", di->n, di->name); - } - - Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, &di->name[xapreflen], di->n - xapreflen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - } else { - WCHAR* utf16 = ExAllocatePoolWithTag(PagedPool, utf16len, ALLOC_TAG); - if (!utf16) { - ERR("out of memory\n"); - goto end; - } - - Status = RtlUTF8ToUnicodeN(utf16, utf16len, &utf16len, &di->name[xapreflen], di->n - xapreflen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - } else { - UNICODE_STRING us; - - us.Buffer = utf16; - us.Length = us.MaximumLength = (USHORT)utf16len; - - if (FsRtlAreNamesEqual(stream, &us, TRUE, NULL)) { - TRACE("found case-insensitive match for %s\n", utf8); - - *newstreamname = us; - *hash = tp.item->key.offset; - - xattr->Buffer = ExAllocatePoolWithTag(PagedPool, di->n + 1, ALLOC_TAG); - if (!xattr->Buffer) { - ERR("out of memory\n"); - ExFreePool(utf16); - goto end; - } - - xattr->Length = xattr->MaximumLength = di->n; - RtlCopyMemory(xattr->Buffer, di->name, di->n); - xattr->Buffer[di->n] = 0; - - success = TRUE; - goto end; - } - } - - ExFreePool(utf16); - } - - xasize = sizeof(DIR_ITEM) - 1 + di->m + di->n; - - if (len > xasize) { - len -= xasize; - di = (DIR_ITEM*)&di->name[di->m + di->n]; - } else - break; - } - } - } - - b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - if (b) { - tp = next_tp; - - if (next_tp.item->key.obj_id > fcb->inode || next_tp.item->key.obj_type > TYPE_XATTR_ITEM) - break; - } - } while (b); - -end: - ExFreePool(utf8); - - return success; + return Status; } -static NTSTATUS split_path(PUNICODE_STRING path, UNICODE_STRING** parts, ULONG* num_parts, BOOL* stream) { - ULONG len, i, j, np; +static NTSTATUS split_path(device_extension* Vcb, PUNICODE_STRING path, LIST_ENTRY* parts, BOOL* stream) { + ULONG len, i; BOOL has_stream; - UNICODE_STRING* ps; WCHAR* buf; - - np = 1; - + name_bit* nb; + len = path->Length / sizeof(WCHAR); if (len > 0 && (path->Buffer[len - 1] == '/' || path->Buffer[len - 1] == '\\')) len--; - + has_stream = FALSE; for (i = 0; i < len; i++) { if (path->Buffer[i] == '/' || path->Buffer[i] == '\\') { - np++; has_stream = FALSE; } else if (path->Buffer[i] == ':') { has_stream = TRUE; } } - - if (has_stream) - np++; - - ps = ExAllocatePoolWithTag(PagedPool, np * sizeof(UNICODE_STRING), ALLOC_TAG); - if (!ps) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(ps, np * sizeof(UNICODE_STRING)); - + buf = path->Buffer; - - j = 0; + for (i = 0; i < len; i++) { if (path->Buffer[i] == '/' || path->Buffer[i] == '\\') { - ps[j].Buffer = buf; - ps[j].Length = (&path->Buffer[i] - buf) * sizeof(WCHAR); - ps[j].MaximumLength = ps[j].Length; - + nb = ExAllocateFromPagedLookasideList(&Vcb->name_bit_lookaside); + if (!nb) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + nb->us.Buffer = buf; + nb->us.Length = nb->us.MaximumLength = (USHORT)(&path->Buffer[i] - buf) * sizeof(WCHAR); + InsertTailList(parts, &nb->list_entry); + buf = &path->Buffer[i+1]; - j++; } } - - ps[j].Buffer = buf; - ps[j].Length = (&path->Buffer[i] - buf) * sizeof(WCHAR); - ps[j].MaximumLength = ps[j].Length; - + + nb = ExAllocateFromPagedLookasideList(&Vcb->name_bit_lookaside); + if (!nb) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + nb->us.Buffer = buf; + nb->us.Length = nb->us.MaximumLength = (USHORT)(&path->Buffer[i] - buf) * sizeof(WCHAR); + InsertTailList(parts, &nb->list_entry); + if (has_stream) { static WCHAR datasuf[] = {':','$','D','A','T','A',0}; UNICODE_STRING dsus; - + dsus.Buffer = datasuf; - dsus.Length = dsus.MaximumLength = wcslen(datasuf) * sizeof(WCHAR); - - for (i = 0; i < ps[j].Length / sizeof(WCHAR); i++) { - if (ps[j].Buffer[i] == ':') { - ps[j+1].Buffer = &ps[j].Buffer[i+1]; - ps[j+1].Length = ps[j].Length - (i * sizeof(WCHAR)) - sizeof(WCHAR); - - ps[j].Length = i * sizeof(WCHAR); - ps[j].MaximumLength = ps[j].Length; - - j++; - + dsus.Length = dsus.MaximumLength = (UINT16)wcslen(datasuf) * sizeof(WCHAR); + + for (i = 0; i < nb->us.Length / sizeof(WCHAR); i++) { + if (nb->us.Buffer[i] == ':') { + name_bit* nb2; + + nb2 = ExAllocateFromPagedLookasideList(&Vcb->name_bit_lookaside); + if (!nb2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + nb2->us.Buffer = &nb->us.Buffer[i+1]; + nb2->us.Length = nb2->us.MaximumLength = (UINT16)(nb->us.Length - (i * sizeof(WCHAR)) - sizeof(WCHAR)); + InsertTailList(parts, &nb2->list_entry); + + nb->us.Length = (UINT16)i * sizeof(WCHAR); + nb->us.MaximumLength = nb->us.Length; + + nb = nb2; + break; } } - + // FIXME - should comparison be case-insensitive? // remove :$DATA suffix - if (ps[j].Length >= dsus.Length && RtlCompareMemory(&ps[j].Buffer[(ps[j].Length - dsus.Length)/sizeof(WCHAR)], dsus.Buffer, dsus.Length) == dsus.Length) - ps[j].Length -= dsus.Length; - - if (ps[j].Length == 0) { - np--; + if (nb->us.Length >= dsus.Length && RtlCompareMemory(&nb->us.Buffer[(nb->us.Length - dsus.Length)/sizeof(WCHAR)], dsus.Buffer, dsus.Length) == dsus.Length) + nb->us.Length -= dsus.Length; + + if (nb->us.Length == 0) { + RemoveTailList(parts); + ExFreeToPagedLookasideList(&Vcb->name_bit_lookaside, nb); + has_stream = FALSE; } } - + // if path is just stream name, remove first empty item if (has_stream && path->Length >= sizeof(WCHAR) && path->Buffer[0] == ':') { - ps[0] = ps[1]; - np--; + name_bit *nb1 = CONTAINING_RECORD(RemoveHeadList(parts), name_bit, list_entry); + + ExFreeToPagedLookasideList(&Vcb->name_bit_lookaside, nb1); } -// for (i = 0; i < np; i++) { -// ERR("part %u: %u, (%.*S)\n", i, ps[i].Length, ps[i].Length / sizeof(WCHAR), ps[i].Buffer); -// } - - *num_parts = np; - *parts = ps; *stream = has_stream; - - return STATUS_SUCCESS; -} - -// #ifdef DEBUG_FCB_REFCOUNTS -// static void print_fcbs(device_extension* Vcb) { -// fcb* fcb = Vcb->fcbs; -// -// while (fcb) { -// ERR("fcb %p (%.*S): refcount %u\n", fcb, fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer, fcb->refcount); -// -// fcb = fcb->next; -// } -// } -// #endif - -static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name, BOOL case_sensitive) { - LIST_ENTRY* le; - file_ref *c, *deleted = NULL; - NTSTATUS Status; - UNICODE_STRING ucus; -#ifdef DEBUG_FCB_REFCOUNTS - ULONG rc; -#endif - - if (case_sensitive) { - le = dir->children.Flink; - while (le != &dir->children) { - c = CONTAINING_RECORD(le, file_ref, list_entry); - - if (c->refcount > 0 && c->filepart.Length == name->Length && - RtlCompareMemory(c->filepart.Buffer, name->Buffer, name->Length) == name->Length) { - if (c->deleted) { - deleted = c; - } else { -#ifdef DEBUG_FCB_REFCOUNTS - rc = InterlockedIncrement(&c->refcount); - WARN("fileref %p: refcount now %i (%S)\n", c, rc, file_desc_fileref(c)); -#else - InterlockedIncrement(&c->refcount); -#endif - return c; - } - } - - le = le->Flink; - } - - goto end; - } - Status = RtlUpcaseUnicodeString(&ucus, name, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - return NULL; - } - - le = dir->children.Flink; - while (le != &dir->children) { - c = CONTAINING_RECORD(le, file_ref, list_entry); - - if (c->refcount > 0 && c->filepart_uc.Length == ucus.Length && - RtlCompareMemory(c->filepart_uc.Buffer, ucus.Buffer, ucus.Length) == ucus.Length) { - if (c->deleted) { - deleted = c; - } else { -#ifdef DEBUG_FCB_REFCOUNTS - rc = InterlockedIncrement(&c->refcount); - WARN("fileref %p: refcount now %i (%S)\n", c, rc, file_desc_fileref(c)); -#else - InterlockedIncrement(&c->refcount); -#endif - ExFreePool(ucus.Buffer); - - return c; - } - } - - le = le->Flink; - } - - ExFreePool(ucus.Buffer); - -end: - if (deleted) - increase_fileref_refcount(deleted); - - return deleted; + return STATUS_SUCCESS; } -NTSTATUS load_csum(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp) { +NTSTATUS load_csum(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp) { NTSTATUS Status; KEY searchkey; traverse_ptr tp, next_tp; UINT64 i, j; BOOL b; - + searchkey.obj_id = EXTENT_CSUM_ID; searchkey.obj_type = TYPE_EXTENT_CSUM; searchkey.offset = start; - + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + i = 0; do { if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { ULONG readlen; - + if (start < tp.item->key.offset) j = 0; else j = ((start - tp.item->key.offset) / Vcb->superblock.sector_size) + i; - + if (j * sizeof(UINT32) > tp.item->size || tp.item->key.offset > start + (i * Vcb->superblock.sector_size)) { ERR("checksum not found for %llx\n", start + (i * Vcb->superblock.sector_size)); return STATUS_INTERNAL_ERROR; } - - readlen = min((tp.item->size / sizeof(UINT32)) - j, length - i); + + readlen = (ULONG)min((tp.item->size / sizeof(UINT32)) - j, length - i); RtlCopyMemory(&csum[i], tp.item->data + (j * sizeof(UINT32)), readlen * sizeof(UINT32)); i += readlen; - + if (i == length) break; } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - + if (b) tp = next_tp; } while (b); - + if (i < length) { ERR("could not read checksums: offset %llx, length %llx sectors\n", start, length); return STATUS_INTERNAL_ERROR; } - + return STATUS_SUCCESS; } -NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { +NTSTATUS load_dir_children(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, fcb* fcb, BOOL ignore_size, PIRP Irp) { KEY searchkey; traverse_ptr tp, next_tp; NTSTATUS Status; - + ULONG num_children = 0; + fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); if (!fcb->hash_ptrs) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); - + fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); if (!fcb->hash_ptrs_uc) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); - + if (!ignore_size && fcb->inode_item.st_size == 0) return STATUS_SUCCESS; - + searchkey.obj_id = fcb->inode; searchkey.obj_type = TYPE_DIR_INDEX; searchkey.offset = 2; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); + + Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - + if (keycmp(tp.item->key, searchkey) == -1) { - if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) { + if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { tp = next_tp; TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); } } - + while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { DIR_ITEM* di = (DIR_ITEM*)tp.item->data; dir_child* dc; ULONG utf16len; - + if (tp.item->size < sizeof(DIR_ITEM)) { WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); goto cont; } - + if (di->n == 0) { WARN("(%llx,%x,%llx): DIR_ITEM name length is zero\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); goto cont; } - + Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, di->name, di->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); @@ -753,12 +484,12 @@ NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + dc->key = di->key; dc->index = tp.item->key.offset; dc->type = di->type; dc->fileref = NULL; - + dc->utf8.MaximumLength = dc->utf8.Length = di->n; dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, di->n, ALLOC_TAG); if (!dc->utf8.Buffer) { @@ -766,10 +497,10 @@ NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { ExFreePool(dc); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(dc->utf8.Buffer, di->name, di->n); - - dc->name.MaximumLength = dc->name.Length = utf16len; + + dc->name.MaximumLength = dc->name.Length = (UINT16)utf16len; dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name.MaximumLength, ALLOC_TAG); if (!dc->name.Buffer) { ERR("out of memory\n"); @@ -777,7 +508,7 @@ NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { ExFreePool(dc); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = RtlUTF8ToUnicodeN(dc->name.Buffer, utf16len, &utf16len, di->name, di->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); @@ -786,7 +517,7 @@ NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { ExFreePool(dc); goto cont; } - + Status = RtlUpcaseUnicodeString(&dc->name_uc, &dc->name, TRUE); if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); @@ -795,414 +526,543 @@ NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { ExFreePool(dc); goto cont; } - + dc->hash = calc_crc32c(0xffffffff, (UINT8*)dc->name.Buffer, dc->name.Length); dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)dc->name_uc.Buffer, dc->name_uc.Length); - + InsertTailList(&fcb->dir_children_index, &dc->list_entry_index); - + insert_dir_child_into_hash_lists(fcb, dc); - + + num_children++; + cont: - if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) + if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) tp = next_tp; else break; } - + + // If a directory has a lot of files, force it to stick around until the next flush + // so we aren't constantly re-reading. + if (num_children >= 100) + mark_fcb_dirty(fcb); + return STATUS_SUCCESS; } -NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp) { +NTSTATUS open_fcb(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, + root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp) { KEY searchkey; traverse_ptr tp, next_tp; NTSTATUS Status; - fcb* fcb; + fcb *fcb, *deleted_fcb = NULL; BOOL atts_set = FALSE, sd_set = FALSE, no_data; LIST_ENTRY* lastle = NULL; EXTENT_DATA* ed = NULL; - + if (!IsListEmpty(&subvol->fcbs)) { LIST_ENTRY* le = subvol->fcbs.Flink; - + while (le != &subvol->fcbs) { fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); - + if (fcb->inode == inode) { if (!fcb->ads) { + if (fcb->deleted) + deleted_fcb = fcb; + else { #ifdef DEBUG_FCB_REFCOUNTS - LONG rc = InterlockedIncrement(&fcb->refcount); + LONG rc = InterlockedIncrement(&fcb->refcount); - WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); + WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); #else - InterlockedIncrement(&fcb->refcount); + InterlockedIncrement(&fcb->refcount); #endif - *pfcb = fcb; - return STATUS_SUCCESS; + *pfcb = fcb; + return STATUS_SUCCESS; + } } } else if (fcb->inode > inode) { + if (deleted_fcb) { + InterlockedIncrement(&deleted_fcb->refcount); + *pfcb = deleted_fcb; + return STATUS_SUCCESS; + } + lastle = le->Blink; break; } - + le = le->Flink; } } - - fcb = create_fcb(pooltype); + + if (deleted_fcb) { + InterlockedIncrement(&deleted_fcb->refcount); + *pfcb = deleted_fcb; + return STATUS_SUCCESS; + } + + fcb = create_fcb(Vcb, pooltype); if (!fcb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + fcb->Vcb = Vcb; - + fcb->subvol = subvol; fcb->inode = inode; fcb->type = type; - + searchkey.obj_id = inode; searchkey.obj_type = TYPE_INODE_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { WARN("couldn't find INODE_ITEM for inode %llx in subvol %llx\n", inode, subvol->id); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INVALID_PARAMETER; } - + if (tp.item->size > 0) RtlCopyMemory(&fcb->inode_item, tp.item->data, min(sizeof(INODE_ITEM), tp.item->size)); - + if (fcb->type == 0) { // guess the type from the inode mode, if the caller doesn't know already - if (fcb->inode_item.st_mode & __S_IFDIR) + if ((fcb->inode_item.st_mode & __S_IFDIR) == __S_IFDIR) fcb->type = BTRFS_TYPE_DIRECTORY; - else if (fcb->inode_item.st_mode & __S_IFCHR) + else if ((fcb->inode_item.st_mode & __S_IFCHR) == __S_IFCHR) fcb->type = BTRFS_TYPE_CHARDEV; - else if (fcb->inode_item.st_mode & __S_IFBLK) + else if ((fcb->inode_item.st_mode & __S_IFBLK) == __S_IFBLK) fcb->type = BTRFS_TYPE_BLOCKDEV; - else if (fcb->inode_item.st_mode & __S_IFIFO) + else if ((fcb->inode_item.st_mode & __S_IFIFO) == __S_IFIFO) fcb->type = BTRFS_TYPE_FIFO; - else if (fcb->inode_item.st_mode & __S_IFLNK) + else if ((fcb->inode_item.st_mode & __S_IFLNK) == __S_IFLNK) fcb->type = BTRFS_TYPE_SYMLINK; - else if (fcb->inode_item.st_mode & __S_IFSOCK) + else if ((fcb->inode_item.st_mode & __S_IFSOCK) == __S_IFSOCK) fcb->type = BTRFS_TYPE_SOCKET; else fcb->type = BTRFS_TYPE_FILE; } - + no_data = fcb->inode_item.st_size == 0 || (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK); - + while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { tp = next_tp; - + if (tp.item->key.obj_id > inode) break; - + if ((no_data && tp.item->key.obj_type > TYPE_XATTR_ITEM) || tp.item->key.obj_type > TYPE_EXTENT_DATA) break; - + if (fcb->inode_item.st_nlink > 1 && tp.item->key.obj_type == TYPE_INODE_REF) { ULONG len; INODE_REF* ir; - + len = tp.item->size; ir = (INODE_REF*)tp.item->data; - + while (len >= sizeof(INODE_REF) - 1) { hardlink* hl; ULONG stringlen; - + hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG); if (!hl) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + hl->parent = tp.item->key.offset; hl->index = ir->index; - + hl->utf8.Length = hl->utf8.MaximumLength = ir->n; - + if (hl->utf8.Length > 0) { hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG); RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n); } - + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ir->name, ir->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - - hl->name.Length = hl->name.MaximumLength = stringlen; - + + hl->name.Length = hl->name.MaximumLength = (UINT16)stringlen; + if (stringlen == 0) hl->name.Buffer = NULL; else { hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG); - + if (!hl->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ir->name, ir->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); ExFreePool(hl->name.Buffer); ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } } - + InsertTailList(&fcb->hardlinks, &hl->list_entry); - + len -= sizeof(INODE_REF) - 1 + ir->n; ir = (INODE_REF*)&ir->name[ir->n]; } } else if (fcb->inode_item.st_nlink > 1 && tp.item->key.obj_type == TYPE_INODE_EXTREF) { ULONG len; INODE_EXTREF* ier; - + len = tp.item->size; ier = (INODE_EXTREF*)tp.item->data; - + while (len >= sizeof(INODE_EXTREF) - 1) { hardlink* hl; ULONG stringlen; - + hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG); if (!hl) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + hl->parent = ier->dir; hl->index = ier->index; - + hl->utf8.Length = hl->utf8.MaximumLength = ier->n; - + if (hl->utf8.Length > 0) { hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG); RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n); } - + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ier->name, ier->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - - hl->name.Length = hl->name.MaximumLength = stringlen; - + + hl->name.Length = hl->name.MaximumLength = (UINT16)stringlen; + if (stringlen == 0) hl->name.Buffer = NULL; else { hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG); - + if (!hl->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ier->name, ier->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); ExFreePool(hl->name.Buffer); ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } } - + InsertTailList(&fcb->hardlinks, &hl->list_entry); - + len -= sizeof(INODE_EXTREF) - 1 + ier->n; ier = (INODE_EXTREF*)&ier->name[ier->n]; } } else if (tp.item->key.obj_type == TYPE_XATTR_ITEM) { - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); + ULONG len; + DIR_ITEM* di; + + static char xapref[] = "user."; + + if (tp.item->size < offsetof(DIR_ITEM, name[0])) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, offsetof(DIR_ITEM, name[0])); continue; } - - if (tp.item->key.offset == EA_REPARSE_HASH) { - UINT8* xattrdata; - UINT16 xattrlen; - - if (extract_xattr(tp.item->data, tp.item->size, EA_REPARSE, &xattrdata, &xattrlen)) { - fcb->reparse_xattr.Buffer = (char*)xattrdata; - fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = xattrlen; - } - } else if (tp.item->key.offset == EA_EA_HASH) { - UINT8* eadata; - UINT16 ealen; - - if (extract_xattr(tp.item->data, tp.item->size, EA_EA, &eadata, &ealen)) { - ULONG offset; - - Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, ealen, &offset); - + + len = tp.item->size; + di = (DIR_ITEM*)tp.item->data; + + do { + if (len < offsetof(DIR_ITEM, name[0]) + di->m + di->n) + break; + + if (tp.item->key.offset == EA_REPARSE_HASH && di->n == strlen(EA_REPARSE) && RtlCompareMemory(EA_REPARSE, di->name, di->n) == di->n) { + if (di->m > 0) { + fcb->reparse_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, di->m, ALLOC_TAG); + if (!fcb->reparse_xattr.Buffer) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(fcb->reparse_xattr.Buffer, &di->name[di->n], di->m); + } else + fcb->reparse_xattr.Buffer = NULL; + + fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = di->m; + } else if (tp.item->key.offset == EA_EA_HASH && di->n == strlen(EA_EA) && RtlCompareMemory(EA_EA, di->name, di->n) == di->n) { + if (di->m > 0) { + ULONG offset; + + Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)&di->name[di->n], di->m, &offset); + + if (!NT_SUCCESS(Status)) + WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); + else { + FILE_FULL_EA_INFORMATION* eainfo; + + fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, di->m, ALLOC_TAG); + if (!fcb->ea_xattr.Buffer) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(fcb->ea_xattr.Buffer, &di->name[di->n], di->m); + + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = di->m; + + fcb->ealen = 4; + + // calculate ealen + eainfo = (FILE_FULL_EA_INFORMATION*)&di->name[di->n]; + do { + fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; + + if (eainfo->NextEntryOffset == 0) + break; + + eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); + } while (TRUE); + } + } + } else if (tp.item->key.offset == EA_DOSATTRIB_HASH && di->n == strlen(EA_DOSATTRIB) && RtlCompareMemory(EA_DOSATTRIB, di->name, di->n) == di->n) { + if (di->m > 0) { + if (get_file_attributes_from_xattr(&di->name[di->n], di->m, &fcb->atts)) { + atts_set = TRUE; + + if (fcb->type == BTRFS_TYPE_DIRECTORY) + fcb->atts |= FILE_ATTRIBUTE_DIRECTORY; + else if (fcb->type == BTRFS_TYPE_SYMLINK) + fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT; + + if (fcb->type != BTRFS_TYPE_DIRECTORY) + fcb->atts &= ~FILE_ATTRIBUTE_DIRECTORY; + + if (inode == SUBVOL_ROOT_INODE) { + if (subvol->root_item.flags & BTRFS_SUBVOL_READONLY) + fcb->atts |= FILE_ATTRIBUTE_READONLY; + else + fcb->atts &= ~FILE_ATTRIBUTE_READONLY; + } + } + } + } else if (tp.item->key.offset == EA_NTACL_HASH && di->n == strlen(EA_NTACL) && RtlCompareMemory(EA_NTACL, di->name, di->n) == di->n) { + if (di->m > 0) { + fcb->sd = ExAllocatePoolWithTag(PagedPool, di->m, ALLOC_TAG); + if (!fcb->sd) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(fcb->sd, &di->name[di->n], di->m); + + // We have to test against our copy rather than the source, as RtlValidRelativeSecurityDescriptor + // will fail if the ACLs aren't 32-bit aligned. + if (!RtlValidRelativeSecurityDescriptor(fcb->sd, di->m, 0)) + ExFreePool(fcb->sd); + else + sd_set = TRUE; + } + } else if (tp.item->key.offset == EA_PROP_COMPRESSION_HASH && di->n == strlen(EA_PROP_COMPRESSION) && RtlCompareMemory(EA_PROP_COMPRESSION, di->name, di->n) == di->n) { + if (di->m > 0) { + const char lzo[] = "lzo"; + const char zlib[] = "zlib"; + + if (di->m == strlen(lzo) && RtlCompareMemory(&di->name[di->n], lzo, di->m) == di->m) + fcb->prop_compression = PropCompression_LZO; + else if (di->m == strlen(zlib) && RtlCompareMemory(&di->name[di->n], zlib, di->m) == di->m) + fcb->prop_compression = PropCompression_Zlib; + else + fcb->prop_compression = PropCompression_None; + } + } else if (di->n > strlen(xapref) && RtlCompareMemory(xapref, di->name, strlen(xapref)) == strlen(xapref)) { + dir_child* dc; + ULONG utf16len; + + Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, &di->name[strlen(xapref)], di->n - (ULONG)strlen(xapref)); if (!NT_SUCCESS(Status)) { - WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); - ExFreePool(eadata); - } else { - FILE_FULL_EA_INFORMATION* eainfo; - fcb->ea_xattr.Buffer = (char*)eadata; - fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen; - - fcb->ealen = 4; - - // calculate ealen - eainfo = (FILE_FULL_EA_INFORMATION*)eadata; - do { - fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; - - if (eainfo->NextEntryOffset == 0) - break; - - eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); - } while (TRUE); + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + free_fcb(Vcb, fcb); + return Status; } - } - } else if (tp.item->key.offset == EA_DOSATTRIB_HASH) { - UINT8* xattrdata; - UINT16 xattrlen; - - if (extract_xattr(tp.item->data, tp.item->size, EA_DOSATTRIB, &xattrdata, &xattrlen)) { - if (get_file_attributes_from_xattr((char*)xattrdata, xattrlen, &fcb->atts)) { - atts_set = TRUE; - - if (fcb->type == BTRFS_TYPE_DIRECTORY) - fcb->atts |= FILE_ATTRIBUTE_DIRECTORY; - else if (fcb->type == BTRFS_TYPE_SYMLINK) - fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT; + + dc = ExAllocatePoolWithTag(PagedPool, sizeof(dir_child), ALLOC_TAG); + if (!dc) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; } - - ExFreePool(xattrdata); - } - } else if (tp.item->key.offset == EA_NTACL_HASH) { - UINT16 buflen; - - if (extract_xattr(tp.item->data, tp.item->size, EA_NTACL, (UINT8**)&fcb->sd, &buflen)) { - if (get_sd_from_xattr(fcb, buflen)) { - sd_set = TRUE; - } else - ExFreePool(fcb->sd); + + RtlZeroMemory(dc, sizeof(dir_child)); + + dc->utf8.MaximumLength = dc->utf8.Length = di->n - (UINT16)strlen(xapref); + dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, dc->utf8.MaximumLength, ALLOC_TAG); + if (!dc->utf8.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(dc->utf8.Buffer, &di->name[strlen(xapref)], dc->utf8.Length); + + dc->name.MaximumLength = dc->name.Length = (UINT16)utf16len; + dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name.MaximumLength, ALLOC_TAG); + if (!dc->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = RtlUTF8ToUnicodeN(dc->name.Buffer, utf16len, &utf16len, dc->utf8.Buffer, dc->utf8.Length); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + free_fcb(Vcb, fcb); + return Status; + } + + Status = RtlUpcaseUnicodeString(&dc->name_uc, &dc->name, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + free_fcb(Vcb, fcb); + return Status; + } + + dc->size = di->m; + + InsertTailList(&fcb->dir_children_index, &dc->list_entry_index); + } else { + xattr* xa; + + xa = ExAllocatePoolWithTag(PagedPool, offsetof(xattr, data[0]) + di->m + di->n, ALLOC_TAG); + if (!xa) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + xa->namelen = di->n; + xa->valuelen = di->m; + xa->dirty = FALSE; + RtlCopyMemory(xa->data, di->name, di->m + di->n); + + InsertTailList(&fcb->xattrs, &xa->list_entry); } - } + + len -= (ULONG)offsetof(DIR_ITEM, name[0]) + di->m + di->n; + + if (len < offsetof(DIR_ITEM, name[0])) + break; + + di = (DIR_ITEM*)&di->name[di->m + di->n]; + } while (TRUE); } else if (tp.item->key.obj_type == TYPE_EXTENT_DATA) { extent* ext; BOOL unique = FALSE; - + ed = (EXTENT_DATA*)tp.item->data; - + if (tp.item->size < sizeof(EXTENT_DATA)) { - ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA)); - - free_fcb(fcb); + + free_fcb(Vcb, fcb); return STATUS_INTERNAL_ERROR; } - + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0]; - + if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { - ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)); - - free_fcb(fcb); + + free_fcb(Vcb, fcb); return STATUS_INTERNAL_ERROR; } - - if (ed2->address == 0 && ed2->size == 0) // sparse + + if (ed2->address == 0 || ed2->size == 0) // sparse continue; - + if (ed2->size != 0 && is_tree_unique(Vcb, tp.tree, Irp)) unique = is_extent_unique(Vcb, ed2->address, ed2->size, Irp); } - - ext = ExAllocatePoolWithTag(pooltype, sizeof(extent), ALLOC_TAG); + + ext = ExAllocatePoolWithTag(pooltype, offsetof(extent, extent_data) + tp.item->size, ALLOC_TAG); if (!ext) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - - ext->data = ExAllocatePoolWithTag(pooltype, tp.item->size, ALLOC_TAG); - if (!ext->data) { - ERR("out of memory\n"); - ExFreePool(ext); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - + ext->offset = tp.item->key.offset; - RtlCopyMemory(ext->data, tp.item->data, tp.item->size); + RtlCopyMemory(&ext->extent_data, tp.item->data, tp.item->size); ext->datalen = tp.item->size; ext->unique = unique; ext->ignore = FALSE; ext->inserted = FALSE; - - if (ed->type == EXTENT_TYPE_REGULAR && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0]; - UINT64 len; - - len = (ed->compression == BTRFS_COMPRESSION_NONE ? ed2->num_bytes : ed2->size) / Vcb->superblock.sector_size; - - ext->csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(UINT32), ALLOC_TAG); - if (!ext->csum) { - ERR("out of memory\n"); - ExFreePool(ext); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = load_csum(Vcb, ext->csum, ed2->address + (ed->compression == BTRFS_COMPRESSION_NONE ? ed2->offset : 0), len, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("load_csum returned %08x\n", Status); - ExFreePool(ext); - free_fcb(fcb); - return Status; - } - } else - ext->csum = NULL; - + ext->csum = NULL; + InsertTailList(&fcb->extents, &ext->list_entry); } } - + if (fcb->type == BTRFS_TYPE_DIRECTORY) { - Status = load_dir_children(fcb, FALSE, Irp); + Status = load_dir_children(Vcb, fcb, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("load_dir_children returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } } - + if (no_data) { fcb->Header.AllocationSize.QuadPart = 0; fcb->Header.FileSize.QuadPart = 0; @@ -1212,275 +1072,228 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, fcb->Header.AllocationSize.QuadPart = fcb->inode_item.st_size; else fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); - + fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size; fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size; } - + if (!atts_set) - fcb->atts = get_file_attributes(Vcb, &fcb->inode_item, fcb->subvol, fcb->inode, fcb->type, utf8 && utf8->Buffer[0] == '.', TRUE, Irp); - + fcb->atts = get_file_attributes(Vcb, fcb->subvol, fcb->inode, fcb->type, utf8 && utf8->Buffer[0] == '.', TRUE, Irp); + if (!sd_set) fcb_get_sd(fcb, parent, FALSE, Irp); - + if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT && fcb->reparse_xattr.Length == 0) { fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; - - if (!Vcb->readonly && !(subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) { + + if (!Vcb->readonly && !is_subvol_readonly(subvol, Irp)) { fcb->atts_changed = TRUE; mark_fcb_dirty(fcb); } } - + if (lastle) InsertHeadList(lastle, &fcb->list_entry); else InsertTailList(&subvol->fcbs, &fcb->list_entry); - + InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); - + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); *pfcb = fcb; return STATUS_SUCCESS; } -NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI_STRING* xattr, - UINT32 streamhash, fcb* parent, fcb** pfcb, PIRP Irp) { +static NTSTATUS open_fcb_stream(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, + dir_child* dc, fcb* parent, fcb** pfcb, PIRP Irp) { fcb* fcb; UINT8* xattrdata; UINT16 xattrlen, overhead; NTSTATUS Status; KEY searchkey; traverse_ptr tp; - LIST_ENTRY* lastle = NULL; - - if (!IsListEmpty(&subvol->fcbs)) { - LIST_ENTRY* le = subvol->fcbs.Flink; - - while (le != &subvol->fcbs) { - fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); - - if (fcb->inode == inode) { - if (fcb->ads && fcb->adsxattr.Length == xattr->Length && - RtlCompareMemory(fcb->adsxattr.Buffer, xattr->Buffer, fcb->adsxattr.Length) == fcb->adsxattr.Length) { -#ifdef DEBUG_FCB_REFCOUNTS - LONG rc = InterlockedIncrement(&fcb->refcount); - - WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); -#else - InterlockedIncrement(&fcb->refcount); -#endif + static char xapref[] = "user."; + ANSI_STRING xattr; + UINT32 crc32; - *pfcb = fcb; - return STATUS_SUCCESS; - } - } else if (fcb->inode > inode) { - lastle = le->Blink; - break; - } - - le = le->Flink; - } + xattr.Length = (UINT16)strlen(xapref) + dc->utf8.Length; + xattr.MaximumLength = xattr.Length + 1; + xattr.Buffer = ExAllocatePoolWithTag(PagedPool, xattr.MaximumLength, ALLOC_TAG); + if (!xattr.Buffer) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - - fcb = create_fcb(PagedPool); + + RtlCopyMemory(xattr.Buffer, xapref, strlen(xapref)); + RtlCopyMemory(&xattr.Buffer[strlen(xapref)], dc->utf8.Buffer, dc->utf8.Length); + xattr.Buffer[xattr.Length] = 0; + + fcb = create_fcb(Vcb, PagedPool); if (!fcb) { ERR("out of memory\n"); + ExFreePool(xattr.Buffer); return STATUS_INSUFFICIENT_RESOURCES; } - - if (!get_xattr(Vcb, parent->subvol, parent->inode, xattr->Buffer, streamhash, &xattrdata, &xattrlen, Irp)) { + + fcb->Vcb = Vcb; + + crc32 = calc_crc32c(0xfffffffe, (UINT8*)xattr.Buffer, xattr.Length); + + if (!get_xattr(Vcb, parent->subvol, parent->inode, xattr.Buffer, crc32, &xattrdata, &xattrlen, Irp)) { ERR("get_xattr failed\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); + ExFreePool(xattr.Buffer); return STATUS_INTERNAL_ERROR; } - fcb->Vcb = Vcb; - fcb->subvol = parent->subvol; fcb->inode = parent->inode; fcb->type = parent->type; fcb->ads = TRUE; - fcb->adshash = streamhash; - fcb->adsxattr = *xattr; - + fcb->adshash = crc32; + fcb->adsxattr = xattr; + // find XATTR_ITEM overhead and hence calculate maximum length - + searchkey.obj_id = parent->inode; searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = streamhash; + searchkey.offset = crc32; Status = find_item(Vcb, parent->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - + if (keycmp(tp.item->key, searchkey)) { ERR("error - could not find key for xattr\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INTERNAL_ERROR; } - + if (tp.item->size < xattrlen) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, xattrlen); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INTERNAL_ERROR; } - + overhead = tp.item->size - xattrlen; - + fcb->adsmaxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - overhead; - + fcb->adsdata.Buffer = (char*)xattrdata; fcb->adsdata.Length = fcb->adsdata.MaximumLength = xattrlen; - + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); fcb->Header.AllocationSize.QuadPart = xattrlen; fcb->Header.FileSize.QuadPart = xattrlen; fcb->Header.ValidDataLength.QuadPart = xattrlen; - + TRACE("stream found: size = %x, hash = %08x\n", xattrlen, fcb->adshash); - - if (lastle) - InsertHeadList(lastle, &fcb->list_entry); - else - InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry); - + + InsertHeadList(&parent->list_entry, &fcb->list_entry); + InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); - + *pfcb = fcb; - - return STATUS_SUCCESS; -} -void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock) { - if (do_lock) - ExAcquireResourceExclusiveLite(&parent->nonpaged->children_lock, TRUE); - - if (IsListEmpty(&parent->children)) - InsertTailList(&parent->children, &child->list_entry); - else { - LIST_ENTRY* le = parent->children.Flink; - file_ref* fr1 = CONTAINING_RECORD(le, file_ref, list_entry); - - if (child->index < fr1->index) - InsertHeadList(&parent->children, &child->list_entry); - else { - while (le != &parent->children) { - file_ref* fr2 = (le->Flink == &parent->children) ? NULL : CONTAINING_RECORD(le->Flink, file_ref, list_entry); - - if (child->index >= fr1->index && (!fr2 || fr2->index > child->index)) { - InsertHeadList(&fr1->list_entry, &child->list_entry); - break; - } - - fr1 = fr2; - le = le->Flink; - } - } - } - - if (do_lock) - ExReleaseResourceLite(&parent->nonpaged->children_lock); + return STATUS_SUCCESS; } -static NTSTATUS open_fileref_child(device_extension* Vcb, file_ref* sf, PUNICODE_STRING name, BOOL case_sensitive, BOOL lastpart, BOOL streampart, - POOL_TYPE pooltype, file_ref** psf2, PIRP Irp) { +NTSTATUS open_fileref_child(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, + _In_ file_ref* sf, _In_ PUNICODE_STRING name, _In_ BOOL case_sensitive, _In_ BOOL lastpart, _In_ BOOL streampart, + _In_ POOL_TYPE pooltype, _Out_ file_ref** psf2, _In_opt_ PIRP Irp) { NTSTATUS Status; file_ref* sf2; - - if (streampart) { - UNICODE_STRING streamname; - ANSI_STRING xattr; - UINT32 streamhash; - - sf2 = search_fileref_children(sf, name, case_sensitive); - - if (sf2) { - if (sf2->deleted) { - TRACE("element in path has been deleted\n"); - free_fileref(sf2); - return lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND; - } - - *psf2 = sf2; - - return STATUS_SUCCESS; - } - - streamname.Buffer = NULL; - streamname.Length = streamname.MaximumLength = 0; - xattr.Buffer = NULL; - xattr.Length = xattr.MaximumLength = 0; - - // FIXME - check if already opened - - if (!find_stream(Vcb, sf->fcb, name, &streamname, &streamhash, &xattr, Irp)) { - TRACE("could not find stream %.*S\n", name->Length / sizeof(WCHAR), name->Buffer); - - return STATUS_OBJECT_NAME_NOT_FOUND; - } else { - fcb* fcb; - if (streamhash == EA_DOSATTRIB_HASH && xattr.Length == strlen(EA_DOSATTRIB) && - RtlCompareMemory(xattr.Buffer, EA_DOSATTRIB, xattr.Length) == xattr.Length) { - WARN("not allowing user.DOSATTRIB to be opened as stream\n"); - - return STATUS_OBJECT_NAME_NOT_FOUND; - } - - Status = open_fcb_stream(Vcb, sf->fcb->subvol, sf->fcb->inode, &xattr, streamhash, sf->fcb, &fcb, Irp); - if (!NT_SUCCESS(Status)) { - ERR("open_fcb_stream returned %08x\n", Status); - return Status; - } - - sf2 = create_fileref(); - if (!sf2) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sf2->fcb = fcb; + if (sf->fcb == Vcb->dummy_fcb) + return STATUS_OBJECT_NAME_NOT_FOUND; - if (streamname.Buffer) // case has changed - sf2->filepart = streamname; - else { - sf2->filepart.MaximumLength = sf2->filepart.Length = name->Length; - sf2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, sf2->filepart.MaximumLength, ALLOC_TAG); - if (!sf2->filepart.Buffer) { - ERR("out of memory\n"); - free_fileref(sf2); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(sf2->filepart.Buffer, name->Buffer, name->Length); - } - - Status = RtlUpcaseUnicodeString(&sf2->filepart_uc, &sf2->filepart, TRUE); + if (streampart) { + BOOL locked = FALSE; + LIST_ENTRY* le; + UNICODE_STRING name_uc; + dir_child* dc = NULL; + fcb* fcb; + + if (!case_sensitive) { + Status = RtlUpcaseUnicodeString(&name_uc, name, TRUE); if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(sf2); return Status; } - - // FIXME - make sure all functions know that ADS FCBs won't have a valid SD or INODE_ITEM + } - sf2->parent = (struct _file_ref*)sf; - insert_fileref_child(sf, sf2, TRUE); - - increase_fileref_refcount(sf); + if (!ExIsResourceAcquiredSharedLite(&sf->fcb->nonpaged->dir_children_lock)) { + ExAcquireResourceSharedLite(&sf->fcb->nonpaged->dir_children_lock, TRUE); + locked = TRUE; } + + le = sf->fcb->dir_children_index.Flink; + while (le != &sf->fcb->dir_children_index) { + dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_index); + + if (dc2->index == 0) { + if ((case_sensitive && dc2->name.Length == name->Length && RtlCompareMemory(dc2->name.Buffer, name->Buffer, dc2->name.Length) == dc2->name.Length) || + (!case_sensitive && dc2->name_uc.Length == name_uc.Length && RtlCompareMemory(dc2->name_uc.Buffer, name_uc.Buffer, dc2->name_uc.Length) == dc2->name_uc.Length) + ) { + dc = dc2; + break; + } + } else + break; + + le = le->Flink; + } + + if (!case_sensitive) + ExFreePool(name_uc.Buffer); + + if (locked) + ExReleaseResourceLite(&sf->fcb->nonpaged->dir_children_lock); + + if (!dc) + return STATUS_OBJECT_NAME_NOT_FOUND; + + if (dc->fileref) { + increase_fileref_refcount(dc->fileref); + *psf2 = dc->fileref; + return STATUS_SUCCESS; + } + + Status = open_fcb_stream(Vcb, dc, sf->fcb, &fcb, Irp); + if (!NT_SUCCESS(Status)) { + ERR("open_fcb_stream returned %08x\n", Status); + return Status; + } + + sf2 = create_fileref(Vcb); + if (!sf2) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sf2->fcb = fcb; + + sf2->parent = (struct _file_ref*)sf; + + sf2->dc = dc; + dc->fileref = sf2; + + ExAcquireResourceExclusiveLite(&sf->nonpaged->children_lock, TRUE); + InsertTailList(&sf->children, &sf2->list_entry); + ExReleaseResourceLite(&sf->nonpaged->children_lock); + + increase_fileref_refcount(sf); } else { root* subvol; UINT64 inode; dir_child* dc; - - Status = find_file_in_dir(Vcb, name, sf->fcb, &subvol, &inode, &dc, case_sensitive, Irp); + + Status = find_file_in_dir(name, sf->fcb, &subvol, &inode, &dc, case_sensitive); if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { TRACE("could not find %.*S\n", name->Length / sizeof(WCHAR), name->Buffer); @@ -1490,124 +1303,113 @@ static NTSTATUS open_fileref_child(device_extension* Vcb, file_ref* sf, PUNICODE return Status; } else { fcb* fcb; - +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; +#endif + if (dc->fileref) { if (!lastpart && dc->type != BTRFS_TYPE_DIRECTORY) { - WARN("passed path including file as subdirectory\n"); + TRACE("passed path including file as subdirectory\n"); return STATUS_OBJECT_PATH_NOT_FOUND; } - + InterlockedIncrement(&dc->fileref->refcount); *psf2 = dc->fileref; return STATUS_SUCCESS; } - - Status = open_fcb(Vcb, subvol, inode, dc->type, &dc->utf8, sf->fcb, &fcb, pooltype, Irp); - if (!NT_SUCCESS(Status)) { - ERR("open_fcb returned %08x\n", Status); - return Status; + + if (!subvol || (subvol != Vcb->root_fileref->fcb->subvol && inode == SUBVOL_ROOT_INODE && subvol->parent != sf->fcb->subvol->id)) { + fcb = Vcb->dummy_fcb; + InterlockedIncrement(&fcb->refcount); + } else { +#ifdef DEBUG_STATS + time1 = KeQueryPerformanceCounter(NULL); +#endif + Status = open_fcb(Vcb, subvol, inode, dc->type, &dc->utf8, sf->fcb, &fcb, pooltype, Irp); +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + Vcb->stats.open_fcb_calls++; + Vcb->stats.open_fcb_time += time2.QuadPart - time1.QuadPart; +#endif + + if (!NT_SUCCESS(Status)) { + ERR("open_fcb returned %08x\n", Status); + return Status; + } } - + if (dc->type != BTRFS_TYPE_DIRECTORY && !lastpart && !(fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)) { - WARN("passed path including file as subdirectory\n"); - free_fcb(fcb); + TRACE("passed path including file as subdirectory\n"); + free_fcb(Vcb, fcb); return STATUS_OBJECT_PATH_NOT_FOUND; } - sf2 = create_fileref(); + sf2 = create_fileref(Vcb); if (!sf2) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + sf2->fcb = fcb; - + if (dc->type == BTRFS_TYPE_DIRECTORY) fcb->fileref = sf2; - - sf2->index = dc->index; + sf2->dc = dc; dc->fileref = sf2; - - sf2->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, dc->utf8.Length, ALLOC_TAG); - if (!sf2->utf8.Buffer) { - ERR("out of memory\n"); - free_fileref(sf2); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sf2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name.Length, ALLOC_TAG); - if (!sf2->filepart.Buffer) { - ERR("out of memory\n"); - free_fileref(sf2); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sf2->filepart_uc.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name_uc.Length, ALLOC_TAG); - if (!sf2->filepart_uc.Buffer) { - ERR("out of memory\n"); - free_fileref(sf2); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sf2->utf8.Length = sf2->utf8.MaximumLength = dc->utf8.Length; - RtlCopyMemory(sf2->utf8.Buffer, dc->utf8.Buffer, dc->utf8.Length); - - sf2->filepart.Length = sf2->filepart.MaximumLength = dc->name.Length; - RtlCopyMemory(sf2->filepart.Buffer, dc->name.Buffer, dc->name.Length); - - sf2->filepart_uc.Length = sf2->filepart_uc.MaximumLength = dc->name_uc.Length; - RtlCopyMemory(sf2->filepart_uc.Buffer, dc->name_uc.Buffer, dc->name_uc.Length); - + sf2->parent = (struct _file_ref*)sf; - - insert_fileref_child(sf, sf2, TRUE); - + + ExAcquireResourceExclusiveLite(&sf->nonpaged->children_lock, TRUE); + InsertTailList(&sf->children, &sf2->list_entry); + ExReleaseResourceLite(&sf->nonpaged->children_lock); + increase_fileref_refcount(sf); } } - + *psf2 = sf2; - + return STATUS_SUCCESS; } -NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* parsed, ULONG* fn_offset, - POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp) { +NTSTATUS open_fileref(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Out_ file_ref** pfr, + _In_ PUNICODE_STRING fnus, _In_opt_ file_ref* related, _In_ BOOL parent, _Out_opt_ USHORT* parsed, _Out_opt_ ULONG* fn_offset, _In_ POOL_TYPE pooltype, + _In_ BOOL case_sensitive, _In_opt_ PIRP Irp) { UNICODE_STRING fnus2; file_ref *dir, *sf, *sf2; - ULONG i, num_parts; - UNICODE_STRING* parts = NULL; + LIST_ENTRY parts; BOOL has_stream; NTSTATUS Status; - + LIST_ENTRY* le; + TRACE("(%p, %p, %p, %u, %p)\n", Vcb, pfr, related, parent, parsed); -#ifdef DEBUG +#ifdef DEBUG if (!ExIsResourceAcquiredExclusiveLite(&Vcb->fcb_lock) && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) { ERR("fcb_lock not acquired exclusively\n"); int3; } #endif - + if (Vcb->removing || Vcb->locked) return STATUS_ACCESS_DENIED; - + fnus2 = *fnus; - + if (fnus2.Length < sizeof(WCHAR) && !related) { ERR("error - fnus was too short\n"); return STATUS_INTERNAL_ERROR; } - + if (related && fnus->Length == 0) { increase_fileref_refcount(related); - + *pfr = related; return STATUS_SUCCESS; } - + if (related) { dir = related; } else { @@ -1615,185 +1417,151 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu ERR("error - filename %.*S did not begin with \\\n", fnus2.Length / sizeof(WCHAR), fnus2.Buffer); return STATUS_OBJECT_PATH_NOT_FOUND; } - + if (fnus2.Length == sizeof(WCHAR)) { - if (Vcb->root_fileref->open_count == 0) { // don't allow root to be opened on unmounted FS - ULONG cc; - IO_STATUS_BLOCK iosb; - - Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); - - if (!NT_SUCCESS(Status)) - return Status; - } - + if (Vcb->root_fileref->open_count == 0 && !(Vcb->Vpb->Flags & VPB_MOUNTED)) // don't allow root to be opened on unmounted FS + return STATUS_DEVICE_NOT_READY; + increase_fileref_refcount(Vcb->root_fileref); *pfr = Vcb->root_fileref; - + if (fn_offset) *fn_offset = 0; - + return STATUS_SUCCESS; } - + dir = Vcb->root_fileref; - + fnus2.Buffer++; fnus2.Length -= sizeof(WCHAR); fnus2.MaximumLength -= sizeof(WCHAR); } - + if (dir->fcb->type != BTRFS_TYPE_DIRECTORY && (fnus->Length < sizeof(WCHAR) || fnus->Buffer[0] != ':')) { WARN("passed related fileref which isn't a directory (%S) (fnus = %.*S)\n", file_desc_fileref(related), fnus->Length / sizeof(WCHAR), fnus->Buffer); return STATUS_OBJECT_PATH_NOT_FOUND; } - - if (fnus->Length == 0) { - num_parts = 0; - } else if (fnus->Length == wcslen(datastring) * sizeof(WCHAR) && - RtlCompareMemory(fnus->Buffer, datastring, wcslen(datastring) * sizeof(WCHAR)) == wcslen(datastring) * sizeof(WCHAR)) { - num_parts = 0; - } else { - Status = split_path(&fnus2, &parts, &num_parts, &has_stream); + + InitializeListHead(&parts); + + if (fnus->Length != 0 && + (fnus->Length != wcslen(datastring) * sizeof(WCHAR) || RtlCompareMemory(fnus->Buffer, datastring, wcslen(datastring) * sizeof(WCHAR)) != wcslen(datastring) * sizeof(WCHAR))) { + Status = split_path(Vcb, &fnus2, &parts, &has_stream); if (!NT_SUCCESS(Status)) { ERR("split_path returned %08x\n", Status); return Status; } } - + sf = dir; increase_fileref_refcount(dir); - - if (parent) { - num_parts--; - - if (has_stream && num_parts > 0) { - num_parts--; + + if (parent && !IsListEmpty(&parts)) { + name_bit* nb; + + nb = CONTAINING_RECORD(RemoveTailList(&parts), name_bit, list_entry); + ExFreePool(nb); + + if (has_stream && !IsListEmpty(&parts)) { + nb = CONTAINING_RECORD(RemoveTailList(&parts), name_bit, list_entry); + ExFreePool(nb); + has_stream = FALSE; } } - - if (num_parts == 0) { + + if (IsListEmpty(&parts)) { Status = STATUS_SUCCESS; *pfr = dir; - + if (fn_offset) *fn_offset = 0; - + goto end2; } - - for (i = 0; i < num_parts; i++) { - BOOL lastpart = (i == num_parts-1) || (i == num_parts-2 && has_stream); - - Status = open_fileref_child(Vcb, sf, &parts[i], case_sensitive, lastpart, has_stream && i == num_parts - 1, pooltype, &sf2, Irp); + + le = parts.Flink; + do { + name_bit* nb = CONTAINING_RECORD(le, name_bit, list_entry); + BOOL lastpart = le->Flink == &parts || (has_stream && le->Flink->Flink == &parts); + BOOL streampart = has_stream && le->Flink == &parts; + + Status = open_fileref_child(Vcb, sf, &nb->us, case_sensitive, lastpart, streampart, pooltype, &sf2, Irp); if (!NT_SUCCESS(Status)) { if (Status == STATUS_OBJECT_PATH_NOT_FOUND || Status == STATUS_OBJECT_NAME_NOT_FOUND) TRACE("open_fileref_child returned %08x\n", Status); else ERR("open_fileref_child returned %08x\n", Status); - + goto end; } - - if (i == num_parts - 1) { - if (fn_offset) - *fn_offset = parts[has_stream ? (num_parts - 2) : (num_parts - 1)].Buffer - fnus->Buffer; - + + if (le->Flink == &parts) { // last entry + if (fn_offset) { + if (has_stream) + nb = CONTAINING_RECORD(le->Blink, name_bit, list_entry); + + *fn_offset = (ULONG)(nb->us.Buffer - fnus->Buffer); + } + break; } - + if (sf2->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) { Status = STATUS_REPARSE; - - if (parsed) - *parsed = (parts[i+1].Buffer - fnus->Buffer - 1) * sizeof(WCHAR); - + + if (parsed) { + name_bit* nb2 = CONTAINING_RECORD(le->Flink, name_bit, list_entry); + + *parsed = (USHORT)(nb2->us.Buffer - fnus->Buffer - 1) * sizeof(WCHAR); + } + break; } - - free_fileref(sf); + + free_fileref(Vcb, sf); sf = sf2; - } - + + le = le->Flink; + } while (le != &parts); + if (Status != STATUS_REPARSE) Status = STATUS_SUCCESS; *pfr = sf2; - + end: - free_fileref(sf); - + free_fileref(Vcb, sf); + + while (!IsListEmpty(&parts)) { + name_bit* nb = CONTAINING_RECORD(RemoveHeadList(&parts), name_bit, list_entry); + ExFreeToPagedLookasideList(&Vcb->name_bit_lookaside, nb); + } + end2: - if (parts) - ExFreePool(parts); - TRACE("returning %08x\n", Status); - - return Status; -} -NTSTATUS fcb_get_last_dir_index(fcb* fcb, UINT64* index, PIRP Irp) { - KEY searchkey; - traverse_ptr tp, prev_tp; - NTSTATUS Status; - - ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - - if (fcb->last_dir_index != 0) { - *index = fcb->last_dir_index; - fcb->last_dir_index++; - Status = STATUS_SUCCESS; - goto end; - } - - searchkey.obj_id = fcb->inode; - searchkey.obj_type = TYPE_DIR_INDEX + 1; - searchkey.offset = 0; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type >= searchkey.obj_type)) { - if (find_prev_item(fcb->Vcb, &tp, &prev_tp, FALSE, Irp)) - tp = prev_tp; - } - - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_DIR_INDEX) { - fcb->last_dir_index = tp.item->key.offset + 1; - } else - fcb->last_dir_index = 2; - - *index = fcb->last_dir_index; - fcb->last_dir_index++; - - Status = STATUS_SUCCESS; - -end: - ExReleaseResourceLite(fcb->Header.Resource); - return Status; } -NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, UINT64 index, PANSI_STRING utf8, PUNICODE_STRING name, PUNICODE_STRING name_uc, UINT8 type, dir_child** pdc) { +NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, PANSI_STRING utf8, PUNICODE_STRING name, UINT8 type, dir_child** pdc) { + NTSTATUS Status; dir_child* dc; - + dc = ExAllocatePoolWithTag(PagedPool, sizeof(dir_child), ALLOC_TAG); if (!dc) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8->Length, ALLOC_TAG); if (!dc->utf8.Buffer) { ERR("out of memory\n"); ExFreePool(dc); return STATUS_INSUFFICIENT_RESOURCES; } - + dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, name->Length, ALLOC_TAG); if (!dc->name.Buffer) { ERR("out of memory\n"); @@ -1801,95 +1569,116 @@ NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, UINT64 index, PANSI_ ExFreePool(dc); return STATUS_INSUFFICIENT_RESOURCES; } - - dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, name_uc->Length, ALLOC_TAG); - if (!dc->name_uc.Buffer) { - ERR("out of memory\n"); - ExFreePool(dc->utf8.Buffer); - ExFreePool(dc->name.Buffer); - ExFreePool(dc); - return STATUS_INSUFFICIENT_RESOURCES; - } - + dc->key.obj_id = inode; dc->key.obj_type = subvol ? TYPE_ROOT_ITEM : TYPE_INODE_ITEM; - dc->key.offset = 0; - dc->index = index; + dc->key.offset = subvol ? 0xffffffffffffffff : 0; dc->type = type; dc->fileref = NULL; - + dc->utf8.Length = dc->utf8.MaximumLength = utf8->Length; RtlCopyMemory(dc->utf8.Buffer, utf8->Buffer, utf8->Length); - + dc->name.Length = dc->name.MaximumLength = name->Length; RtlCopyMemory(dc->name.Buffer, name->Buffer, name->Length); - - dc->name_uc.Length = dc->name_uc.MaximumLength = name_uc->Length; - RtlCopyMemory(dc->name_uc.Buffer, name_uc->Buffer, name_uc->Length); - + + Status = RtlUpcaseUnicodeString(&dc->name_uc, name, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + return Status; + } + dc->hash = calc_crc32c(0xffffffff, (UINT8*)dc->name.Buffer, dc->name.Length); dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)dc->name_uc.Buffer, dc->name_uc.Length); - + ExAcquireResourceExclusiveLite(&fcb->nonpaged->dir_children_lock, TRUE); - + + if (IsListEmpty(&fcb->dir_children_index)) + dc->index = 2; + else { + dir_child* dc2 = CONTAINING_RECORD(fcb->dir_children_index.Blink, dir_child, list_entry_index); + + dc->index = max(2, dc2->index + 1); + } + InsertTailList(&fcb->dir_children_index, &dc->list_entry_index); - + insert_dir_child_into_hash_lists(fcb, dc); - + ExReleaseResourceLite(&fcb->nonpaged->dir_children_lock); - + *pdc = dc; - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_STRING fpus, file_ref* parfileref, ULONG options, - FILE_FULL_EA_INFORMATION* ea, ULONG ealen, file_ref** pfr, LIST_ENTRY* rollback) { +UINT32 inherit_mode(fcb* parfcb, BOOL is_dir) { + UINT32 mode; + + if (!parfcb) + return 0755; + + mode = parfcb->inode_item.st_mode & ~S_IFDIR; + mode &= ~S_ISVTX; // clear sticky bit + mode &= ~S_ISUID; // clear setuid bit + + if (!is_dir) + mode &= ~S_ISGID; // if not directory, clear setgid bit + + return mode; +} + +static NTSTATUS file_create2(_In_ PIRP Irp, _Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _In_ PUNICODE_STRING fpus, + _In_ file_ref* parfileref, _In_ ULONG options, _In_reads_bytes_opt_(ealen) FILE_FULL_EA_INFORMATION* ea, _In_ ULONG ealen, + _Out_ file_ref** pfr, _In_ LIST_ENTRY* rollback) { NTSTATUS Status; fcb* fcb; ULONG utf8len; char* utf8 = NULL; - UINT64 dirpos, inode; + UINT64 inode; UINT8 type; LARGE_INTEGER time; BTRFS_TIME now; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); POOL_TYPE pool_type = IrpSp->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool; - ULONG defda; + USHORT defda; file_ref* fileref; dir_child* dc; + ANSI_STRING utf8as; #ifdef DEBUG_FCB_REFCOUNTS LONG rc; #endif - + + if (parfileref->fcb == Vcb->dummy_fcb) + return STATUS_ACCESS_DENIED; + Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, fpus->Buffer, fpus->Length); - if (!NT_SUCCESS(Status)) + if (!NT_SUCCESS(Status)) { + ERR("RtlUnicodeToUTF8N returned %08x\n", Status); return Status; - + } + utf8 = ExAllocatePoolWithTag(pool_type, utf8len + 1, ALLOC_TAG); if (!utf8) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = RtlUnicodeToUTF8N(utf8, utf8len, &utf8len, fpus->Buffer, fpus->Length); if (!NT_SUCCESS(Status)) { + ERR("RtlUnicodeToUTF8N returned %08x\n", Status); ExFreePool(utf8); return Status; } - + utf8[utf8len] = 0; - - Status = fcb_get_last_dir_index(parfileref->fcb, &dirpos, Irp); - if (!NT_SUCCESS(Status)) { - ERR("fcb_get_last_dir_index returned %08x\n", Status); - ExFreePool(utf8); - return Status; - } - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + TRACE("create file %.*S\n", fpus->Length / sizeof(WCHAR), fpus->Buffer); ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); TRACE("parfileref->fcb->inode_item.st_size (inode %llx) was %llx\n", parfileref->fcb->inode, parfileref->fcb->inode_item.st_size); @@ -1900,39 +1689,45 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S parfileref->fcb->inode_item.st_ctime = now; parfileref->fcb->inode_item.st_mtime = now; ExReleaseResourceLite(parfileref->fcb->Header.Resource); - + parfileref->fcb->inode_item_changed = TRUE; mark_fcb_dirty(parfileref->fcb); - + inode = InterlockedIncrement64(&parfileref->fcb->subvol->lastinode); - + type = options & FILE_DIRECTORY_FILE ? BTRFS_TYPE_DIRECTORY : BTRFS_TYPE_FILE; - + // FIXME - link FILE_ATTRIBUTE_READONLY to st_mode - + TRACE("requested attributes = %x\n", IrpSp->Parameters.Create.FileAttributes); - + IrpSp->Parameters.Create.FileAttributes |= FILE_ATTRIBUTE_ARCHIVE; - - defda = 0; - + + defda = FILE_ATTRIBUTE_ARCHIVE; + if (utf8[0] == '.') defda |= FILE_ATTRIBUTE_HIDDEN; - + if (options & FILE_DIRECTORY_FILE) { defda |= FILE_ATTRIBUTE_DIRECTORY; IrpSp->Parameters.Create.FileAttributes |= FILE_ATTRIBUTE_DIRECTORY; - } - + } else + IrpSp->Parameters.Create.FileAttributes &= ~FILE_ATTRIBUTE_DIRECTORY; + TRACE("defda = %x\n", defda); - + if (IrpSp->Parameters.Create.FileAttributes == FILE_ATTRIBUTE_NORMAL) IrpSp->Parameters.Create.FileAttributes = defda; - - fcb = create_fcb(pool_type); + + fcb = create_fcb(Vcb, pool_type); if (!fcb) { ERR("out of memory\n"); ExFreePool(utf8); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return STATUS_INSUFFICIENT_RESOURCES; } @@ -1949,9 +1744,8 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S fcb->inode_item.st_blocks = 0; fcb->inode_item.block_group = 0; fcb->inode_item.st_nlink = 1; -// fcb->inode_item.st_uid = UID_NOBODY; // FIXME? fcb->inode_item.st_gid = GID_NOBODY; // FIXME? - fcb->inode_item.st_mode = parfileref->fcb ? (parfileref->fcb->inode_item.st_mode & ~S_IFDIR) : 0755; // use parent's permissions by default + fcb->inode_item.st_mode = inherit_mode(parfileref->fcb, type == BTRFS_TYPE_DIRECTORY); // use parent's permissions by default fcb->inode_item.st_rdev = 0; fcb->inode_item.flags = 0; fcb->inode_item.sequence = 1; @@ -1959,39 +1753,42 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S fcb->inode_item.st_ctime = now; fcb->inode_item.st_mtime = now; fcb->inode_item.otime = now; - + if (type == BTRFS_TYPE_DIRECTORY) fcb->inode_item.st_mode |= S_IFDIR; else { fcb->inode_item.st_mode |= S_IFREG; fcb->inode_item.st_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); // remove executable bit if not directory } - + if (IrpSp->Flags & SL_OPEN_PAGING_FILE) { fcb->inode_item.flags = BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM | BTRFS_INODE_NOCOMPRESS; } else { // inherit nodatacow flag from parent directory if (parfileref->fcb->inode_item.flags & BTRFS_INODE_NODATACOW) { fcb->inode_item.flags |= BTRFS_INODE_NODATACOW; - + if (type != BTRFS_TYPE_DIRECTORY) fcb->inode_item.flags |= BTRFS_INODE_NODATASUM; } - + if (parfileref->fcb->inode_item.flags & BTRFS_INODE_COMPRESS) fcb->inode_item.flags |= BTRFS_INODE_COMPRESS; } - + + fcb->prop_compression = parfileref->fcb->prop_compression; + fcb->prop_compression_changed = fcb->prop_compression != PropCompression_None; + fcb->inode_item_changed = TRUE; - + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); fcb->Header.AllocationSize.QuadPart = 0; fcb->Header.FileSize.QuadPart = 0; fcb->Header.ValidDataLength.QuadPart = 0; - - fcb->atts = IrpSp->Parameters.Create.FileAttributes; + + fcb->atts = IrpSp->Parameters.Create.FileAttributes & ~FILE_ATTRIBUTE_NORMAL; fcb->atts_changed = fcb->atts != defda; - + #ifdef DEBUG_FCB_REFCOUNTS rc = InterlockedIncrement(&parfileref->fcb->refcount); WARN("fcb %p: refcount now %i (%S)\n", parfileref->fcb, rc, file_desc_fileref(parfileref)); @@ -2001,243 +1798,289 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S fcb->subvol = parfileref->fcb->subvol; fcb->inode = inode; fcb->type = type; - + fcb->created = TRUE; + fcb->deleted = TRUE; + + mark_fcb_dirty(fcb); + Status = fcb_get_new_sd(fcb, parfileref, IrpSp->Parameters.Create.SecurityContext->AccessState); - + if (!NT_SUCCESS(Status)) { ERR("fcb_get_new_sd returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return Status; } - + fcb->sd_dirty = TRUE; - + if (ea && ealen > 0) { FILE_FULL_EA_INFORMATION* eainfo; - + fcb->ealen = 4; - + // capitalize EA names eainfo = ea; do { STRING s; - + s.Length = s.MaximumLength = eainfo->EaNameLength; s.Buffer = eainfo->EaName; - + RtlUpperString(&s, &s); - + fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; - + if (eainfo->NextEntryOffset == 0) break; - + eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); } while (TRUE); - + fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, ealen, ALLOC_TAG); if (!fcb->ea_xattr.Buffer) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return STATUS_INSUFFICIENT_RESOURCES; } - - fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen; - RtlCopyMemory(fcb->ea_xattr.Buffer, ea, ealen); - + + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = (UINT16)ealen; + RtlCopyMemory(fcb->ea_xattr.Buffer, ea, fcb->ea_xattr.Length); + fcb->ea_changed = TRUE; } - - fileref = create_fileref(); + + fileref = create_fileref(Vcb); if (!fileref) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return STATUS_INSUFFICIENT_RESOURCES; } - + fileref->fcb = fcb; - fileref->index = dirpos; - - fileref->utf8.MaximumLength = fileref->utf8.Length = utf8len; - fileref->utf8.Buffer = utf8; - - fileref->filepart.Length = fileref->filepart.MaximumLength = fpus->Length; - - if (fileref->filepart.Length == 0) - fileref->filepart.Buffer = NULL; - else { - fileref->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart.Length, ALLOC_TAG); - - if (!fileref->filepart.Buffer) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(fileref->filepart.Buffer, fpus->Buffer, fpus->Length); - } - - Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(fileref); - return Status; - } if (Irp->Overlay.AllocationSize.QuadPart > 0 && !write_fcb_compressed(fcb)) { Status = extend_file(fcb, fileref, Irp->Overlay.AllocationSize.QuadPart, TRUE, NULL, rollback); - + if (!NT_SUCCESS(Status)) { ERR("extend_file returned %08x\n", Status); - free_fileref(fileref); + free_fileref(Vcb, fileref); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return Status; } } - - fcb->created = TRUE; - mark_fcb_dirty(fcb); - - fileref->created = TRUE; - mark_fileref_dirty(fileref); - - fcb->subvol->root_item.ctransid = Vcb->superblock.generation; - fcb->subvol->root_item.ctime = now; - - fileref->parent = parfileref; - insert_fileref_child(parfileref, fileref, TRUE); - - Status = add_dir_child(fileref->parent->fcb, fcb->inode, FALSE, fileref->index, &fileref->utf8, &fileref->filepart, &fileref->filepart_uc, fcb->type, &dc); - if (!NT_SUCCESS(Status)) - WARN("add_dir_child returned %08x\n", Status); - - fileref->dc = dc; - dc->fileref = fileref; - - increase_fileref_refcount(parfileref); - if (fcb->type == BTRFS_TYPE_DIRECTORY) { fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); if (!fcb->hash_ptrs) { ERR("out of memory\n"); - free_fileref(fileref); + free_fileref(Vcb, fileref); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); - + fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); if (!fcb->hash_ptrs_uc) { ERR("out of memory\n"); - free_fileref(fileref); + free_fileref(Vcb, fileref); + + ExAcquireResourceExclusiveLite(parfileref->fcb->Header.Resource, TRUE); + parfileref->fcb->inode_item.st_size -= utf8len * 2; + ExReleaseResourceLite(parfileref->fcb->Header.Resource); + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); } - + + fcb->deleted = FALSE; + + fileref->created = TRUE; + mark_fileref_dirty(fileref); + + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; + fcb->subvol->root_item.ctime = now; + + fileref->parent = parfileref; + + utf8as.Buffer = utf8; + utf8as.Length = utf8as.MaximumLength = (UINT16)utf8len; + + Status = add_dir_child(fileref->parent->fcb, fcb->inode, FALSE, &utf8as, fpus, fcb->type, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + ExFreePool(utf8); + + fileref->dc = dc; + dc->fileref = fileref; + + ExAcquireResourceExclusiveLite(&parfileref->nonpaged->children_lock, TRUE); + InsertTailList(&parfileref->children, &fileref->list_entry); + ExReleaseResourceLite(&parfileref->nonpaged->children_lock); + + increase_fileref_refcount(parfileref); + InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry); InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); - + *pfr = fileref; - + if (type == BTRFS_TYPE_DIRECTORY) fileref->fcb->fileref = fileref; - + TRACE("created new file %S in subvol %llx, inode %llx\n", file_desc_fileref(fileref), fcb->subvol->id, fcb->inode); - + return STATUS_SUCCESS; } -static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_ref** pparfileref, PUNICODE_STRING fpus, PUNICODE_STRING stream, - PIRP Irp, ULONG options, POOL_TYPE pool_type, BOOL case_sensitive, LIST_ENTRY* rollback) { +static NTSTATUS create_stream(_Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, + file_ref** pfileref, file_ref** pparfileref, PUNICODE_STRING fpus, PUNICODE_STRING stream, PIRP Irp, + ULONG options, POOL_TYPE pool_type, BOOL case_sensitive, LIST_ENTRY* rollback) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); file_ref *fileref, *newpar, *parfileref; fcb* fcb; static char xapref[] = "user."; static WCHAR DOSATTRIB[] = L"DOSATTRIB"; static WCHAR EA[] = L"EA"; - ULONG xapreflen = strlen(xapref), overhead; + static WCHAR reparse[] = L"reparse"; + UINT16 xapreflen = (UINT16)strlen(xapref); LARGE_INTEGER time; BTRFS_TIME now; - ULONG utf8len; + ULONG utf8len, overhead; NTSTATUS Status; KEY searchkey; traverse_ptr tp; + dir_child* dc; + ACCESS_MASK granted_access; #ifdef DEBUG_FCB_REFCOUNTS LONG rc; #endif - + TRACE("fpus = %.*S\n", fpus->Length / sizeof(WCHAR), fpus->Buffer); TRACE("stream = %.*S\n", stream->Length / sizeof(WCHAR), stream->Buffer); - + parfileref = *pparfileref; - + + if (parfileref->fcb == Vcb->dummy_fcb) + return STATUS_ACCESS_DENIED; + Status = open_fileref(Vcb, &newpar, fpus, parfileref, FALSE, NULL, NULL, PagedPool, case_sensitive, Irp); - + if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { UNICODE_STRING fpus2; - - if (!is_file_name_valid(fpus)) + + if (!is_file_name_valid(fpus, FALSE)) return STATUS_OBJECT_NAME_INVALID; - + fpus2.Length = fpus2.MaximumLength = fpus->Length; fpus2.Buffer = ExAllocatePoolWithTag(pool_type, fpus2.MaximumLength, ALLOC_TAG); - + if (!fpus2.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fpus2.Buffer, fpus->Buffer, fpus2.Length); - + + SeLockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + + if (!SeAccessCheck(parfileref->fcb->sd, &IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext, + TRUE, options & FILE_DIRECTORY_FILE ? FILE_ADD_SUBDIRECTORY : FILE_ADD_FILE, 0, NULL, + IoGetFileObjectGenericMapping(), IrpSp->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode, + &granted_access, &Status)) { + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + return Status; + } + + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + Status = file_create2(Irp, Vcb, &fpus2, parfileref, options, NULL, 0, &newpar, rollback); - + if (!NT_SUCCESS(Status)) { ERR("file_create2 returned %08x\n", Status); ExFreePool(fpus2.Buffer); return Status; } - - send_notification_fileref(newpar, options & FILE_DIRECTORY_FILE ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED); - send_notification_fcb(newpar->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); + + send_notification_fileref(newpar, options & FILE_DIRECTORY_FILE ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, NULL); + send_notification_fcb(newpar->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); } else if (!NT_SUCCESS(Status)) { ERR("open_fileref returned %08x\n", Status); return Status; } - - free_fileref(parfileref); - + parfileref = newpar; *pparfileref = parfileref; - + if (parfileref->fcb->type != BTRFS_TYPE_FILE && parfileref->fcb->type != BTRFS_TYPE_SYMLINK && parfileref->fcb->type != BTRFS_TYPE_DIRECTORY) { WARN("parent not file, directory, or symlink\n"); return STATUS_INVALID_PARAMETER; } - + if (options & FILE_DIRECTORY_FILE) { WARN("tried to create directory as stream\n"); return STATUS_INVALID_PARAMETER; } - - if ((stream->Length == wcslen(DOSATTRIB) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, DOSATTRIB, stream->Length) == stream->Length) || - (stream->Length == wcslen(EA) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, EA, stream->Length) == stream->Length)) { + + if (parfileref->fcb->atts & FILE_ATTRIBUTE_READONLY) + return STATUS_ACCESS_DENIED; + + SeLockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + + if (!SeAccessCheck(parfileref->fcb->sd, &IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext, + TRUE, FILE_WRITE_DATA, 0, NULL, IoGetFileObjectGenericMapping(), IrpSp->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode, + &granted_access, &Status)) { + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + return Status; + } + + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + + if ((stream->Length == wcslen(DOSATTRIB) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, DOSATTRIB, stream->Length) == stream->Length) || + (stream->Length == wcslen(EA) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, EA, stream->Length) == stream->Length) || + (stream->Length == wcslen(reparse) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, reparse, stream->Length) == stream->Length)) { return STATUS_OBJECT_NAME_INVALID; } - - fcb = create_fcb(pool_type); + + fcb = create_fcb(Vcb, pool_type); if (!fcb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + fcb->Vcb = Vcb; - + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); fcb->Header.AllocationSize.QuadPart = 0; fcb->Header.FileSize.QuadPart = 0; fcb->Header.ValidDataLength.QuadPart = 0; - + #ifdef DEBUG_FCB_REFCOUNTS rc = InterlockedIncrement(&parfileref->fcb->refcount); WARN("fcb %p: refcount now %i (%S)\n", parfileref->fcb, rc, file_desc_fileref(parfileref)); @@ -2247,279 +2090,349 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r fcb->subvol = parfileref->fcb->subvol; fcb->inode = parfileref->fcb->inode; fcb->type = parfileref->fcb->type; - + fcb->ads = TRUE; - + Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, stream->Buffer, stream->Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N 1 returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - - fcb->adsxattr.Length = utf8len + xapreflen; + + fcb->adsxattr.Length = (UINT16)utf8len + xapreflen; fcb->adsxattr.MaximumLength = fcb->adsxattr.Length + 1; fcb->adsxattr.Buffer = ExAllocatePoolWithTag(pool_type, fcb->adsxattr.MaximumLength, ALLOC_TAG); if (!fcb->adsxattr.Buffer) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fcb->adsxattr.Buffer, xapref, xapreflen); - + Status = RtlUnicodeToUTF8N(&fcb->adsxattr.Buffer[xapreflen], utf8len, &utf8len, stream->Buffer, stream->Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N 2 returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - + fcb->adsxattr.Buffer[fcb->adsxattr.Length] = 0; - + TRACE("adsxattr = %s\n", fcb->adsxattr.Buffer); - + fcb->adshash = calc_crc32c(0xfffffffe, (UINT8*)fcb->adsxattr.Buffer, fcb->adsxattr.Length); TRACE("adshash = %08x\n", fcb->adshash); - + searchkey.obj_id = parfileref->fcb->inode; searchkey.obj_type = TYPE_XATTR_ITEM; searchkey.offset = fcb->adshash; - + Status = find_item(Vcb, parfileref->fcb->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - + if (!keycmp(tp.item->key, searchkey)) overhead = tp.item->size; else overhead = 0; - + fcb->adsmaxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - (sizeof(DIR_ITEM) - 1); - + if (utf8len + xapreflen + overhead > fcb->adsmaxlen) { WARN("not enough room for new DIR_ITEM (%u + %u > %u)", utf8len + xapreflen, overhead, fcb->adsmaxlen); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_DISK_FULL; } else fcb->adsmaxlen -= overhead + utf8len + xapreflen; - - fileref = create_fileref(); + + fileref = create_fileref(Vcb); if (!fileref) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + fileref->fcb = fcb; - fileref->filepart.MaximumLength = fileref->filepart.Length = stream->Length; - fileref->filepart.Buffer = ExAllocatePoolWithTag(pool_type, fileref->filepart.MaximumLength, ALLOC_TAG); - if (!fileref->filepart.Buffer) { + dc = ExAllocatePoolWithTag(PagedPool, sizeof(dir_child), ALLOC_TAG); + if (!dc) { + ERR("out of memory\n"); + free_fileref(Vcb, fileref); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(dc, sizeof(dir_child)); + + dc->utf8.MaximumLength = dc->utf8.Length = fcb->adsxattr.Length - xapreflen; + dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, dc->utf8.MaximumLength, ALLOC_TAG); + if (!dc->utf8.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc); + free_fileref(Vcb, fileref); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(dc->utf8.Buffer, &fcb->adsxattr.Buffer[xapreflen], fcb->adsxattr.Length - xapreflen); + + dc->name.MaximumLength = dc->name.Length = stream->Length; + dc->name.Buffer = ExAllocatePoolWithTag(pool_type, dc->name.MaximumLength, ALLOC_TAG); + if (!dc->name.Buffer) { ERR("out of memory\n"); - free_fileref(fileref); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc); + free_fileref(Vcb, fileref); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(fileref->filepart.Buffer, stream->Buffer, stream->Length); - - Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE); + + RtlCopyMemory(dc->name.Buffer, stream->Buffer, stream->Length); + + Status = RtlUpcaseUnicodeString(&dc->name_uc, &dc->name, TRUE); if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(fileref); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + free_fileref(Vcb, fileref); return Status; } - + + dc->fileref = fileref; + fileref->dc = dc; + + InsertHeadList(&parfileref->fcb->dir_children_index, &dc->list_entry_index); + mark_fcb_dirty(fcb); mark_fileref_dirty(fileref); - + InsertHeadList(&parfileref->fcb->list_entry, &fcb->list_entry); // insert in list after parent fcb InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + parfileref->fcb->inode_item.transid = Vcb->superblock.generation; parfileref->fcb->inode_item.sequence++; parfileref->fcb->inode_item.st_ctime = now; parfileref->fcb->inode_item_changed = TRUE; - + mark_fcb_dirty(parfileref->fcb); - + parfileref->fcb->subvol->root_item.ctransid = Vcb->superblock.generation; parfileref->fcb->subvol->root_item.ctime = now; - + fileref->parent = (struct _file_ref*)parfileref; - - insert_fileref_child(parfileref, fileref, TRUE); - + + ExAcquireResourceExclusiveLite(&parfileref->nonpaged->children_lock, TRUE); + InsertTailList(&parfileref->children, &fileref->list_entry); + ExReleaseResourceLite(&parfileref->nonpaged->children_lock); + increase_fileref_refcount(parfileref); - + *pfileref = fileref; - + + send_notification_fileref(parfileref, options & FILE_DIRECTORY_FILE ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, &fileref->dc->name); + return STATUS_SUCCESS; } -static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJECT FileObject, file_ref* related, BOOL loaded_related, - PUNICODE_STRING fnus, ULONG disposition, ULONG options, LIST_ENTRY* rollback) { +// LXSS programs can be distinguished by the fact they have a NULL PEB. +#ifdef _AMD64_ +static __inline BOOL called_from_lxss() { + NTSTATUS Status; + PROCESS_BASIC_INFORMATION pbi; + ULONG retlen; + + Status = ZwQueryInformationProcess(NtCurrentProcess(), ProcessBasicInformation, &pbi, sizeof(pbi), &retlen); + + if (!NT_SUCCESS(Status)) { + ERR("ZwQueryInformationProcess returned %08x\n", Status); + return FALSE; + } + + return !pbi.PebBaseAddress; +} +#else +#define called_from_lxss() FALSE +#endif + +static NTSTATUS file_create(PIRP Irp, _Requires_lock_held_(_Curr_->tree_lock) _Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, + PFILE_OBJECT FileObject, file_ref* related, BOOL loaded_related, PUNICODE_STRING fnus, ULONG disposition, ULONG options, LIST_ENTRY* rollback) { NTSTATUS Status; -// fcb *fcb, *parfcb = NULL; file_ref *fileref, *parfileref = NULL; - ULONG i, j, fn_offset; -// ULONG utf8len; + ULONG i, j; ccb* ccb; static WCHAR datasuf[] = {':','$','D','A','T','A',0}; UNICODE_STRING dsus, fpus, stream; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); POOL_TYPE pool_type = IrpSp->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool; - PACCESS_STATE access_state = IrpSp->Parameters.Create.SecurityContext->AccessState; #ifdef DEBUG_FCB_REFCOUNTS LONG oc; #endif TRACE("(%p, %p, %p, %.*S, %x, %x)\n", Irp, Vcb, FileObject, fnus->Length / sizeof(WCHAR), fnus->Buffer, disposition, options); - + if (Vcb->readonly) return STATUS_MEDIA_WRITE_PROTECTED; - + dsus.Buffer = datasuf; - dsus.Length = dsus.MaximumLength = wcslen(datasuf) * sizeof(WCHAR); + dsus.Length = dsus.MaximumLength = (USHORT)wcslen(datasuf) * sizeof(WCHAR); fpus.Buffer = NULL; - + if (!loaded_related) { Status = open_fileref(Vcb, &parfileref, fnus, related, TRUE, NULL, NULL, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp); - + if (!NT_SUCCESS(Status)) goto end; } else parfileref = related; - + if (parfileref->fcb->type != BTRFS_TYPE_DIRECTORY && (fnus->Length < sizeof(WCHAR) || fnus->Buffer[0] != ':')) { Status = STATUS_OBJECT_PATH_NOT_FOUND; goto end; } - - if (parfileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { + + if (is_subvol_readonly(parfileref->fcb->subvol, Irp)) { Status = STATUS_ACCESS_DENIED; goto end; } - + i = (fnus->Length / sizeof(WCHAR))-1; while ((fnus->Buffer[i] == '\\' || fnus->Buffer[i] == '/') && i > 0) { i--; } - + j = i; - + while (i > 0 && fnus->Buffer[i-1] != '\\' && fnus->Buffer[i-1] != '/') { i--; } - - fpus.MaximumLength = (j - i + 2) * sizeof(WCHAR); + + fpus.MaximumLength = (USHORT)((j - i + 2) * sizeof(WCHAR)); fpus.Buffer = ExAllocatePoolWithTag(pool_type, fpus.MaximumLength, ALLOC_TAG); if (!fpus.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - fpus.Length = (j - i + 1) * sizeof(WCHAR); - + + fpus.Length = (USHORT)((j - i + 1) * sizeof(WCHAR)); + RtlCopyMemory(fpus.Buffer, &fnus->Buffer[i], (j - i + 1) * sizeof(WCHAR)); fpus.Buffer[j - i + 1] = 0; - - fn_offset = i; - + if (fpus.Length > dsus.Length) { // check for :$DATA suffix UNICODE_STRING lb; - + lb.Buffer = &fpus.Buffer[(fpus.Length - dsus.Length)/sizeof(WCHAR)]; lb.Length = lb.MaximumLength = dsus.Length; - + TRACE("lb = %.*S\n", lb.Length/sizeof(WCHAR), lb.Buffer); - + if (FsRtlAreNamesEqual(&dsus, &lb, TRUE, NULL)) { TRACE("ignoring :$DATA suffix\n"); - + fpus.Length -= lb.Length; - + if (fpus.Length > sizeof(WCHAR) && fpus.Buffer[(fpus.Length-1)/sizeof(WCHAR)] == ':') fpus.Length -= sizeof(WCHAR); - + TRACE("fpus = %.*S\n", fpus.Length / sizeof(WCHAR), fpus.Buffer); } } - + stream.Length = 0; - - for (i = 0; i < fpus.Length/sizeof(WCHAR); i++) { + + for (i = 0; i < fpus.Length / sizeof(WCHAR); i++) { if (fpus.Buffer[i] == ':') { - stream.Length = fpus.Length - (i*sizeof(WCHAR)) - sizeof(WCHAR); + stream.Length = (USHORT)(fpus.Length - (i * sizeof(WCHAR)) - sizeof(WCHAR)); stream.Buffer = &fpus.Buffer[i+1]; fpus.Buffer[i] = 0; - fpus.Length = i * sizeof(WCHAR); + fpus.Length = (USHORT)(i * sizeof(WCHAR)); break; } } - + if (stream.Length > 0) { Status = create_stream(Vcb, &fileref, &parfileref, &fpus, &stream, Irp, options, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, rollback); if (!NT_SUCCESS(Status)) { ERR("create_stream returned %08x\n", Status); goto end; } + + IoSetShareAccess(IrpSp->Parameters.Create.SecurityContext->DesiredAccess, IrpSp->Parameters.Create.ShareAccess, + FileObject, &fileref->fcb->share_access); } else { - if (!is_file_name_valid(&fpus)) { + ACCESS_MASK granted_access; + + if (!is_file_name_valid(&fpus, FALSE)) { Status = STATUS_OBJECT_NAME_INVALID; goto end; } - + + SeLockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + + if (!SeAccessCheck(parfileref->fcb->sd, &IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext, + TRUE, options & FILE_DIRECTORY_FILE ? FILE_ADD_SUBDIRECTORY : FILE_ADD_FILE, 0, NULL, + IoGetFileObjectGenericMapping(), IrpSp->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode, + &granted_access, &Status)) { + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + goto end; + } + + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + if (Irp->AssociatedIrp.SystemBuffer && IrpSp->Parameters.Create.EaLength > 0) { ULONG offset; - + Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength, &offset); if (!NT_SUCCESS(Status)) { ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); goto end; } } - + Status = file_create2(Irp, Vcb, &fpus, parfileref, options, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength, &fileref, rollback); - + if (!NT_SUCCESS(Status)) { ERR("file_create2 returned %08x\n", Status); goto end; } - - send_notification_fileref(fileref, options & FILE_DIRECTORY_FILE ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED); - send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); + + IoSetShareAccess(IrpSp->Parameters.Create.SecurityContext->DesiredAccess, IrpSp->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access); + + send_notification_fileref(fileref, options & FILE_DIRECTORY_FILE ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, NULL); + send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); } - + FileObject->FsContext = fileref->fcb; - + ccb = ExAllocatePoolWithTag(NonPagedPool, sizeof(*ccb), ALLOC_TAG); if (!ccb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - free_fileref(fileref); + free_fileref(Vcb, fileref); goto end; } - + RtlZeroMemory(ccb, sizeof(*ccb)); - + ccb->fileref = fileref; - + ccb->NodeType = BTRFS_NODE_TYPE_CCB; - ccb->NodeSize = sizeof(ccb); + ccb->NodeSize = sizeof(*ccb); ccb->disposition = disposition; ccb->options = options; ccb->query_dir_offset = 0; RtlInitUnicodeString(&ccb->query_string, NULL); ccb->has_wildcard = FALSE; ccb->specific_file = FALSE; - ccb->access = access_state->OriginalDesiredAccess; + ccb->access = IrpSp->Parameters.Create.SecurityContext->DesiredAccess; ccb->case_sensitive = IrpSp->Flags & SL_CASE_SENSITIVE; - + ccb->reserving = FALSE; + ccb->lxss = called_from_lxss(); + #ifdef DEBUG_FCB_REFCOUNTS oc = InterlockedIncrement(&fileref->open_count); ERR("fileref %p: open_count now %i\n", fileref, oc); @@ -2527,61 +2440,30 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC InterlockedIncrement(&fileref->open_count); #endif InterlockedIncrement(&Vcb->open_files); - + FileObject->FsContext2 = ccb; - - if (fn_offset > 0) { - FileObject->FileName.Length -= fn_offset * sizeof(WCHAR); - RtlMoveMemory(&FileObject->FileName.Buffer[0], &FileObject->FileName.Buffer[fn_offset], FileObject->FileName.Length); - } FileObject->SectionObjectPointer = &fileref->fcb->nonpaged->segment_object; - -// TRACE("returning FCB %p with parent %p\n", fcb, parfcb); - -// ULONG fnlen; -// -// fcb->name_offset = fcb->par->full_filename.Length / sizeof(WCHAR); -// -// if (fcb->par != Vcb->root_fcb) -// fcb->name_offset++; -// -// fnlen = (fcb->name_offset * sizeof(WCHAR)) + fcb->filepart.Length; -// -// fcb->full_filename.Buffer = ExAllocatePoolWithTag(PagedPool, fnlen, ALLOC_TAG); -// if (!fcb->full_filename.Buffer) { -// ERR("out of memory\n"); -// Status = STATUS_INSUFFICIENT_RESOURCES; -// goto end; -// } -// -// fcb->full_filename.Length = fcb->full_filename.MaximumLength = fnlen; -// RtlCopyMemory(fcb->full_filename.Buffer, fcb->par->full_filename.Buffer, fcb->par->full_filename.Length); -// -// if (fcb->par != Vcb->root_fcb) -// fcb->full_filename.Buffer[fcb->par->full_filename.Length / sizeof(WCHAR)] = '\\'; -// -// RtlCopyMemory(&fcb->full_filename.Buffer[fcb->name_offset], fcb->filepart.Buffer, fcb->filepart.Length); - + goto end2; - -end: + +end: if (fpus.Buffer) ExFreePool(fpus.Buffer); - + end2: if (parfileref && !loaded_related) - free_fileref(parfileref); - + free_fileref(Vcb, parfileref); + return Status; } static __inline void debug_create_options(ULONG RequestedOptions) { if (RequestedOptions != 0) { ULONG options = RequestedOptions; - + TRACE("requested options:\n"); - + if (options & FILE_DIRECTORY_FILE) { TRACE(" FILE_DIRECTORY_FILE\n"); options &= ~FILE_DIRECTORY_FILE; @@ -2631,7 +2513,7 @@ static __inline void debug_create_options(ULONG RequestedOptions) { TRACE(" FILE_NO_EA_KNOWLEDGE\n"); options &= ~FILE_NO_EA_KNOWLEDGE; } - + if (options & FILE_OPEN_REMOTE_INSTANCE) { TRACE(" FILE_OPEN_REMOTE_INSTANCE\n"); options &= ~FILE_OPEN_REMOTE_INSTANCE; @@ -2656,7 +2538,7 @@ static __inline void debug_create_options(ULONG RequestedOptions) { TRACE(" FILE_OPEN_FOR_BACKUP_INTENT\n"); options &= ~FILE_OPEN_FOR_BACKUP_INTENT; } - + if (options & FILE_NO_COMPRESSION) { TRACE(" FILE_NO_COMPRESSION\n"); options &= ~FILE_NO_COMPRESSION; @@ -2667,7 +2549,7 @@ static __inline void debug_create_options(ULONG RequestedOptions) { TRACE(" FILE_OPEN_REQUIRING_OPLOCK\n"); options &= ~FILE_OPEN_REQUIRING_OPLOCK; } - + if (options & FILE_DISALLOW_EXCLUSIVE) { TRACE(" FILE_DISALLOW_EXCLUSIVE\n"); options &= ~FILE_DISALLOW_EXCLUSIVE; @@ -2683,17 +2565,17 @@ static __inline void debug_create_options(ULONG RequestedOptions) { TRACE(" FILE_OPEN_REPARSE_POINT\n"); options &= ~FILE_OPEN_REPARSE_POINT; } - + if (options & FILE_OPEN_NO_RECALL) { TRACE(" FILE_OPEN_NO_RECALL\n"); options &= ~FILE_OPEN_NO_RECALL; } - + if (options & FILE_OPEN_FOR_FREE_SPACE_QUERY) { TRACE(" FILE_OPEN_FOR_FREE_SPACE_QUERY\n"); options &= ~FILE_OPEN_FOR_FREE_SPACE_QUERY; } - + if (options) TRACE(" unknown options: %x\n", options); } else { @@ -2703,65 +2585,68 @@ static __inline void debug_create_options(ULONG RequestedOptions) { static NTSTATUS get_reparse_block(fcb* fcb, UINT8** data) { NTSTATUS Status; - + if (fcb->type == BTRFS_TYPE_FILE || fcb->type == BTRFS_TYPE_SYMLINK) { ULONG size, bytes_read, i; - + if (fcb->type == BTRFS_TYPE_FILE && fcb->inode_item.st_size < sizeof(ULONG)) { WARN("file was too short to be a reparse point\n"); return STATUS_INVALID_PARAMETER; } - + // 0x10007 = 0xffff (maximum length of data buffer) + 8 bytes header - size = min(0x10007, fcb->inode_item.st_size); - + size = (ULONG)min(0x10007, fcb->inode_item.st_size); + + if (size == 0) + return STATUS_INVALID_PARAMETER; + *data = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG); if (!*data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = read_file(fcb, *data, 0, size, &bytes_read, NULL, TRUE); + + Status = read_file(fcb, *data, 0, size, &bytes_read, NULL); if (!NT_SUCCESS(Status)) { ERR("read_file_fcb returned %08x\n", Status); ExFreePool(*data); return Status; } - + if (fcb->type == BTRFS_TYPE_SYMLINK) { - ULONG stringlen, subnamelen, printnamelen, reqlen; + ULONG stringlen, reqlen; + UINT16 subnamelen, printnamelen; REPARSE_DATA_BUFFER* rdb; - + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, (char*)*data, bytes_read); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); ExFreePool(*data); return Status; } - - subnamelen = stringlen; - printnamelen = stringlen; - + + subnamelen = printnamelen = (USHORT)stringlen; + reqlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + subnamelen + printnamelen; - + rdb = ExAllocatePoolWithTag(PagedPool, reqlen, ALLOC_TAG); - + if (!rdb) { ERR("out of memory\n"); ExFreePool(*data); return STATUS_INSUFFICIENT_RESOURCES; } - + rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK; - rdb->ReparseDataLength = reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer); + rdb->ReparseDataLength = (USHORT)(reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer)); rdb->Reserved = 0; - + rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset = 0; rdb->SymbolicLinkReparseBuffer.SubstituteNameLength = subnamelen; rdb->SymbolicLinkReparseBuffer.PrintNameOffset = subnamelen; rdb->SymbolicLinkReparseBuffer.PrintNameLength = printnamelen; rdb->SymbolicLinkReparseBuffer.Flags = SYMLINK_FLAG_RELATIVE; - + Status = RtlUTF8ToUnicodeN(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)], stringlen, &stringlen, (char*)*data, size); @@ -2771,18 +2656,18 @@ static NTSTATUS get_reparse_block(fcb* fcb, UINT8** data) { ExFreePool(*data); return Status; } - + for (i = 0; i < stringlen / sizeof(WCHAR); i++) { if (rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] == '/') rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] = '\\'; } - + RtlCopyMemory(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(WCHAR)], &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)], rdb->SymbolicLinkReparseBuffer.SubstituteNameLength); - + ExFreePool(*data); - + *data = (UINT8*)rdb; } else { Status = FsRtlValidateReparsePointBuffer(bytes_read, (REPARSE_DATA_BUFFER*)*data); @@ -2795,42 +2680,82 @@ static NTSTATUS get_reparse_block(fcb* fcb, UINT8** data) { } else if (fcb->type == BTRFS_TYPE_DIRECTORY) { if (!fcb->reparse_xattr.Buffer || fcb->reparse_xattr.Length == 0) return STATUS_INTERNAL_ERROR; - + if (fcb->reparse_xattr.Length < sizeof(ULONG)) { WARN("xattr was too short to be a reparse point\n"); return STATUS_INTERNAL_ERROR; } - + Status = FsRtlValidateReparsePointBuffer(fcb->reparse_xattr.Length, (REPARSE_DATA_BUFFER*)fcb->reparse_xattr.Buffer); if (!NT_SUCCESS(Status)) { ERR("FsRtlValidateReparsePointBuffer returned %08x\n", Status); return Status; } - + *data = ExAllocatePoolWithTag(PagedPool, fcb->reparse_xattr.Length, ALLOC_TAG); if (!*data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(*data, fcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length); } - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_ENTRY* rollback) { - PFILE_OBJECT FileObject; +static void fcb_load_csums(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, fcb* fcb, PIRP Irp) { + LIST_ENTRY* le; + NTSTATUS Status; + + if (fcb->csum_loaded) + return; + + if (IsListEmpty(&fcb->extents) || fcb->inode_item.flags & BTRFS_INODE_NODATASUM) + goto end; + + le = fcb->extents.Flink; + while (le != &fcb->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (ext->extent_data.type == EXTENT_TYPE_REGULAR) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0]; + UINT64 len; + + len = (ext->extent_data.compression == BTRFS_COMPRESSION_NONE ? ed2->num_bytes : ed2->size) / Vcb->superblock.sector_size; + + ext->csum = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(len * sizeof(UINT32)), ALLOC_TAG); + if (!ext->csum) { + ERR("out of memory\n"); + goto end; + } + + Status = load_csum(Vcb, ext->csum, ed2->address + (ext->extent_data.compression == BTRFS_COMPRESSION_NONE ? ed2->offset : 0), len, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("load_csum returned %08x\n", Status); + goto end; + } + } + + le = le->Flink; + } + +end: + fcb->csum_loaded = TRUE; +} + +static NTSTATUS open_file(PDEVICE_OBJECT DeviceObject, _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { + PFILE_OBJECT FileObject = NULL; ULONG RequestedDisposition; ULONG options; NTSTATUS Status; ccb* ccb; - device_extension* Vcb = DeviceObject->DeviceExtension; - PIO_STACK_LOCATION Stack = IoGetCurrentIrpStackLocation(Irp); + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); USHORT parsed; ULONG fn_offset = 0; - file_ref *related, *fileref; - POOL_TYPE pool_type = Stack->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool; + file_ref *related, *fileref = NULL; + POOL_TYPE pool_type = IrpSp->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool; ACCESS_MASK granted_access; BOOL loaded_related = FALSE; UNICODE_STRING fn; @@ -2840,90 +2765,88 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; UINT8 open_type = 0; - + time1 = KeQueryPerformanceCounter(NULL); #endif - + Irp->IoStatus.Information = 0; - - RequestedDisposition = ((Stack->Parameters.Create.Options >> 24) & 0xff); - options = Stack->Parameters.Create.Options & FILE_VALID_OPTION_FLAGS; - + + RequestedDisposition = ((IrpSp->Parameters.Create.Options >> 24) & 0xff); + options = IrpSp->Parameters.Create.Options & FILE_VALID_OPTION_FLAGS; + if (options & FILE_DIRECTORY_FILE && RequestedDisposition == FILE_SUPERSEDE) { WARN("error - supersede requested with FILE_DIRECTORY_FILE\n"); - Status = STATUS_INVALID_PARAMETER; - goto exit2; + return STATUS_INVALID_PARAMETER; + } + + FileObject = IrpSp->FileObject; + + if (!FileObject) { + ERR("FileObject was NULL\n"); + return STATUS_INVALID_PARAMETER; } - FileObject = Stack->FileObject; - if (FileObject->RelatedFileObject && FileObject->RelatedFileObject->FsContext2) { struct _ccb* relatedccb = FileObject->RelatedFileObject->FsContext2; - + related = relatedccb->fileref; } else related = NULL; - + debug_create_options(options); - + switch (RequestedDisposition) { case FILE_SUPERSEDE: TRACE("requested disposition: FILE_SUPERSEDE\n"); break; - + case FILE_CREATE: TRACE("requested disposition: FILE_CREATE\n"); break; - + case FILE_OPEN: TRACE("requested disposition: FILE_OPEN\n"); break; - + case FILE_OPEN_IF: TRACE("requested disposition: FILE_OPEN_IF\n"); break; - + case FILE_OVERWRITE: TRACE("requested disposition: FILE_OVERWRITE\n"); break; - + case FILE_OVERWRITE_IF: TRACE("requested disposition: FILE_OVERWRITE_IF\n"); break; - + default: ERR("unknown disposition: %x\n", RequestedDisposition); Status = STATUS_NOT_IMPLEMENTED; goto exit; } - + fn = FileObject->FileName; - + TRACE("(%.*S)\n", fn.Length / sizeof(WCHAR), fn.Buffer); TRACE("FileObject = %p\n", FileObject); - + if (Vcb->readonly && (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_CREATE || RequestedDisposition == FILE_OVERWRITE)) { Status = STATUS_MEDIA_WRITE_PROTECTED; - goto exit2; - } - - if (Vcb->readonly && Stack->Parameters.Create.SecurityContext->DesiredAccess & - (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC)) { - Status = STATUS_MEDIA_WRITE_PROTECTED; - goto exit2; + goto exit; } - + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); if (options & FILE_OPEN_BY_FILE_ID) { if (fn.Length == sizeof(UINT64) && related && RequestedDisposition == FILE_OPEN) { UINT64 inode; - + RtlCopyMemory(&inode, fn.Buffer, sizeof(UINT64)); - + if (related->fcb == Vcb->root_fileref->fcb && inode == 0) inode = Vcb->root_fileref->fcb->inode; - + if (inode == 0) { // we use 0 to mean the parent of a subvolume fileref = related->parent; increase_fileref_refcount(fileref); @@ -2934,415 +2857,539 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN } else { WARN("FILE_OPEN_BY_FILE_ID only supported for inodes\n"); Status = STATUS_NOT_IMPLEMENTED; + ExReleaseResourceLite(&Vcb->fcb_lock); goto exit; } } else { if (related && fn.Length != 0 && fn.Buffer[0] == '\\') { Status = STATUS_OBJECT_NAME_INVALID; + ExReleaseResourceLite(&Vcb->fcb_lock); goto exit; } - - if (!related && RequestedDisposition != FILE_OPEN && !(Stack->Flags & SL_OPEN_TARGET_DIRECTORY)) { + + if (!related && RequestedDisposition != FILE_OPEN && !(IrpSp->Flags & SL_OPEN_TARGET_DIRECTORY)) { ULONG fnoff; - + Status = open_fileref(Vcb, &related, &fn, NULL, TRUE, &parsed, &fnoff, - pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); - + pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp); + if (Status == STATUS_OBJECT_NAME_NOT_FOUND) Status = STATUS_OBJECT_PATH_NOT_FOUND; else if (Status == STATUS_REPARSE) fileref = related; else if (NT_SUCCESS(Status)) { fnoff *= sizeof(WCHAR); - fnoff += related->filepart.Length + sizeof(WCHAR); - + fnoff += (related->dc ? related->dc->name.Length : 0) + sizeof(WCHAR); + if (related->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) { Status = STATUS_REPARSE; fileref = related; - parsed = fnoff - sizeof(WCHAR); + parsed = (USHORT)fnoff - sizeof(WCHAR); } else { fn.Buffer = &fn.Buffer[fnoff / sizeof(WCHAR)]; - fn.Length -= fnoff; + fn.Length -= (USHORT)fnoff; + + Status = open_fileref(Vcb, &fileref, &fn, related, IrpSp->Flags & SL_OPEN_TARGET_DIRECTORY, &parsed, &fn_offset, + pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp); - Status = open_fileref(Vcb, &fileref, &fn, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &parsed, &fn_offset, - pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); - loaded_related = TRUE; } - + } } else { - Status = open_fileref(Vcb, &fileref, &fn, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &parsed, &fn_offset, - pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); + Status = open_fileref(Vcb, &fileref, &fn, related, IrpSp->Flags & SL_OPEN_TARGET_DIRECTORY, &parsed, &fn_offset, + pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp); } } - + if (Status == STATUS_REPARSE) { REPARSE_DATA_BUFFER* data; - + ExAcquireResourceSharedLite(fileref->fcb->Header.Resource, TRUE); Status = get_reparse_block(fileref->fcb, (UINT8**)&data); ExReleaseResourceLite(fileref->fcb->Header.Resource); - + if (!NT_SUCCESS(Status)) { ERR("get_reparse_block returned %08x\n", Status); - - free_fileref(fileref); + + Status = STATUS_SUCCESS; + } else { + Status = STATUS_REPARSE; + RtlCopyMemory(&Irp->IoStatus.Information, data, sizeof(ULONG)); + + data->Reserved = FileObject->FileName.Length - parsed; + + Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data; + + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - - Status = STATUS_REPARSE; - RtlCopyMemory(&Irp->IoStatus.Information, data, sizeof(ULONG)); - - data->Reserved = FileObject->FileName.Length - parsed; - - Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data; - - free_fileref(fileref); - goto exit; } - - if (NT_SUCCESS(Status) && fileref->deleted) { - free_fileref(fileref); - + + if (NT_SUCCESS(Status) && fileref->deleted) Status = STATUS_OBJECT_NAME_NOT_FOUND; - } - + if (NT_SUCCESS(Status)) { if (RequestedDisposition == FILE_CREATE) { TRACE("file %S already exists, returning STATUS_OBJECT_NAME_COLLISION\n", file_desc_fileref(fileref)); Status = STATUS_OBJECT_NAME_COLLISION; - free_fileref(fileref); + + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } } else if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { if (RequestedDisposition == FILE_OPEN || RequestedDisposition == FILE_OVERWRITE) { TRACE("file doesn't exist, returning STATUS_OBJECT_NAME_NOT_FOUND\n"); + ExReleaseResourceLite(&Vcb->fcb_lock); goto exit; } } else if (Status == STATUS_OBJECT_PATH_NOT_FOUND) { TRACE("open_fileref returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->fcb_lock); goto exit; } else { ERR("open_fileref returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->fcb_lock); goto exit; } - + if (NT_SUCCESS(Status)) { // file already exists file_ref* sf; - + BOOL readonly; + + ExReleaseResourceLite(&Vcb->fcb_lock); + if (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) { LARGE_INTEGER zero; - + #ifdef DEBUG_STATS open_type = 1; #endif - if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY || fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { + if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY || is_subvol_readonly(fileref->fcb->subvol, Irp)) { Status = STATUS_ACCESS_DENIED; - free_fileref(fileref); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - + if (Vcb->readonly) { Status = STATUS_MEDIA_WRITE_PROTECTED; - free_fileref(fileref); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - + zero.QuadPart = 0; if (!MmCanFileBeTruncated(&fileref->fcb->nonpaged->segment_object, &zero)) { Status = STATUS_USER_MAPPED_FILE; - free_fileref(fileref); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } } - - SeLockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); - - if (!SeAccessCheck(fileref->fcb->ads ? fileref->parent->fcb->sd : fileref->fcb->sd, - &Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext, - FALSE, Stack->Parameters.Create.SecurityContext->DesiredAccess, 0, NULL, - IoGetFileObjectGenericMapping(), Stack->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode, - &granted_access, &Status)) { - SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); - WARN("SeAccessCheck failed, returning %08x\n", Status); - goto exit; - } - - SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); - - // We allow a subvolume root to be opened read-write even if its readonly flag is set, so it can be cleared - if (fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && granted_access & - (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC) && - fileref->fcb->inode != SUBVOL_ROOT_INODE) { - Status = STATUS_ACCESS_DENIED; - free_fileref(fileref); - goto exit; - } - + + if (IrpSp->Parameters.Create.SecurityContext->DesiredAccess != 0) { + SeLockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + + if (!SeAccessCheck((fileref->fcb->ads || fileref->fcb == Vcb->dummy_fcb) ? fileref->parent->fcb->sd : fileref->fcb->sd, + &IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext, + TRUE, IrpSp->Parameters.Create.SecurityContext->DesiredAccess, 0, NULL, + IoGetFileObjectGenericMapping(), IrpSp->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode, + &granted_access, &Status)) { + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + TRACE("SeAccessCheck failed, returning %08x\n", Status); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + + goto exit; + } + + SeUnlockSubjectContext(&IrpSp->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + } else + granted_access = 0; + TRACE("deleted = %s\n", fileref->deleted ? "TRUE" : "FALSE"); - + sf = fileref; while (sf) { if (sf->delete_on_close) { - WARN("could not open as deletion pending\n"); + TRACE("could not open as deletion pending\n"); Status = STATUS_DELETE_PENDING; - - free_fileref(fileref); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } sf = sf->parent; } - if (fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) { - ACCESS_MASK allowed = DELETE | READ_CONTROL | WRITE_OWNER | WRITE_DAC | - SYNCHRONIZE | ACCESS_SYSTEM_SECURITY | FILE_READ_DATA | - FILE_READ_EA | FILE_WRITE_EA | FILE_READ_ATTRIBUTES | - FILE_WRITE_ATTRIBUTES | FILE_EXECUTE | FILE_LIST_DIRECTORY | - FILE_TRAVERSE; + readonly = (!fileref->fcb->ads && fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) || (fileref->fcb->ads && fileref->parent->fcb->atts & FILE_ATTRIBUTE_READONLY) || + is_subvol_readonly(fileref->fcb->subvol, Irp) || fileref->fcb == Vcb->dummy_fcb || Vcb->readonly; + + if (readonly) { + ACCESS_MASK allowed; + + allowed = READ_CONTROL | SYNCHRONIZE | ACCESS_SYSTEM_SECURITY | FILE_READ_DATA | + FILE_READ_EA | FILE_READ_ATTRIBUTES | FILE_EXECUTE | FILE_LIST_DIRECTORY | + FILE_TRAVERSE; + + if (!Vcb->readonly && (fileref->fcb == Vcb->dummy_fcb || fileref->fcb->inode == SUBVOL_ROOT_INODE)) + allowed |= DELETE; + + if (fileref->fcb != Vcb->dummy_fcb && !is_subvol_readonly(fileref->fcb->subvol, Irp) && !Vcb->readonly) { + allowed |= WRITE_OWNER | WRITE_DAC | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES; + + if (!fileref->fcb->ads && fileref->fcb->type == BTRFS_TYPE_DIRECTORY) + allowed |= FILE_ADD_SUBDIRECTORY | FILE_ADD_FILE | FILE_DELETE_CHILD; + } else if (fileref->fcb->inode == SUBVOL_ROOT_INODE && is_subvol_readonly(fileref->fcb->subvol, Irp) && !Vcb->readonly) { + // We allow a subvolume root to be opened read-write even if its readonly flag is set, so it can be cleared + + allowed |= FILE_WRITE_ATTRIBUTES; + } + + if (IrpSp->Parameters.Create.SecurityContext->DesiredAccess & MAXIMUM_ALLOWED) { + granted_access &= allowed; + IrpSp->Parameters.Create.SecurityContext->AccessState->PreviouslyGrantedAccess &= allowed; + } else if (granted_access & ~allowed) { + Status = Vcb->readonly ? STATUS_MEDIA_WRITE_PROTECTED : STATUS_ACCESS_DENIED; + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); - if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY) - allowed |= FILE_ADD_SUBDIRECTORY | FILE_ADD_FILE | FILE_DELETE_CHILD; - - if (granted_access & ~allowed) { - Status = STATUS_ACCESS_DENIED; - free_fileref(fileref); goto exit; } } - + if (options & FILE_DELETE_ON_CLOSE && (fileref == Vcb->root_fileref || Vcb->readonly || - fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY || fileref->fcb->atts & FILE_ATTRIBUTE_READONLY)) { + is_subvol_readonly(fileref->fcb->subvol, Irp) || readonly)) { Status = STATUS_CANNOT_DELETE; - free_fileref(fileref); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - + if ((fileref->fcb->type == BTRFS_TYPE_SYMLINK || fileref->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) && !(options & FILE_OPEN_REPARSE_POINT)) { REPARSE_DATA_BUFFER* data; - + /* How reparse points work from the point of view of the filesystem appears to * undocumented. When returning STATUS_REPARSE, MSDN encourages us to return * IO_REPARSE in Irp->IoStatus.Information, but that means we have to do our own * translation. If we instead return the reparse tag in Information, and store * a pointer to the reparse data buffer in Irp->Tail.Overlay.AuxiliaryBuffer, * IopSymlinkProcessReparse will do the translation for us. */ - + Status = get_reparse_block(fileref->fcb, (UINT8**)&data); if (!NT_SUCCESS(Status)) { ERR("get_reparse_block returned %08x\n", Status); - free_fileref(fileref); + Status = STATUS_SUCCESS; + } else { + Status = STATUS_REPARSE; + Irp->IoStatus.Information = data->ReparseTag; + + if (fn.Buffer[(fn.Length / sizeof(WCHAR)) - 1] == '\\') + data->Reserved = sizeof(WCHAR); + + Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data; + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - - Status = STATUS_REPARSE; - Irp->IoStatus.Information = data->ReparseTag; - - if (fn.Buffer[(fn.Length / sizeof(WCHAR)) - 1] == '\\') - data->Reserved = sizeof(WCHAR); - - Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data; - - free_fileref(fileref); - goto exit; } - + if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY && !fileref->fcb->ads) { if (options & FILE_NON_DIRECTORY_FILE && !(fileref->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)) { - free_fileref(fileref); Status = STATUS_FILE_IS_A_DIRECTORY; + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } } else if (options & FILE_DIRECTORY_FILE) { TRACE("returning STATUS_NOT_A_DIRECTORY (type = %u, %S)\n", fileref->fcb->type, file_desc_fileref(fileref)); - free_fileref(fileref); Status = STATUS_NOT_A_DIRECTORY; + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - + if (fileref->open_count > 0) { - Status = IoCheckShareAccess(granted_access, Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access, TRUE); - + Status = IoCheckShareAccess(granted_access, IrpSp->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access, FALSE); + if (!NT_SUCCESS(Status)) { - WARN("IoCheckShareAccess failed, returning %08x\n", Status); - - free_fileref(fileref); + if (Status == STATUS_SHARING_VIOLATION) + TRACE("IoCheckShareAccess failed, returning %08x\n", Status); + else + WARN("IoCheckShareAccess failed, returning %08x\n", Status); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - } else { - IoSetShareAccess(granted_access, Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access); - } + + IoUpdateShareAccess(FileObject, &fileref->fcb->share_access); + } else + IoSetShareAccess(granted_access, IrpSp->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access); if (granted_access & FILE_WRITE_DATA || options & FILE_DELETE_ON_CLOSE) { if (!MmFlushImageSection(&fileref->fcb->nonpaged->segment_object, MmFlushForWrite)) { Status = (options & FILE_DELETE_ON_CLOSE) ? STATUS_CANNOT_DELETE : STATUS_SHARING_VIOLATION; - - free_fileref(fileref); + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } } - + if (RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF || RequestedDisposition == FILE_SUPERSEDE) { ULONG defda, oldatts, filter; LARGE_INTEGER time; BTRFS_TIME now; - - if ((RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) && fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) { + + if ((RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) && readonly) { WARN("cannot overwrite readonly file\n"); Status = STATUS_ACCESS_DENIED; - free_fileref(fileref); - goto exit; - } - - // FIXME - where did we get this from? -// if (fcb->refcount > 1) { -// WARN("cannot overwrite open file (fcb = %p, refcount = %u)\n", fcb, fcb->refcount); -// Status = STATUS_ACCESS_DENIED; -// free_fcb(fcb); -// goto exit; -// } - - // FIXME - make sure not ADS! - Status = truncate_file(fileref->fcb, 0, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("truncate_file returned %08x\n", Status); - free_fileref(fileref); + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - - if (Irp->Overlay.AllocationSize.QuadPart > 0) { - Status = extend_file(fileref->fcb, fileref, Irp->Overlay.AllocationSize.QuadPart, TRUE, NULL, rollback); - + + if (fileref->fcb->ads) { + Status = stream_set_end_of_file_information(Vcb, 0, fileref->fcb, fileref, FALSE); if (!NT_SUCCESS(Status)) { - ERR("extend_file returned %08x\n", Status); - free_fileref(fileref); + ERR("stream_set_end_of_file_information returned %08x\n", Status); + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - } - - if (Irp->AssociatedIrp.SystemBuffer && Stack->Parameters.Create.EaLength > 0) { - ULONG offset; - FILE_FULL_EA_INFORMATION* eainfo; - - Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, Stack->Parameters.Create.EaLength, &offset); + } else { + Status = truncate_file(fileref->fcb, 0, Irp, rollback); if (!NT_SUCCESS(Status)) { - ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); - free_fileref(fileref); + ERR("truncate_file returned %08x\n", Status); + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - - fileref->fcb->ealen = 4; - - // capitalize EA name - eainfo = Irp->AssociatedIrp.SystemBuffer; - do { - STRING s; - - s.Length = s.MaximumLength = eainfo->EaNameLength; - s.Buffer = eainfo->EaName; - - RtlUpperString(&s, &s); - - fileref->fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; - - if (eainfo->NextEntryOffset == 0) - break; - - eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); - } while (TRUE); - - if (fileref->fcb->ea_xattr.Buffer) - ExFreePool(fileref->fcb->ea_xattr.Buffer); - - fileref->fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, Stack->Parameters.Create.EaLength, ALLOC_TAG); - if (!fileref->fcb->ea_xattr.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - - free_fileref(fileref); + } + + if (Irp->Overlay.AllocationSize.QuadPart > 0) { + Status = extend_file(fileref->fcb, fileref, Irp->Overlay.AllocationSize.QuadPart, TRUE, NULL, rollback); + + if (!NT_SUCCESS(Status)) { + ERR("extend_file returned %08x\n", Status); + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - - fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = Stack->Parameters.Create.EaLength; - RtlCopyMemory(fileref->fcb->ea_xattr.Buffer, Irp->AssociatedIrp.SystemBuffer, Stack->Parameters.Create.EaLength); - } else { - if (fileref->fcb->ea_xattr.Length > 0) { - ExFreePool(fileref->fcb->ea_xattr.Buffer); - fileref->fcb->ea_xattr.Buffer = NULL; - fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = 0; - - fileref->fcb->ea_changed = TRUE; - fileref->fcb->ealen = 0; - } } - - filter = FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE; - - mark_fcb_dirty(fileref->fcb); - - oldatts = fileref->fcb->atts; - - defda = get_file_attributes(Vcb, &fileref->fcb->inode_item, fileref->fcb->subvol, fileref->fcb->inode, fileref->fcb->type, - fileref->filepart.Length > 0 && fileref->filepart.Buffer[0] == '.', TRUE, Irp); - - if (RequestedDisposition == FILE_SUPERSEDE) - fileref->fcb->atts = Stack->Parameters.Create.FileAttributes | FILE_ATTRIBUTE_ARCHIVE; - else - fileref->fcb->atts |= Stack->Parameters.Create.FileAttributes | FILE_ATTRIBUTE_ARCHIVE; - - if (fileref->fcb->atts != oldatts) { - fileref->fcb->atts_changed = TRUE; - fileref->fcb->atts_deleted = Stack->Parameters.Create.FileAttributes == defda; - filter |= FILE_NOTIFY_CHANGE_ATTRIBUTES; + + if (!fileref->fcb->ads) { + if (Irp->AssociatedIrp.SystemBuffer && IrpSp->Parameters.Create.EaLength > 0) { + ULONG offset; + FILE_FULL_EA_INFORMATION* eainfo; + + Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength, &offset); + if (!NT_SUCCESS(Status)) { + ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + + goto exit; + } + + fileref->fcb->ealen = 4; + + // capitalize EA name + eainfo = Irp->AssociatedIrp.SystemBuffer; + do { + STRING s; + + s.Length = s.MaximumLength = eainfo->EaNameLength; + s.Buffer = eainfo->EaName; + + RtlUpperString(&s, &s); + + fileref->fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; + + if (eainfo->NextEntryOffset == 0) + break; + + eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); + } while (TRUE); + + if (fileref->fcb->ea_xattr.Buffer) + ExFreePool(fileref->fcb->ea_xattr.Buffer); + + fileref->fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, IrpSp->Parameters.Create.EaLength, ALLOC_TAG); + if (!fileref->fcb->ea_xattr.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + + goto exit; + } + + fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = (USHORT)IrpSp->Parameters.Create.EaLength; + RtlCopyMemory(fileref->fcb->ea_xattr.Buffer, Irp->AssociatedIrp.SystemBuffer, fileref->fcb->ea_xattr.Length); + } else { + if (fileref->fcb->ea_xattr.Length > 0) { + ExFreePool(fileref->fcb->ea_xattr.Buffer); + fileref->fcb->ea_xattr.Buffer = NULL; + fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = 0; + + fileref->fcb->ea_changed = TRUE; + fileref->fcb->ealen = 0; + } + } } - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - - fileref->fcb->inode_item.transid = Vcb->superblock.generation; - fileref->fcb->inode_item.sequence++; - fileref->fcb->inode_item.st_ctime = now; - fileref->fcb->inode_item.st_mtime = now; - fileref->fcb->inode_item_changed = TRUE; - - // FIXME - truncate streams - // FIXME - do we need to alter parent directory's times? - - send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED); + + filter = FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE; + + if (fileref->fcb->ads) { + fileref->parent->fcb->inode_item.st_mtime = now; + fileref->parent->fcb->inode_item_changed = TRUE; + mark_fcb_dirty(fileref->parent->fcb); + + send_notification_fcb(fileref->parent, filter, FILE_ACTION_MODIFIED, &fileref->dc->name); + } else { + mark_fcb_dirty(fileref->fcb); + + oldatts = fileref->fcb->atts; + + defda = get_file_attributes(Vcb, fileref->fcb->subvol, fileref->fcb->inode, fileref->fcb->type, + fileref->dc && fileref->dc->name.Length >= sizeof(WCHAR) && fileref->dc->name.Buffer[0] == '.', TRUE, Irp); + + if (RequestedDisposition == FILE_SUPERSEDE) + fileref->fcb->atts = IrpSp->Parameters.Create.FileAttributes | FILE_ATTRIBUTE_ARCHIVE; + else + fileref->fcb->atts |= IrpSp->Parameters.Create.FileAttributes | FILE_ATTRIBUTE_ARCHIVE; + + if (fileref->fcb->atts != oldatts) { + fileref->fcb->atts_changed = TRUE; + fileref->fcb->atts_deleted = IrpSp->Parameters.Create.FileAttributes == defda; + filter |= FILE_NOTIFY_CHANGE_ATTRIBUTES; + } + + fileref->fcb->inode_item.transid = Vcb->superblock.generation; + fileref->fcb->inode_item.sequence++; + fileref->fcb->inode_item.st_ctime = now; + fileref->fcb->inode_item.st_mtime = now; + fileref->fcb->inode_item_changed = TRUE; + + send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED, NULL); + } } else { if (options & FILE_NO_EA_KNOWLEDGE && fileref->fcb->ea_xattr.Length > 0) { FILE_FULL_EA_INFORMATION* ffei = (FILE_FULL_EA_INFORMATION*)fileref->fcb->ea_xattr.Buffer; - + do { if (ffei->Flags & FILE_NEED_EA) { WARN("returning STATUS_ACCESS_DENIED as no EA knowledge\n"); - free_fileref(fileref); Status = STATUS_ACCESS_DENIED; + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - + if (ffei->NextEntryOffset == 0) break; - + ffei = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ffei) + ffei->NextEntryOffset); } while (TRUE); } } - + FileObject->FsContext = fileref->fcb; - + ccb = ExAllocatePoolWithTag(NonPagedPool, sizeof(*ccb), ALLOC_TAG); if (!ccb) { ERR("out of memory\n"); - free_fileref(fileref); Status = STATUS_INSUFFICIENT_RESOURCES; + + IoRemoveShareAccess(FileObject, &fileref->fcb->share_access); + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, fileref); + ExReleaseResourceLite(&Vcb->fcb_lock); + goto exit; } - + RtlZeroMemory(ccb, sizeof(*ccb)); - + ccb->NodeType = BTRFS_NODE_TYPE_CCB; - ccb->NodeSize = sizeof(ccb); + ccb->NodeSize = sizeof(*ccb); ccb->disposition = RequestedDisposition; ccb->options = options; ccb->query_dir_offset = 0; @@ -3350,69 +3397,65 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN ccb->has_wildcard = FALSE; ccb->specific_file = FALSE; ccb->access = granted_access; - ccb->case_sensitive = Stack->Flags & SL_CASE_SENSITIVE; - + ccb->case_sensitive = IrpSp->Flags & SL_CASE_SENSITIVE; + ccb->reserving = FALSE; + ccb->lxss = called_from_lxss(); + ccb->fileref = fileref; - + FileObject->FsContext2 = ccb; - - if (fn_offset > 0) { - fn.Length -= fn_offset * sizeof(WCHAR); - RtlMoveMemory(&fn.Buffer[0], &fn.Buffer[fn_offset], fn.Length); - } - FileObject->SectionObjectPointer = &fileref->fcb->nonpaged->segment_object; - + if (NT_SUCCESS(Status)) { switch (RequestedDisposition) { case FILE_SUPERSEDE: Irp->IoStatus.Information = FILE_SUPERSEDED; break; - + case FILE_OPEN: case FILE_OPEN_IF: Irp->IoStatus.Information = FILE_OPENED; break; - + case FILE_OVERWRITE: case FILE_OVERWRITE_IF: Irp->IoStatus.Information = FILE_OVERWRITTEN; break; } } - + // Make sure paging files don't have any extents marked as being prealloc, // as this would mean we'd have to lock exclusively when writing. - if (Stack->Flags & SL_OPEN_PAGING_FILE) { + if (IrpSp->Flags & SL_OPEN_PAGING_FILE) { LIST_ENTRY* le; BOOL changed = FALSE; - + ExAcquireResourceExclusiveLite(fileref->fcb->Header.Resource, TRUE); - + le = fileref->fcb->extents.Flink; - + while (le != &fileref->fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if (ext->data->type == EXTENT_TYPE_PREALLOC) { - ext->data->type = EXTENT_TYPE_REGULAR; + + if (ext->extent_data.type == EXTENT_TYPE_PREALLOC) { + ext->extent_data.type = EXTENT_TYPE_REGULAR; changed = TRUE; } - + le = le->Flink; } - + ExReleaseResourceLite(fileref->fcb->Header.Resource); - + if (changed) { fileref->fcb->extents_changed = TRUE; mark_fcb_dirty(fileref->fcb); } - + fileref->fcb->Header.Flags2 |= FSRTL_FLAG2_IS_PAGING_FILE; Vcb->disallow_dismount = TRUE; } - + #ifdef DEBUG_FCB_REFCOUNTS oc = InterlockedIncrement(&fileref->open_count); ERR("fileref %p: open_count now %i\n", fileref, oc); @@ -3425,30 +3468,47 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN open_type = 2; #endif Status = file_create(Irp, DeviceObject->DeviceExtension, FileObject, related, loaded_related, &fn, RequestedDisposition, options, rollback); + ExReleaseResourceLite(&Vcb->fcb_lock); + Irp->IoStatus.Information = NT_SUCCESS(Status) ? FILE_CREATED : 0; } - + if (NT_SUCCESS(Status) && !(options & FILE_NO_INTERMEDIATE_BUFFERING)) FileObject->Flags |= FO_CACHE_SUPPORTED; - + exit: - ExReleaseResourceLite(&Vcb->fcb_lock); - -exit2: - if (loaded_related) - free_fileref(related); - - if (NT_SUCCESS(Status)) { + if (loaded_related) { + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(Vcb, related); + ExReleaseResourceLite(&Vcb->fcb_lock); + } + + if (Status == STATUS_SUCCESS) { + fcb* fcb2; + + IrpSp->Parameters.Create.SecurityContext->AccessState->PreviouslyGrantedAccess |= granted_access; + IrpSp->Parameters.Create.SecurityContext->AccessState->RemainingDesiredAccess &= ~(granted_access | MAXIMUM_ALLOWED); + if (!FileObject->Vpb) FileObject->Vpb = DeviceObject->Vpb; - } else { - if (Status != STATUS_OBJECT_NAME_NOT_FOUND && Status != STATUS_OBJECT_PATH_NOT_FOUND) - TRACE("returning %08x\n", Status); - } - + + fcb2 = FileObject->FsContext; + + if (fcb2->ads) { + struct _ccb* ccb2 = FileObject->FsContext2; + + fcb2 = ccb2->fileref->parent->fcb; + } + + ExAcquireResourceExclusiveLite(fcb2->Header.Resource, TRUE); + fcb_load_csums(Vcb, fcb2, Irp); + ExReleaseResourceLite(fcb2->Header.Resource); + } else if (Status != STATUS_REPARSE && Status != STATUS_OBJECT_NAME_NOT_FOUND && Status != STATUS_OBJECT_PATH_NOT_FOUND) + TRACE("returning %08x\n", Status); + #ifdef DEBUG_STATS time2 = KeQueryPerformanceCounter(NULL); - + if (open_type == 0) { Vcb->stats.open_total_time += time2.QuadPart - time1.QuadPart; Vcb->stats.num_opens++; @@ -3460,68 +3520,69 @@ exit2: Vcb->stats.num_creates++; } #endif - + return Status; } -NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp) { +static NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp) { NTSTATUS Status; LIST_ENTRY* le; - + BOOL need_verify = FALSE; + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - - if (dev->removable) { + + if (dev->devobj && dev->removable) { ULONG cc; IO_STATUS_BLOCK iosb; - + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); - - if (!NT_SUCCESS(Status)) { - ERR("dev_ioctl returned %08x\n", Status); + + if (IoIsErrorUserInduced(Status)) { + ERR("IOCTL_STORAGE_CHECK_VERIFY returned %08x (user-induced)\n", Status); + need_verify = TRUE; + } else if (!NT_SUCCESS(Status)) { + ERR("IOCTL_STORAGE_CHECK_VERIFY returned %08x\n", Status); goto end; - } - - if (iosb.Information < sizeof(ULONG)) { + } else if (iosb.Information < sizeof(ULONG)) { ERR("iosb.Information was too short\n"); Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (cc != dev->change_count) { - PDEVICE_OBJECT devobj; - + } else if (cc != dev->change_count) { dev->devobj->Flags |= DO_VERIFY_VOLUME; - - devobj = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); - IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL); - - if (!devobj) { - devobj = IoGetDeviceToVerify(PsGetCurrentThread()); - IoSetDeviceToVerify(PsGetCurrentThread(), NULL); - } - - devobj = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; - - if (devobj) - IoVerifyVolume(devobj, FALSE); - - Status = STATUS_VERIFY_REQUIRED; - goto end; + need_verify = TRUE; } } - + le = le->Flink; } - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(&Vcb->tree_lock); - + + if (need_verify) { + PDEVICE_OBJECT devobj; + + devobj = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); + IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL); + + if (!devobj) { + devobj = IoGetDeviceToVerify(PsGetCurrentThread()); + IoSetDeviceToVerify(PsGetCurrentThread(), NULL); + } + + devobj = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; + + if (devobj) + Status = IoVerifyVolume(devobj, FALSE); + else + Status = STATUS_VERIFY_REQUIRED; + } + return Status; } @@ -3536,92 +3597,118 @@ static BOOL has_manage_volume_privilege(ACCESS_STATE* access_state, KPROCESSOR_M return SePrivilegeCheck(&privset, &access_state->SubjectSecurityContext, processor_mode) ? TRUE : FALSE; } -NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_CREATE) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp; device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level, locked = FALSE; - LIST_ENTRY rollback; - - TRACE("create (flags = %x)\n", Irp->Flags); - - InitializeListHead(&rollback); - + FsRtlEnterFileSystem(); - + + TRACE("create (flags = %x)\n", Irp->Flags); + top_level = is_top_level(Irp); - + /* return success if just called for FS device object */ - if (DeviceObject == devobj || (Vcb && Vcb->type == VCB_TYPE_PARTITION0)) { + if (DeviceObject == master_devobj) { TRACE("create called for FS device object\n"); - + Irp->IoStatus.Information = FILE_OPENED; Status = STATUS_SUCCESS; + goto exit; + } else if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_create(DeviceObject, Irp); + goto exit; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto exit; + } + + if (!(Vcb->Vpb->Flags & VPB_MOUNTED)) { + Status = STATUS_DEVICE_NOT_READY; + goto exit; + } + + if (Vcb->removing) { + Status = STATUS_ACCESS_DENIED; goto exit; } - + Status = verify_vcb(Vcb, Irp); if (!NT_SUCCESS(Status)) { ERR("verify_vcb returned %08x\n", Status); goto exit; } - + ExAcquireResourceSharedLite(&Vcb->load_lock, TRUE); locked = TRUE; - + IrpSp = IoGetCurrentIrpStackLocation(Irp); - + if (IrpSp->Flags != 0) { UINT32 flags = IrpSp->Flags; - + TRACE("flags:\n"); - + if (flags & SL_CASE_SENSITIVE) { TRACE("SL_CASE_SENSITIVE\n"); flags &= ~SL_CASE_SENSITIVE; } - + if (flags & SL_FORCE_ACCESS_CHECK) { TRACE("SL_FORCE_ACCESS_CHECK\n"); flags &= ~SL_FORCE_ACCESS_CHECK; } - + if (flags & SL_OPEN_PAGING_FILE) { TRACE("SL_OPEN_PAGING_FILE\n"); flags &= ~SL_OPEN_PAGING_FILE; } - + if (flags & SL_OPEN_TARGET_DIRECTORY) { TRACE("SL_OPEN_TARGET_DIRECTORY\n"); flags &= ~SL_OPEN_TARGET_DIRECTORY; } - + if (flags & SL_STOP_ON_SYMLINK) { TRACE("SL_STOP_ON_SYMLINK\n"); flags &= ~SL_STOP_ON_SYMLINK; } - + if (flags) WARN("unknown flags: %x\n", flags); } else { TRACE("flags: (none)\n"); } - -// Vpb = DeviceObject->DeviceExtension; - -// TRACE("create called for something other than FS device object\n"); - + + if (!IrpSp->FileObject) { + ERR("FileObject was NULL\n"); + Status = STATUS_INVALID_PARAMETER; + goto exit; + } + + if (IrpSp->FileObject->RelatedFileObject) { + fcb* relatedfcb = IrpSp->FileObject->RelatedFileObject->FsContext; + + if (relatedfcb && relatedfcb->Vcb != Vcb) { + WARN("RelatedFileObject was for different device\n"); + Status = STATUS_INVALID_PARAMETER; + goto exit; + } + } + // opening volume - // FIXME - also check if RelatedFileObject is Vcb if (IrpSp->FileObject->FileName.Length == 0 && !IrpSp->FileObject->RelatedFileObject) { ULONG RequestedDisposition = ((IrpSp->Parameters.Create.Options >> 24) & 0xff); ULONG RequestedOptions = IrpSp->Parameters.Create.Options & FILE_VALID_OPTION_FLAGS; #ifdef DEBUG_FCB_REFCOUNTS - LONG rc, oc; + LONG rc; #endif ccb* ccb; - + TRACE("open operation for volume\n"); if (RequestedDisposition != FILE_OPEN && RequestedDisposition != FILE_OPEN_IF) { @@ -3633,28 +3720,25 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { Status = STATUS_NOT_A_DIRECTORY; goto exit; } - - if (Vcb->removing) { - Status = STATUS_ACCESS_DENIED; - goto exit; - } - + ccb = ExAllocatePoolWithTag(NonPagedPool, sizeof(*ccb), ALLOC_TAG); if (!ccb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - + RtlZeroMemory(ccb, sizeof(*ccb)); - + ccb->NodeType = BTRFS_NODE_TYPE_CCB; - ccb->NodeSize = sizeof(ccb); + ccb->NodeSize = sizeof(*ccb); ccb->disposition = RequestedDisposition; ccb->options = RequestedOptions; ccb->access = IrpSp->Parameters.Create.SecurityContext->AccessState->PreviouslyGrantedAccess; ccb->manage_volume_privilege = has_manage_volume_privilege(IrpSp->Parameters.Create.SecurityContext->AccessState, IrpSp->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode); + ccb->reserving = FALSE; + ccb->lxss = called_from_lxss(); #ifdef DEBUG_FCB_REFCOUNTS rc = InterlockedIncrement(&Vcb->volume_fcb->refcount); @@ -3664,56 +3748,56 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { #endif IrpSp->FileObject->FsContext = Vcb->volume_fcb; IrpSp->FileObject->FsContext2 = ccb; - + IrpSp->FileObject->SectionObjectPointer = &Vcb->volume_fcb->nonpaged->segment_object; if (!IrpSp->FileObject->Vpb) IrpSp->FileObject->Vpb = DeviceObject->Vpb; - + InterlockedIncrement(&Vcb->open_files); Irp->IoStatus.Information = FILE_OPENED; Status = STATUS_SUCCESS; } else { + LIST_ENTRY rollback; BOOL skip_lock; - + + InitializeListHead(&rollback); + TRACE("file name: %.*S\n", IrpSp->FileObject->FileName.Length / sizeof(WCHAR), IrpSp->FileObject->FileName.Buffer); - + if (IrpSp->FileObject->RelatedFileObject) TRACE("related file = %S\n", file_desc(IrpSp->FileObject->RelatedFileObject)); - + // Don't lock again if we're being called from within CcCopyRead etc. skip_lock = ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock); if (!skip_lock) ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - - Status = open_file(DeviceObject, Irp, &rollback); - + + Status = open_file(DeviceObject, Vcb, Irp, &rollback); + if (!NT_SUCCESS(Status)) do_rollback(Vcb, &rollback); else - clear_rollback(Vcb, &rollback); - + clear_rollback(&rollback); + if (!skip_lock) ExReleaseResourceLite(&Vcb->tree_lock); - -// Status = STATUS_ACCESS_DENIED; } - + exit: Irp->IoStatus.Status = Status; IoCompleteRequest( Irp, NT_SUCCESS(Status) ? IO_DISK_INCREMENT : IO_NO_INCREMENT ); -// IoCompleteRequest( Irp, IO_DISK_INCREMENT ); - + TRACE("create returning %08x\n", Status); - + if (locked) ExReleaseResourceLite(&Vcb->load_lock); - - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; diff --git a/reactos/drivers/filesystems/btrfs/devctrl.c b/reactos/drivers/filesystems/btrfs/devctrl.c index 10c02cb28b9..06e0c658f50 100644 --- a/reactos/drivers/filesystems/btrfs/devctrl.c +++ b/reactos/drivers/filesystems/btrfs/devctrl.c @@ -1,158 +1,48 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" -#ifndef __REACTOS__ -#include -#endif +#include #include -#include #include +extern PDRIVER_OBJECT drvobj; extern LIST_ENTRY VcbList; extern ERESOURCE global_loading_lock; -static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { - NTSTATUS Status; - part0_device_extension* p0de = DeviceObject->DeviceExtension; - PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); - - TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode); - - switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) { - case IOCTL_MOUNTDEV_QUERY_UNIQUE_ID: - { - MOUNTDEV_UNIQUE_ID* mduid; - - if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID)) { - Status = STATUS_BUFFER_TOO_SMALL; - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID); - IoCompleteRequest(Irp, IO_NO_INCREMENT); - return Status; - } - - mduid = Irp->AssociatedIrp.SystemBuffer; - mduid->UniqueIdLength = sizeof(BTRFS_UUID); - - if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength) { - Status = STATUS_BUFFER_OVERFLOW; - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID); - IoCompleteRequest(Irp, IO_NO_INCREMENT); - return Status; - } - - RtlCopyMemory(mduid->UniqueId, &p0de->uuid, sizeof(BTRFS_UUID)); - - Status = STATUS_SUCCESS; - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength; - IoCompleteRequest(Irp, IO_NO_INCREMENT); - - return Status; - } - - case IOCTL_MOUNTDEV_QUERY_DEVICE_NAME: - { - PMOUNTDEV_NAME name; - - if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME)) { - Status = STATUS_BUFFER_TOO_SMALL; - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME); - IoCompleteRequest(Irp, IO_NO_INCREMENT); - return Status; - } - - name = Irp->AssociatedIrp.SystemBuffer; - name->NameLength = p0de->name.Length; - - if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < offsetof(MOUNTDEV_NAME, Name[0]) + name->NameLength) { - Status = STATUS_BUFFER_OVERFLOW; - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME); - IoCompleteRequest(Irp, IO_NO_INCREMENT); - return Status; - } - - RtlCopyMemory(name->Name, p0de->name.Buffer, p0de->name.Length); - - Status = STATUS_SUCCESS; - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = offsetof(MOUNTDEV_NAME, Name[0]) + name->NameLength; - IoCompleteRequest(Irp, IO_NO_INCREMENT); - - return Status; - } - } - - IoSkipCurrentIrpStackLocation(Irp); - - Status = IoCallDriver(p0de->devobj, Irp); - - TRACE("returning %08x\n", Status); - - return Status; -} - static NTSTATUS mountdev_query_stable_guid(device_extension* Vcb, PIRP Irp) { MOUNTDEV_STABLE_GUID* msg = Irp->UserBuffer; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); - + TRACE("IOCTL_MOUNTDEV_QUERY_STABLE_GUID\n"); - + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_STABLE_GUID)) return STATUS_INVALID_PARAMETER; RtlCopyMemory(&msg->StableGuid, &Vcb->superblock.uuid, sizeof(GUID)); - + Irp->IoStatus.Information = sizeof(MOUNTDEV_STABLE_GUID); - - return STATUS_SUCCESS; -} -static NTSTATUS get_partition_info_ex(device_extension* Vcb, PIRP Irp) { - NTSTATUS Status; - PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); - PARTITION_INFORMATION_EX* piex; - - TRACE("IOCTL_DISK_GET_PARTITION_INFO_EX\n"); - - Status = dev_ioctl(Vcb->Vpb->RealDevice, IOCTL_DISK_GET_PARTITION_INFO_EX, NULL, 0, - Irp->UserBuffer, IrpSp->Parameters.DeviceIoControl.OutputBufferLength, TRUE, &Irp->IoStatus); - if (!NT_SUCCESS(Status)) - return Status; - - piex = (PARTITION_INFORMATION_EX*)Irp->UserBuffer; - - if (piex->PartitionStyle == PARTITION_STYLE_MBR) { - piex->Mbr.PartitionType = PARTITION_IFS; - piex->Mbr.RecognizedPartition = TRUE; - } else if (piex->PartitionStyle == PARTITION_STYLE_GPT) { - piex->Gpt.PartitionType = PARTITION_BASIC_DATA_GUID; - } - return STATUS_SUCCESS; } -static NTSTATUS is_writable(device_extension* Vcb, PIRP Irp) { +static NTSTATUS is_writable(device_extension* Vcb) { TRACE("IOCTL_DISK_IS_WRITABLE\n"); - + return Vcb->readonly ? STATUS_MEDIA_WRITE_PROTECTED : STATUS_SUCCESS; } @@ -161,9 +51,9 @@ static NTSTATUS query_filesystems(void* data, ULONG length) { LIST_ENTRY *le, *le2; btrfs_filesystem* bfs = NULL; ULONG itemsize; - + ExAcquireResourceSharedLite(&global_loading_lock, TRUE); - + if (IsListEmpty(&VcbList)) { if (length < sizeof(btrfs_filesystem)) { Status = STATUS_BUFFER_OVERFLOW; @@ -176,113 +66,177 @@ static NTSTATUS query_filesystems(void* data, ULONG length) { } le = VcbList.Flink; - + while (le != &VcbList) { device_extension* Vcb = CONTAINING_RECORD(le, device_extension, list_entry); btrfs_filesystem_device* bfd; - + if (bfs) { bfs->next_entry = itemsize; bfs = (btrfs_filesystem*)((UINT8*)bfs + itemsize); } else bfs = data; - + if (length < offsetof(btrfs_filesystem, device)) { Status = STATUS_BUFFER_OVERFLOW; goto end; } - + itemsize = offsetof(btrfs_filesystem, device); length -= offsetof(btrfs_filesystem, device); - + bfs->next_entry = 0; RtlCopyMemory(&bfs->uuid, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)); - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - - bfs->num_devices = Vcb->superblock.num_devices; - + + bfs->num_devices = (UINT32)Vcb->superblock.num_devices; + bfd = NULL; - + le2 = Vcb->devices.Flink; while (le2 != &Vcb->devices) { device* dev = CONTAINING_RECORD(le2, device, list_entry); MOUNTDEV_NAME mdn; - + if (bfd) bfd = (btrfs_filesystem_device*)((UINT8*)bfd + offsetof(btrfs_filesystem_device, name[0]) + bfd->name_length); else bfd = &bfs->device; - + if (length < offsetof(btrfs_filesystem_device, name[0])) { ExReleaseResourceLite(&Vcb->tree_lock); Status = STATUS_BUFFER_OVERFLOW; goto end; } - - itemsize += offsetof(btrfs_filesystem_device, name[0]); - length -= offsetof(btrfs_filesystem_device, name[0]); - + + itemsize += (ULONG)offsetof(btrfs_filesystem_device, name[0]); + length -= (ULONG)offsetof(btrfs_filesystem_device, name[0]); + RtlCopyMemory(&bfd->uuid, &dev->devitem.device_uuid, sizeof(BTRFS_UUID)); - - Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ExReleaseResourceLite(&Vcb->tree_lock); - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - goto end; - } - - if (mdn.NameLength > length) { - ExReleaseResourceLite(&Vcb->tree_lock); - Status = STATUS_BUFFER_OVERFLOW; - goto end; - } - - Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &bfd->name_length, offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength, TRUE, NULL); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ExReleaseResourceLite(&Vcb->tree_lock); - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - goto end; + + if (dev->devobj) { + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end; + } + + if (mdn.NameLength > length) { + ExReleaseResourceLite(&Vcb->tree_lock); + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &bfd->name_length, (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength, TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end; + } + + itemsize += bfd->name_length; + length -= bfd->name_length; + } else { + bfd->missing = TRUE; + bfd->name_length = 0; } - - itemsize += bfd->name_length; - length -= bfd->name_length; - + le2 = le2->Flink; } - + ExReleaseResourceLite(&Vcb->tree_lock); - + le = le->Flink; } - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(&global_loading_lock); - + return Status; } +static NTSTATUS probe_volume(void* data, ULONG length, KPROCESSOR_MODE processor_mode) { + MOUNTDEV_NAME* mdn = (MOUNTDEV_NAME*)data; + UNICODE_STRING path, pnp_name; + NTSTATUS Status; + PDEVICE_OBJECT DeviceObject; + PFILE_OBJECT FileObject; + const GUID* guid; + + if (length < sizeof(MOUNTDEV_NAME)) + return STATUS_INVALID_PARAMETER; + + if (length < offsetof(MOUNTDEV_NAME, Name[0]) + mdn->NameLength) + return STATUS_INVALID_PARAMETER; + + TRACE("%.*S\n", mdn->NameLength / sizeof(WCHAR), mdn->Name); + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + path.Buffer = mdn->Name; + path.Length = path.MaximumLength = mdn->NameLength; + + Status = IoGetDeviceObjectPointer(&path, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + return Status; + } + + Status = get_device_pnp_name(DeviceObject, &pnp_name, &guid); + if (!NT_SUCCESS(Status)) { + ERR("get_device_pnp_name returned %08x\n", Status); + ObDereferenceObject(FileObject); + return Status; + } + + if (RtlCompareMemory(guid, &GUID_DEVINTERFACE_DISK, sizeof(GUID)) == sizeof(GUID)) { + Status = dev_ioctl(DeviceObject, IOCTL_DISK_UPDATE_PROPERTIES, NULL, 0, NULL, 0, TRUE, NULL); + if (!NT_SUCCESS(Status)) + WARN("IOCTL_DISK_UPDATE_PROPERTIES returned %08x\n", Status); + } + + ObDereferenceObject(FileObject); + + volume_removal(drvobj, &pnp_name); + + if (RtlCompareMemory(guid, &GUID_DEVINTERFACE_DISK, sizeof(GUID)) == sizeof(GUID)) + disk_arrival(drvobj, &pnp_name); + else + volume_arrival(drvobj, &pnp_name); + + return STATUS_SUCCESS; +} + static NTSTATUS control_ioctl(PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); NTSTATUS Status; - + switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) { case IOCTL_BTRFS_QUERY_FILESYSTEMS: - Status = query_filesystems(map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = query_filesystems(map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); break; - + + case IOCTL_BTRFS_PROBE_VOLUME: + Status = probe_volume(Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + break; + default: TRACE("unhandled ioctl %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode); Status = STATUS_NOT_IMPLEMENTED; break; } - + return Status; } -NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_DEVICE_CONTROL) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); device_extension* Vcb = DeviceObject->DeviceExtension; @@ -291,61 +245,62 @@ NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) FsRtlEnterFileSystem(); top_level = is_top_level(Irp); - + Irp->IoStatus.Information = 0; - + if (Vcb) { - if (Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_device_control(DeviceObject, Irp); - goto end2; - } else if (Vcb->type == VCB_TYPE_CONTROL) { + if (Vcb->type == VCB_TYPE_CONTROL) { Status = control_ioctl(Irp); goto end; + } else if (Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_device_control(DeviceObject, Irp); + goto end; + } else if (Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } } else { Status = STATUS_INVALID_PARAMETER; goto end; } - + if (!IrpSp->FileObject || IrpSp->FileObject->FsContext != Vcb->volume_fcb) { Status = STATUS_INVALID_PARAMETER; goto end; } - + switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) { case IOCTL_MOUNTDEV_QUERY_STABLE_GUID: Status = mountdev_query_stable_guid(Vcb, Irp); goto end; - - case IOCTL_DISK_GET_PARTITION_INFO_EX: - Status = get_partition_info_ex(Vcb, Irp); - goto end; - + case IOCTL_DISK_IS_WRITABLE: - Status = is_writable(Vcb, Irp); + Status = is_writable(Vcb); goto end; - + default: TRACE("unhandled control code %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode); break; } - + IoSkipCurrentIrpStackLocation(Irp); - + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); - + goto end2; - + end: Irp->IoStatus.Status = Status; if (Status != STATUS_PENDING) IoCompleteRequest(Irp, IO_NO_INCREMENT); - + end2: - if (top_level) + TRACE("returning %08x\n", Status); + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; diff --git a/reactos/drivers/filesystems/btrfs/dirctrl.c b/reactos/drivers/filesystems/btrfs/dirctrl.c index aa23219a611..5c78146153a 100644 --- a/reactos/drivers/filesystems/btrfs/dirctrl.c +++ b/reactos/drivers/filesystems/btrfs/dirctrl.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -28,68 +28,69 @@ typedef struct { UNICODE_STRING name; UINT8 type; enum DirEntryType dir_entry_type; + dir_child* dc; } dir_entry; -ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, ULONG atts, PIRP Irp) { +ULONG get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, ULONG atts, BOOL lxss, PIRP Irp) { fcb* fcb; ULONG tag = 0, br; NTSTATUS Status; - + if (type == BTRFS_TYPE_SYMLINK) { - if (called_from_lxss()) + if (lxss) return IO_REPARSE_TAG_LXSS_SYMLINK; else return IO_REPARSE_TAG_SYMLINK; } - + if (type != BTRFS_TYPE_FILE && type != BTRFS_TYPE_DIRECTORY) return 0; - + if (!(atts & FILE_ATTRIBUTE_REPARSE_POINT)) return 0; - + Status = open_fcb(Vcb, subvol, inode, type, NULL, NULL, &fcb, PagedPool, Irp); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); return 0; } - + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + if (type == BTRFS_TYPE_DIRECTORY) { if (!fcb->reparse_xattr.Buffer || fcb->reparse_xattr.Length < sizeof(ULONG)) goto end; - + RtlCopyMemory(&tag, fcb->reparse_xattr.Buffer, sizeof(ULONG)); } else { - Status = read_file(fcb, (UINT8*)&tag, 0, sizeof(ULONG), &br, NULL, TRUE); + Status = read_file(fcb, (UINT8*)&tag, 0, sizeof(ULONG), &br, NULL); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); goto end; } - + if (br < sizeof(ULONG)) goto end; } - + end: ExReleaseResourceLite(fcb->Header.Resource); - free_fcb(fcb); - + free_fcb(Vcb, fcb); + return tag; } static ULONG get_ea_len(device_extension* Vcb, root* subvol, UINT64 inode, PIRP Irp) { UINT8* eadata; UINT16 len; - + if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &len, Irp)) { ULONG offset; NTSTATUS Status; - + Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, len, &offset); - + if (!NT_SUCCESS(Status)) { WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); ExFreePool(eadata); @@ -97,134 +98,133 @@ static ULONG get_ea_len(device_extension* Vcb, root* subvol, UINT64 inode, PIRP } else { FILE_FULL_EA_INFORMATION* eainfo; ULONG ealen; - + ealen = 4; eainfo = (FILE_FULL_EA_INFORMATION*)eadata; do { ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; - + if (eainfo->NextEntryOffset == 0) break; - + eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); } while (TRUE); - + ExFreePool(eadata); - + return ealen; } } else return 0; } -static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, LONG* len, PIRP Irp, dir_entry* de, root* r) { +static NTSTATUS query_dir_item(fcb* fcb, ccb* ccb, void* buf, LONG* len, PIRP Irp, dir_entry* de, root* r) { PIO_STACK_LOCATION IrpSp; - UINT32 needed; + LONG needed; UINT64 inode; INODE_ITEM ii; NTSTATUS Status; - ULONG atts, ealen; - + ULONG atts = 0, ealen = 0; + file_ref* fileref = ccb->fileref; + IrpSp = IoGetCurrentIrpStackLocation(Irp); - + if (de->key.obj_type == TYPE_ROOT_ITEM) { // subvol LIST_ENTRY* le; - + r = NULL; - + le = fcb->Vcb->roots.Flink; while (le != &fcb->Vcb->roots) { root* subvol = CONTAINING_RECORD(le, root, list_entry); - + if (subvol->id == de->key.obj_id) { r = subvol; break; } - + le = le->Flink; } - - if (!r) { - ERR("could not find root %llx\n", de->key.obj_id); - return STATUS_OBJECT_NAME_NOT_FOUND; - } - + + if (r && r->parent != fcb->subvol->id) + r = NULL; + inode = SUBVOL_ROOT_INODE; } else { inode = de->key.obj_id; } - + if (IrpSp->Parameters.QueryDirectory.FileInformationClass != FileNamesInformation) { // FIXME - object ID and reparse point classes too? switch (de->dir_entry_type) { case DirEntryType_File: { - LIST_ENTRY* le; - BOOL found = FALSE; - - if (!IsListEmpty(&r->fcbs)) { - le = r->fcbs.Flink; - while (le != &r->fcbs) { - struct _fcb* fcb2 = CONTAINING_RECORD(le, struct _fcb, list_entry); - - if (fcb2->inode == inode && !fcb2->ads) { - ii = fcb2->inode_item; - atts = fcb2->atts; - ealen = fcb2->ealen; - found = TRUE; - break; - } else if (fcb2->inode > inode) - break; - - le = le->Flink; - } - } - - if (!found) { - KEY searchkey; - traverse_ptr tp; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_ITEM; - searchkey.offset = 0xffffffffffffffff; - - Status = find_item(fcb->Vcb, r, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { - ERR("could not find inode item for inode %llx in root %llx\n", inode, r->id); - return STATUS_INTERNAL_ERROR; - } - - RtlZeroMemory(&ii, sizeof(INODE_ITEM)); - - if (tp.item->size > 0) - RtlCopyMemory(&ii, tp.item->data, min(sizeof(INODE_ITEM), tp.item->size)); - - if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) { - - BOOL dotfile = de->name.Length > sizeof(WCHAR) && de->name.Buffer[0] == '.'; - - atts = get_file_attributes(fcb->Vcb, &ii, r, inode, de->type, dotfile, FALSE, Irp); + if (!r) { + LARGE_INTEGER time; + + ii = fcb->Vcb->dummy_fcb->inode_item; + atts = fcb->Vcb->dummy_fcb->atts; + ealen = fcb->Vcb->dummy_fcb->ealen; + + KeQuerySystemTime(&time); + win_time_to_unix(time, &ii.otime); + ii.st_atime = ii.st_mtime = ii.st_ctime = ii.otime; + } else { + BOOL found = FALSE; + + if (de->dc && de->dc->fileref && de->dc->fileref->fcb) { + ii = de->dc->fileref->fcb->inode_item; + atts = de->dc->fileref->fcb->atts; + ealen = de->dc->fileref->fcb->ealen; + found = TRUE; } - - if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) { - ealen = get_ea_len(fcb->Vcb, r, inode, Irp); + + if (!found) { + KEY searchkey; + traverse_ptr tp; + + searchkey.obj_id = inode; + searchkey.obj_type = TYPE_INODE_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(fcb->Vcb, r, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + return Status; + } + + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { + ERR("could not find inode item for inode %llx in root %llx\n", inode, r->id); + return STATUS_INTERNAL_ERROR; + } + + RtlZeroMemory(&ii, sizeof(INODE_ITEM)); + + if (tp.item->size > 0) + RtlCopyMemory(&ii, tp.item->data, min(sizeof(INODE_ITEM), tp.item->size)); + + if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) { + + BOOL dotfile = de->name.Length > sizeof(WCHAR) && de->name.Buffer[0] == '.'; + + atts = get_file_attributes(fcb->Vcb, r, inode, de->type, dotfile, FALSE, Irp); + } + + if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || + IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) { + ealen = get_ea_len(fcb->Vcb, r, inode, Irp); + } } } - + break; } - + case DirEntryType_Self: ii = fcb->inode_item; r = fcb->subvol; @@ -232,7 +232,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L atts = fcb->atts; ealen = fcb->ealen; break; - + case DirEntryType_Parent: if (fileref && fileref->parent) { ii = fileref->parent->fcb->inode_item; @@ -246,22 +246,25 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L } break; } + + if (atts == 0) + atts = FILE_ATTRIBUTE_NORMAL; } - + switch (IrpSp->Parameters.QueryDirectory.FileInformationClass) { case FileBothDirectoryInformation: { FILE_BOTH_DIR_INFORMATION* fbdi = buf; - + TRACE("FileBothDirectoryInformation\n"); - + needed = sizeof(FILE_BOTH_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; - + if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); return STATUS_BUFFER_OVERFLOW; } - + fbdi->NextEntryOffset = 0; fbdi->FileIndex = 0; fbdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime); @@ -269,33 +272,39 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fbdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime); fbdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime); fbdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; - fbdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; + + if (de->type == BTRFS_TYPE_SYMLINK) + fbdi->AllocationSize.QuadPart = 0; + else if (atts & FILE_ATTRIBUTE_SPARSE_FILE) + fbdi->AllocationSize.QuadPart = ii.st_blocks; + else + fbdi->AllocationSize.QuadPart = sector_align(ii.st_size, fcb->Vcb->superblock.sector_size); + fbdi->FileAttributes = atts; fbdi->FileNameLength = de->name.Length; - fbdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; + fbdi->EaSize = (r && atts & FILE_ATTRIBUTE_REPARSE_POINT) ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, ccb->lxss, Irp) : ealen; fbdi->ShortNameLength = 0; -// fibdi->ShortName[12]; - + RtlCopyMemory(fbdi->FileName, de->name.Buffer, de->name.Length); - + *len -= needed; - + return STATUS_SUCCESS; } case FileDirectoryInformation: { FILE_DIRECTORY_INFORMATION* fdi = buf; - + TRACE("FileDirectoryInformation\n"); - + needed = sizeof(FILE_DIRECTORY_INFORMATION) - sizeof(WCHAR) + de->name.Length; - + if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); return STATUS_BUFFER_OVERFLOW; } - + fdi->NextEntryOffset = 0; fdi->FileIndex = 0; fdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime); @@ -303,30 +312,37 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime); fdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime); fdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; - fdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; + + if (de->type == BTRFS_TYPE_SYMLINK) + fdi->AllocationSize.QuadPart = 0; + else if (atts & FILE_ATTRIBUTE_SPARSE_FILE) + fdi->AllocationSize.QuadPart = ii.st_blocks; + else + fdi->AllocationSize.QuadPart = sector_align(ii.st_size, fcb->Vcb->superblock.sector_size); + fdi->FileAttributes = atts; fdi->FileNameLength = de->name.Length; - + RtlCopyMemory(fdi->FileName, de->name.Buffer, de->name.Length); - + *len -= needed; - + return STATUS_SUCCESS; } - + case FileFullDirectoryInformation: { FILE_FULL_DIR_INFORMATION* ffdi = buf; - + TRACE("FileFullDirectoryInformation\n"); - + needed = sizeof(FILE_FULL_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; - + if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); return STATUS_BUFFER_OVERFLOW; } - + ffdi->NextEntryOffset = 0; ffdi->FileIndex = 0; ffdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime); @@ -334,34 +350,38 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L ffdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime); ffdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime); ffdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; - ffdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; + + if (de->type == BTRFS_TYPE_SYMLINK) + ffdi->AllocationSize.QuadPart = 0; + else if (atts & FILE_ATTRIBUTE_SPARSE_FILE) + ffdi->AllocationSize.QuadPart = ii.st_blocks; + else + ffdi->AllocationSize.QuadPart = sector_align(ii.st_size, fcb->Vcb->superblock.sector_size); + ffdi->FileAttributes = atts; ffdi->FileNameLength = de->name.Length; - ffdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; - + ffdi->EaSize = (r && atts & FILE_ATTRIBUTE_REPARSE_POINT) ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, ccb->lxss, Irp) : ealen; + RtlCopyMemory(ffdi->FileName, de->name.Buffer, de->name.Length); - + *len -= needed; - + return STATUS_SUCCESS; } case FileIdBothDirectoryInformation: { FILE_ID_BOTH_DIR_INFORMATION* fibdi = buf; - + TRACE("FileIdBothDirectoryInformation\n"); - + needed = sizeof(FILE_ID_BOTH_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; - + if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); return STATUS_BUFFER_OVERFLOW; } - -// if (!buf) -// return STATUS_INVALID_POINTER; - + fibdi->NextEntryOffset = 0; fibdi->FileIndex = 0; fibdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime); @@ -369,37 +389,40 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fibdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime); fibdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime); fibdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; - fibdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; + + if (de->type == BTRFS_TYPE_SYMLINK) + fibdi->AllocationSize.QuadPart = 0; + else if (atts & FILE_ATTRIBUTE_SPARSE_FILE) + fibdi->AllocationSize.QuadPart = ii.st_blocks; + else + fibdi->AllocationSize.QuadPart = sector_align(ii.st_size, fcb->Vcb->superblock.sector_size); + fibdi->FileAttributes = atts; fibdi->FileNameLength = de->name.Length; - fibdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; + fibdi->EaSize = (r && atts & FILE_ATTRIBUTE_REPARSE_POINT) ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, ccb->lxss, Irp) : ealen; fibdi->ShortNameLength = 0; -// fibdi->ShortName[12]; - fibdi->FileId.QuadPart = make_file_id(r, inode); - + fibdi->FileId.QuadPart = r ? make_file_id(r, inode) : make_file_id(fcb->Vcb->dummy_fcb->subvol, fcb->Vcb->dummy_fcb->inode); + RtlCopyMemory(fibdi->FileName, de->name.Buffer, de->name.Length); - + *len -= needed; - + return STATUS_SUCCESS; } case FileIdFullDirectoryInformation: { FILE_ID_FULL_DIR_INFORMATION* fifdi = buf; - + TRACE("FileIdFullDirectoryInformation\n"); - + needed = sizeof(FILE_ID_FULL_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; - + if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); return STATUS_BUFFER_OVERFLOW; } - -// if (!buf) -// return STATUS_INVALID_POINTER; - + fifdi->NextEntryOffset = 0; fifdi->FileIndex = 0; fifdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime); @@ -407,40 +430,47 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fifdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime); fifdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime); fifdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; - fifdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; + + if (de->type == BTRFS_TYPE_SYMLINK) + fifdi->AllocationSize.QuadPart = 0; + else if (atts & FILE_ATTRIBUTE_SPARSE_FILE) + fifdi->AllocationSize.QuadPart = ii.st_blocks; + else + fifdi->AllocationSize.QuadPart = sector_align(ii.st_size, fcb->Vcb->superblock.sector_size); + fifdi->FileAttributes = atts; fifdi->FileNameLength = de->name.Length; - fifdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; - fifdi->FileId.QuadPart = make_file_id(r, inode); - + fifdi->EaSize = (r && atts & FILE_ATTRIBUTE_REPARSE_POINT) ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, ccb->lxss, Irp) : ealen; + fifdi->FileId.QuadPart = r ? make_file_id(r, inode) : make_file_id(fcb->Vcb->dummy_fcb->subvol, fcb->Vcb->dummy_fcb->inode); + RtlCopyMemory(fifdi->FileName, de->name.Buffer, de->name.Length); - + *len -= needed; - + return STATUS_SUCCESS; } case FileNamesInformation: { FILE_NAMES_INFORMATION* fni = buf; - + TRACE("FileNamesInformation\n"); - + needed = sizeof(FILE_NAMES_INFORMATION) - sizeof(WCHAR) + de->name.Length; - + if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); return STATUS_BUFFER_OVERFLOW; } - + fni->NextEntryOffset = 0; fni->FileIndex = 0; fni->FileNameLength = de->name.Length; - + RtlCopyMemory(fni->FileName, de->name.Buffer, de->name.Length); - + *len -= needed; - + return STATUS_SUCCESS; } @@ -460,25 +490,25 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L WARN("Unknown FileInformationClass %u\n", IrpSp->Parameters.QueryDirectory.FileInformationClass); return STATUS_NOT_IMPLEMENTED; } - + return STATUS_NO_MORE_FILES; } -static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_entry* de, dir_child** pdc, PIRP Irp) { +static NTSTATUS next_dir_entry(file_ref* fileref, UINT64* offset, dir_entry* de, dir_child** pdc) { LIST_ENTRY* le; dir_child* dc; - + if (*pdc) { dir_child* dc2 = *pdc; - + if (dc2->list_entry_index.Flink != &fileref->fcb->dir_children_index) dc = CONTAINING_RECORD(dc2->list_entry_index.Flink, dir_child, list_entry_index); else dc = NULL; - + goto next; } - + if (fileref->parent) { // don't return . and .. if root directory if (*offset == 0) { de->key.obj_id = fileref->fcb->inode; @@ -488,10 +518,10 @@ static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_en de->name.Buffer = L"."; de->name.Length = de->name.MaximumLength = sizeof(WCHAR); de->type = BTRFS_TYPE_DIRECTORY; - + *offset = 1; *pdc = NULL; - + return STATUS_SUCCESS; } else if (*offset == 1) { de->key.obj_id = fileref->parent->fcb->inode; @@ -501,77 +531,74 @@ static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_en de->name.Buffer = L".."; de->name.Length = de->name.MaximumLength = sizeof(WCHAR) * 2; de->type = BTRFS_TYPE_DIRECTORY; - + *offset = 2; *pdc = NULL; - + return STATUS_SUCCESS; } } - + if (*offset < 2) *offset = 2; - + dc = NULL; le = fileref->fcb->dir_children_index.Flink; - + // skip entries before offset while (le != &fileref->fcb->dir_children_index) { dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_index); - + if (dc2->index >= *offset) { dc = dc2; break; } - + le = le->Flink; } - + next: if (!dc) return STATUS_NO_MORE_FILES; - + de->key = dc->key; de->name = dc->name; de->type = dc->type; de->dir_entry_type = DirEntryType_File; - + de->dc = dc; + *offset = dc->index + 1; *pdc = dc; - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +static NTSTATUS query_directory(PIRP Irp) { PIO_STACK_LOCATION IrpSp; NTSTATUS Status, status2; fcb* fcb; ccb* ccb; file_ref* fileref; + device_extension* Vcb; void* buf; UINT8 *curitem, *lastitem; LONG length; ULONG count; BOOL has_wildcard = FALSE, specific_file = FALSE, initial; -// UINT64 num_reads_orig; dir_entry de; UINT64 newoffset; ANSI_STRING utf8; dir_child* dc = NULL; - + TRACE("query directory\n"); - -// get_uid(); // TESTING - -// num_reads_orig = num_reads; - + IrpSp = IoGetCurrentIrpStackLocation(Irp); fcb = IrpSp->FileObject->FsContext; ccb = IrpSp->FileObject->FsContext2; fileref = ccb ? ccb->fileref : NULL; - + utf8.Buffer = NULL; - + if (!fileref) return STATUS_INVALID_PARAMETER; @@ -579,24 +606,39 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ERR("ccb was NULL\n"); return STATUS_INVALID_PARAMETER; } - + + if (!fcb) { + ERR("fcb was NULL\n"); + return STATUS_INVALID_PARAMETER; + } + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_LIST_DIRECTORY)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - - ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); - ExAcquireResourceSharedLite(&fcb->Vcb->fcb_lock, TRUE); - + + Vcb = fcb->Vcb; + + if (!Vcb) { + ERR("Vcb was NULL\n"); + return STATUS_INVALID_PARAMETER; + } + + if (fileref->fcb == Vcb->dummy_fcb) + return STATUS_NO_MORE_FILES; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE); + TRACE("%S\n", file_desc(IrpSp->FileObject)); - + if (IrpSp->Flags == 0) { TRACE("QD flags: (none)\n"); } else { ULONG flags = IrpSp->Flags; - + TRACE("QD flags:\n"); - + if (flags & SL_INDEX_SPECIFIED) { TRACE(" SL_INDEX_SPECIFIED\n"); flags &= ~SL_INDEX_SPECIFIED; @@ -606,7 +648,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { TRACE(" SL_RESTART_SCAN\n"); flags &= ~SL_RESTART_SCAN; } - + if (flags & SL_RETURN_SINGLE_ENTRY) { TRACE(" SL_RETURN_SINGLE_ENTRY\n"); flags &= ~SL_RETURN_SINGLE_ENTRY; @@ -615,24 +657,25 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { if (flags != 0) TRACE(" unknown flags: %u\n", flags); } - + initial = !ccb->query_string.Buffer; - + if (IrpSp->Flags & SL_RESTART_SCAN) { ccb->query_dir_offset = 0; - + if (ccb->query_string.Buffer) { RtlFreeUnicodeString(&ccb->query_string); ccb->query_string.Buffer = NULL; } } - + if (IrpSp->Parameters.QueryDirectory.FileName && IrpSp->Parameters.QueryDirectory.FileName->Length > 1) { TRACE("QD filename: %.*S\n", IrpSp->Parameters.QueryDirectory.FileName->Length / sizeof(WCHAR), IrpSp->Parameters.QueryDirectory.FileName->Buffer); - + if (IrpSp->Parameters.QueryDirectory.FileName->Buffer[0] != '*') { specific_file = TRUE; - if (!ccb->case_sensitive || FsRtlDoesNameContainWildCards(IrpSp->Parameters.QueryDirectory.FileName)) { + + if (FsRtlDoesNameContainWildCards(IrpSp->Parameters.QueryDirectory.FileName)) { has_wildcard = TRUE; specific_file = FALSE; } @@ -640,7 +683,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { if (ccb->query_string.Buffer) RtlFreeUnicodeString(&ccb->query_string); - + if (has_wildcard) RtlUpcaseUnicodeString(&ccb->query_string, IrpSp->Parameters.QueryDirectory.FileName, TRUE); else { @@ -650,97 +693,98 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Status = STATUS_INSUFFICIENT_RESOURCES; goto end2; } - + ccb->query_string.Length = ccb->query_string.MaximumLength = IrpSp->Parameters.QueryDirectory.FileName->Length; RtlCopyMemory(ccb->query_string.Buffer, IrpSp->Parameters.QueryDirectory.FileName->Buffer, IrpSp->Parameters.QueryDirectory.FileName->Length); } - + ccb->has_wildcard = has_wildcard; ccb->specific_file = specific_file; } else { has_wildcard = ccb->has_wildcard; specific_file = ccb->specific_file; - + if (!(IrpSp->Flags & SL_RESTART_SCAN)) { initial = FALSE; - + if (specific_file) { Status = STATUS_NO_MORE_FILES; goto end2; } } } - + if (ccb->query_string.Buffer) { TRACE("query string = %.*S\n", ccb->query_string.Length / sizeof(WCHAR), ccb->query_string.Buffer); } - + newoffset = ccb->query_dir_offset; - + ExAcquireResourceSharedLite(&fileref->fcb->nonpaged->dir_children_lock, TRUE); - - Status = next_dir_entry(fileref, &newoffset, &de, &dc, Irp); - + + Status = next_dir_entry(fileref, &newoffset, &de, &dc); + if (!NT_SUCCESS(Status)) { if (Status == STATUS_NO_MORE_FILES && initial) Status = STATUS_NO_SUCH_FILE; goto end; } - + ccb->query_dir_offset = newoffset; - buf = map_user_buffer(Irp); - + buf = map_user_buffer(Irp, NormalPagePriority); + if (Irp->MdlAddress && !buf) { ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + length = IrpSp->Parameters.QueryDirectory.Length; - + if (specific_file) { BOOL found = FALSE; UNICODE_STRING us; LIST_ENTRY* le; UINT32 hash; UINT8 c; - + us.Buffer = NULL; - + if (!ccb->case_sensitive) { Status = RtlUpcaseUnicodeString(&us, &ccb->query_string, TRUE); if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); goto end; } - + hash = calc_crc32c(0xffffffff, (UINT8*)us.Buffer, us.Length); } else hash = calc_crc32c(0xffffffff, (UINT8*)ccb->query_string.Buffer, ccb->query_string.Length); - + c = hash >> 24; - + if (ccb->case_sensitive) { if (fileref->fcb->hash_ptrs[c]) { le = fileref->fcb->hash_ptrs[c]; while (le != &fileref->fcb->dir_children_hash) { dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash); - + if (dc2->hash == hash) { if (dc2->name.Length == ccb->query_string.Length && RtlCompareMemory(dc2->name.Buffer, ccb->query_string.Buffer, ccb->query_string.Length) == ccb->query_string.Length) { found = TRUE; - + de.key = dc2->key; de.name = dc2->name; de.type = dc2->type; de.dir_entry_type = DirEntryType_File; - + de.dc = dc2; + break; } } else if (dc2->hash > hash) break; - + le = le->Flink; } } @@ -749,29 +793,30 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { le = fileref->fcb->hash_ptrs_uc[c]; while (le != &fileref->fcb->dir_children_hash_uc) { dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc); - + if (dc2->hash_uc == hash) { if (dc2->name_uc.Length == us.Length && RtlCompareMemory(dc2->name_uc.Buffer, us.Buffer, us.Length) == us.Length) { found = TRUE; - + de.key = dc2->key; de.name = dc2->name; de.type = dc2->type; de.dir_entry_type = DirEntryType_File; - + de.dc = dc2; + break; } } else if (dc2->hash_uc > hash) break; - + le = le->Flink; } } } - + if (us.Buffer) ExFreePool(us.Buffer); - + if (!found) { Status = STATUS_NO_SUCH_FILE; goto end; @@ -779,28 +824,28 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } else if (has_wildcard) { while (!FsRtlIsNameInExpression(&ccb->query_string, &de.name, !ccb->case_sensitive, NULL)) { newoffset = ccb->query_dir_offset; - Status = next_dir_entry(fileref, &newoffset, &de, &dc, Irp); - + Status = next_dir_entry(fileref, &newoffset, &de, &dc); + if (NT_SUCCESS(Status)) ccb->query_dir_offset = newoffset; else { if (Status == STATUS_NO_MORE_FILES && initial) Status = STATUS_NO_SUCH_FILE; - + goto end; } } } - + TRACE("file(0) = %.*S\n", de.name.Length / sizeof(WCHAR), de.name.Buffer); TRACE("offset = %u\n", ccb->query_dir_offset - 1); - Status = query_dir_item(fcb, fileref, buf, &length, Irp, &de, fcb->subvol); + Status = query_dir_item(fcb, ccb, buf, &length, Irp, &de, fcb->subvol); count = 0; if (NT_SUCCESS(Status) && !(IrpSp->Flags & SL_RETURN_SINGLE_ENTRY) && !specific_file) { lastitem = (UINT8*)buf; - + while (length > 0) { switch (IrpSp->Parameters.QueryDirectory.FileInformationClass) { case FileBothDirectoryInformation: @@ -810,35 +855,35 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { case FileIdFullDirectoryInformation: length -= length % 8; break; - + case FileNamesInformation: length -= length % 4; break; - + default: WARN("unhandled file information class %u\n", IrpSp->Parameters.QueryDirectory.FileInformationClass); break; } - + if (length > 0) { newoffset = ccb->query_dir_offset; - Status = next_dir_entry(fileref, &newoffset, &de, &dc, Irp); + Status = next_dir_entry(fileref, &newoffset, &de, &dc); if (NT_SUCCESS(Status)) { if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &de.name, !ccb->case_sensitive, NULL)) { curitem = (UINT8*)buf + IrpSp->Parameters.QueryDirectory.Length - length; count++; - + TRACE("file(%u) %u = %.*S\n", count, curitem - (UINT8*)buf, de.name.Length / sizeof(WCHAR), de.name.Buffer); TRACE("offset = %u\n", ccb->query_dir_offset - 1); - - status2 = query_dir_item(fcb, fileref, curitem, &length, Irp, &de, fcb->subvol); - + + status2 = query_dir_item(fcb, ccb, curitem, &length, Irp, &de, fcb->subvol); + if (NT_SUCCESS(status2)) { ULONG* lastoffset = (ULONG*)lastitem; - + *lastoffset = (ULONG)(curitem - lastitem); ccb->query_dir_offset = newoffset; - + lastitem = curitem; } else break; @@ -847,143 +892,167 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } else { if (Status == STATUS_NO_MORE_FILES) Status = STATUS_SUCCESS; - + break; } } else break; } } - + Irp->IoStatus.Information = IrpSp->Parameters.QueryDirectory.Length - length; - + end: ExReleaseResourceLite(&fileref->fcb->nonpaged->dir_children_lock); - + end2: - ExReleaseResourceLite(&fcb->Vcb->fcb_lock); - ExReleaseResourceLite(&fcb->Vcb->tree_lock); - + ExReleaseResourceLite(&Vcb->fcb_lock); + ExReleaseResourceLite(&Vcb->tree_lock); + TRACE("returning %08x\n", Status); - + if (utf8.Buffer) ExFreePool(utf8.Buffer); return Status; } -static NTSTATUS STDCALL notify_change_directory(device_extension* Vcb, PIRP Irp) { +static NTSTATUS notify_change_directory(device_extension* Vcb, PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; - file_ref* fileref = ccb->fileref; + file_ref* fileref = ccb ? ccb->fileref : NULL; NTSTATUS Status; - + TRACE("IRP_MN_NOTIFY_CHANGE_DIRECTORY\n"); - + if (!ccb) { ERR("ccb was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (!fileref) { ERR("no fileref\n"); return STATUS_INVALID_PARAMETER; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_LIST_DIRECTORY)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fcb->type != BTRFS_TYPE_DIRECTORY) { Status = STATUS_INVALID_PARAMETER; goto end; } - + // FIXME - raise exception if FCB marked for deletion? - + TRACE("%S\n", file_desc(FileObject)); if (ccb->filename.Length == 0) { - Status = fileref_get_filename(fileref, &ccb->filename, NULL); - if (!NT_SUCCESS(Status)) { + ULONG reqlen; + + ccb->filename.MaximumLength = ccb->filename.Length = 0; + + Status = fileref_get_filename(fileref, &ccb->filename, NULL, &reqlen); + if (Status == STATUS_BUFFER_OVERFLOW) { + ccb->filename.Buffer = ExAllocatePoolWithTag(PagedPool, reqlen, ALLOC_TAG); + if (!ccb->filename.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + ccb->filename.MaximumLength = (UINT16)reqlen; + + Status = fileref_get_filename(fileref, &ccb->filename, NULL, &reqlen); + if (!NT_SUCCESS(Status)) { + ERR("fileref_get_filename returned %08x\n", Status); + goto end; + } + } else { ERR("fileref_get_filename returned %08x\n", Status); goto end; } } - + FsRtlNotifyFilterChangeDirectory(Vcb->NotifySync, &Vcb->DirNotifyList, FileObject->FsContext2, (PSTRING)&ccb->filename, IrpSp->Flags & SL_WATCH_TREE, FALSE, IrpSp->Parameters.NotifyDirectory.CompletionFilter, Irp, NULL, NULL, NULL); - + Status = STATUS_PENDING; - + end: ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&fcb->Vcb->tree_lock); - + return Status; } -NTSTATUS STDCALL drv_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_DIRECTORY_CONTROL) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { PIO_STACK_LOCATION IrpSp; NTSTATUS Status; ULONG func; BOOL top_level; device_extension* Vcb = DeviceObject->DeviceExtension; - TRACE("directory control\n"); - FsRtlEnterFileSystem(); + TRACE("directory control\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_directory_control(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + IrpSp = IoGetCurrentIrpStackLocation(Irp); - + Irp->IoStatus.Information = 0; - + func = IrpSp->MinorFunction; - + switch (func) { case IRP_MN_NOTIFY_CHANGE_DIRECTORY: Status = notify_change_directory(Vcb, Irp); break; - + case IRP_MN_QUERY_DIRECTORY: - Status = query_directory(DeviceObject, Irp); + Status = query_directory(Irp); break; - + default: WARN("unknown minor %u\n", func); Status = STATUS_NOT_IMPLEMENTED; Irp->IoStatus.Status = Status; break; } - + if (Status == STATUS_PENDING) goto exit; - + +end: Irp->IoStatus.Status = Status; -// if (Irp->UserIosb) -// *Irp->UserIosb = Irp->IoStatus; - IoCompleteRequest(Irp, IO_DISK_INCREMENT); - + exit: - if (top_level) + TRACE("returning %08x\n", Status); + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; diff --git a/reactos/drivers/filesystems/btrfs/extent-tree.c b/reactos/drivers/filesystems/btrfs/extent-tree.c index 19b92328613..8cd7000cece 100644 --- a/reactos/drivers/filesystems/btrfs/extent-tree.c +++ b/reactos/drivers/filesystems/btrfs/extent-tree.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -19,78 +19,25 @@ typedef struct { UINT8 type; - + union { EXTENT_DATA_REF edr; SHARED_DATA_REF sdr; TREE_BLOCK_REF tbr; SHARED_BLOCK_REF sbr; }; - + UINT64 hash; LIST_ENTRY list_entry; } extent_ref; -static __inline ULONG get_extent_data_len(UINT8 type) { - switch (type) { - case TYPE_TREE_BLOCK_REF: - return sizeof(TREE_BLOCK_REF); - - case TYPE_EXTENT_DATA_REF: - return sizeof(EXTENT_DATA_REF); - - case TYPE_EXTENT_REF_V0: - return sizeof(EXTENT_REF_V0); - - case TYPE_SHARED_BLOCK_REF: - return sizeof(SHARED_BLOCK_REF); - - case TYPE_SHARED_DATA_REF: - return sizeof(SHARED_DATA_REF); - - default: - return 0; - } -} - -static __inline UINT64 get_extent_data_refcount(UINT8 type, void* data) { - switch (type) { - case TYPE_TREE_BLOCK_REF: - return 1; - - case TYPE_EXTENT_DATA_REF: - { - EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)data; - return edr->count; - } - - case TYPE_EXTENT_REF_V0: - { - EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)data; - return erv0->count; - } - - case TYPE_SHARED_BLOCK_REF: - return 1; - - case TYPE_SHARED_DATA_REF: - { - SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data; - return sdr->count; - } - - default: - return 0; - } -} - UINT64 get_extent_data_ref_hash2(UINT64 root, UINT64 objid, UINT64 offset) { UINT32 high_crc = 0xffffffff, low_crc = 0xffffffff; high_crc = calc_crc32c(high_crc, (UINT8*)&root, sizeof(UINT64)); low_crc = calc_crc32c(low_crc, (UINT8*)&objid, sizeof(UINT64)); low_crc = calc_crc32c(low_crc, (UINT8*)&offset, sizeof(UINT64)); - + return ((UINT64)high_crc << 31) ^ (UINT64)low_crc; } @@ -120,7 +67,7 @@ static void free_extent_refs(LIST_ENTRY* extent_refs) { while (!IsListEmpty(extent_refs)) { LIST_ENTRY* le = RemoveHeadList(extent_refs); extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); - + ExFreePool(er); } } @@ -128,167 +75,206 @@ static void free_extent_refs(LIST_ENTRY* extent_refs) { static NTSTATUS add_shared_data_extent_ref(LIST_ENTRY* extent_refs, UINT64 parent, UINT32 count) { extent_ref* er2; LIST_ENTRY* le; - + if (!IsListEmpty(extent_refs)) { le = extent_refs->Flink; - + while (le != extent_refs) { extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); - + if (er->type == TYPE_SHARED_DATA_REF && er->sdr.offset == parent) { er->sdr.count += count; return STATUS_SUCCESS; } - + le = le->Flink; } } - + er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG); if (!er2) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + er2->type = TYPE_SHARED_DATA_REF; er2->sdr.offset = parent; er2->sdr.count = count; - + InsertTailList(extent_refs, &er2->list_entry); - + return STATUS_SUCCESS; } static NTSTATUS add_shared_block_extent_ref(LIST_ENTRY* extent_refs, UINT64 parent) { extent_ref* er2; LIST_ENTRY* le; - + if (!IsListEmpty(extent_refs)) { le = extent_refs->Flink; - + while (le != extent_refs) { extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); - + if (er->type == TYPE_SHARED_BLOCK_REF && er->sbr.offset == parent) return STATUS_SUCCESS; - + le = le->Flink; } } - + er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG); if (!er2) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + er2->type = TYPE_SHARED_BLOCK_REF; er2->sbr.offset = parent; - + InsertTailList(extent_refs, &er2->list_entry); - + return STATUS_SUCCESS; } static NTSTATUS add_tree_block_extent_ref(LIST_ENTRY* extent_refs, UINT64 root) { extent_ref* er2; LIST_ENTRY* le; - + if (!IsListEmpty(extent_refs)) { le = extent_refs->Flink; - + while (le != extent_refs) { extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); - + if (er->type == TYPE_TREE_BLOCK_REF && er->tbr.offset == root) return STATUS_SUCCESS; - + le = le->Flink; } } - + er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG); if (!er2) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + er2->type = TYPE_TREE_BLOCK_REF; er2->tbr.offset = root; - + InsertTailList(extent_refs, &er2->list_entry); - + return STATUS_SUCCESS; } +static void sort_extent_refs(LIST_ENTRY* extent_refs) { + LIST_ENTRY newlist; + + if (IsListEmpty(extent_refs)) + return; + + // insertion sort + + InitializeListHead(&newlist); + + while (!IsListEmpty(extent_refs)) { + extent_ref* er = CONTAINING_RECORD(RemoveHeadList(extent_refs), extent_ref, list_entry); + LIST_ENTRY* le; + BOOL inserted = FALSE; + + le = newlist.Flink; + while (le != &newlist) { + extent_ref* er2 = CONTAINING_RECORD(le, extent_ref, list_entry); + + if (er->type < er2->type || (er->type == er2->type && er->hash > er2->hash)) { + InsertHeadList(le->Blink, &er->list_entry); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&newlist, &er->list_entry); + } + + newlist.Flink->Blink = extent_refs; + newlist.Blink->Flink = extent_refs; + extent_refs->Flink = newlist.Flink; + extent_refs->Blink = newlist.Blink; +} + static NTSTATUS construct_extent_item(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 flags, LIST_ENTRY* extent_refs, - KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { + KEY* firstitem, UINT8 level, PIRP Irp) { + NTSTATUS Status; LIST_ENTRY *le, *next_le; UINT64 refcount; - ULONG inline_len; + UINT16 inline_len; BOOL all_inline = TRUE; - extent_ref* first_noninline; + extent_ref* first_noninline = NULL; EXTENT_ITEM* ei; UINT8* siptr; - + // FIXME - write skinny extents if is tree and incompat flag set - + if (IsListEmpty(extent_refs)) { WARN("no extent refs found\n"); return STATUS_SUCCESS; } - + refcount = 0; inline_len = sizeof(EXTENT_ITEM); - + if (flags & EXTENT_ITEM_TREE_BLOCK) inline_len += sizeof(EXTENT_ITEM2); - + le = extent_refs->Flink; while (le != extent_refs) { extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); UINT64 rc; - + next_le = le->Flink; - + rc = get_extent_data_refcount(er->type, &er->edr); - + if (rc == 0) { RemoveEntryList(&er->list_entry); - + ExFreePool(er); } else { - ULONG extlen = get_extent_data_len(er->type); - + UINT16 extlen = get_extent_data_len(er->type); + refcount += rc; - + er->hash = get_extent_hash(er->type, &er->edr); - + if (all_inline) { - if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) { + if ((UINT16)(inline_len + 1 + extlen) > Vcb->superblock.node_size >> 2) { all_inline = FALSE; first_noninline = er; } else inline_len += extlen + 1; } } - + le = next_le; } - + ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); if (!ei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ei->refcount = refcount; ei->generation = Vcb->superblock.generation; ei->flags = flags; - + if (flags & EXTENT_ITEM_TREE_BLOCK) { EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)&ei[1]; - + if (firstitem) { ei2->firstitem.obj_id = firstitem->obj_id; ei2->firstitem.obj_type = firstitem->obj_type; @@ -298,106 +284,128 @@ static NTSTATUS construct_extent_item(device_extension* Vcb, UINT64 address, UIN ei2->firstitem.obj_type = 0; ei2->firstitem.offset = 0; } - + ei2->level = level; - + siptr = (UINT8*)&ei2[1]; } else siptr = (UINT8*)&ei[1]; - - // Do we need to sort the inline extent refs? The Linux driver doesn't seem to bother. - + + sort_extent_refs(extent_refs); + le = extent_refs->Flink; while (le != extent_refs) { extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); ULONG extlen = get_extent_data_len(er->type); - + if (!all_inline && er == first_noninline) break; - + *siptr = er->type; siptr++; - + if (extlen > 0) { RtlCopyMemory(siptr, &er->edr, extlen); siptr += extlen; } - + le = le->Flink; } - - if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, inline_len, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); + + Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, inline_len, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(ei); - return STATUS_INTERNAL_ERROR; + return Status; } - + if (!all_inline) { le = &first_noninline->list_entry; - + while (le != extent_refs) { extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry); - ULONG len = get_extent_data_len(er->type); + UINT16 len; UINT8* data; - - if (len > 0) { + + if (er->type == TYPE_EXTENT_DATA_REF) { + len = sizeof(EXTENT_DATA_REF); + data = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); - + if (!data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(data, &er->edr, len); - } else + } else if (er->type == TYPE_SHARED_DATA_REF) { + len = sizeof(UINT32); + + data = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); + + if (!data) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + *((UINT32*)data) = er->sdr.count; + } else { + len = 0; data = NULL; - - if (!insert_tree_item(Vcb, Vcb->extent_root, address, er->type, er->hash, data, len, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); - return STATUS_INTERNAL_ERROR; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, address, er->type, er->hash, data, len, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + if (data) ExFreePool(data); + return Status; + } + le = le->Flink; } } - + return STATUS_SUCCESS; } -static NTSTATUS convert_old_extent(device_extension* Vcb, UINT64 address, BOOL tree, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS convert_old_extent(device_extension* Vcb, UINT64 address, BOOL tree, KEY* firstitem, UINT8 level, PIRP Irp) { NTSTATUS Status; KEY searchkey; traverse_ptr tp, next_tp; LIST_ENTRY extent_refs; UINT64 size; - + InitializeListHead(&extent_refs); - + searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("old-style extent %llx not found\n", address); return STATUS_INTERNAL_ERROR; } - + size = tp.item->key.offset; - - delete_tree_item(Vcb, &tp, rollback); - + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { tp = next_tp; - + if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_REF_V0 && tp.item->size >= sizeof(EXTENT_REF_V0)) { EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)tp.item->data; - + if (tree) { if (tp.item->key.offset == tp.item->key.obj_id) { // top of the tree Status = add_tree_block_extent_ref(&extent_refs, erv0->root); @@ -419,8 +427,12 @@ static NTSTATUS convert_old_extent(device_extension* Vcb, UINT64 address, BOOL t goto end; } } - - delete_tree_item(Vcb, &tp, rollback); + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + goto end; + } } if (tp.item->key.obj_id > address || tp.item->key.obj_type > TYPE_EXTENT_REF_V0) @@ -428,21 +440,22 @@ static NTSTATUS convert_old_extent(device_extension* Vcb, UINT64 address, BOOL t } Status = construct_extent_item(Vcb, address, size, tree ? (EXTENT_ITEM_TREE_BLOCK | EXTENT_ITEM_SHARED_BACKREFS) : EXTENT_ITEM_DATA, - &extent_refs, firstitem, level, Irp, rollback); + &extent_refs, firstitem, level, Irp); if (!NT_SUCCESS(Status)) ERR("construct_extent_item returned %08x\n", Status); end: free_extent_refs(&extent_refs); - + return Status; } -NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp) { NTSTATUS Status; KEY searchkey; traverse_ptr tp; - ULONG datalen = get_extent_data_len(type), len, max_extent_item_size; + ULONG len, max_extent_item_size; + UINT16 datalen = get_extent_data_len(type); EXTENT_ITEM* ei; UINT8* ptr; UINT64 inline_rc, offset; @@ -450,65 +463,61 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 EXTENT_ITEM* newei; BOOL skinny; BOOL is_tree = type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF; - + if (datalen == 0) { ERR("unrecognized extent type %x\n", type); return STATUS_INTERNAL_ERROR; } - + searchkey.obj_id = address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + // If entry doesn't exist yet, create new inline extent item - + if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) { - ULONG eisize; - EXTENT_ITEM* ei; - UINT8* ptr; - + UINT16 eisize; + eisize = sizeof(EXTENT_ITEM); if (is_tree && !(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) eisize += sizeof(EXTENT_ITEM2); eisize += sizeof(UINT8); eisize += datalen; - + ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG); if (!ei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ei->refcount = get_extent_data_refcount(type, data); ei->generation = Vcb->superblock.generation; ei->flags = is_tree ? EXTENT_ITEM_TREE_BLOCK : EXTENT_ITEM_DATA; ptr = (UINT8*)&ei[1]; - + if (is_tree && !(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr; ei2->firstitem = *firstitem; ei2->level = level; ptr = (UINT8*)&ei2[1]; } - + *ptr = type; RtlCopyMemory(ptr + 1, data, datalen); - - if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && is_tree) { - if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, ei, eisize, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; - } - } else { - if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; - } + + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && is_tree) + Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, ei, eisize, NULL, Irp); + else + Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } return STATUS_SUCCESS; @@ -520,47 +529,47 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 skinny = tp.item->key.obj_type == TYPE_METADATA_ITEM; if (tp.item->size == sizeof(EXTENT_ITEM_V0) && !skinny) { - Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp, rollback); - + Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp); + if (!NT_SUCCESS(Status)) { ERR("convert_old_extent returned %08x\n", Status); return Status; } - return increase_extent_refcount(Vcb, address, size, type, data, firstitem, level, Irp, rollback); + return increase_extent_refcount(Vcb, address, size, type, data, firstitem, level, Irp); } - + if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return STATUS_INTERNAL_ERROR; } - + ei = (EXTENT_ITEM*)tp.item->data; - + len = tp.item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)&ei[1]; - + if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) { if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); return STATUS_INTERNAL_ERROR; } - + len -= sizeof(EXTENT_ITEM2); ptr += sizeof(EXTENT_ITEM2); } - + inline_rc = 0; - + // Loop through existing inline extent entries - + while (len > 0) { UINT8 secttype = *ptr; ULONG sectlen = get_extent_data_len(secttype); UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); - + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); return STATUS_INTERNAL_ERROR; @@ -570,44 +579,49 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); return STATUS_INTERNAL_ERROR; } - + // If inline extent already present, increase refcount and return - + if (secttype == type) { if (type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)data; - + if (sectedr->root == edr->root && sectedr->objid == edr->objid && sectedr->offset == edr->offset) { UINT32 rc = get_extent_data_refcount(type, data); EXTENT_DATA_REF* sectedr2; - + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, tp.item->data, tp.item->size); - + newei->refcount += rc; - + sectedr2 = (EXTENT_DATA_REF*)((UINT8*)newei + ((UINT8*)sectedr - tp.item->data)); sectedr2->count += rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; } } else if (type == TYPE_TREE_BLOCK_REF) { TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)(ptr + sizeof(UINT8)); TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data; - + if (secttbr->offset == tbr->offset) { TRACE("trying to increase refcount of non-shared tree extent\n"); return STATUS_SUCCESS; @@ -615,37 +629,42 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 } else if (type == TYPE_SHARED_BLOCK_REF) { SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8)); SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data; - + if (sectsbr->offset == sbr->offset) return STATUS_SUCCESS; } else if (type == TYPE_SHARED_DATA_REF) { SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8)); SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data; - + if (sectsdr->offset == sdr->offset) { UINT32 rc = get_extent_data_refcount(type, data); SHARED_DATA_REF* sectsdr2; - + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, tp.item->data, tp.item->size); - + newei->refcount += rc; - + sectsdr2 = (SHARED_DATA_REF*)((UINT8*)newei + ((UINT8*)sectsdr - tp.item->data)); sectsdr2->count += rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; } } else { @@ -653,95 +672,111 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 return STATUS_INTERNAL_ERROR; } } - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; inline_rc += sectcount; } - + offset = get_extent_hash(type, data); - + max_extent_item_size = (Vcb->superblock.node_size >> 4) - sizeof(leaf_node); - + // If we can, add entry as inline extent item - + if (inline_rc == ei->refcount && tp.item->size + sizeof(UINT8) + datalen < max_extent_item_size) { len = tp.item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)&ei[1]; - + if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) { len -= sizeof(EXTENT_ITEM2); ptr += sizeof(EXTENT_ITEM2); } + // Confusingly, it appears that references are sorted forward by type (i.e. EXTENT_DATA_REFs before + // SHARED_DATA_REFs), but then backwards by hash... + while (len > 0) { UINT8 secttype = *ptr; ULONG sectlen = get_extent_data_len(secttype); - + if (secttype > type) break; - + if (secttype == type) { UINT64 sectoff = get_extent_hash(secttype, ptr + 1); - - if (sectoff > offset) + + if (sectoff < offset) break; } - + len -= sectlen + sizeof(UINT8); ptr += sizeof(UINT8) + sectlen; } - + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(UINT8) + datalen, ALLOC_TAG); RtlCopyMemory(newei, tp.item->data, ptr - tp.item->data); - + newei->refcount += get_extent_data_refcount(type, data); - + if (len > 0) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data) + sizeof(UINT8) + datalen, ptr, len); - + ptr = (ptr - tp.item->data) + (UINT8*)newei; - + *ptr = type; RtlCopyMemory(ptr + 1, data, datalen); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size + sizeof(UINT8) + datalen, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size + sizeof(UINT8) + datalen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } - + // Look for existing non-inline entry, and increase refcount if found - + if (inline_rc != ei->refcount) { traverse_ptr tp2; - + searchkey.obj_id = address; searchkey.obj_type = type; searchkey.offset = offset; - + Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(tp2.item->key, searchkey)) { - if (tp2.item->size < datalen) { + if (type == TYPE_SHARED_DATA_REF && tp2.item->size < sizeof(UINT32)) { + ERR("(%llx,%x,%llx) was %x bytes, expecting %x\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp2.item->size, sizeof(UINT32)); + return STATUS_INTERNAL_ERROR; + } else if (type != TYPE_SHARED_DATA_REF && tp2.item->size < datalen) { ERR("(%llx,%x,%llx) was %x bytes, expecting %x\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp2.item->size, datalen); return STATUS_INTERNAL_ERROR; } - + data2 = ExAllocatePoolWithTag(PagedPool, tp2.item->size, ALLOC_TAG); + if (!data2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + RtlCopyMemory(data2, tp2.item->data, tp2.item->size); - + if (type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)data2; - + edr->count += get_extent_data_refcount(type, data); } else if (type == TYPE_TREE_BLOCK_REF) { TRACE("trying to increase refcount of non-shared tree extent\n"); @@ -749,81 +784,123 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 } else if (type == TYPE_SHARED_BLOCK_REF) return STATUS_SUCCESS; else if (type == TYPE_SHARED_DATA_REF) { - SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data2; - - sdr->count += get_extent_data_refcount(type, data); + UINT32* sdr = (UINT32*)data2; + + *sdr += get_extent_data_refcount(type, data); } else { ERR("unhandled extent type %x\n", type); return STATUS_INTERNAL_ERROR; } - - delete_tree_item(Vcb, &tp2, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, data2, tp2.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, data2, tp2.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); + if (!newei) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + RtlCopyMemory(newei, tp.item->data, tp.item->size); - + newei->refcount += get_extent_data_refcount(type, data); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } } - + // Otherwise, add new non-inline entry - - data2 = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); - RtlCopyMemory(data2, data, datalen); - - if (!insert_tree_item(Vcb, Vcb->extent_root, address, type, offset, data2, datalen, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + if (type == TYPE_SHARED_DATA_REF) { + SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data; + + data2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32), ALLOC_TAG); + if (!data2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + datalen = sizeof(UINT32); + + *((UINT32*)data2) = sdr->count; + } else if (type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF) { + data2 = NULL; + datalen = 0; + } else { + data2 = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); + if (!data2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(data2, data, datalen); } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, address, type, offset, data2, datalen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); + if (!newei) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + RtlCopyMemory(newei, tp.item->data, tp.item->size); - + newei->refcount += get_extent_data_refcount(type, data); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } -NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp) { EXTENT_DATA_REF edr; - + edr.root = root; edr.objid = inode; edr.offset = offset; edr.count = refcount; - - return increase_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback); -} -void decrease_chunk_usage(chunk* c, UINT64 delta) { - c->used -= delta; - - TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta); + return increase_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); } NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, - UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback) { + UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp) { KEY searchkey; NTSTATUS Status; traverse_ptr tp, tp2; @@ -834,91 +911,91 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 UINT32 rc = data ? get_extent_data_refcount(type, data) : 1; ULONG datalen = get_extent_data_len(type); BOOL is_tree = (type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF), skinny = FALSE; - + if (is_tree && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { searchkey.obj_id = address; searchkey.obj_type = TYPE_METADATA_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) skinny = TRUE; } - + if (!skinny) { searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("could not find EXTENT_ITEM for address %llx\n", address); return STATUS_INTERNAL_ERROR; } - + if (tp.item->key.offset != size) { ERR("extent %llx had length %llx, not %llx as expected\n", address, tp.item->key.offset, size); return STATUS_INTERNAL_ERROR; } - + if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { - Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp, rollback); - + Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp); + if (!NT_SUCCESS(Status)) { ERR("convert_old_extent returned %08x\n", Status); return Status; } - return decrease_extent_refcount(Vcb, address, size, type, data, firstitem, level, parent, superseded, Irp, rollback); + return decrease_extent_refcount(Vcb, address, size, type, data, firstitem, level, parent, superseded, Irp); } } - + if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return STATUS_INTERNAL_ERROR; } - + ei = (EXTENT_ITEM*)tp.item->data; - + len = tp.item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)&ei[1]; - + if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) { if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); return STATUS_INTERNAL_ERROR; } - + len -= sizeof(EXTENT_ITEM2); ptr += sizeof(EXTENT_ITEM2); } - + if (ei->refcount < rc) { ERR("error - extent has refcount %llx, trying to reduce by %x\n", ei->refcount, rc); return STATUS_INTERNAL_ERROR; } - + inline_rc = 0; - + // Loop through inline extent entries - + while (len > 0) { UINT8 secttype = *ptr; - ULONG sectlen = get_extent_data_len(secttype); + UINT16 sectlen = get_extent_data_len(secttype); UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); - + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); return STATUS_INTERNAL_ERROR; @@ -928,190 +1005,232 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); return STATUS_INTERNAL_ERROR; } - + if (secttype == type) { if (type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)data; - ULONG neweilen; - EXTENT_ITEM* newei; - + if (sectedr->root == edr->root && sectedr->objid == edr->objid && sectedr->offset == edr->offset) { + UINT16 neweilen; + EXTENT_ITEM* newei; + if (ei->refcount == edr->count) { - delete_tree_item(Vcb, &tp, rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (!superseded) - add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); - + add_checksum_entry(Vcb, address, (ULONG)(size / Vcb->superblock.sector_size), NULL, Irp); + return STATUS_SUCCESS; } - + if (sectedr->count < edr->count) { ERR("error - extent section has refcount %x, trying to reduce by %x\n", sectedr->count, edr->count); return STATUS_INTERNAL_ERROR; } - + if (sectedr->count > edr->count) // reduce section refcount neweilen = tp.item->size; else // remove section entirely neweilen = tp.item->size - sizeof(UINT8) - sectlen; - + newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + if (sectedr->count > edr->count) { EXTENT_DATA_REF* newedr = (EXTENT_DATA_REF*)((UINT8*)newei + ((UINT8*)sectedr - tp.item->data)); - + RtlCopyMemory(newei, ei, neweilen); - + newedr->count -= rc; } else { RtlCopyMemory(newei, ei, ptr - tp.item->data); - + if (len > sectlen) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); } - + newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } } else if (type == TYPE_SHARED_DATA_REF) { SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8)); SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data; - ULONG neweilen; - EXTENT_ITEM* newei; - + if (sectsdr->offset == sdr->offset) { + EXTENT_ITEM* newei; + UINT16 neweilen; + if (ei->refcount == sectsdr->count) { - delete_tree_item(Vcb, &tp, rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (!superseded) - add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); - + add_checksum_entry(Vcb, address, (ULONG)(size / Vcb->superblock.sector_size), NULL, Irp); + return STATUS_SUCCESS; } - + if (sectsdr->count < sdr->count) { ERR("error - SHARED_DATA_REF has refcount %x, trying to reduce by %x\n", sectsdr->count, sdr->count); return STATUS_INTERNAL_ERROR; } - + if (sectsdr->count > sdr->count) // reduce section refcount neweilen = tp.item->size; else // remove section entirely neweilen = tp.item->size - sizeof(UINT8) - sectlen; - + newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + if (sectsdr->count > sdr->count) { SHARED_DATA_REF* newsdr = (SHARED_DATA_REF*)((UINT8*)newei + ((UINT8*)sectsdr - tp.item->data)); - + RtlCopyMemory(newei, ei, neweilen); - + newsdr->count -= rc; } else { RtlCopyMemory(newei, ei, ptr - tp.item->data); - + if (len > sectlen) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); } newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } } else if (type == TYPE_TREE_BLOCK_REF) { TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)(ptr + sizeof(UINT8)); TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data; - ULONG neweilen; - EXTENT_ITEM* newei; - + if (secttbr->offset == tbr->offset) { + EXTENT_ITEM* newei; + UINT16 neweilen; + if (ei->refcount == 1) { - delete_tree_item(Vcb, &tp, rollback); + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } neweilen = tp.item->size - sizeof(UINT8) - sectlen; - + newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, ei, ptr - tp.item->data); - + if (len > sectlen) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); - + newei->refcount--; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } } else if (type == TYPE_SHARED_BLOCK_REF) { SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8)); SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data; - ULONG neweilen; - EXTENT_ITEM* newei; - + if (sectsbr->offset == sbr->offset) { + EXTENT_ITEM* newei; + UINT16 neweilen; + if (ei->refcount == 1) { - delete_tree_item(Vcb, &tp, rollback); + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } - + neweilen = tp.item->size - sizeof(UINT8) - sectlen; - + newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, ei, ptr - tp.item->data); - + if (len > sectlen) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); - + newei->refcount--; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; } } else { @@ -1119,268 +1238,305 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 return STATUS_INTERNAL_ERROR; } } - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; inline_rc += sectcount; } - + if (inline_rc == ei->refcount) { ERR("entry not found in inline extent item for address %llx\n", address); return STATUS_INTERNAL_ERROR; } - + + if (type == TYPE_SHARED_DATA_REF) + datalen = sizeof(UINT32); + else if (type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF) + datalen = 0; + searchkey.obj_id = address; searchkey.obj_type = type; searchkey.offset = (type == TYPE_SHARED_DATA_REF || type == TYPE_EXTENT_REF_V0) ? parent : get_extent_hash(type, data); - + Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(tp2.item->key, searchkey)) { ERR("(%llx,%x,%llx) not found\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset); return STATUS_INTERNAL_ERROR; } - + if (tp2.item->size < datalen) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, datalen); + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp2.item->size, datalen); return STATUS_INTERNAL_ERROR; } - + if (type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)tp2.item->data; EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)data; - EXTENT_ITEM* newei; - + if (sectedr->root == edr->root && sectedr->objid == edr->objid && sectedr->offset == edr->offset) { + EXTENT_ITEM* newei; + if (ei->refcount == edr->count) { - delete_tree_item(Vcb, &tp, rollback); - delete_tree_item(Vcb, &tp2, rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (!superseded) - add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); - + add_checksum_entry(Vcb, address, (ULONG)(size / Vcb->superblock.sector_size), NULL, Irp); + return STATUS_SUCCESS; } - + if (sectedr->count < edr->count) { ERR("error - extent section has refcount %x, trying to reduce by %x\n", sectedr->count, edr->count); return STATUS_INTERNAL_ERROR; } - - delete_tree_item(Vcb, &tp2, rollback); - + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (sectedr->count > edr->count) { EXTENT_DATA_REF* newedr = ExAllocatePoolWithTag(PagedPool, tp2.item->size, ALLOC_TAG); - + if (!newedr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newedr, sectedr, tp2.item->size); - + newedr->count -= edr->count; - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, newedr, tp2.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, newedr, tp2.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } } - + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, tp.item->data, tp.item->size); newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; } else { ERR("error - hash collision?\n"); return STATUS_INTERNAL_ERROR; } } else if (type == TYPE_SHARED_DATA_REF) { - SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)tp2.item->data; SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data; - EXTENT_ITEM* newei; - - if (sectsdr->offset == sdr->offset) { + + if (tp2.item->key.offset == sdr->offset) { + UINT32* sectsdrcount = (UINT32*)tp2.item->data; + EXTENT_ITEM* newei; + if (ei->refcount == sdr->count) { - delete_tree_item(Vcb, &tp, rollback); - delete_tree_item(Vcb, &tp2, rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (!superseded) - add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); - + add_checksum_entry(Vcb, address, (ULONG)(size / Vcb->superblock.sector_size), NULL, Irp); + return STATUS_SUCCESS; } - - if (sectsdr->count < sdr->count) { - ERR("error - extent section has refcount %x, trying to reduce by %x\n", sectsdr->count, sdr->count); + + if (*sectsdrcount < sdr->count) { + ERR("error - extent section has refcount %x, trying to reduce by %x\n", *sectsdrcount, sdr->count); return STATUS_INTERNAL_ERROR; } - - delete_tree_item(Vcb, &tp2, rollback); - - if (sectsdr->count > sdr->count) { - SHARED_DATA_REF* newsdr = ExAllocatePoolWithTag(PagedPool, tp2.item->size, ALLOC_TAG); - + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + if (*sectsdrcount > sdr->count) { + UINT32* newsdr = ExAllocatePoolWithTag(PagedPool, tp2.item->size, ALLOC_TAG); + if (!newsdr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(newsdr, sectsdr, tp2.item->size); - - newsdr->count -= sdr->count; - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, newsdr, tp2.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; - } - } - - newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); - if (!newei) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; - } - - return STATUS_SUCCESS; - } else { - ERR("error - collision?\n"); - return STATUS_INTERNAL_ERROR; - } - } else if (type == TYPE_SHARED_BLOCK_REF) { - SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)tp2.item->data; - SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data; - EXTENT_ITEM* newei; - - if (sectsbr->offset == sbr->offset) { - if (ei->refcount == 1) { - delete_tree_item(Vcb, &tp, rollback); - delete_tree_item(Vcb, &tp2, rollback); - return STATUS_SUCCESS; + *newsdr = *sectsdrcount - sdr->count; + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, newsdr, tp2.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } } - - delete_tree_item(Vcb, &tp2, rollback); - + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, tp.item->data, tp.item->size); newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; } else { ERR("error - collision?\n"); return STATUS_INTERNAL_ERROR; } - } else if (type == TYPE_TREE_BLOCK_REF) { - TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)tp2.item->data; - TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data; + } else if (type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF) { EXTENT_ITEM* newei; - - if (secttbr->offset == tbr->offset) { - if (ei->refcount == 1) { - delete_tree_item(Vcb, &tp, rollback); - delete_tree_item(Vcb, &tp2, rollback); - return STATUS_SUCCESS; - } - - delete_tree_item(Vcb, &tp2, rollback); - - newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); - if (!newei) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; + + if (ei->refcount == 1) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - - RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; - } else { - ERR("error - collision?\n"); - return STATUS_INTERNAL_ERROR; } + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); + if (!newei) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(newei, tp.item->data, tp.item->size); + + newei->refcount -= rc; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; } else if (type == TYPE_EXTENT_REF_V0) { EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)tp2.item->data; EXTENT_ITEM* newei; - + if (ei->refcount == erv0->count) { - delete_tree_item(Vcb, &tp, rollback); - delete_tree_item(Vcb, &tp2, rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (!superseded) - add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); - + add_checksum_entry(Vcb, address, (ULONG)(size / Vcb->superblock.sector_size), NULL, Irp); + return STATUS_SUCCESS; } - - delete_tree_item(Vcb, &tp2, rollback); - + + Status = delete_tree_item(Vcb, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newei, tp.item->data, tp.item->size); newei->refcount -= rc; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + return STATUS_SUCCESS; } else { ERR("unhandled extent type %x\n", type); @@ -1389,64 +1545,63 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 } NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, - UINT64 offset, UINT32 refcount, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback) { + UINT64 offset, UINT32 refcount, BOOL superseded, PIRP Irp) { EXTENT_DATA_REF edr; - + edr.root = root; edr.objid = inode; edr.offset = offset; edr.count = refcount; - - return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, 0, superseded, Irp, rollback); + + return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, 0, superseded, Irp); } NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, - UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { + UINT8 level, PIRP Irp) { TREE_BLOCK_REF tbr; - + tbr.offset = root; - - return decrease_extent_refcount(Vcb, address, size, TYPE_TREE_BLOCK_REF, &tbr, NULL/*FIXME*/, level, 0, FALSE, Irp, rollback); + + return decrease_extent_refcount(Vcb, address, size, TYPE_TREE_BLOCK_REF, &tbr, NULL/*FIXME*/, level, 0, FALSE, Irp); } -static UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) { +static UINT32 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) { NTSTATUS Status; KEY searchkey; traverse_ptr tp; - EXTENT_DATA_REF* edr; - + searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { TRACE("could not find address %llx in extent tree\n", address); return 0; } - + if (tp.item->key.offset != size) { ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size); return 0; } - + if (tp.item->size >= sizeof(EXTENT_ITEM)) { EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; UINT32 len = tp.item->size - sizeof(EXTENT_ITEM); UINT8* ptr = (UINT8*)&ei[1]; - + while (len > 0) { UINT8 secttype = *ptr; ULONG sectlen = get_extent_data_len(secttype); - UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); - + UINT32 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); return 0; @@ -1456,39 +1611,39 @@ static UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, U ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); return 0; } - + if (secttype == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); - + if (sectedr->root == root && sectedr->objid == objid && sectedr->offset == offset) return sectcount; } - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; } } - + searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_DATA_REF; searchkey.offset = get_extent_data_ref_hash2(root, objid, offset); - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - - if (!keycmp(searchkey, tp.item->key)) { + + if (!keycmp(searchkey, tp.item->key)) { if (tp.item->size < sizeof(EXTENT_DATA_REF)) ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA_REF)); - else { - edr = (EXTENT_DATA_REF*)tp.item->data; - + else { + EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)tp.item->data; + return edr->count; } } - + return 0; } @@ -1497,24 +1652,24 @@ UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, P traverse_ptr tp; NTSTATUS Status; EXTENT_ITEM* ei; - + searchkey.obj_id = address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { ei = (EXTENT_ITEM*)tp.item->data; - + return ei->refcount; } - + if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) { ERR("couldn't find (%llx,%x,%llx) in extent tree\n", address, TYPE_EXTENT_ITEM, size); return 0; @@ -1522,19 +1677,19 @@ UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, P ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size); return 0; } - + if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data; - + return eiv0->refcount; } else if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, - tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA)); + tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return 0; } - + ei = (EXTENT_ITEM*)tp.item->data; - + return ei->refcount; } @@ -1542,69 +1697,69 @@ BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP I KEY searchkey; traverse_ptr tp, next_tp; NTSTATUS Status; - UINT64 rc, rcrun, root = 0, inode = 0; + UINT64 rc, rcrun, root = 0, inode = 0, offset = 0; UINT32 len; EXTENT_ITEM* ei; UINT8* ptr; BOOL b; - + rc = get_extent_refcount(Vcb, address, size, Irp); if (rc == 1) return TRUE; - + if (rc == 0) return FALSE; - + searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = size; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { WARN("error - find_item returned %08x\n", Status); return FALSE; } - + if (keycmp(tp.item->key, searchkey)) { WARN("could not find (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); return FALSE; } - + if (tp.item->size == sizeof(EXTENT_ITEM_V0)) return FALSE; - + if (tp.item->size < sizeof(EXTENT_ITEM)) { WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return FALSE; } - + ei = (EXTENT_ITEM*)tp.item->data; - + len = tp.item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)&ei[1]; - + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) { if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); return FALSE; } - + len -= sizeof(EXTENT_ITEM2); ptr += sizeof(EXTENT_ITEM2); } - + rcrun = 0; - + // Loop through inline extent entries - + while (len > 0) { UINT8 secttype = *ptr; ULONG sectlen = get_extent_data_len(secttype); UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); - + len--; - + if (sectlen > len) { WARN("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); return FALSE; @@ -1614,63 +1769,65 @@ BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP I WARN("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); return FALSE; } - + if (secttype == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); - + if (root == 0 && inode == 0) { root = sectedr->root; inode = sectedr->objid; - } else if (root != sectedr->root || inode != sectedr->objid) + offset = sectedr->offset; + } else if (root != sectedr->root || inode != sectedr->objid || offset != sectedr->offset) return FALSE; } else return FALSE; - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; rcrun += sectcount; } - + if (rcrun == rc) return TRUE; // Loop through non-inlines if some refs still unaccounted for - + do { b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) { EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)tp.item->data; - + if (tp.item->size < sizeof(EXTENT_DATA_REF)) { WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); return FALSE; } - + if (root == 0 && inode == 0) { root = edr->root; inode = edr->objid; - } else if (root != edr->root || inode != edr->objid) + offset = edr->offset; + } else if (root != edr->root || inode != edr->objid || offset != edr->offset) return FALSE; - + rcrun += edr->count; } - + if (rcrun == rc) return TRUE; - + if (b) { tp = next_tp; - + if (tp.item->key.obj_id > searchkey.obj_id) break; } } while (b); - + // If we reach this point, there's still some refs unaccounted for somewhere. // Return FALSE in case we mess things up elsewhere. - + return FALSE; } @@ -1679,39 +1836,39 @@ UINT64 get_extent_flags(device_extension* Vcb, UINT64 address, PIRP Irp) { traverse_ptr tp; NTSTATUS Status; EXTENT_ITEM* ei; - + searchkey.obj_id = address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { ei = (EXTENT_ITEM*)tp.item->data; - + return ei->flags; } - + if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) { ERR("couldn't find %llx in extent tree\n", address); return 0; } - + if (tp.item->size == sizeof(EXTENT_ITEM_V0)) return 0; else if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, - tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA)); + tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return 0; } - + ei = (EXTENT_ITEM*)tp.item->data; - + return ei->flags; } @@ -1720,37 +1877,37 @@ void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PI traverse_ptr tp; NTSTATUS Status; EXTENT_ITEM* ei; - + searchkey.obj_id = address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return; } - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { ei = (EXTENT_ITEM*)tp.item->data; ei->flags = flags; return; } - + if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) { ERR("couldn't find %llx in extent tree\n", address); return; } - + if (tp.item->size == sizeof(EXTENT_ITEM_V0)) return; else if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, - tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA)); + tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return; } - + ei = (EXTENT_ITEM*)tp.item->data; ei->flags = flags; } @@ -1758,23 +1915,23 @@ void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PI static changed_extent* get_changed_extent_item(chunk* c, UINT64 address, UINT64 size, BOOL no_csum) { LIST_ENTRY* le; changed_extent* ce; - + le = c->changed_extents.Flink; while (le != &c->changed_extents) { ce = CONTAINING_RECORD(le, changed_extent, list_entry); - + if (ce->address == address && ce->size == size) return ce; - + le = le->Flink; } - + ce = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent), ALLOC_TAG); if (!ce) { ERR("out of memory\n"); return NULL; } - + ce->address = address; ce->size = size; ce->old_size = size; @@ -1784,13 +1941,13 @@ static changed_extent* get_changed_extent_item(chunk* c, UINT64 address, UINT64 ce->superseded = FALSE; InitializeListHead(&ce->refs); InitializeListHead(&ce->old_refs); - + InsertTailList(&c->changed_extents, &ce->list_entry); - + return ce; } -NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, signed long long count, +NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, INT32 count, BOOL no_csum, BOOL superseded, PIRP Irp) { LIST_ENTRY* le; changed_extent* ce; @@ -1798,48 +1955,48 @@ NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 addre NTSTATUS Status; KEY searchkey; traverse_ptr tp; - UINT64 old_count; - + UINT32 old_count; + ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE); - + ce = get_changed_extent_item(c, address, size, no_csum); - + if (!ce) { ERR("get_changed_extent_item failed\n"); Status = STATUS_INTERNAL_ERROR; goto end; } - + if (IsListEmpty(&ce->refs) && IsListEmpty(&ce->old_refs)) { // new entry searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); goto end; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("could not find address %llx in extent tree\n", address); Status = STATUS_INTERNAL_ERROR; goto end; } - + if (tp.item->key.offset != size) { ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size); Status = STATUS_INTERNAL_ERROR; goto end; } - + if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data; - + ce->count = ce->old_count = eiv0->refcount; } else if (tp.item->size >= sizeof(EXTENT_ITEM)) { EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; - + ce->count = ce->old_count = ei->refcount; } else { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); @@ -1847,71 +2004,71 @@ NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 addre goto end; } } - + le = ce->refs.Flink; while (le != &ce->refs) { cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); - + if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) { ce->count += count; cer->edr.count += count; Status = STATUS_SUCCESS; - + if (superseded) ce->superseded = TRUE; - + goto end; } - + le = le->Flink; } - + old_count = find_extent_data_refcount(Vcb, address, size, root, objid, offset, Irp); - + if (old_count > 0) { cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); - + if (!cer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + cer->type = TYPE_EXTENT_DATA_REF; cer->edr.root = root; cer->edr.objid = objid; cer->edr.offset = offset; cer->edr.count = old_count; - + InsertTailList(&ce->old_refs, &cer->list_entry); } - + cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); - + if (!cer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + cer->type = TYPE_EXTENT_DATA_REF; cer->edr.root = root; cer->edr.objid = objid; cer->edr.offset = offset; cer->edr.count = old_count + count; - + InsertTailList(&ce->refs, &cer->list_entry); - + ce->count += count; - + if (superseded) ce->superseded = TRUE; - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(&c->changed_extents_lock); - + return Status; } @@ -1919,42 +2076,42 @@ void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, changed_extent* ce; changed_extent_ref* cer; LIST_ENTRY* le; - + ce = get_changed_extent_item(c, address, size, no_csum); - + if (!ce) { ERR("get_changed_extent_item failed\n"); return; } - + le = ce->refs.Flink; while (le != &ce->refs) { cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); - + if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) { ce->count += count; cer->edr.count += count; return; } - + le = le->Flink; } - + cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); - + if (!cer) { ERR("out of memory\n"); return; } - + cer->type = TYPE_EXTENT_DATA_REF; cer->edr.root = root; cer->edr.objid = objid; cer->edr.offset = offset; cer->edr.count = count; - + InsertTailList(&ce->refs, &cer->list_entry); - + ce->count += count; } @@ -1966,56 +2123,56 @@ UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, U EXTENT_ITEM* ei; UINT32 len; UINT8* ptr; - + searchkey.obj_id = address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - + if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) { TRACE("could not find address %llx in extent tree\n", address); return 0; } - + if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset != Vcb->superblock.node_size) { ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, Vcb->superblock.node_size); return 0; } - + if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return 0; } - + ei = (EXTENT_ITEM*)tp.item->data; inline_rc = 0; - + len = tp.item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)&ei[1]; - + if (searchkey.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); return 0; } - + len -= sizeof(EXTENT_ITEM2); ptr += sizeof(EXTENT_ITEM2); } - + while (len > 0) { UINT8 secttype = *ptr; ULONG sectlen = get_extent_data_len(secttype); UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); - + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); return 0; @@ -2025,45 +2182,41 @@ UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, U ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); return 0; } - + if (secttype == TYPE_SHARED_BLOCK_REF) { SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8)); - + if (sectsbr->offset == parent) return 1; } - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; inline_rc += sectcount; } - + // FIXME - what if old? - + if (inline_rc == ei->refcount) return 0; - + searchkey.obj_id = address; searchkey.obj_type = TYPE_SHARED_BLOCK_REF; searchkey.offset = parent; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - - if (!keycmp(searchkey, tp.item->key)) { - if (tp.item->size < sizeof(SHARED_BLOCK_REF)) - ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(SHARED_BLOCK_REF)); - else - return 1; - } - + + if (!keycmp(searchkey, tp.item->key)) + return 1; + return 0; } -UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp) { +UINT32 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp) { NTSTATUS Status; KEY searchkey; traverse_ptr tp; @@ -2071,40 +2224,40 @@ UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, U EXTENT_ITEM* ei; UINT32 len; UINT8* ptr; - + searchkey.obj_id = address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - + if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) { TRACE("could not find address %llx in extent tree\n", address); return 0; } - + if (tp.item->size < sizeof(EXTENT_ITEM)) { ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); return 0; } - + ei = (EXTENT_ITEM*)tp.item->data; inline_rc = 0; - + len = tp.item->size - sizeof(EXTENT_ITEM); ptr = (UINT8*)&ei[1]; - + while (len > 0) { UINT8 secttype = *ptr; ULONG sectlen = get_extent_data_len(secttype); UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); - + len--; - + if (sectlen > len) { ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); return 0; @@ -2114,42 +2267,42 @@ UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, U ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); return 0; } - + if (secttype == TYPE_SHARED_DATA_REF) { SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8)); - + if (sectsdr->offset == parent) return sectsdr->count; } - + len -= sectlen; ptr += sizeof(UINT8) + sectlen; inline_rc += sectcount; } - + // FIXME - what if old? - + if (inline_rc == ei->refcount) return 0; - + searchkey.obj_id = address; searchkey.obj_type = TYPE_SHARED_DATA_REF; searchkey.offset = parent; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return 0; } - - if (!keycmp(searchkey, tp.item->key)) { - if (tp.item->size < sizeof(SHARED_DATA_REF)) - ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(SHARED_DATA_REF)); + + if (!keycmp(searchkey, tp.item->key)) { + if (tp.item->size < sizeof(UINT32)) + ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(UINT32)); else { - SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)tp.item->data; - return sdr->count; + UINT32* count = (UINT32*)tp.item->data; + return *count; } } - + return 0; } diff --git a/reactos/drivers/filesystems/btrfs/fastio.c b/reactos/drivers/filesystems/btrfs/fastio.c index 30b58384895..9f4c504a126 100644 --- a/reactos/drivers/filesystems/btrfs/fastio.c +++ b/reactos/drivers/filesystems/btrfs/fastio.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -19,111 +19,137 @@ FAST_IO_DISPATCH FastIoDispatch; -static void STDCALL acquire_file_for_create_section(PFILE_OBJECT FileObject) { - TRACE("STUB: acquire_file_for_create_section\n"); -} - -static void STDCALL release_file_for_create_section(PFILE_OBJECT FileObject) { - TRACE("STUB: release_file_for_create_section\n"); -} - -static BOOLEAN STDCALL fast_query_basic_info(PFILE_OBJECT FileObject, BOOLEAN wait, PFILE_BASIC_INFORMATION fbi, - PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +_Function_class_(FAST_IO_QUERY_BASIC_INFO) +#ifdef __REACTOS__ +static BOOLEAN NTAPI fast_query_basic_info(PFILE_OBJECT FileObject, BOOLEAN wait, PFILE_BASIC_INFORMATION fbi, + PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#else +static BOOLEAN fast_query_basic_info(PFILE_OBJECT FileObject, BOOLEAN wait, PFILE_BASIC_INFORMATION fbi, + PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#endif fcb* fcb; ccb* ccb; - + + FsRtlEnterFileSystem(); + TRACE("(%p, %u, %p, %p, %p)\n", FileObject, wait, fbi, IoStatus, DeviceObject); - - if (!FileObject) + + if (!FileObject) { + FsRtlExitFileSystem(); return FALSE; - + } + fcb = FileObject->FsContext; - - if (!fcb) + + if (!fcb) { + FsRtlExitFileSystem(); return FALSE; - + } + ccb = FileObject->FsContext2; - - if (!ccb) + + if (!ccb) { + FsRtlExitFileSystem(); return FALSE; - - if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) + } + + if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) { + FsRtlExitFileSystem(); return FALSE; - + } + if (fcb->ads) { - if (!ccb || !ccb->fileref || !ccb->fileref->parent || !ccb->fileref->parent->fcb) + if (!ccb->fileref || !ccb->fileref->parent || !ccb->fileref->parent->fcb) { + FsRtlExitFileSystem(); return FALSE; - + } + fcb = ccb->fileref->parent->fcb; } - - FsRtlEnterFileSystem(); - + if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { FsRtlExitFileSystem(); return FALSE; } - fbi->CreationTime.QuadPart = unix_time_to_win(&fcb->inode_item.otime); - fbi->LastAccessTime.QuadPart = unix_time_to_win(&fcb->inode_item.st_atime); - fbi->LastWriteTime.QuadPart = unix_time_to_win(&fcb->inode_item.st_mtime); - fbi->ChangeTime.QuadPart = unix_time_to_win(&fcb->inode_item.st_ctime); - fbi->FileAttributes = fcb->atts; + if (fcb == fcb->Vcb->dummy_fcb) { + LARGE_INTEGER time; + + KeQuerySystemTime(&time); + fbi->CreationTime = fbi->LastAccessTime = fbi->LastWriteTime = fbi->ChangeTime = time; + } else { + fbi->CreationTime.QuadPart = unix_time_to_win(&fcb->inode_item.otime); + fbi->LastAccessTime.QuadPart = unix_time_to_win(&fcb->inode_item.st_atime); + fbi->LastWriteTime.QuadPart = unix_time_to_win(&fcb->inode_item.st_mtime); + fbi->ChangeTime.QuadPart = unix_time_to_win(&fcb->inode_item.st_ctime); + } + + fbi->FileAttributes = fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fcb->atts; IoStatus->Status = STATUS_SUCCESS; IoStatus->Information = sizeof(FILE_BASIC_INFORMATION); - + ExReleaseResourceLite(fcb->Header.Resource); FsRtlExitFileSystem(); - + return TRUE; } -static BOOLEAN STDCALL fast_query_standard_info(PFILE_OBJECT FileObject, BOOLEAN wait, PFILE_STANDARD_INFORMATION fsi, - PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +_Function_class_(FAST_IO_QUERY_STANDARD_INFO) +#ifdef __REACTOS__ +static BOOLEAN NTAPI fast_query_standard_info(PFILE_OBJECT FileObject, BOOLEAN wait, PFILE_STANDARD_INFORMATION fsi, + PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#else +static BOOLEAN fast_query_standard_info(PFILE_OBJECT FileObject, BOOLEAN wait, PFILE_STANDARD_INFORMATION fsi, + PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#endif fcb* fcb; ccb* ccb; BOOL ads; ULONG adssize; - + + FsRtlEnterFileSystem(); + TRACE("(%p, %u, %p, %p, %p)\n", FileObject, wait, fsi, IoStatus, DeviceObject); - - if (!FileObject) + + if (!FileObject) { + FsRtlExitFileSystem(); return FALSE; - + } + fcb = FileObject->FsContext; ccb = FileObject->FsContext2; - - if (!fcb) + + if (!fcb) { + FsRtlExitFileSystem(); return FALSE; - - FsRtlEnterFileSystem(); - + } + if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { FsRtlExitFileSystem(); return FALSE; } - + ads = fcb->ads; - + if (ads) { struct _fcb* fcb2; - + if (!ccb || !ccb->fileref || !ccb->fileref->parent || !ccb->fileref->parent->fcb) { ExReleaseResourceLite(fcb->Header.Resource); FsRtlExitFileSystem(); return FALSE; } - + adssize = fcb->adsdata.Length; - + fcb2 = ccb->fileref->parent->fcb; - + ExReleaseResourceLite(fcb->Header.Resource); - + fcb = fcb2; - + if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { FsRtlExitFileSystem(); return FALSE; @@ -131,188 +157,214 @@ static BOOLEAN STDCALL fast_query_standard_info(PFILE_OBJECT FileObject, BOOLEAN fsi->AllocationSize.QuadPart = fsi->EndOfFile.QuadPart = adssize; fsi->NumberOfLinks = fcb->inode_item.st_nlink; - fsi->Directory = S_ISDIR(fcb->inode_item.st_mode); + fsi->Directory = FALSE; } else { - fsi->AllocationSize.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); + fsi->AllocationSize.QuadPart = fcb_alloc_size(fcb); fsi->EndOfFile.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : fcb->inode_item.st_size; fsi->NumberOfLinks = fcb->inode_item.st_nlink; fsi->Directory = S_ISDIR(fcb->inode_item.st_mode); } - + fsi->DeletePending = ccb->fileref ? ccb->fileref->delete_on_close : FALSE; IoStatus->Status = STATUS_SUCCESS; IoStatus->Information = sizeof(FILE_STANDARD_INFORMATION); - + ExReleaseResourceLite(fcb->Header.Resource); FsRtlExitFileSystem(); - - return TRUE; -} -static BOOLEAN STDCALL fast_io_query_open(PIRP Irp, PFILE_NETWORK_OPEN_INFORMATION NetworkInformation, PDEVICE_OBJECT DeviceObject) { - TRACE("STUB: fast_io_query_open\n"); - - return FALSE; + return TRUE; } -static BOOLEAN STDCALL fast_io_check_if_possible(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, +_Function_class_(FAST_IO_CHECK_IF_POSSIBLE) +#ifdef __REACTOS__ +static BOOLEAN NTAPI fast_io_check_if_possible(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, + ULONG LockKey, BOOLEAN CheckForReadOperation, PIO_STATUS_BLOCK IoStatus, + PDEVICE_OBJECT DeviceObject) { +#else +static BOOLEAN fast_io_check_if_possible(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, BOOLEAN CheckForReadOperation, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#endif fcb* fcb = FileObject->FsContext; LARGE_INTEGER len2; - - TRACE("(%p, %llx, %x, %x, %x, %x, %p, %p)\n", FileObject, FileOffset->QuadPart, Length, Wait, LockKey, CheckForReadOperation, IoStatus, DeviceObject); - + + UNUSED(Wait); + UNUSED(IoStatus); + UNUSED(DeviceObject); + len2.QuadPart = Length; - + if (CheckForReadOperation) { if (FsRtlFastCheckLockForRead(&fcb->lock, FileOffset, &len2, LockKey, FileObject, PsGetCurrentProcess())) return TRUE; } else { - if (!fcb->Vcb->readonly && !(fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) && FsRtlFastCheckLockForWrite(&fcb->lock, FileOffset, &len2, LockKey, FileObject, PsGetCurrentProcess())) + if (!fcb->Vcb->readonly && !is_subvol_readonly(fcb->subvol, NULL) && FsRtlFastCheckLockForWrite(&fcb->lock, FileOffset, &len2, LockKey, FileObject, PsGetCurrentProcess())) return TRUE; } - - return FALSE; -} -static BOOLEAN STDCALL fast_io_lock(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, PLARGE_INTEGER Length, PEPROCESS ProcessId, ULONG Key, BOOLEAN FailImmediately, BOOLEAN ExclusiveLock, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_lock\n"); return FALSE; } -static BOOLEAN STDCALL fast_io_unlock_single(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, PLARGE_INTEGER Length, PEPROCESS ProcessId, ULONG Key, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_unlock_single\n"); - return FALSE; -} +_Function_class_(FAST_IO_QUERY_NETWORK_OPEN_INFO) +#ifdef __REACTOS__ +static BOOLEAN NTAPI fast_io_query_network_open_info(PFILE_OBJECT FileObject, BOOLEAN Wait, FILE_NETWORK_OPEN_INFORMATION* fnoi, + PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#else +static BOOLEAN fast_io_query_network_open_info(PFILE_OBJECT FileObject, BOOLEAN Wait, FILE_NETWORK_OPEN_INFORMATION* fnoi, + PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#endif + fcb* fcb; + ccb* ccb; + file_ref* fileref; -static BOOLEAN STDCALL fast_io_unlock_all(PFILE_OBJECT FileObject, PEPROCESS ProcessId, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_unlock_all\n"); - return FALSE; -} + FsRtlEnterFileSystem(); -static BOOLEAN STDCALL fast_io_unlock_all_by_key(PFILE_OBJECT FileObject, PVOID ProcessId, ULONG Key, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_unlock_all_by_key\n"); - return FALSE; -} + TRACE("(%p, %u, %p, %p, %p)\n", FileObject, Wait, fnoi, IoStatus, DeviceObject); -static BOOLEAN STDCALL fast_io_device_control(PFILE_OBJECT FileObject, BOOLEAN Wait, PVOID InputBuffer OPTIONAL, ULONG InputBufferLength, PVOID OutputBuffer OPTIONAL, ULONG OutputBufferLength, ULONG IoControlCode, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_device_control\n"); - return FALSE; -} + RtlZeroMemory(fnoi, sizeof(FILE_NETWORK_OPEN_INFORMATION)); -static VOID STDCALL fast_io_detach_device(PDEVICE_OBJECT SourceDevice, PDEVICE_OBJECT TargetDevice){ - TRACE("STUB: fast_io_detach_device\n"); -} + fcb = FileObject->FsContext; -static BOOLEAN STDCALL fast_io_query_network_open_info(PFILE_OBJECT FileObject, BOOLEAN Wait, struct _FILE_NETWORK_OPEN_INFORMATION *Buffer, struct _IO_STATUS_BLOCK *IoStatus, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_query_network_open_info\n"); - return FALSE; + if (!fcb || fcb == fcb->Vcb->volume_fcb) { + FsRtlExitFileSystem(); + return FALSE; + } + + ccb = FileObject->FsContext2; + + if (!ccb) { + FsRtlExitFileSystem(); + return FALSE; + } + + fileref = ccb->fileref; + + if (fcb == fcb->Vcb->dummy_fcb) { + LARGE_INTEGER time; + + KeQuerySystemTime(&time); + fnoi->CreationTime = fnoi->LastAccessTime = fnoi->LastWriteTime = fnoi->ChangeTime = time; + } else { + INODE_ITEM* ii; + + if (fcb->ads) { + if (!fileref || !fileref->parent) { + ERR("no fileref for stream\n"); + FsRtlExitFileSystem(); + return FALSE; + } + + ii = &fileref->parent->fcb->inode_item; + } else + ii = &fcb->inode_item; + + fnoi->CreationTime.QuadPart = unix_time_to_win(&ii->otime); + fnoi->LastAccessTime.QuadPart = unix_time_to_win(&ii->st_atime); + fnoi->LastWriteTime.QuadPart = unix_time_to_win(&ii->st_mtime); + fnoi->ChangeTime.QuadPart = unix_time_to_win(&ii->st_ctime); + } + + if (fcb->ads) { + fnoi->AllocationSize.QuadPart = fnoi->EndOfFile.QuadPart = fcb->adsdata.Length; + fnoi->FileAttributes = fileref->parent->fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fileref->parent->fcb->atts; + } else { + fnoi->AllocationSize.QuadPart = fcb_alloc_size(fcb); + fnoi->EndOfFile.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : fcb->inode_item.st_size; + fnoi->FileAttributes = fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fcb->atts; + } + + FsRtlExitFileSystem(); + + return TRUE; } -static NTSTATUS STDCALL fast_io_acquire_for_mod_write(PFILE_OBJECT FileObject, PLARGE_INTEGER EndingOffset, struct _ERESOURCE **ResourceToRelease, PDEVICE_OBJECT DeviceObject) { +_Function_class_(FAST_IO_ACQUIRE_FOR_MOD_WRITE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI fast_io_acquire_for_mod_write(PFILE_OBJECT FileObject, PLARGE_INTEGER EndingOffset, struct _ERESOURCE **ResourceToRelease, PDEVICE_OBJECT DeviceObject) { +#else +static NTSTATUS fast_io_acquire_for_mod_write(PFILE_OBJECT FileObject, PLARGE_INTEGER EndingOffset, struct _ERESOURCE **ResourceToRelease, PDEVICE_OBJECT DeviceObject) { +#endif fcb* fcb; - - TRACE("(%p, %llx, %p, %p)\n", FileObject, EndingOffset->QuadPart, ResourceToRelease, DeviceObject); - + + UNUSED(EndingOffset); + UNUSED(DeviceObject); + fcb = FileObject->FsContext; - + if (!fcb) return STATUS_INVALID_PARAMETER; - + *ResourceToRelease = fcb->Header.PagingIoResource; - + if (!ExAcquireResourceSharedLite(*ResourceToRelease, FALSE)) return STATUS_CANT_WAIT; - + return STATUS_SUCCESS; } -static BOOLEAN STDCALL fast_io_read_compressed(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, ULONG LockKey, PVOID Buffer, PMDL *MdlChain, PIO_STATUS_BLOCK IoStatus, struct _COMPRESSED_DATA_INFO *CompressedDataInfo, ULONG CompressedDataInfoLength, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_read_compressed\n"); - return FALSE; -} +_Function_class_(FAST_IO_RELEASE_FOR_MOD_WRITE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI fast_io_release_for_mod_write(PFILE_OBJECT FileObject, struct _ERESOURCE *ResourceToRelease, PDEVICE_OBJECT DeviceObject) { +#else +static NTSTATUS fast_io_release_for_mod_write(PFILE_OBJECT FileObject, struct _ERESOURCE *ResourceToRelease, PDEVICE_OBJECT DeviceObject) { +#endif + UNUSED(FileObject); + UNUSED(DeviceObject); -static BOOLEAN STDCALL fast_io_write_compressed(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, ULONG LockKey, PVOID Buffer, PMDL *MdlChain, PIO_STATUS_BLOCK IoStatus, struct _COMPRESSED_DATA_INFO *CompressedDataInfo, ULONG CompressedDataInfoLength, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_write_compressed\n"); - return FALSE; -} + ExReleaseResourceLite(ResourceToRelease); -static BOOLEAN STDCALL fast_io_mdl_read_complete_compressed(PFILE_OBJECT FileObject, PMDL MdlChain, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_mdl_read_complete_compressed\n"); - return FALSE; + return STATUS_SUCCESS; } -static BOOLEAN STDCALL fast_io_mdl_write_complete_compressed(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, PMDL MdlChain, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_mdl_write_complete_compressed\n"); - return FALSE; -} +_Function_class_(FAST_IO_ACQUIRE_FOR_CCFLUSH) +#ifdef __REACTOS__ +static NTSTATUS NTAPI fast_io_acquire_for_ccflush(PFILE_OBJECT FileObject, PDEVICE_OBJECT DeviceObject) { +#else +static NTSTATUS fast_io_acquire_for_ccflush(PFILE_OBJECT FileObject, PDEVICE_OBJECT DeviceObject) { +#endif + UNUSED(FileObject); + UNUSED(DeviceObject); -static NTSTATUS STDCALL fast_io_release_for_mod_write(PFILE_OBJECT FileObject, struct _ERESOURCE *ResourceToRelease, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_release_for_mod_write\n"); - return STATUS_NOT_IMPLEMENTED; -} + IoSetTopLevelIrp((PIRP)FSRTL_CACHE_TOP_LEVEL_IRP); -static NTSTATUS STDCALL fast_io_acquire_for_ccflush(PFILE_OBJECT FileObject, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_acquire_for_ccflush\n"); return STATUS_SUCCESS; } -static NTSTATUS STDCALL fast_io_release_for_ccflush(PFILE_OBJECT FileObject, PDEVICE_OBJECT DeviceObject){ - TRACE("STUB: fast_io_release_for_ccflush\n"); +_Function_class_(FAST_IO_RELEASE_FOR_CCFLUSH) +#ifdef __REACTOS__ +static NTSTATUS NTAPI fast_io_release_for_ccflush(PFILE_OBJECT FileObject, PDEVICE_OBJECT DeviceObject) { +#else +static NTSTATUS fast_io_release_for_ccflush(PFILE_OBJECT FileObject, PDEVICE_OBJECT DeviceObject) { +#endif + UNUSED(FileObject); + UNUSED(DeviceObject); + + if (IoGetTopLevelIrp() == (PIRP)FSRTL_CACHE_TOP_LEVEL_IRP) + IoSetTopLevelIrp(NULL); + return STATUS_SUCCESS; } -static BOOLEAN STDCALL fast_io_write(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p (%.*S), %llx, %x, %x, %x, %p, %p, %p)\n", FileObject, FileObject->FileName.Length / sizeof(WCHAR), FileObject->FileName.Buffer, - *FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); - +_Function_class_(FAST_IO_WRITE) +#ifdef __REACTOS__ +static BOOLEAN NTAPI fast_io_write(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#else +static BOOLEAN fast_io_write(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#endif if (FsRtlCopyWrite(FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject)) { fcb* fcb = FileObject->FsContext; - + fcb->inode_item.st_size = fcb->Header.FileSize.QuadPart; - + return TRUE; } - - return FALSE; -} - -#ifdef _DEBUG -static BOOLEAN STDCALL fast_io_read(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p, %p, %x, %x, %x, %p, %p, %p)\n", FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); - - return FsRtlCopyRead(FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); -} - -static BOOLEAN STDCALL fast_io_mdl_read(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, ULONG LockKey, PMDL* MdlChain, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p, %p, %x, %x, %p, %p, %p)\n", FileObject, FileOffset, Length, LockKey, MdlChain, IoStatus, DeviceObject); - - return FsRtlMdlReadDev(FileObject, FileOffset, Length, LockKey, MdlChain, IoStatus, DeviceObject); -} - -static BOOLEAN STDCALL fast_io_mdl_read_complete(PFILE_OBJECT FileObject, PMDL MdlChain, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p, %p, %p)\n", FileObject, MdlChain, DeviceObject); - - return FsRtlMdlReadCompleteDev(FileObject, MdlChain, DeviceObject); -} - -static BOOLEAN STDCALL fast_io_prepare_mdl_write(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, ULONG LockKey, PMDL* MdlChain, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p, %p, %x, %x, %p, %p, %p)\n", FileObject, FileOffset, Length, LockKey, MdlChain, IoStatus, DeviceObject); - - return FsRtlPrepareMdlWriteDev(FileObject, FileOffset, Length, LockKey, MdlChain, IoStatus, DeviceObject); -} - -static BOOLEAN STDCALL fast_io_mdl_write_complete(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, PMDL MdlChain, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p, %p, %p, %p)\n", FileObject, FileOffset, MdlChain, DeviceObject); - return FsRtlMdlWriteCompleteDev(FileObject, FileOffset, MdlChain, DeviceObject); + return FALSE; } -#endif -void __stdcall init_fast_io_dispatch(FAST_IO_DISPATCH** fiod) { +void init_fast_io_dispatch(FAST_IO_DISPATCH** fiod) { RtlZeroMemory(&FastIoDispatch, sizeof(FastIoDispatch)); FastIoDispatch.SizeOfFastIoDispatch = sizeof(FAST_IO_DISPATCH); @@ -320,39 +372,17 @@ void __stdcall init_fast_io_dispatch(FAST_IO_DISPATCH** fiod) { FastIoDispatch.FastIoCheckIfPossible = fast_io_check_if_possible; FastIoDispatch.FastIoQueryBasicInfo = fast_query_basic_info; FastIoDispatch.FastIoQueryStandardInfo = fast_query_standard_info; - FastIoDispatch.FastIoLock = fast_io_lock; - FastIoDispatch.FastIoUnlockSingle = fast_io_unlock_single; - FastIoDispatch.FastIoUnlockAll = fast_io_unlock_all; - FastIoDispatch.FastIoUnlockAllByKey = fast_io_unlock_all_by_key; - FastIoDispatch.FastIoDeviceControl = fast_io_device_control; - FastIoDispatch.AcquireFileForNtCreateSection = acquire_file_for_create_section; - FastIoDispatch.ReleaseFileForNtCreateSection = release_file_for_create_section; - FastIoDispatch.FastIoDetachDevice = fast_io_detach_device; FastIoDispatch.FastIoQueryNetworkOpenInfo = fast_io_query_network_open_info; FastIoDispatch.AcquireForModWrite = fast_io_acquire_for_mod_write; - FastIoDispatch.FastIoReadCompressed = fast_io_read_compressed; - FastIoDispatch.FastIoWriteCompressed = fast_io_write_compressed; - FastIoDispatch.MdlReadCompleteCompressed = fast_io_mdl_read_complete_compressed; - FastIoDispatch.MdlWriteCompleteCompressed = fast_io_mdl_write_complete_compressed; - FastIoDispatch.FastIoQueryOpen = fast_io_query_open; FastIoDispatch.ReleaseForModWrite = fast_io_release_for_mod_write; FastIoDispatch.AcquireForCcFlush = fast_io_acquire_for_ccflush; FastIoDispatch.ReleaseForCcFlush = fast_io_release_for_ccflush; FastIoDispatch.FastIoWrite = fast_io_write; - -#ifdef _DEBUG - FastIoDispatch.FastIoRead = fast_io_read; - FastIoDispatch.MdlRead = fast_io_mdl_read; - FastIoDispatch.MdlReadComplete = fast_io_mdl_read_complete; - FastIoDispatch.PrepareMdlWrite = fast_io_prepare_mdl_write; - FastIoDispatch.MdlWriteComplete = fast_io_mdl_write_complete; -#else FastIoDispatch.FastIoRead = FsRtlCopyRead; FastIoDispatch.MdlRead = FsRtlMdlReadDev; FastIoDispatch.MdlReadComplete = FsRtlMdlReadCompleteDev; FastIoDispatch.PrepareMdlWrite = FsRtlPrepareMdlWriteDev; FastIoDispatch.MdlWriteComplete = FsRtlMdlWriteCompleteDev; -#endif - + *fiod = &FastIoDispatch; } diff --git a/reactos/drivers/filesystems/btrfs/fileinfo.c b/reactos/drivers/filesystems/btrfs/fileinfo.c index 0582d5aedf2..e79f4535119 100644 --- a/reactos/drivers/filesystems/btrfs/fileinfo.c +++ b/reactos/drivers/filesystems/btrfs/fileinfo.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -24,9 +24,7 @@ #endif #endif -static NTSTATUS get_inode_dir_path(device_extension* Vcb, root* subvol, UINT64 inode, PUNICODE_STRING us, PIRP Irp); - -static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { +static NTSTATUS set_basic_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { FILE_BASIC_INFORMATION* fbi = Irp->AssociatedIrp.SystemBuffer; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; @@ -34,7 +32,7 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P ULONG defda, filter = 0; BOOL inode_item_changed = FALSE; NTSTATUS Status; - + if (fcb->ads) { if (fileref && fileref->parent) fcb = fileref->parent->fcb; @@ -43,103 +41,120 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P return STATUS_INTERNAL_ERROR; } } - + + if (!ccb) { + ERR("ccb was NULL\n"); + return STATUS_INVALID_PARAMETER; + } + TRACE("file = %S, attributes = %x\n", file_desc(FileObject), fbi->FileAttributes); - + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fbi->FileAttributes & FILE_ATTRIBUTE_DIRECTORY && fcb->type != BTRFS_TYPE_DIRECTORY) { WARN("attempted to set FILE_ATTRIBUTE_DIRECTORY on non-directory\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - - if (fcb->inode == SUBVOL_ROOT_INODE && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && + + if (fcb->inode == SUBVOL_ROOT_INODE && is_subvol_readonly(fcb->subvol, Irp) && (fbi->FileAttributes == 0 || fbi->FileAttributes & FILE_ATTRIBUTE_READONLY)) { Status = STATUS_ACCESS_DENIED; goto end; } - + + // don't allow readonly subvol to be made r/w if send operation running on it + if (fcb->inode == SUBVOL_ROOT_INODE && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && + fcb->subvol->send_ops > 0) { + Status = STATUS_DEVICE_NOT_READY; + goto end; + } + if (fbi->CreationTime.QuadPart == -1) ccb->user_set_creation_time = TRUE; else if (fbi->CreationTime.QuadPart != 0) { win_time_to_unix(fbi->CreationTime, &fcb->inode_item.otime); inode_item_changed = TRUE; filter |= FILE_NOTIFY_CHANGE_CREATION; - + ccb->user_set_creation_time = TRUE; } - + if (fbi->LastAccessTime.QuadPart == -1) ccb->user_set_access_time = TRUE; else if (fbi->LastAccessTime.QuadPart != 0) { win_time_to_unix(fbi->LastAccessTime, &fcb->inode_item.st_atime); inode_item_changed = TRUE; filter |= FILE_NOTIFY_CHANGE_LAST_ACCESS; - + ccb->user_set_access_time = TRUE; } - + if (fbi->LastWriteTime.QuadPart == -1) ccb->user_set_write_time = TRUE; else if (fbi->LastWriteTime.QuadPart != 0) { win_time_to_unix(fbi->LastWriteTime, &fcb->inode_item.st_mtime); inode_item_changed = TRUE; filter |= FILE_NOTIFY_CHANGE_LAST_WRITE; - + ccb->user_set_write_time = TRUE; } - + if (fbi->ChangeTime.QuadPart == -1) ccb->user_set_change_time = TRUE; else if (fbi->ChangeTime.QuadPart != 0) { win_time_to_unix(fbi->ChangeTime, &fcb->inode_item.st_ctime); inode_item_changed = TRUE; // no filter for this - + ccb->user_set_change_time = TRUE; } - + // FileAttributes == 0 means don't set - undocumented, but seen in fastfat if (fbi->FileAttributes != 0) { LARGE_INTEGER time; BTRFS_TIME now; - - defda = get_file_attributes(Vcb, &fcb->inode_item, fcb->subvol, fcb->inode, fcb->type, fileref->filepart.Length > 0 && fileref->filepart.Buffer[0] == '.', TRUE, Irp); - + + fbi->FileAttributes &= ~FILE_ATTRIBUTE_NORMAL; + + defda = get_file_attributes(Vcb, fcb->subvol, fcb->inode, fcb->type, fileref && fileref->dc && fileref->dc->name.Length >= sizeof(WCHAR) && fileref->dc->name.Buffer[0] == '.', + TRUE, Irp); + if (fcb->type == BTRFS_TYPE_DIRECTORY) fbi->FileAttributes |= FILE_ATTRIBUTE_DIRECTORY; else if (fcb->type == BTRFS_TYPE_SYMLINK) fbi->FileAttributes |= FILE_ATTRIBUTE_REPARSE_POINT; - + fcb->atts_changed = TRUE; - + if (fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) fbi->FileAttributes |= FILE_ATTRIBUTE_REPARSE_POINT; - + if (defda == fbi->FileAttributes) fcb->atts_deleted = TRUE; - + else if (fcb->inode == SUBVOL_ROOT_INODE && (defda | FILE_ATTRIBUTE_READONLY) == (fbi->FileAttributes | FILE_ATTRIBUTE_READONLY)) + fcb->atts_deleted = TRUE; + fcb->atts = fbi->FileAttributes; - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; - + if (fcb->inode == SUBVOL_ROOT_INODE) { if (fbi->FileAttributes & FILE_ATTRIBUTE_READONLY) fcb->subvol->root_item.flags |= BTRFS_SUBVOL_READONLY; else fcb->subvol->root_item.flags &= ~BTRFS_SUBVOL_READONLY; } - + inode_item_changed = TRUE; - + filter |= FILE_NOTIFY_CHANGE_ATTRIBUTES; } @@ -147,38 +162,38 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P fcb->inode_item.transid = Vcb->superblock.generation; fcb->inode_item.sequence++; fcb->inode_item_changed = TRUE; - + mark_fcb_dirty(fcb); } - + if (filter != 0) - send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED); + send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED, NULL); Status = STATUS_SUCCESS; end: ExReleaseResourceLite(fcb->Header.Resource); - + return Status; } -static NTSTATUS STDCALL set_disposition_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { +static NTSTATUS set_disposition_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { FILE_DISPOSITION_INFORMATION* fdi = Irp->AssociatedIrp.SystemBuffer; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; file_ref* fileref = ccb ? ccb->fileref : NULL; ULONG atts; NTSTATUS Status; - + if (!fileref) return STATUS_INVALID_PARAMETER; - + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + TRACE("changing delete_on_close to %s for %S (fcb %p)\n", fdi->DeleteFile ? "TRUE" : "FALSE", file_desc(FileObject), fcb); - + if (fcb->ads) { if (fileref->parent) atts = fileref->parent->fcb->atts; @@ -189,58 +204,66 @@ static NTSTATUS STDCALL set_disposition_information(device_extension* Vcb, PIRP } } else atts = fcb->atts; - + TRACE("atts = %x\n", atts); - + if (atts & FILE_ATTRIBUTE_READONLY) { + TRACE("not allowing readonly file to be deleted\n"); Status = STATUS_CANNOT_DELETE; goto end; } - + // FIXME - can we skip this bit for subvols? - if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0) { + if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0 && (!fileref || fileref->fcb != Vcb->dummy_fcb)) { + TRACE("directory not empty\n"); Status = STATUS_DIRECTORY_NOT_EMPTY; goto end; } - + if (!MmFlushImageSection(&fcb->nonpaged->segment_object, MmFlushForDelete)) { - WARN("trying to delete file which is being mapped as an image\n"); + TRACE("trying to delete file which is being mapped as an image\n"); Status = STATUS_CANNOT_DELETE; goto end; } - + ccb->fileref->delete_on_close = fdi->DeleteFile; - + FileObject->DeletePending = fdi->DeleteFile; - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(fcb->Header.Resource); - + ExReleaseResourceLite(&Vcb->fcb_lock); + // send notification that directory is about to be deleted + if (NT_SUCCESS(Status) && fdi->DeleteFile && fcb->type == BTRFS_TYPE_DIRECTORY) { + FsRtlNotifyFullChangeDirectory(Vcb->NotifySync, &Vcb->DirNotifyList, FileObject->FsContext, + NULL, FALSE, FALSE, 0, NULL, NULL, NULL); + } + return Status; } BOOL has_open_children(file_ref* fileref) { LIST_ENTRY* le = fileref->children.Flink; - + if (IsListEmpty(&fileref->children)) return FALSE; - + while (le != &fileref->children) { file_ref* c = CONTAINING_RECORD(le, file_ref, list_entry); - + if (c->open_count > 0) return TRUE; - + if (has_open_children(c)) return TRUE; le = le->Flink; } - + return FALSE; } @@ -248,213 +271,229 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { device_extension* Vcb = oldfcb->Vcb; fcb* fcb; LIST_ENTRY* le; - + // FIXME - we can skip a lot of this if the inode is about to be deleted - - fcb = create_fcb(PagedPool); // FIXME - what if we duplicate the paging file? + + fcb = create_fcb(Vcb, PagedPool); // FIXME - what if we duplicate the paging file? if (!fcb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + fcb->Vcb = Vcb; fcb->Header.IsFastIoPossible = fast_io_possible(fcb); fcb->Header.AllocationSize = oldfcb->Header.AllocationSize; fcb->Header.FileSize = oldfcb->Header.FileSize; fcb->Header.ValidDataLength = oldfcb->Header.ValidDataLength; - + fcb->type = oldfcb->type; - + if (oldfcb->ads) { fcb->ads = TRUE; fcb->adshash = oldfcb->adshash; fcb->adsmaxlen = oldfcb->adsmaxlen; - + if (oldfcb->adsxattr.Buffer && oldfcb->adsxattr.Length > 0) { fcb->adsxattr.Length = oldfcb->adsxattr.Length; fcb->adsxattr.MaximumLength = fcb->adsxattr.Length + 1; fcb->adsxattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->adsxattr.MaximumLength, ALLOC_TAG); - + if (!fcb->adsxattr.Buffer) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fcb->adsxattr.Buffer, oldfcb->adsxattr.Buffer, fcb->adsxattr.Length); fcb->adsxattr.Buffer[fcb->adsxattr.Length] = 0; } - + if (oldfcb->adsdata.Buffer && oldfcb->adsdata.Length > 0) { fcb->adsdata.Length = fcb->adsdata.MaximumLength = oldfcb->adsdata.Length; fcb->adsdata.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->adsdata.MaximumLength, ALLOC_TAG); - + if (!fcb->adsdata.Buffer) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fcb->adsdata.Buffer, oldfcb->adsdata.Buffer, fcb->adsdata.Length); } - + goto end; } - + RtlCopyMemory(&fcb->inode_item, &oldfcb->inode_item, sizeof(INODE_ITEM)); fcb->inode_item_changed = TRUE; - + if (oldfcb->sd && RtlLengthSecurityDescriptor(oldfcb->sd) > 0) { fcb->sd = ExAllocatePoolWithTag(PagedPool, RtlLengthSecurityDescriptor(oldfcb->sd), ALLOC_TAG); if (!fcb->sd) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fcb->sd, oldfcb->sd, RtlLengthSecurityDescriptor(oldfcb->sd)); } - + fcb->atts = oldfcb->atts; - + le = oldfcb->extents.Flink; while (le != &oldfcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + if (!ext->ignore) { - extent* ext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - + extent* ext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); + if (!ext2) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + ext2->offset = ext->offset; ext2->datalen = ext->datalen; - - if (ext2->datalen > 0) { - ext2->data = ExAllocatePoolWithTag(PagedPool, ext2->datalen, ALLOC_TAG); - - if (!ext2->data) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(ext2->data, ext->data, ext2->datalen); - } else - ext2->data = NULL; - + + if (ext2->datalen > 0) + RtlCopyMemory(&ext2->extent_data, &ext->extent_data, ext2->datalen); + ext2->unique = FALSE; ext2->ignore = FALSE; ext2->inserted = TRUE; - + if (ext->csum) { ULONG len; - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - - if (ext->data->compression == BTRFS_COMPRESSION_NONE) - len = ed2->num_bytes; + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + + if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE) + len = (ULONG)ed2->num_bytes; else - len = ed2->size; - + len = (ULONG)ed2->size; + len = len * sizeof(UINT32) / Vcb->superblock.sector_size; - + ext2->csum = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); if (!ext2->csum) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(ext2->csum, ext->csum, len); } else ext2->csum = NULL; InsertTailList(&fcb->extents, &ext2->list_entry); } - + le = le->Flink; } - + le = oldfcb->hardlinks.Flink; while (le != &oldfcb->hardlinks) { hardlink *hl = CONTAINING_RECORD(le, hardlink, list_entry), *hl2; - + hl2 = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); - + if (!hl2) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + hl2->parent = hl->parent; hl2->index = hl->index; - + hl2->name.Length = hl2->name.MaximumLength = hl->name.Length; hl2->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl2->name.MaximumLength, ALLOC_TAG); - + if (!hl2->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl2); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(hl2->name.Buffer, hl->name.Buffer, hl->name.Length); - + hl2->utf8.Length = hl2->utf8.MaximumLength = hl->utf8.Length; hl2->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl2->utf8.MaximumLength, ALLOC_TAG); - + if (!hl2->utf8.Buffer) { ERR("out of memory\n"); ExFreePool(hl2->name.Buffer); ExFreePool(hl2); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(hl2->utf8.Buffer, hl->utf8.Buffer, hl->utf8.Length); - + InsertTailList(&fcb->hardlinks, &hl2->list_entry); - + le = le->Flink; } - - fcb->last_dir_index = oldfcb->last_dir_index; - + if (oldfcb->reparse_xattr.Buffer && oldfcb->reparse_xattr.Length > 0) { fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = oldfcb->reparse_xattr.Length; - + fcb->reparse_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->reparse_xattr.MaximumLength, ALLOC_TAG); if (!fcb->reparse_xattr.Buffer) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fcb->reparse_xattr.Buffer, oldfcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length); } - + if (oldfcb->ea_xattr.Buffer && oldfcb->ea_xattr.Length > 0) { fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = oldfcb->ea_xattr.Length; - + fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->ea_xattr.MaximumLength, ALLOC_TAG); if (!fcb->ea_xattr.Buffer) { ERR("out of memory\n"); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(fcb->ea_xattr.Buffer, oldfcb->ea_xattr.Buffer, fcb->ea_xattr.Length); } + fcb->prop_compression = oldfcb->prop_compression; + + le = oldfcb->xattrs.Flink; + while (le != &oldfcb->xattrs) { + xattr* xa = CONTAINING_RECORD(le, xattr, list_entry); + + if (xa->valuelen > 0) { + xattr* xa2; + + xa2 = ExAllocatePoolWithTag(PagedPool, offsetof(xattr, data[0]) + xa->namelen + xa->valuelen, ALLOC_TAG); + + if (!xa2) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + xa2->namelen = xa->namelen; + xa2->valuelen = xa->valuelen; + xa2->dirty = xa->dirty; + memcpy(xa2->data, xa->data, xa->namelen + xa->valuelen); + + InsertTailList(&fcb->xattrs, &xa2->list_entry); + } + + le = le->Flink; + } + end: *pfcb = fcb; - + return STATUS_SUCCESS; } @@ -466,529 +505,263 @@ typedef struct _move_entry { LIST_ENTRY list_entry; } move_entry; -static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { +static NTSTATUS add_children_to_move_list(device_extension* Vcb, move_entry* me, PIRP Irp) { NTSTATUS Status; - KEY searchkey; - traverse_ptr tp; - BOOL b; LIST_ENTRY* le; - move_entry* me2; - - static char xapref[] = "user."; - ULONG xapreflen = strlen(xapref); - - ExAcquireResourceSharedLite(&me->fileref->nonpaged->children_lock, TRUE); - - le = me->fileref->children.Flink; - while (le != &me->fileref->children) { - file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry); - - if (!fr->deleted) { - me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG); - if (!me2) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - me2->fileref = fr; - - increase_fileref_refcount(fr); - - me2->dummyfcb = NULL; - me2->dummyfileref = NULL; - me2->parent = me; - - InsertHeadList(&me->list_entry, &me2->list_entry); - } - - le = le->Flink; - } - - searchkey.obj_id = me->fileref->fcb->inode; - searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = 0; - - Status = find_item(me->fileref->fcb->Vcb, me->fileref->fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - do { - traverse_ptr next_tp; - - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { - DIR_ITEM* xa = (DIR_ITEM*)tp.item->data; - ULONG len; - - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - len = tp.item->size; - - do { - if (len < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (xa->n > xapreflen && RtlCompareMemory(xa->name, xapref, xapreflen) == xapreflen && - (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n) && - (tp.item->key.offset != EA_EA_HASH || xa->n != strlen(EA_EA) || RtlCompareMemory(xa->name, EA_EA, xa->n) != xa->n) - ) { - BOOL found = FALSE; - - le = me->fileref->children.Flink; - - while (le != &me->fileref->children) { - file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry); - - if (fr->fcb->ads && fr->fcb->adshash == tp.item->key.offset && fr->fcb->adsxattr.Length == xa->n && - RtlCompareMemory(fr->fcb->adsxattr.Buffer, xa->name, xa->n) == xa->n) { - found = TRUE; - break; - } - - le = le->Flink; - } - - if (!found) { - fcb* fcb; - file_ref* fr; - ANSI_STRING xattr; - ULONG stringlen; - - xattr.Length = xa->n; - xattr.MaximumLength = xattr.Length + 1; - xattr.Buffer = ExAllocatePoolWithTag(PagedPool, xattr.MaximumLength, ALLOC_TAG); - - if (!xattr.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(xattr.Buffer, xa->name, xa->n); - xattr.Buffer[xa->n] = 0; - - Status = open_fcb_stream(me->fileref->fcb->Vcb, me->fileref->fcb->subvol, me->fileref->fcb->inode, &xattr, - tp.item->key.offset, me->fileref->fcb, &fcb, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("open_fcb_stream returned %08x\n", Status); - ExFreePool(xattr.Buffer); - goto end; - } - - fr = create_fileref(); - if (!fr) { - ERR("out of memory\n"); - free_fcb(fcb); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - fr->fcb = fcb; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, &xa->name[xapreflen], xa->n - xapreflen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - fr->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - if (!fr->filepart.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - free_fileref(fr); - goto end; - } - - Status = RtlUTF8ToUnicodeN(fr->filepart.Buffer, stringlen, &stringlen, &xa->name[xapreflen], xa->n - xapreflen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - fr->filepart.Length = fr->filepart.MaximumLength = stringlen; - - Status = RtlUpcaseUnicodeString(&fr->filepart_uc, &fr->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(fr); - goto end; - } - fr->parent = (struct _file_ref*)me->fileref; - increase_fileref_refcount(fr->parent); - - insert_fileref_child(me->fileref, fr, FALSE); - - me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG); - if (!me2) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - free_fileref(fr); - goto end; - } - - me2->fileref = fr; - me2->dummyfcb = NULL; - me2->dummyfileref = NULL; - me2->parent = me; - - InsertHeadList(&me->list_entry, &me2->list_entry); - } - } - - len -= sizeof(DIR_ITEM) - 1 + xa->m + xa->n; - - if (len > 0) - xa = (DIR_ITEM*)&xa->name[xa->m + xa->n]; - } while (len > 0); - } - - b = find_next_item(me->fileref->fcb->Vcb, &tp, &next_tp, FALSE, Irp); - if (b) { - tp = next_tp; - - if (next_tp.item->key.obj_id > searchkey.obj_id || (next_tp.item->key.obj_id == searchkey.obj_id && next_tp.item->key.obj_type > searchkey.obj_type)) - break; - } - } while (b); - - if (me->fileref->fcb->type == BTRFS_TYPE_DIRECTORY && me->fileref->fcb->inode_item.st_size != 0) { - searchkey.obj_id = me->fileref->fcb->inode; - searchkey.obj_type = TYPE_DIR_INDEX; - searchkey.offset = 2; - - Status = find_item(me->fileref->fcb->Vcb, me->fileref->fcb->subvol, &tp, &searchkey, FALSE, Irp); + ExAcquireResourceSharedLite(&me->fileref->fcb->nonpaged->dir_children_lock, TRUE); + + le = me->fileref->fcb->dir_children_index.Flink; + + while (le != &me->fileref->fcb->dir_children_index) { + dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index); + file_ref* fr; + move_entry* me2; + + Status = open_fileref_child(Vcb, me->fileref, &dc->name, TRUE, TRUE, dc->index == 0 ? TRUE : FALSE, PagedPool, &fr, Irp); + if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; + ERR("open_fileref_child returned %08x\n", Status); + ExReleaseResourceLite(&me->fileref->fcb->nonpaged->dir_children_lock); + return Status; } - - do { - traverse_ptr next_tp; - - // FIXME - both lists are ordered; we can make this more efficient - - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { - BOOL found = FALSE; - - le = me->fileref->children.Flink; - - while (le != &me->fileref->children) { - file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry); - - if (!fr->fcb->ads) { - if (fr->index == tp.item->key.offset) { - found = TRUE; - break; - } else if (fr->index > tp.item->key.offset) - break; - } - - le = le->Flink; - } - - if (!found) { - DIR_ITEM* di = (DIR_ITEM*)tp.item->data; - - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (tp.item->size < sizeof(DIR_ITEM) - 1 + di->m + di->n) { - ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM) - 1 + di->m + di->n); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (di->n == 0) { - ERR("(%llx,%x,%llx): filename length was 0\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (di->key.obj_type == TYPE_INODE_ITEM || di->key.obj_type == TYPE_ROOT_ITEM) { - ANSI_STRING utf8; - fcb* fcb; - file_ref* fr; - ULONG stringlen; - root* subvol; - UINT64 inode; - dir_child* dc = NULL; - - utf8.Length = utf8.MaximumLength = di->n; - utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.MaximumLength, ALLOC_TAG); - if (!utf8.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(utf8.Buffer, di->name, di->n); - - if (di->key.obj_type == TYPE_ROOT_ITEM) { - LIST_ENTRY* le2; - - subvol = NULL; - - le2 = me->fileref->fcb->Vcb->roots.Flink; - while (le2 != &me->fileref->fcb->Vcb->roots) { - root* r2 = CONTAINING_RECORD(le2, root, list_entry); - - if (r2->id == di->key.obj_id) { - subvol = r2; - break; - } - - le2 = le2->Flink; - } - - if (!subvol) { - ERR("could not find subvol %llx\n", di->key.obj_id); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - inode = SUBVOL_ROOT_INODE; - } else { - subvol = me->fileref->fcb->subvol; - inode = di->key.obj_id; - } - - Status = open_fcb(me->fileref->fcb->Vcb, subvol, inode, di->type, &utf8, me->fileref->fcb, &fcb, PagedPool, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("open_fcb returned %08x\n", Status); - ExFreePool(utf8.Buffer); - goto end; - } - - fr = create_fileref(); - if (!fr) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(utf8.Buffer); - free_fcb(fcb); - goto end; - } - - fr->fcb = fcb; - fr->utf8 = utf8; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, utf8.Buffer, utf8.Length); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - fr->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - if (!fr->filepart.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - free_fileref(fr); - goto end; - } - - Status = RtlUTF8ToUnicodeN(fr->filepart.Buffer, stringlen, &stringlen, utf8.Buffer, utf8.Length); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - fr->filepart.Length = fr->filepart.MaximumLength = stringlen; - - Status = RtlUpcaseUnicodeString(&fr->filepart_uc, &fr->filepart, TRUE); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - fr->parent = me->fileref; - - fr->index = tp.item->key.offset; - increase_fileref_refcount(me->fileref); - - Status = add_dir_child(me->fileref->fcb, di->key.obj_type == TYPE_ROOT_ITEM ? subvol->id : fr->fcb->inode, - di->key.obj_type == TYPE_ROOT_ITEM ? TRUE : FALSE, fr->index, &utf8, &fr->filepart, &fr->filepart_uc, BTRFS_TYPE_DIRECTORY, &dc); - if (!NT_SUCCESS(Status)) - WARN("add_dir_child returned %08x\n", Status); - - fr->dc = dc; - dc->fileref = fr; - - insert_fileref_child(fr->parent, fr, FALSE); - - if (fr->fcb->type == BTRFS_TYPE_DIRECTORY) - fr->fcb->fileref = fr; - - me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG); - if (!me2) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - free_fileref(fr); - goto end; - } - - me2->fileref = fr; - me2->dummyfcb = NULL; - me2->dummyfileref = NULL; - me2->parent = me; - - InsertHeadList(&me->list_entry, &me2->list_entry); - } else { - ERR("unrecognized key (%llx,%x,%llx)\n", di->key.obj_id, di->key.obj_type, di->key.offset); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - } - } - - b = find_next_item(me->fileref->fcb->Vcb, &tp, &next_tp, FALSE, Irp); - if (b) { - tp = next_tp; - - if (next_tp.item->key.obj_id > searchkey.obj_id || (next_tp.item->key.obj_id == searchkey.obj_id && next_tp.item->key.obj_type > searchkey.obj_type)) - break; - } - } while (b); + me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG); + if (!me2) { + ERR("out of memory\n"); + ExReleaseResourceLite(&me->fileref->fcb->nonpaged->dir_children_lock); + return STATUS_INSUFFICIENT_RESOURCES; + } + + me2->fileref = fr; + me2->dummyfcb = NULL; + me2->dummyfileref = NULL; + me2->parent = me; + + InsertHeadList(&me->list_entry, &me2->list_entry); + + le = le->Flink; } - - Status = STATUS_SUCCESS; - -end: - ExReleaseResourceLite(&me->fileref->nonpaged->children_lock); - - return Status; + + ExReleaseResourceLite(&me->fileref->fcb->nonpaged->dir_children_lock); + + return STATUS_SUCCESS; } void remove_dir_child_from_hash_lists(fcb* fcb, dir_child* dc) { UINT8 c; - + c = dc->hash >> 24; - + if (fcb->hash_ptrs[c] == &dc->list_entry_hash) { if (dc->list_entry_hash.Flink == &fcb->dir_children_hash) fcb->hash_ptrs[c] = NULL; else { dir_child* dc2 = CONTAINING_RECORD(dc->list_entry_hash.Flink, dir_child, list_entry_hash); - + if (dc2->hash >> 24 == c) fcb->hash_ptrs[c] = &dc2->list_entry_hash; else fcb->hash_ptrs[c] = NULL; } } - + RemoveEntryList(&dc->list_entry_hash); - + c = dc->hash_uc >> 24; - + if (fcb->hash_ptrs_uc[c] == &dc->list_entry_hash_uc) { if (dc->list_entry_hash_uc.Flink == &fcb->dir_children_hash_uc) fcb->hash_ptrs_uc[c] = NULL; else { dir_child* dc2 = CONTAINING_RECORD(dc->list_entry_hash_uc.Flink, dir_child, list_entry_hash_uc); - + if (dc2->hash_uc >> 24 == c) fcb->hash_ptrs_uc[c] = &dc2->list_entry_hash_uc; else fcb->hash_ptrs_uc[c] = NULL; } } - + RemoveEntryList(&dc->list_entry_hash_uc); } -static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_STRING utf8, PUNICODE_STRING fnus, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS create_directory_fcb(device_extension* Vcb, root* r, fcb* parfcb, fcb** pfcb) { NTSTATUS Status; - LIST_ENTRY move_list, *le; - move_entry* me; + fcb* fcb; + SECURITY_SUBJECT_CONTEXT subjcont; + PSID owner; + BOOLEAN defaulted; LARGE_INTEGER time; BTRFS_TIME now; - file_ref* origparent; - - InitializeListHead(&move_list); - - me = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG); - - if (!me) { + + fcb = create_fcb(Vcb, PagedPool); + if (!fcb) { ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - origparent = fileref->parent; - - me->fileref = fileref; - increase_fileref_refcount(me->fileref); - me->dummyfcb = NULL; - me->dummyfileref = NULL; - me->parent = NULL; - - InsertTailList(&move_list, &me->list_entry); - - le = move_list.Flink; - while (le != &move_list) { - me = CONTAINING_RECORD(le, move_entry, list_entry); - - ExAcquireResourceSharedLite(me->fileref->fcb->Header.Resource, TRUE); - - if (!me->fileref->fcb->ads && me->fileref->fcb->subvol == origparent->fcb->subvol) { - Status = add_children_to_move_list(me, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("add_children_to_move_list returned %08x\n", Status); - goto end; - } - } - - ExReleaseResourceLite(me->fileref->fcb->Header.Resource); - - le = le->Flink; + return STATUS_INSUFFICIENT_RESOURCES; } - - // loop through list and create new inodes - - le = move_list.Flink; - while (le != &move_list) { - me = CONTAINING_RECORD(le, move_entry, list_entry); - - if (me->fileref->fcb->inode != SUBVOL_ROOT_INODE) { + + KeQuerySystemTime(&time); + win_time_to_unix(time, &now); + + fcb->Vcb = Vcb; + + fcb->subvol = r; + fcb->inode = InterlockedIncrement64(&r->lastinode); + fcb->type = BTRFS_TYPE_DIRECTORY; + + fcb->inode_item.generation = Vcb->superblock.generation; + fcb->inode_item.transid = Vcb->superblock.generation; + fcb->inode_item.st_nlink = 1; + fcb->inode_item.st_mode = __S_IFDIR | inherit_mode(parfcb, TRUE); + fcb->inode_item.st_atime = fcb->inode_item.st_ctime = fcb->inode_item.st_mtime = fcb->inode_item.otime = now; + fcb->inode_item.st_gid = GID_NOBODY; + + fcb->atts = get_file_attributes(Vcb, fcb->subvol, fcb->inode, fcb->type, FALSE, TRUE, NULL); + + SeCaptureSubjectContext(&subjcont); + + Status = SeAssignSecurity(parfcb->sd, NULL, (void**)&fcb->sd, TRUE, &subjcont, IoGetFileObjectGenericMapping(), PagedPool); + + if (!NT_SUCCESS(Status)) { + ERR("SeAssignSecurity returned %08x\n", Status); + return Status; + } + + if (!fcb->sd) { + ERR("SeAssignSecurity returned NULL security descriptor\n"); + return STATUS_INTERNAL_ERROR; + } + + Status = RtlGetOwnerSecurityDescriptor(fcb->sd, &owner, &defaulted); + if (!NT_SUCCESS(Status)) { + ERR("RtlGetOwnerSecurityDescriptor returned %08x\n", Status); + fcb->inode_item.st_uid = UID_NOBODY; + fcb->sd_dirty = TRUE; + } else { + fcb->inode_item.st_uid = sid_to_uid(owner); + fcb->sd_dirty = fcb->inode_item.st_uid == UID_NOBODY; + } + + find_gid(fcb, parfcb, &subjcont); + + fcb->inode_item_changed = TRUE; + + InsertTailList(&r->fcbs, &fcb->list_entry); + InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); + + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); + fcb->Header.AllocationSize.QuadPart = 0; + fcb->Header.FileSize.QuadPart = 0; + fcb->Header.ValidDataLength.QuadPart = 0; + + fcb->created = TRUE; + mark_fcb_dirty(fcb); + + if (parfcb->inode_item.flags & BTRFS_INODE_COMPRESS) + fcb->inode_item.flags |= BTRFS_INODE_COMPRESS; + + fcb->prop_compression = parfcb->prop_compression; + fcb->prop_compression_changed = fcb->prop_compression != PropCompression_None; + + fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); + + fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs_uc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); + + *pfcb = fcb; + + return STATUS_SUCCESS; +} + +static NTSTATUS move_across_subvols(file_ref* fileref, ccb* ccb, file_ref* destdir, PANSI_STRING utf8, PUNICODE_STRING fnus, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; + LIST_ENTRY move_list, *le; + move_entry* me; + LARGE_INTEGER time; + BTRFS_TIME now; + file_ref* origparent; + + InitializeListHead(&move_list); + + KeQuerySystemTime(&time); + win_time_to_unix(time, &now); + + me = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG); + + if (!me) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + origparent = fileref->parent; + + me->fileref = fileref; + increase_fileref_refcount(me->fileref); + me->dummyfcb = NULL; + me->dummyfileref = NULL; + me->parent = NULL; + + InsertTailList(&move_list, &me->list_entry); + + le = move_list.Flink; + while (le != &move_list) { + me = CONTAINING_RECORD(le, move_entry, list_entry); + + ExAcquireResourceSharedLite(me->fileref->fcb->Header.Resource, TRUE); + + if (!me->fileref->fcb->ads && me->fileref->fcb->subvol == origparent->fcb->subvol) { + Status = add_children_to_move_list(fileref->fcb->Vcb, me, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("add_children_to_move_list returned %08x\n", Status); + goto end; + } + } + + ExReleaseResourceLite(me->fileref->fcb->Header.Resource); + + le = le->Flink; + } + + send_notification_fileref(fileref, fileref->fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED, NULL); + + // loop through list and create new inodes + + le = move_list.Flink; + while (le != &move_list) { + me = CONTAINING_RECORD(le, move_entry, list_entry); + + if (me->fileref->fcb->inode != SUBVOL_ROOT_INODE && me->fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { if (!me->dummyfcb) { ULONG defda; BOOL inserted = FALSE; - LIST_ENTRY* le; - + LIST_ENTRY* le3; + ExAcquireResourceExclusiveLite(me->fileref->fcb->Header.Resource, TRUE); - + Status = duplicate_fcb(me->fileref->fcb, &me->dummyfcb); if (!NT_SUCCESS(Status)) { ERR("duplicate_fcb returned %08x\n", Status); ExReleaseResourceLite(me->fileref->fcb->Header.Resource); goto end; } - + me->dummyfcb->subvol = me->fileref->fcb->subvol; me->dummyfcb->inode = me->fileref->fcb->inode; - + if (!me->dummyfcb->ads) { me->dummyfcb->sd_dirty = me->fileref->fcb->sd_dirty; me->dummyfcb->atts_changed = me->fileref->fcb->atts_changed; @@ -997,116 +770,133 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ me->dummyfcb->reparse_xattr_changed = me->fileref->fcb->reparse_xattr_changed; me->dummyfcb->ea_changed = me->fileref->fcb->ea_changed; } - + me->dummyfcb->created = me->fileref->fcb->created; me->dummyfcb->deleted = me->fileref->fcb->deleted; mark_fcb_dirty(me->dummyfcb); - + if (!me->fileref->fcb->ads) { LIST_ENTRY* le2; - + me->fileref->fcb->subvol = destdir->fcb->subvol; me->fileref->fcb->inode = InterlockedIncrement64(&destdir->fcb->subvol->lastinode); me->fileref->fcb->inode_item.st_nlink = 1; - - defda = get_file_attributes(me->fileref->fcb->Vcb, &me->fileref->fcb->inode_item, me->fileref->fcb->subvol, me->fileref->fcb->inode, - me->fileref->fcb->type, me->fileref->filepart.Length > 0 && me->fileref->filepart.Buffer[0] == '.', TRUE, Irp); - + + defda = get_file_attributes(me->fileref->fcb->Vcb, me->fileref->fcb->subvol, me->fileref->fcb->inode, + me->fileref->fcb->type, me->fileref->dc && me->fileref->dc->name.Length >= sizeof(WCHAR) && me->fileref->dc->name.Buffer[0] == '.', + TRUE, Irp); + me->fileref->fcb->sd_dirty = !!me->fileref->fcb->sd; me->fileref->fcb->atts_changed = defda != me->fileref->fcb->atts; me->fileref->fcb->extents_changed = !IsListEmpty(&me->fileref->fcb->extents); me->fileref->fcb->reparse_xattr_changed = !!me->fileref->fcb->reparse_xattr.Buffer; me->fileref->fcb->ea_changed = !!me->fileref->fcb->ea_xattr.Buffer; + me->fileref->fcb->xattrs_changed = !IsListEmpty(&me->fileref->fcb->xattrs); me->fileref->fcb->inode_item_changed = TRUE; - + + le2 = me->fileref->fcb->xattrs.Flink; + while (le2 != &me->fileref->fcb->xattrs) { + xattr* xa = CONTAINING_RECORD(le2, xattr, list_entry); + + xa->dirty = TRUE; + + le2 = le2->Flink; + } + + if (le == move_list.Flink) { // first entry + me->fileref->fcb->inode_item.transid = me->fileref->fcb->Vcb->superblock.generation; + me->fileref->fcb->inode_item.sequence++; + + if (!ccb->user_set_change_time) + me->fileref->fcb->inode_item.st_ctime = now; + } + le2 = me->fileref->fcb->extents.Flink; while (le2 != &me->fileref->fcb->extents) { extent* ext = CONTAINING_RECORD(le2, extent, list_entry); - - if (!ext->ignore && ext->datalen >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2) && - (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC)) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + + if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + if (ed2->size != 0) { chunk* c = get_chunk_from_address(me->fileref->fcb->Vcb, ed2->address); - + if (!c) { ERR("get_chunk_from_address(%llx) failed\n", ed2->address); } else { Status = update_changed_extent_ref(me->fileref->fcb->Vcb, c, ed2->address, ed2->size, me->fileref->fcb->subvol->id, me->fileref->fcb->inode, ext->offset - ed2->offset, 1, me->fileref->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp); - + if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); ExReleaseResourceLite(me->fileref->fcb->Header.Resource); goto end; } } - + } } - + le2 = le2->Flink; } } else { me->fileref->fcb->subvol = me->parent->fileref->fcb->subvol; me->fileref->fcb->inode = me->parent->fileref->fcb->inode; } - + me->fileref->fcb->created = TRUE; - + InsertHeadList(&me->fileref->fcb->list_entry, &me->dummyfcb->list_entry); RemoveEntryList(&me->fileref->fcb->list_entry); - - le = destdir->fcb->subvol->fcbs.Flink; - while (le != &destdir->fcb->subvol->fcbs) { - fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); - + + le3 = destdir->fcb->subvol->fcbs.Flink; + while (le3 != &destdir->fcb->subvol->fcbs) { + fcb* fcb = CONTAINING_RECORD(le3, struct _fcb, list_entry); + if (fcb->inode > me->fileref->fcb->inode) { - InsertHeadList(le->Blink, &me->fileref->fcb->list_entry); + InsertHeadList(le3->Blink, &me->fileref->fcb->list_entry); inserted = TRUE; break; } - - le = le->Flink; + + le3 = le3->Flink; } - + if (!inserted) InsertTailList(&destdir->fcb->subvol->fcbs, &me->fileref->fcb->list_entry); - + InsertTailList(&me->fileref->fcb->Vcb->all_fcbs, &me->dummyfcb->list_entry_all); - + while (!IsListEmpty(&me->fileref->fcb->hardlinks)) { - LIST_ENTRY* le = RemoveHeadList(&me->fileref->fcb->hardlinks); - hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); - + hardlink* hl = CONTAINING_RECORD(RemoveHeadList(&me->fileref->fcb->hardlinks), hardlink, list_entry); + if (hl->name.Buffer) ExFreePool(hl->name.Buffer); - + if (hl->utf8.Buffer) ExFreePool(hl->utf8.Buffer); ExFreePool(hl); } - + me->fileref->fcb->inode_item_changed = TRUE; mark_fcb_dirty(me->fileref->fcb); - + if ((!me->dummyfcb->ads && me->dummyfcb->inode_item.st_nlink > 1) || (me->dummyfcb->ads && me->parent->dummyfcb->inode_item.st_nlink > 1)) { LIST_ENTRY* le2 = le->Flink; - + while (le2 != &move_list) { move_entry* me2 = CONTAINING_RECORD(le2, move_entry, list_entry); - + if (me2->fileref->fcb == me->fileref->fcb && !me2->fileref->fcb->ads) { me2->dummyfcb = me->dummyfcb; InterlockedIncrement(&me->dummyfcb->refcount); } - + le2 = le2->Flink; } } - + ExReleaseResourceLite(me->fileref->fcb->Header.Resource); } else { ExAcquireResourceExclusiveLite(me->fileref->fcb->Header.Resource, TRUE); @@ -1115,175 +905,172 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ ExReleaseResourceLite(me->fileref->fcb->Header.Resource); } } - + le = le->Flink; } - - KeQuerySystemTime(&time); - win_time_to_unix(time, &now); - + fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->subvol->root_item.ctime = now; - + // loop through list and create new filerefs - + le = move_list.Flink; while (le != &move_list) { hardlink* hl; BOOL name_changed = FALSE; - + me = CONTAINING_RECORD(le, move_entry, list_entry); - - me->dummyfileref = create_fileref(); + + me->dummyfileref = create_fileref(fileref->fcb->Vcb); if (!me->dummyfileref) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - if (me->fileref->fcb->inode == SUBVOL_ROOT_INODE) + + if (me->fileref->fcb == me->fileref->fcb->Vcb->dummy_fcb) { + root* r = me->parent ? me->parent->fileref->fcb->subvol : destdir->fcb->subvol; + + Status = create_directory_fcb(me->fileref->fcb->Vcb, r, me->fileref->parent->fcb, &me->fileref->fcb); + if (!NT_SUCCESS(Status)) { + ERR("create_directory_fcb returnd %08x\n", Status); + goto end; + } + + me->fileref->dc->key.obj_id = me->fileref->fcb->inode; + me->fileref->dc->key.obj_type = TYPE_INODE_ITEM; + + me->dummyfileref->fcb = me->fileref->fcb->Vcb->dummy_fcb; + } else if (me->fileref->fcb->inode == SUBVOL_ROOT_INODE) { me->dummyfileref->fcb = me->fileref->fcb; - else + + me->fileref->fcb->subvol->parent = le == move_list.Flink ? destdir->fcb->subvol->id : me->parent->fileref->fcb->subvol->id; + } else me->dummyfileref->fcb = me->dummyfcb; - + InterlockedIncrement(&me->dummyfileref->fcb->refcount); - me->dummyfileref->filepart = me->fileref->filepart; - - if (le == move_list.Flink) // first item - me->fileref->filepart.Length = me->fileref->filepart.MaximumLength = fnus->Length; - else - me->fileref->filepart.MaximumLength = me->fileref->filepart.Length; - - me->fileref->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->filepart.MaximumLength, ALLOC_TAG); - - if (!me->fileref->filepart.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(me->fileref->filepart.Buffer, le == move_list.Flink ? fnus->Buffer : me->dummyfileref->filepart.Buffer, me->fileref->filepart.Length); - - Status = RtlUpcaseUnicodeString(&me->fileref->filepart_uc, &me->fileref->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - goto end; - } - - me->dummyfileref->utf8 = me->fileref->utf8; me->dummyfileref->oldutf8 = me->fileref->oldutf8; - - if (le == move_list.Flink) { - if (me->fileref->utf8.Length != utf8->Length || RtlCompareMemory(me->fileref->utf8.Buffer, utf8->Buffer, utf8->Length) != utf8->Length) - name_changed = TRUE; - - me->fileref->utf8.Length = me->fileref->utf8.MaximumLength = utf8->Length; - } else - me->fileref->utf8.MaximumLength = me->fileref->utf8.Length; - - if (me->fileref->utf8.MaximumLength > 0) { - me->fileref->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->utf8.MaximumLength, ALLOC_TAG); - - if (!me->fileref->utf8.Buffer) { + me->dummyfileref->oldindex = me->fileref->dc->index; + + if (le == move_list.Flink && (me->fileref->dc->utf8.Length != utf8->Length || RtlCompareMemory(me->fileref->dc->utf8.Buffer, utf8->Buffer, utf8->Length) != utf8->Length)) + name_changed = TRUE; + + if ((le == move_list.Flink || me->fileref->fcb->inode == SUBVOL_ROOT_INODE) && !me->dummyfileref->oldutf8.Buffer) { + me->dummyfileref->oldutf8.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->dc->utf8.Length, ALLOC_TAG); + if (!me->dummyfileref->oldutf8.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(me->fileref->utf8.Buffer, le == move_list.Flink ? utf8->Buffer : me->dummyfileref->utf8.Buffer, me->fileref->utf8.Length); + + RtlCopyMemory(me->dummyfileref->oldutf8.Buffer, me->fileref->dc->utf8.Buffer, me->fileref->dc->utf8.Length); + + me->dummyfileref->oldutf8.Length = me->dummyfileref->oldutf8.MaximumLength = me->fileref->dc->utf8.Length; } - + me->dummyfileref->delete_on_close = me->fileref->delete_on_close; me->dummyfileref->deleted = me->fileref->deleted; - + me->dummyfileref->created = me->fileref->created; me->fileref->created = TRUE; - + me->dummyfileref->parent = me->parent ? me->parent->dummyfileref : origparent; increase_fileref_refcount(me->dummyfileref->parent); - - me->dummyfileref->index = me->fileref->index; - insert_fileref_child(me->dummyfileref->parent, me->dummyfileref, TRUE); - + ExAcquireResourceExclusiveLite(&me->dummyfileref->parent->nonpaged->children_lock, TRUE); + InsertTailList(&me->dummyfileref->parent->children, &me->dummyfileref->list_entry); + ExReleaseResourceLite(&me->dummyfileref->parent->nonpaged->children_lock); + me->dummyfileref->debug_desc = me->fileref->debug_desc; - + if (me->dummyfileref->fcb->type == BTRFS_TYPE_DIRECTORY) me->dummyfileref->fcb->fileref = me->dummyfileref; - + if (!me->parent) { RemoveEntryList(&me->fileref->list_entry); - - free_fileref(me->fileref->parent); - + increase_fileref_refcount(destdir); - - Status = fcb_get_last_dir_index(destdir->fcb, &me->fileref->index, Irp); - if (!NT_SUCCESS(Status)) { - ERR("fcb_get_last_dir_index returned %08x\n", Status); - goto end; - } - + if (me->fileref->dc) { // remove from old parent ExAcquireResourceExclusiveLite(&me->fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); RemoveEntryList(&me->fileref->dc->list_entry_index); remove_dir_child_from_hash_lists(me->fileref->parent->fcb, me->fileref->dc); ExReleaseResourceLite(&me->fileref->parent->fcb->nonpaged->dir_children_lock); - + + me->fileref->parent->fcb->inode_item.st_size -= me->fileref->dc->utf8.Length * 2; + me->fileref->parent->fcb->inode_item.transid = me->fileref->fcb->Vcb->superblock.generation; + me->fileref->parent->fcb->inode_item.sequence++; + me->fileref->parent->fcb->inode_item.st_ctime = now; + me->fileref->parent->fcb->inode_item.st_mtime = now; + me->fileref->parent->fcb->inode_item_changed = TRUE; + mark_fcb_dirty(me->fileref->parent->fcb); + if (name_changed) { ExFreePool(me->fileref->dc->utf8.Buffer); ExFreePool(me->fileref->dc->name.Buffer); ExFreePool(me->fileref->dc->name_uc.Buffer); - + me->fileref->dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8->Length, ALLOC_TAG); if (!me->fileref->dc->utf8.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - me->fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->filepart.Length, ALLOC_TAG); + + me->fileref->dc->utf8.Length = me->fileref->dc->utf8.MaximumLength = utf8->Length; + RtlCopyMemory(me->fileref->dc->utf8.Buffer, utf8->Buffer, utf8->Length); + + me->fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fnus->Length, ALLOC_TAG); if (!me->fileref->dc->name.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - me->fileref->dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->filepart_uc.Length, ALLOC_TAG); - if (!me->fileref->dc->name_uc.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; + + me->fileref->dc->name.Length = me->fileref->dc->name.MaximumLength = fnus->Length; + RtlCopyMemory(me->fileref->dc->name.Buffer, fnus->Buffer, fnus->Length); + + Status = RtlUpcaseUnicodeString(&fileref->dc->name_uc, &fileref->dc->name, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); goto end; } - - me->fileref->dc->utf8.Length = me->fileref->dc->utf8.MaximumLength = utf8->Length; - RtlCopyMemory(me->fileref->dc->utf8.Buffer, utf8->Buffer, utf8->Length); - - me->fileref->dc->name.Length = me->fileref->dc->name.MaximumLength = me->fileref->filepart.Length; - RtlCopyMemory(me->fileref->dc->name.Buffer, me->fileref->filepart.Buffer, me->fileref->filepart.Length); - - me->fileref->dc->name_uc.Length = me->fileref->dc->name_uc.MaximumLength = me->fileref->filepart_uc.Length; - RtlCopyMemory(me->fileref->dc->name_uc.Buffer, me->fileref->filepart_uc.Buffer, me->fileref->filepart_uc.Length); - + me->fileref->dc->hash = calc_crc32c(0xffffffff, (UINT8*)me->fileref->dc->name.Buffer, me->fileref->dc->name.Length); me->fileref->dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)me->fileref->dc->name_uc.Buffer, me->fileref->dc->name_uc.Length); } - + + if (me->fileref->dc->key.obj_type == TYPE_INODE_ITEM) + me->fileref->dc->key.obj_id = me->fileref->fcb->inode; + // add to new parent + ExAcquireResourceExclusiveLite(&destdir->fcb->nonpaged->dir_children_lock, TRUE); + + if (IsListEmpty(&destdir->fcb->dir_children_index)) + me->fileref->dc->index = 2; + else { + dir_child* dc2 = CONTAINING_RECORD(destdir->fcb->dir_children_index.Blink, dir_child, list_entry_index); + + me->fileref->dc->index = max(2, dc2->index + 1); + } + InsertTailList(&destdir->fcb->dir_children_index, &me->fileref->dc->list_entry_index); insert_dir_child_into_hash_lists(destdir->fcb, me->fileref->dc); ExReleaseResourceLite(&destdir->fcb->nonpaged->dir_children_lock); } - + + free_fileref(fileref->fcb->Vcb, me->fileref->parent); me->fileref->parent = destdir; - - insert_fileref_child(me->fileref->parent, me->fileref, TRUE); - + + ExAcquireResourceExclusiveLite(&me->fileref->parent->nonpaged->children_lock, TRUE); + InsertTailList(&me->fileref->parent->children, &me->fileref->list_entry); + ExReleaseResourceLite(&me->fileref->parent->nonpaged->children_lock); + TRACE("me->fileref->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", me->fileref->parent->fcb->inode, me->fileref->parent->fcb->inode_item.st_size); - me->fileref->parent->fcb->inode_item.st_size += me->fileref->utf8.Length * 2; + me->fileref->parent->fcb->inode_item.st_size += me->fileref->dc->utf8.Length * 2; TRACE("me->fileref->parent->fcb->inode_item.st_size (inode %llx) now %llx\n", me->fileref->parent->fcb->inode, me->fileref->parent->fcb->inode_item.st_size); me->fileref->parent->fcb->inode_item.transid = me->fileref->fcb->Vcb->superblock.generation; me->fileref->parent->fcb->inode_item.sequence++; @@ -1291,10 +1078,39 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ me->fileref->parent->fcb->inode_item.st_mtime = now; me->fileref->parent->fcb->inode_item_changed = TRUE; mark_fcb_dirty(me->fileref->parent->fcb); - } + } else { + if (me->fileref->dc) { + ExAcquireResourceExclusiveLite(&me->fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); + RemoveEntryList(&me->fileref->dc->list_entry_index); + + if (!me->fileref->fcb->ads) + remove_dir_child_from_hash_lists(me->fileref->parent->fcb, me->fileref->dc); + + ExReleaseResourceLite(&me->fileref->parent->fcb->nonpaged->dir_children_lock); + + ExAcquireResourceExclusiveLite(&me->parent->fileref->fcb->nonpaged->dir_children_lock, TRUE); + + if (me->fileref->fcb->ads) + InsertHeadList(&me->parent->fileref->fcb->dir_children_index, &me->fileref->dc->list_entry_index); + else { + if (me->fileref->fcb->inode != SUBVOL_ROOT_INODE) + me->fileref->dc->key.obj_id = me->fileref->fcb->inode; + + if (IsListEmpty(&me->parent->fileref->fcb->dir_children_index)) + me->fileref->dc->index = 2; + else { + dir_child* dc2 = CONTAINING_RECORD(me->parent->fileref->fcb->dir_children_index.Blink, dir_child, list_entry_index); + + me->fileref->dc->index = max(2, dc2->index + 1); + } - if (me->fileref->fcb->inode == SUBVOL_ROOT_INODE) - me->fileref->fcb->subvol->root_item.num_references++; + InsertTailList(&me->parent->fileref->fcb->dir_children_index, &me->fileref->dc->list_entry_index); + insert_dir_child_into_hash_lists(me->parent->fileref->fcb, me->fileref->dc); + } + + ExReleaseResourceLite(&me->parent->fileref->fcb->nonpaged->dir_children_lock); + } + } if (!me->dummyfileref->fcb->ads) { Status = delete_fileref(me->dummyfileref, NULL, Irp, rollback); @@ -1303,7 +1119,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ goto end; } } - + if (me->fileref->fcb->inode_item.st_nlink > 1) { hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); if (!hl) { @@ -1311,11 +1127,11 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + hl->parent = me->fileref->parent->fcb->inode; - hl->index = me->fileref->index; - - hl->utf8.Length = hl->utf8.MaximumLength = me->fileref->utf8.Length; + hl->index = me->fileref->dc->index; + + hl->utf8.Length = hl->utf8.MaximumLength = me->fileref->dc->utf8.Length; hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); if (!hl->utf8.Buffer) { ERR("out of memory\n"); @@ -1323,10 +1139,10 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ ExFreePool(hl); goto end; } - - RtlCopyMemory(hl->utf8.Buffer, me->fileref->utf8.Buffer, me->fileref->utf8.Length); - - hl->name.Length = hl->name.MaximumLength = me->fileref->filepart.Length; + + RtlCopyMemory(hl->utf8.Buffer, me->fileref->dc->utf8.Buffer, me->fileref->dc->utf8.Length); + + hl->name.Length = hl->name.MaximumLength = me->fileref->dc->name.Length; hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); if (!hl->name.Buffer) { ERR("out of memory\n"); @@ -1335,23 +1151,23 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ ExFreePool(hl); goto end; } - - RtlCopyMemory(hl->name.Buffer, me->fileref->filepart.Buffer, me->fileref->filepart.Length); - + + RtlCopyMemory(hl->name.Buffer, me->fileref->dc->name.Buffer, me->fileref->dc->name.Length); + InsertTailList(&me->fileref->fcb->hardlinks, &hl->list_entry); } - + mark_fileref_dirty(me->fileref); - + le = le->Flink; } - + // loop through, and only mark streams as deleted if their parent inodes are also deleted - + le = move_list.Flink; while (le != &move_list) { me = CONTAINING_RECORD(le, move_entry, list_entry); - + if (me->dummyfileref->fcb->ads && me->parent->dummyfileref->fcb->deleted) { Status = delete_fileref(me->dummyfileref, NULL, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -1359,37 +1175,36 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ goto end; } } - + le = le->Flink; } - + destdir->fcb->subvol->root_item.ctransid = destdir->fcb->Vcb->superblock.generation; destdir->fcb->subvol->root_item.ctime = now; - + me = CONTAINING_RECORD(move_list.Flink, move_entry, list_entry); - send_notification_fileref(me->dummyfileref, fileref->fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED); - send_notification_fileref(fileref, fileref->fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED); - send_notification_fileref(me->dummyfileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); - send_notification_fileref(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); + send_notification_fileref(fileref, fileref->fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, NULL); + send_notification_fileref(me->dummyfileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + send_notification_fileref(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); Status = STATUS_SUCCESS; - + end: while (!IsListEmpty(&move_list)) { le = RemoveHeadList(&move_list); me = CONTAINING_RECORD(le, move_entry, list_entry); - + if (me->dummyfcb) - free_fcb(me->dummyfcb); - + free_fcb(fileref->fcb->Vcb, me->dummyfcb); + if (me->dummyfileref) - free_fileref(me->dummyfileref); - - free_fileref(me->fileref); - + free_fileref(fileref->fcb->Vcb, me->dummyfileref); + + free_fileref(fileref->fcb->Vcb, me->fileref); + ExFreePool(me); } - + return Status; } @@ -1397,99 +1212,98 @@ void insert_dir_child_into_hash_lists(fcb* fcb, dir_child* dc) { BOOL inserted; LIST_ENTRY* le; UINT8 c, d; - + c = dc->hash >> 24; - + inserted = FALSE; - + d = c; do { le = fcb->hash_ptrs[d]; - + if (d == 0) break; - + d--; } while (!le); - + if (!le) le = fcb->dir_children_hash.Flink; - + while (le != &fcb->dir_children_hash) { dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash); - + if (dc2->hash > dc->hash) { InsertHeadList(le->Blink, &dc->list_entry_hash); inserted = TRUE; break; } - + le = le->Flink; } - + if (!inserted) InsertTailList(&fcb->dir_children_hash, &dc->list_entry_hash); - + if (!fcb->hash_ptrs[c]) fcb->hash_ptrs[c] = &dc->list_entry_hash; else { dir_child* dc2 = CONTAINING_RECORD(fcb->hash_ptrs[c], dir_child, list_entry_hash); - + if (dc2->hash > dc->hash) fcb->hash_ptrs[c] = &dc->list_entry_hash; } - + c = dc->hash_uc >> 24; - + inserted = FALSE; - + d = c; do { le = fcb->hash_ptrs_uc[d]; - + if (d == 0) break; - + d--; } while (!le); - + if (!le) le = fcb->dir_children_hash_uc.Flink; - + while (le != &fcb->dir_children_hash_uc) { dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc); - + if (dc2->hash_uc > dc->hash_uc) { InsertHeadList(le->Blink, &dc->list_entry_hash_uc); inserted = TRUE; break; } - + le = le->Flink; } - + if (!inserted) InsertTailList(&fcb->dir_children_hash_uc, &dc->list_entry_hash_uc); - + if (!fcb->hash_ptrs_uc[c]) fcb->hash_ptrs_uc[c] = &dc->list_entry_hash_uc; else { dir_child* dc2 = CONTAINING_RECORD(fcb->hash_ptrs_uc[c], dir_child, list_entry_hash_uc); - + if (dc2->hash_uc > dc->hash_uc) fcb->hash_ptrs_uc[c] = &dc->list_entry_hash_uc; } } -static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, PFILE_OBJECT tfo) { +static NTSTATUS set_rename_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, PFILE_OBJECT tfo) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); FILE_RENAME_INFORMATION* fri = Irp->AssociatedIrp.SystemBuffer; fcb *fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; file_ref *fileref = ccb ? ccb->fileref : NULL, *oldfileref = NULL, *related = NULL, *fr2 = NULL; - UINT64 index; WCHAR* fn; - ULONG fnlen, utf8len; + ULONG fnlen, utf8len, origutf8len; UNICODE_STRING fnus; ANSI_STRING utf8; NTSTATUS Status; @@ -1497,17 +1311,19 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, BTRFS_TIME now; LIST_ENTRY rollback, *le; hardlink* hl; - + SECURITY_SUBJECT_CONTEXT subjcont; + ACCESS_MASK access; + InitializeListHead(&rollback); - + TRACE("tfo = %p\n", tfo); TRACE("ReplaceIfExists = %u\n", IrpSp->Parameters.SetFile.ReplaceIfExists); TRACE("RootDirectory = %p\n", fri->RootDirectory); TRACE("FileName = %.*S\n", fri->FileNameLength / sizeof(WCHAR), fri->FileName); - + fn = fri->FileName; fnlen = fri->FileNameLength / sizeof(WCHAR); - + if (!tfo) { if (!fileref || !fileref->parent) { ERR("no fileref set and no directory given\n"); @@ -1515,13 +1331,13 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, } } else { LONG i; - + while (fnlen > 0 && (fri->FileName[fnlen - 1] == '/' || fri->FileName[fnlen - 1] == '\\')) fnlen--; - + if (fnlen == 0) return STATUS_INVALID_PARAMETER; - + for (i = fnlen - 1; i >= 0; i--) { if (fri->FileName[i] == '\\' || fri->FileName[i] == '/') { fn = &fri->FileName[i+1]; @@ -1530,41 +1346,45 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, } } } - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fcb->ads) { - FIXME("FIXME - renaming streams\n"); // FIXME - Status = STATUS_NOT_IMPLEMENTED; + // MSDN says that NTFS data streams can be renamed (https://msdn.microsoft.com/en-us/library/windows/hardware/ff540344.aspx), + // but if you try it always seems to return STATUS_INVALID_PARAMETER. There is a function in ntfs.sys called NtfsStreamRename, + // but it never seems to get invoked... If you know what's going on here, I'd appreciate it if you let me know. + Status = STATUS_INVALID_PARAMETER; goto end; } - + fnus.Buffer = fn; - fnus.Length = fnus.MaximumLength = fnlen * sizeof(WCHAR); - + fnus.Length = fnus.MaximumLength = (UINT16)(fnlen * sizeof(WCHAR)); + TRACE("fnus = %.*S\n", fnus.Length / sizeof(WCHAR), fnus.Buffer); - + + origutf8len = fileref->dc->utf8.Length; + Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, fn, (ULONG)fnlen * sizeof(WCHAR)); if (!NT_SUCCESS(Status)) goto end; - - utf8.MaximumLength = utf8.Length = utf8len; + + utf8.MaximumLength = utf8.Length = (UINT16)utf8len; utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.MaximumLength, ALLOC_TAG); if (!utf8.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + Status = RtlUnicodeToUTF8N(utf8.Buffer, utf8len, &utf8len, fn, (ULONG)fnlen * sizeof(WCHAR)); if (!NT_SUCCESS(Status)) goto end; - + if (tfo && tfo->FsContext2) { struct _ccb* relatedccb = tfo->FsContext2; - + related = relatedccb->fileref; increase_fileref_refcount(related); } else if (fnus.Length >= sizeof(WCHAR) && fnus.Buffer[0] != '\\') { @@ -1576,7 +1396,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, if (NT_SUCCESS(Status)) { TRACE("destination file %S already exists\n", file_desc_fileref(oldfileref)); - + if (fileref != oldfileref && !oldfileref->deleted) { if (!IrpSp->Parameters.SetFile.ReplaceIfExists) { Status = STATUS_OBJECT_NAME_COLLISION; @@ -1586,20 +1406,20 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, Status = STATUS_ACCESS_DENIED; goto end; } - + if (oldfileref->fcb->type == BTRFS_TYPE_DIRECTORY) { WARN("trying to overwrite directory\n"); Status = STATUS_ACCESS_DENIED; goto end; } } - + if (fileref == oldfileref || oldfileref->deleted) { - free_fileref(oldfileref); + free_fileref(Vcb, oldfileref); oldfileref = NULL; } } - + if (!related) { Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp); @@ -1608,160 +1428,195 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, goto end; } } - + + if (related->fcb == Vcb->dummy_fcb) { + Status = STATUS_ACCESS_DENIED; + goto end; + } + + SeCaptureSubjectContext(&subjcont); + + if (!SeAccessCheck(related->fcb->sd, &subjcont, FALSE, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_ADD_SUBDIRECTORY : FILE_ADD_FILE, 0, NULL, + IoGetFileObjectGenericMapping(), Irp->RequestorMode, &access, &Status)) { + SeReleaseSubjectContext(&subjcont); + TRACE("SeAccessCheck failed, returning %08x\n", Status); + goto end; + } + + SeReleaseSubjectContext(&subjcont); + if (has_open_children(fileref)) { WARN("trying to rename file with open children\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + if (oldfileref) { - ACCESS_MASK access; - SECURITY_SUBJECT_CONTEXT subjcont; - SeCaptureSubjectContext(&subjcont); if (!SeAccessCheck(oldfileref->fcb->sd, &subjcont, FALSE, DELETE, 0, NULL, IoGetFileObjectGenericMapping(), Irp->RequestorMode, &access, &Status)) { SeReleaseSubjectContext(&subjcont); - WARN("SeAccessCheck failed, returning %08x\n", Status); + TRACE("SeAccessCheck failed, returning %08x\n", Status); goto end; } SeReleaseSubjectContext(&subjcont); - + Status = delete_fileref(oldfileref, NULL, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("delete_fileref returned %08x\n", Status); goto end; } } - - if (fileref->parent->fcb->subvol != related->fcb->subvol && fileref->fcb->subvol == fileref->parent->fcb->subvol) { - Status = move_across_subvols(fileref, related, &utf8, &fnus, Irp, &rollback); + + if (fileref->parent->fcb->subvol != related->fcb->subvol && (fileref->fcb->subvol == fileref->parent->fcb->subvol || fileref->fcb == Vcb->dummy_fcb)) { + Status = move_across_subvols(fileref, ccb, related, &utf8, &fnus, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("move_across_subvols returned %08x\n", Status); } goto end; } - + if (related == fileref->parent) { // keeping file in same directory - UNICODE_STRING fnus2, oldfn, newfn; + UNICODE_STRING oldfn, newfn; USHORT name_offset; - ULONG oldutf8len; - - fnus2.Buffer = ExAllocatePoolWithTag(PagedPool, fnus.Length, ALLOC_TAG); - if (!fnus2.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; + ULONG reqlen, oldutf8len; + + oldfn.Length = oldfn.MaximumLength = 0; + + Status = fileref_get_filename(fileref, &oldfn, &name_offset, &reqlen); + if (Status != STATUS_BUFFER_OVERFLOW) { + ERR("fileref_get_filename returned %08x\n", Status); goto end; } - - Status = fileref_get_filename(fileref, &oldfn, &name_offset); - if (!NT_SUCCESS(Status)) { - ERR("fileref_get_filename returned %08x\n", Status); + + oldfn.Buffer = ExAllocatePoolWithTag(PagedPool, reqlen, ALLOC_TAG); + if (!oldfn.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - fnus2.Length = fnus2.MaximumLength = fnus.Length; - RtlCopyMemory(fnus2.Buffer, fnus.Buffer, fnus.Length); - - oldutf8len = fileref->utf8.Length; - - if (!fileref->created && !fileref->oldutf8.Buffer) - fileref->oldutf8 = fileref->utf8; - else - ExFreePool(fileref->utf8.Buffer); - - TRACE("renaming %.*S to %.*S\n", fileref->filepart.Length / sizeof(WCHAR), fileref->filepart.Buffer, fnus2.Length / sizeof(WCHAR), fnus.Buffer); - - fileref->utf8 = utf8; - fileref->filepart = fnus2; - - Status = fileref_get_filename(fileref, &newfn, &name_offset); + + oldfn.MaximumLength = (UINT16)reqlen; + + Status = fileref_get_filename(fileref, &oldfn, &name_offset, &reqlen); if (!NT_SUCCESS(Status)) { ERR("fileref_get_filename returned %08x\n", Status); ExFreePool(oldfn.Buffer); goto end; } - - if (fileref->filepart_uc.Buffer) - ExFreePool(fileref->filepart_uc.Buffer); - - Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - ExFreePool(oldfn.Buffer); - ExFreePool(newfn.Buffer); - goto end; + + oldutf8len = fileref->dc->utf8.Length; + + if (!fileref->created && !fileref->oldutf8.Buffer) { + fileref->oldutf8.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->dc->utf8.Length, ALLOC_TAG); + if (!fileref->oldutf8.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + fileref->oldutf8.Length = fileref->oldutf8.MaximumLength = fileref->dc->utf8.Length; + RtlCopyMemory(fileref->oldutf8.Buffer, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); } - + + TRACE("renaming %.*S to %.*S\n", fileref->dc->name.Length / sizeof(WCHAR), fileref->dc->name.Buffer, fnus.Length / sizeof(WCHAR), fnus.Buffer); + mark_fileref_dirty(fileref); - + if (fileref->dc) { ExAcquireResourceExclusiveLite(&fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); - + ExFreePool(fileref->dc->utf8.Buffer); ExFreePool(fileref->dc->name.Buffer); ExFreePool(fileref->dc->name_uc.Buffer); - + fileref->dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.Length, ALLOC_TAG); if (!fileref->dc->utf8.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + ExFreePool(oldfn.Buffer); goto end; } - - fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart.Length, ALLOC_TAG); + + fileref->dc->utf8.Length = fileref->dc->utf8.MaximumLength = utf8.Length; + RtlCopyMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length); + + fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fnus.Length, ALLOC_TAG); if (!fileref->dc->name.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + ExFreePool(oldfn.Buffer); goto end; } - - fileref->dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart_uc.Length, ALLOC_TAG); - if (!fileref->dc->name_uc.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; + + fileref->dc->name.Length = fileref->dc->name.MaximumLength = fnus.Length; + RtlCopyMemory(fileref->dc->name.Buffer, fnus.Buffer, fnus.Length); + + Status = RtlUpcaseUnicodeString(&fileref->dc->name_uc, &fileref->dc->name, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + ExFreePool(oldfn.Buffer); goto end; } - - fileref->dc->utf8.Length = fileref->dc->utf8.MaximumLength = utf8.Length; - RtlCopyMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length); - - fileref->dc->name.Length = fileref->dc->name.MaximumLength = fileref->filepart.Length; - RtlCopyMemory(fileref->dc->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); - - fileref->dc->name_uc.Length = fileref->dc->name_uc.MaximumLength = fileref->filepart_uc.Length; - RtlCopyMemory(fileref->dc->name_uc.Buffer, fileref->filepart_uc.Buffer, fileref->filepart_uc.Length); - + remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc); - + fileref->dc->hash = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name.Buffer, fileref->dc->name.Length); fileref->dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name_uc.Buffer, fileref->dc->name_uc.Length); - + insert_dir_child_into_hash_lists(fileref->parent->fcb, fileref->dc); - + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); } - + + newfn.Length = newfn.MaximumLength = 0; + + Status = fileref_get_filename(fileref, &newfn, &name_offset, &reqlen); + if (Status != STATUS_BUFFER_OVERFLOW) { + ERR("fileref_get_filename returned %08x\n", Status); + ExFreePool(oldfn.Buffer); + goto end; + } + + newfn.Buffer = ExAllocatePoolWithTag(PagedPool, reqlen, ALLOC_TAG); + if (!newfn.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(oldfn.Buffer); + goto end; + } + + newfn.MaximumLength = (UINT16)reqlen; + + Status = fileref_get_filename(fileref, &newfn, &name_offset, &reqlen); + if (!NT_SUCCESS(Status)) { + ERR("fileref_get_filename returned %08x\n", Status); + ExFreePool(oldfn.Buffer); + ExFreePool(newfn.Buffer); + goto end; + } + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - - fcb->inode_item.transid = Vcb->superblock.generation; - fcb->inode_item.sequence++; - - if (!ccb->user_set_change_time) - fcb->inode_item.st_ctime = now; - - fcb->inode_item_changed = TRUE; - mark_fcb_dirty(fcb); - + + if (fcb != Vcb->dummy_fcb && (fileref->parent->fcb->subvol == fcb->subvol || !is_subvol_readonly(fcb->subvol, Irp))) { + fcb->inode_item.transid = Vcb->superblock.generation; + fcb->inode_item.sequence++; + + if (!ccb->user_set_change_time) + fcb->inode_item.st_ctime = now; + + fcb->inode_item_changed = TRUE; + mark_fcb_dirty(fcb); + } + // update parent's INODE_ITEM - + related->fcb->inode_item.transid = Vcb->superblock.generation; TRACE("related->fcb->inode_item.st_size (inode %llx) was %llx\n", related->fcb->inode, related->fcb->inode_item.st_size); related->fcb->inode_item.st_size = related->fcb->inode_item.st_size + (2 * utf8.Length) - (2* oldutf8len); @@ -1769,70 +1624,62 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, related->fcb->inode_item.sequence++; related->fcb->inode_item.st_ctime = now; related->fcb->inode_item.st_mtime = now; - + related->fcb->inode_item_changed = TRUE; mark_fcb_dirty(related->fcb); - send_notification_fileref(related, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); - + send_notification_fileref(related, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&oldfn, name_offset, NULL, NULL, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_RENAMED_OLD_NAME, NULL, NULL); FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&newfn, name_offset, NULL, NULL, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_RENAMED_NEW_NAME, NULL, NULL); - + ExFreePool(oldfn.Buffer); ExFreePool(newfn.Buffer); - + Status = STATUS_SUCCESS; goto end; } - + // We move files by moving the existing fileref to the new directory, and // replacing it with a dummy fileref with the same original values, but marked as deleted. - - fr2 = create_fileref(); - + + send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED, NULL); + + fr2 = create_fileref(Vcb); + fr2->fcb = fileref->fcb; fr2->fcb->refcount++; - - fr2->filepart = fileref->filepart; - fr2->filepart_uc = fileref->filepart_uc; - fr2->utf8 = fileref->utf8; + fr2->oldutf8 = fileref->oldutf8; - fr2->index = fileref->index; + fr2->oldindex = fileref->dc->index; fr2->delete_on_close = fileref->delete_on_close; fr2->deleted = TRUE; fr2->created = fileref->created; fr2->parent = fileref->parent; fr2->dc = NULL; - + + if (!fr2->oldutf8.Buffer) { + fr2->oldutf8.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->dc->utf8.Length, ALLOC_TAG); + if (!fr2->oldutf8.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(fr2->oldutf8.Buffer, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + + fr2->oldutf8.Length = fr2->oldutf8.MaximumLength = fileref->dc->utf8.Length; + } + if (fr2->fcb->type == BTRFS_TYPE_DIRECTORY) fr2->fcb->fileref = fr2; - Status = fcb_get_last_dir_index(related->fcb, &index, Irp); - if (!NT_SUCCESS(Status)) { - ERR("fcb_get_last_dir_index returned %08x\n", Status); - goto end; - } - - fileref->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, fnus.Length, ALLOC_TAG); - if (!fileref->filepart.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - fileref->filepart.Length = fileref->filepart.MaximumLength = fnus.Length; - RtlCopyMemory(fileref->filepart.Buffer, fnus.Buffer, fnus.Length); - - Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - goto end; - } - - fileref->utf8 = utf8; + if (fileref->fcb->inode == SUBVOL_ROOT_INODE) + fileref->fcb->subvol->parent = related->fcb->subvol->id; + + fileref->oldutf8.Length = fileref->oldutf8.MaximumLength = 0; fileref->oldutf8.Buffer = NULL; - fileref->index = index; fileref->deleted = FALSE; fileref->created = TRUE; fileref->parent = related; @@ -1841,95 +1688,102 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, InsertHeadList(&fileref->list_entry, &fr2->list_entry); RemoveEntryList(&fileref->list_entry); ExReleaseResourceLite(&fileref->parent->nonpaged->children_lock); - - insert_fileref_child(related, fileref, TRUE); - + mark_fileref_dirty(fr2); mark_fileref_dirty(fileref); - + if (fileref->dc) { // remove from old parent ExAcquireResourceExclusiveLite(&fr2->parent->fcb->nonpaged->dir_children_lock, TRUE); RemoveEntryList(&fileref->dc->list_entry_index); remove_dir_child_from_hash_lists(fr2->parent->fcb, fileref->dc); ExReleaseResourceLite(&fr2->parent->fcb->nonpaged->dir_children_lock); - - if (fileref->utf8.Length != fr2->utf8.Length || RtlCompareMemory(fileref->utf8.Buffer, fr2->utf8.Buffer, fr2->utf8.Length) != fr2->utf8.Length) { + + if (fileref->dc->utf8.Length != utf8.Length || RtlCompareMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length) != utf8.Length) { // handle changed name - + ExFreePool(fileref->dc->utf8.Buffer); ExFreePool(fileref->dc->name.Buffer); ExFreePool(fileref->dc->name_uc.Buffer); - + fileref->dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.Length, ALLOC_TAG); if (!fileref->dc->utf8.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart.Length, ALLOC_TAG); + + fileref->dc->utf8.Length = fileref->dc->utf8.MaximumLength = utf8.Length; + RtlCopyMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length); + + fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fnus.Length, ALLOC_TAG); if (!fileref->dc->name.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - fileref->dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart_uc.Length, ALLOC_TAG); - if (!fileref->dc->name_uc.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; + + fileref->dc->name.Length = fileref->dc->name.MaximumLength = fnus.Length; + RtlCopyMemory(fileref->dc->name.Buffer, fnus.Buffer, fnus.Length); + + Status = RtlUpcaseUnicodeString(&fileref->dc->name_uc, &fileref->dc->name, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); goto end; } - - fileref->dc->utf8.Length = fileref->dc->utf8.MaximumLength = utf8.Length; - RtlCopyMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length); - - fileref->dc->name.Length = fileref->dc->name.MaximumLength = fileref->filepart.Length; - RtlCopyMemory(fileref->dc->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); - - fileref->dc->name_uc.Length = fileref->dc->name_uc.MaximumLength = fileref->filepart_uc.Length; - RtlCopyMemory(fileref->dc->name_uc.Buffer, fileref->filepart_uc.Buffer, fileref->filepart_uc.Length); - + fileref->dc->hash = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name.Buffer, fileref->dc->name.Length); fileref->dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name_uc.Buffer, fileref->dc->name_uc.Length); } - + // add to new parent ExAcquireResourceExclusiveLite(&related->fcb->nonpaged->dir_children_lock, TRUE); + + if (IsListEmpty(&related->fcb->dir_children_index)) + fileref->dc->index = 2; + else { + dir_child* dc2 = CONTAINING_RECORD(related->fcb->dir_children_index.Blink, dir_child, list_entry_index); + + fileref->dc->index = max(2, dc2->index + 1); + } + InsertTailList(&related->fcb->dir_children_index, &fileref->dc->list_entry_index); insert_dir_child_into_hash_lists(related->fcb, fileref->dc); ExReleaseResourceLite(&related->fcb->nonpaged->dir_children_lock); } - + + ExAcquireResourceExclusiveLite(&related->nonpaged->children_lock, TRUE); + InsertTailList(&related->children, &fileref->list_entry); + ExReleaseResourceLite(&related->nonpaged->children_lock); + if (fcb->inode_item.st_nlink > 1) { // add new hardlink entry to fcb - + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); if (!hl) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + hl->parent = related->fcb->inode; - hl->index = index; - - hl->name.Length = hl->name.MaximumLength = fileref->filepart.Length; + hl->index = fileref->dc->index; + + hl->name.Length = hl->name.MaximumLength = fnus.Length; hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - + if (!hl->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(hl->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); - - hl->utf8.Length = hl->utf8.MaximumLength = fileref->utf8.Length; + + RtlCopyMemory(hl->name.Buffer, fnus.Buffer, fnus.Length); + + hl->utf8.Length = hl->utf8.MaximumLength = fileref->dc->utf8.Length; hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - + if (!hl->utf8.Buffer) { ERR("out of memory\n"); ExFreePool(hl->name.Buffer); @@ -1937,50 +1791,52 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(hl->utf8.Buffer, fileref->utf8.Buffer, fileref->utf8.Length); - + + RtlCopyMemory(hl->utf8.Buffer, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + InsertTailList(&fcb->hardlinks, &hl->list_entry); } - + // delete old hardlink entry from fcb - + le = fcb->hardlinks.Flink; while (le != &fcb->hardlinks) { hl = CONTAINING_RECORD(le, hardlink, list_entry); - - if (hl->parent == fr2->parent->fcb->inode && hl->index == fr2->index) { + + if (hl->parent == fr2->parent->fcb->inode && hl->index == fr2->oldindex) { RemoveEntryList(&hl->list_entry); - + if (hl->utf8.Buffer) ExFreePool(hl->utf8.Buffer); - + if (hl->name.Buffer) ExFreePool(hl->name.Buffer); - + ExFreePool(hl); break; } - + le = le->Flink; } // update inode's INODE_ITEM - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - - fcb->inode_item.transid = Vcb->superblock.generation; - fcb->inode_item.sequence++; - - if (!ccb->user_set_change_time) - fcb->inode_item.st_ctime = now; - - fcb->inode_item_changed = TRUE; - mark_fcb_dirty(fcb); - + + if (fcb != Vcb->dummy_fcb && (fileref->parent->fcb->subvol == fcb->subvol || !is_subvol_readonly(fcb->subvol, Irp))) { + fcb->inode_item.transid = Vcb->superblock.generation; + fcb->inode_item.sequence++; + + if (!ccb->user_set_change_time) + fcb->inode_item.st_ctime = now; + + fcb->inode_item_changed = TRUE; + mark_fcb_dirty(fcb); + } + // update new parent's INODE_ITEM - + related->fcb->inode_item.transid = Vcb->superblock.generation; TRACE("related->fcb->inode_item.st_size (inode %llx) was %llx\n", related->fcb->inode, related->fcb->inode_item.st_size); related->fcb->inode_item.st_size += 2 * utf8len; @@ -1988,72 +1844,70 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, related->fcb->inode_item.sequence++; related->fcb->inode_item.st_ctime = now; related->fcb->inode_item.st_mtime = now; - + related->fcb->inode_item_changed = TRUE; mark_fcb_dirty(related->fcb); - + // update old parent's INODE_ITEM - + fr2->parent->fcb->inode_item.transid = Vcb->superblock.generation; TRACE("fr2->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", fr2->parent->fcb->inode, fr2->parent->fcb->inode_item.st_size); - fr2->parent->fcb->inode_item.st_size -= 2 * fr2->utf8.Length; + fr2->parent->fcb->inode_item.st_size -= 2 * origutf8len; TRACE("fr2->parent->fcb->inode_item.st_size (inode %llx) now %llx\n", fr2->parent->fcb->inode, fr2->parent->fcb->inode_item.st_size); fr2->parent->fcb->inode_item.sequence++; fr2->parent->fcb->inode_item.st_ctime = now; fr2->parent->fcb->inode_item.st_mtime = now; - - free_fileref(fr2); - + + free_fileref(Vcb, fr2); + fr2->parent->fcb->inode_item_changed = TRUE; mark_fcb_dirty(fr2->parent->fcb); - - send_notification_fileref(fr2, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED); - send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED); - send_notification_fileref(related, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); - send_notification_fileref(fr2->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); + + send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, NULL); + send_notification_fileref(related, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + send_notification_fileref(fr2->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); Status = STATUS_SUCCESS; end: if (oldfileref) - free_fileref(oldfileref); - + free_fileref(Vcb, oldfileref); + if (!NT_SUCCESS(Status) && related) - free_fileref(related); - + free_fileref(Vcb, related); + if (!NT_SUCCESS(Status) && fr2) - free_fileref(fr2); - + free_fileref(Vcb, fr2); + if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(Vcb, &rollback); ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&Vcb->fcb_lock); ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } -NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT64 end, fcb* fcb, file_ref* fileref, PFILE_OBJECT FileObject, BOOL advance_only, LIST_ENTRY* rollback) { +NTSTATUS stream_set_end_of_file_information(device_extension* Vcb, UINT16 end, fcb* fcb, file_ref* fileref, BOOL advance_only) { LARGE_INTEGER time; BTRFS_TIME now; - CC_FILE_SIZES ccfs; - + TRACE("setting new end to %llx bytes (currently %x)\n", end, fcb->adsdata.Length); - + if (!fileref || !fileref->parent) { ERR("no fileref for stream\n"); return STATUS_INTERNAL_ERROR; } - + if (end < fcb->adsdata.Length) { if (advance_only) return STATUS_SUCCESS; - + TRACE("truncating stream to %llx bytes\n", end); - + fcb->adsdata.Length = end; } else if (end > fcb->adsdata.Length) { TRACE("extending stream to %llx bytes\n", end); @@ -2070,42 +1924,34 @@ NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT6 ExFreePool(data); return STATUS_INSUFFICIENT_RESOURCES; } - + if (fcb->adsdata.Buffer) { RtlCopyMemory(data, fcb->adsdata.Buffer, fcb->adsdata.Length); ExFreePool(fcb->adsdata.Buffer); } - + fcb->adsdata.Buffer = data; fcb->adsdata.MaximumLength = end; } - + RtlZeroMemory(&fcb->adsdata.Buffer[fcb->adsdata.Length], end - fcb->adsdata.Length); - + fcb->adsdata.Length = end; } - + mark_fcb_dirty(fcb); - + fcb->Header.AllocationSize.QuadPart = end; fcb->Header.FileSize.QuadPart = end; fcb->Header.ValidDataLength.QuadPart = end; - if (FileObject) { - ccfs.AllocationSize = fcb->Header.AllocationSize; - ccfs.FileSize = fcb->Header.FileSize; - ccfs.ValidDataLength = fcb->Header.ValidDataLength; - - CcSetFileSizes(FileObject, &ccfs); - } - KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + fileref->parent->fcb->inode_item.transid = Vcb->superblock.generation; fileref->parent->fcb->inode_item.sequence++; fileref->parent->fcb->inode_item.st_ctime = now; - + fileref->parent->fcb->inode_item_changed = TRUE; mark_fcb_dirty(fileref->parent->fcb); @@ -2115,7 +1961,7 @@ NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT6 return STATUS_SUCCESS; } -static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, BOOL advance_only) { +static NTSTATUS set_end_of_file_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, BOOL advance_only, BOOL prealloc) { FILE_END_OF_FILE_INFORMATION* feofi = Irp->AssociatedIrp.SystemBuffer; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; @@ -2125,134 +1971,161 @@ static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP CC_FILE_SIZES ccfs; LIST_ENTRY rollback; BOOL set_size = FALSE; - + ULONG filter; + if (!fileref) { ERR("fileref is NULL\n"); return STATUS_INVALID_PARAMETER; } - + InitializeListHead(&rollback); - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fileref ? fileref->deleted : fcb->deleted) { Status = STATUS_FILE_CLOSED; goto end; } - + if (fcb->ads) { - Status = stream_set_end_of_file_information(Vcb, feofi->EndOfFile.QuadPart, fcb, fileref, FileObject, advance_only, &rollback); + if (feofi->EndOfFile.QuadPart > 0xffff) { + Status = STATUS_DISK_FULL; + goto end; + } + + if (feofi->EndOfFile.QuadPart < 0) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + Status = stream_set_end_of_file_information(Vcb, (UINT16)feofi->EndOfFile.QuadPart, fcb, fileref, advance_only); + + if (NT_SUCCESS(Status)) { + ccfs.AllocationSize = fcb->Header.AllocationSize; + ccfs.FileSize = fcb->Header.FileSize; + ccfs.ValidDataLength = fcb->Header.ValidDataLength; + set_size = TRUE; + } + + filter = FILE_NOTIFY_CHANGE_SIZE; + + if (!ccb->user_set_write_time) { + KeQuerySystemTime(&time); + win_time_to_unix(time, &fileref->parent->fcb->inode_item.st_mtime); + filter |= FILE_NOTIFY_CHANGE_LAST_WRITE; + + fileref->parent->fcb->inode_item_changed = TRUE; + mark_fcb_dirty(fileref->parent->fcb); + } + + send_notification_fcb(fileref->parent, filter, FILE_ACTION_MODIFIED, &fileref->dc->name); + goto end; } - + TRACE("file: %S\n", file_desc(FileObject)); TRACE("paging IO: %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE"); TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); - -// int3; + TRACE("setting new end to %llx bytes (currently %llx)\n", feofi->EndOfFile.QuadPart, fcb->inode_item.st_size); - -// if (feofi->EndOfFile.QuadPart==0x36c000) -// int3; - - if (feofi->EndOfFile.QuadPart < fcb->inode_item.st_size) { + + if ((UINT64)feofi->EndOfFile.QuadPart < fcb->inode_item.st_size) { if (advance_only) { Status = STATUS_SUCCESS; goto end; } - + TRACE("truncating file to %llx bytes\n", feofi->EndOfFile.QuadPart); - + if (!MmCanFileBeTruncated(&fcb->nonpaged->segment_object, &feofi->EndOfFile)) { Status = STATUS_USER_MAPPED_FILE; goto end; } - + Status = truncate_file(fcb, feofi->EndOfFile.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("error - truncate_file failed\n"); goto end; } - } else if (feofi->EndOfFile.QuadPart > fcb->inode_item.st_size) { + } else if ((UINT64)feofi->EndOfFile.QuadPart > fcb->inode_item.st_size) { if (Irp->Flags & IRP_PAGING_IO) { TRACE("paging IO tried to extend file size\n"); Status = STATUS_SUCCESS; goto end; } - + TRACE("extending file to %llx bytes\n", feofi->EndOfFile.QuadPart); - - Status = extend_file(fcb, fileref, feofi->EndOfFile.QuadPart, TRUE, NULL, &rollback); + + Status = extend_file(fcb, fileref, feofi->EndOfFile.QuadPart, prealloc, NULL, &rollback); if (!NT_SUCCESS(Status)) { ERR("error - extend_file failed\n"); goto end; } } - + ccfs.AllocationSize = fcb->Header.AllocationSize; ccfs.FileSize = fcb->Header.FileSize; ccfs.ValidDataLength = fcb->Header.ValidDataLength; set_size = TRUE; - + + filter = FILE_NOTIFY_CHANGE_SIZE; + if (!ccb->user_set_write_time) { KeQuerySystemTime(&time); win_time_to_unix(time, &fcb->inode_item.st_mtime); + filter |= FILE_NOTIFY_CHANGE_LAST_WRITE; } - + fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_SIZE, FILE_ACTION_MODIFIED); + send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED, NULL); Status = STATUS_SUCCESS; end: if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(Vcb, &rollback); ExReleaseResourceLite(fcb->Header.Resource); - - if (set_size) - CcSetFileSizes(FileObject, &ccfs); - + + if (set_size) { + _SEH2_TRY { + CcSetFileSizes(FileObject, &ccfs); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) + ERR("CcSetFileSizes threw exception %08x\n", Status); + } + ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } -// static NTSTATUS STDCALL set_allocation_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { -// FILE_ALLOCATION_INFORMATION* fai = (FILE_ALLOCATION_INFORMATION*)Irp->AssociatedIrp.SystemBuffer; -// fcb* fcb = FileObject->FsContext; -// -// FIXME("FIXME\n"); -// ERR("fcb = %p (%.*S)\n", fcb, fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer); -// ERR("AllocationSize = %llx\n", fai->AllocationSize.QuadPart); -// -// return STATUS_NOT_IMPLEMENTED; -// } - -static NTSTATUS STDCALL set_position_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { +static NTSTATUS set_position_information(PFILE_OBJECT FileObject, PIRP Irp) { FILE_POSITION_INFORMATION* fpi = (FILE_POSITION_INFORMATION*)Irp->AssociatedIrp.SystemBuffer; - + TRACE("setting the position on %S to %llx\n", file_desc(FileObject), fpi->CurrentByteOffset.QuadPart); - + // FIXME - make sure aligned for FO_NO_INTERMEDIATE_BUFFERING - + FileObject->CurrentByteOffset = fpi->CurrentByteOffset; - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, PFILE_OBJECT tfo) { +static NTSTATUS set_link_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, PFILE_OBJECT tfo) { FILE_LINK_INFORMATION* fli = Irp->AssociatedIrp.SystemBuffer; fcb *fcb = FileObject->FsContext, *tfofcb, *parfcb; ccb* ccb = FileObject->FsContext2; file_ref *fileref = ccb ? ccb->fileref : NULL, *oldfileref = NULL, *related = NULL, *fr2 = NULL; - UINT64 index; WCHAR* fn; ULONG fnlen, utf8len; UNICODE_STRING fnus; @@ -2265,40 +2138,40 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF ACCESS_MASK access; SECURITY_SUBJECT_CONTEXT subjcont; dir_child* dc = NULL; - + InitializeListHead(&rollback); - + // FIXME - check fli length // FIXME - don't ignore fli->RootDirectory - + TRACE("ReplaceIfExists = %x\n", fli->ReplaceIfExists); TRACE("RootDirectory = %p\n", fli->RootDirectory); TRACE("FileNameLength = %x\n", fli->FileNameLength); TRACE("FileName = %.*S\n", fli->FileNameLength / sizeof(WCHAR), fli->FileName); - + fn = fli->FileName; fnlen = fli->FileNameLength / sizeof(WCHAR); - + if (!tfo) { if (!fileref || !fileref->parent) { ERR("no fileref set and no directory given\n"); return STATUS_INVALID_PARAMETER; } - + parfcb = fileref->parent->fcb; tfofcb = NULL; } else { LONG i; - + tfofcb = tfo->FsContext; parfcb = tfofcb; - + while (fnlen > 0 && (fli->FileName[fnlen - 1] == '/' || fli->FileName[fnlen - 1] == '\\')) fnlen--; - + if (fnlen == 0) return STATUS_INVALID_PARAMETER; - + for (i = fnlen - 1; i >= 0; i--) { if (fli->FileName[i] == '\\' || fli->FileName[i] == '/') { fn = &fli->FileName[i+1]; @@ -2307,47 +2180,52 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF } } } - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fcb->type == BTRFS_TYPE_DIRECTORY) { WARN("tried to create hard link on directory\n"); Status = STATUS_FILE_IS_A_DIRECTORY; goto end; } - + if (fcb->ads) { WARN("tried to create hard link on stream\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + + if (fcb->inode_item.st_nlink >= 65535) { + Status = STATUS_TOO_MANY_LINKS; + goto end; + } + fnus.Buffer = fn; - fnus.Length = fnus.MaximumLength = fnlen * sizeof(WCHAR); - + fnus.Length = fnus.MaximumLength = (UINT16)(fnlen * sizeof(WCHAR)); + TRACE("fnus = %.*S\n", fnus.Length / sizeof(WCHAR), fnus.Buffer); - + Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, fn, (ULONG)fnlen * sizeof(WCHAR)); if (!NT_SUCCESS(Status)) goto end; - - utf8.MaximumLength = utf8.Length = utf8len; + + utf8.MaximumLength = utf8.Length = (UINT16)utf8len; utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.MaximumLength, ALLOC_TAG); if (!utf8.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + Status = RtlUnicodeToUTF8N(utf8.Buffer, utf8len, &utf8len, fn, (ULONG)fnlen * sizeof(WCHAR)); if (!NT_SUCCESS(Status)) goto end; - + if (tfo && tfo->FsContext2) { struct _ccb* relatedccb = tfo->FsContext2; - + related = relatedccb->fileref; increase_fileref_refcount(related); } @@ -2357,7 +2235,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF if (NT_SUCCESS(Status)) { if (!oldfileref->deleted) { WARN("destination file %S already exists\n", file_desc_fileref(oldfileref)); - + if (!fli->ReplaceIfExists) { Status = STATUS_OBJECT_NAME_COLLISION; goto end; @@ -2369,18 +2247,18 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF Status = STATUS_ACCESS_DENIED; goto end; } - + if (oldfileref->fcb->type == BTRFS_TYPE_DIRECTORY) { WARN("trying to overwrite directory\n"); Status = STATUS_ACCESS_DENIED; goto end; } } else { - free_fileref(oldfileref); + free_fileref(Vcb, oldfileref); oldfileref = NULL; } } - + if (!related) { Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp); @@ -2389,84 +2267,62 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF goto end; } } - + SeCaptureSubjectContext(&subjcont); if (!SeAccessCheck(related->fcb->sd, &subjcont, FALSE, FILE_ADD_FILE, 0, NULL, IoGetFileObjectGenericMapping(), Irp->RequestorMode, &access, &Status)) { SeReleaseSubjectContext(&subjcont); - WARN("SeAccessCheck failed, returning %08x\n", Status); + TRACE("SeAccessCheck failed, returning %08x\n", Status); goto end; } SeReleaseSubjectContext(&subjcont); - + if (fcb->subvol != parfcb->subvol) { WARN("can't create hard link over subvolume boundary\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (oldfileref) { SeCaptureSubjectContext(&subjcont); if (!SeAccessCheck(oldfileref->fcb->sd, &subjcont, FALSE, DELETE, 0, NULL, IoGetFileObjectGenericMapping(), Irp->RequestorMode, &access, &Status)) { SeReleaseSubjectContext(&subjcont); - WARN("SeAccessCheck failed, returning %08x\n", Status); + TRACE("SeAccessCheck failed, returning %08x\n", Status); goto end; } SeReleaseSubjectContext(&subjcont); - + Status = delete_fileref(oldfileref, NULL, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("delete_fileref returned %08x\n", Status); goto end; } } - - Status = fcb_get_last_dir_index(related->fcb, &index, Irp); - if (!NT_SUCCESS(Status)) { - ERR("fcb_get_last_dir_index returned %08x\n", Status); - goto end; - } - - fr2 = create_fileref(); - + + fr2 = create_fileref(Vcb); + fr2->fcb = fcb; fcb->refcount++; - - fr2->utf8 = utf8; - fr2->index = index; + fr2->created = TRUE; fr2->parent = related; - - fr2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, fnus.Length, ALLOC_TAG); - if (!fr2->filepart.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - fr2->filepart.Length = fr2->filepart.MaximumLength = fnus.Length; - RtlCopyMemory(fr2->filepart.Buffer, fnus.Buffer, fnus.Length); - - Status = RtlUpcaseUnicodeString(&fr2->filepart_uc, &fr2->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - goto end; - } - - insert_fileref_child(related, fr2, TRUE); - - Status = add_dir_child(related->fcb, fcb->inode, FALSE, index, &utf8, &fr2->filepart, &fr2->filepart_uc, fcb->type, &dc); + + Status = add_dir_child(related->fcb, fcb->inode, FALSE, &utf8, &fnus, fcb->type, &dc); if (!NT_SUCCESS(Status)) WARN("add_dir_child returned %08x\n", Status); - + fr2->dc = dc; dc->fileref = fr2; + ExAcquireResourceExclusiveLite(&related->nonpaged->children_lock, TRUE); + InsertTailList(&related->children, &fr2->list_entry); + ExReleaseResourceLite(&related->nonpaged->children_lock); + // add hardlink for existing fileref, if it's not there already if (IsListEmpty(&fcb->hardlinks)) { hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); @@ -2475,25 +2331,25 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + hl->parent = fileref->parent->fcb->inode; - hl->index = fileref->index; - - hl->name.Length = hl->name.MaximumLength = fileref->filepart.Length; - hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - + hl->index = fileref->dc->index; + + hl->name.Length = hl->name.MaximumLength = fnus.Length; + hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, fnus.Length, ALLOC_TAG); + if (!hl->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(hl->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); - - hl->utf8.Length = hl->utf8.MaximumLength = fileref->utf8.Length; + + RtlCopyMemory(hl->name.Buffer, fnus.Buffer, fnus.Length); + + hl->utf8.Length = hl->utf8.MaximumLength = fileref->dc->utf8.Length; hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - + if (!hl->utf8.Buffer) { ERR("out of memory\n"); ExFreePool(hl->name.Buffer); @@ -2501,37 +2357,37 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(hl->utf8.Buffer, fileref->utf8.Buffer, fileref->utf8.Length); - + + RtlCopyMemory(hl->utf8.Buffer, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + InsertTailList(&fcb->hardlinks, &hl->list_entry); } - + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); if (!hl) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + hl->parent = related->fcb->inode; - hl->index = index; - + hl->index = dc->index; + hl->name.Length = hl->name.MaximumLength = fnus.Length; hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - + if (!hl->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlCopyMemory(hl->name.Buffer, fnus.Buffer, fnus.Length); - + hl->utf8.Length = hl->utf8.MaximumLength = utf8.Length; hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - + if (!hl->utf8.Buffer) { ERR("out of memory\n"); ExFreePool(hl->name.Buffer); @@ -2539,68 +2395,166 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlCopyMemory(hl->utf8.Buffer, utf8.Buffer, utf8.Length); - + ExFreePool(utf8.Buffer); + InsertTailList(&fcb->hardlinks, &hl->list_entry); - + mark_fileref_dirty(fr2); - free_fileref(fr2); - + free_fileref(Vcb, fr2); + // update inode's INODE_ITEM - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + fcb->inode_item.transid = Vcb->superblock.generation; fcb->inode_item.sequence++; fcb->inode_item.st_nlink++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + // update parent's INODE_ITEM - + parfcb->inode_item.transid = Vcb->superblock.generation; TRACE("parfcb->inode_item.st_size (inode %llx) was %llx\n", parfcb->inode, parfcb->inode_item.st_size); parfcb->inode_item.st_size += 2 * utf8len; TRACE("parfcb->inode_item.st_size (inode %llx) now %llx\n", parfcb->inode, parfcb->inode_item.st_size); parfcb->inode_item.sequence++; parfcb->inode_item.st_ctime = now; - + parfcb->inode_item_changed = TRUE; mark_fcb_dirty(parfcb); - - send_notification_fileref(fr2, FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED); + + send_notification_fileref(fr2, FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, NULL); Status = STATUS_SUCCESS; - + end: if (oldfileref) - free_fileref(oldfileref); - + free_fileref(Vcb, oldfileref); + if (!NT_SUCCESS(Status) && related) - free_fileref(related); - + free_fileref(Vcb, related); + if (!NT_SUCCESS(Status) && fr2) - free_fileref(fr2); - + free_fileref(Vcb, fr2); + if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(Vcb, &rollback); ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&Vcb->fcb_lock); ExReleaseResourceLite(&Vcb->tree_lock); - + + return Status; +} + +static NTSTATUS set_valid_data_length_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject) { + FILE_VALID_DATA_LENGTH_INFORMATION* fvdli = Irp->AssociatedIrp.SystemBuffer; + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + fcb* fcb = FileObject->FsContext; + ccb* ccb = FileObject->FsContext2; + file_ref* fileref = ccb ? ccb->fileref : NULL; + NTSTATUS Status; + LARGE_INTEGER time; + CC_FILE_SIZES ccfs; + LIST_ENTRY rollback; + BOOL set_size = FALSE; + ULONG filter; + + if (IrpSp->Parameters.SetFile.Length < sizeof(FILE_VALID_DATA_LENGTH_INFORMATION)) { + ERR("input buffer length was %u, expected %u\n", IrpSp->Parameters.SetFile.Length, sizeof(FILE_VALID_DATA_LENGTH_INFORMATION)); + return STATUS_INVALID_PARAMETER; + } + + if (!fileref) { + ERR("fileref is NULL\n"); + return STATUS_INVALID_PARAMETER; + } + + InitializeListHead(&rollback); + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + + if (fcb->atts & FILE_ATTRIBUTE_SPARSE_FILE) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + if (fvdli->ValidDataLength.QuadPart <= fcb->Header.ValidDataLength.QuadPart || fvdli->ValidDataLength.QuadPart > fcb->Header.FileSize.QuadPart) { + TRACE("invalid VDL of %llu (current VDL = %llu, file size = %llu)\n", fvdli->ValidDataLength.QuadPart, + fcb->Header.ValidDataLength.QuadPart, fcb->Header.FileSize.QuadPart); + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + if (fileref ? fileref->deleted : fcb->deleted) { + Status = STATUS_FILE_CLOSED; + goto end; + } + + // This function doesn't really do anything - the fsctl can only increase the value of ValidDataLength, + // and we set it to the max anyway. + + ccfs.AllocationSize = fcb->Header.AllocationSize; + ccfs.FileSize = fcb->Header.FileSize; + ccfs.ValidDataLength = fvdli->ValidDataLength; + set_size = TRUE; + + filter = FILE_NOTIFY_CHANGE_SIZE; + + if (!ccb->user_set_write_time) { + KeQuerySystemTime(&time); + win_time_to_unix(time, &fcb->inode_item.st_mtime); + filter |= FILE_NOTIFY_CHANGE_LAST_WRITE; + } + + fcb->inode_item_changed = TRUE; + mark_fcb_dirty(fcb); + + send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED, NULL); + + Status = STATUS_SUCCESS; + +end: + if (NT_SUCCESS(Status)) + clear_rollback(&rollback); + else + do_rollback(Vcb, &rollback); + + ExReleaseResourceLite(fcb->Header.Resource); + + if (set_size) { + _SEH2_TRY { + CcSetFileSizes(FileObject, &ccfs); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) + ERR("CcSetFileSizes threw exception %08x\n", Status); + else + fcb->Header.AllocationSize = ccfs.AllocationSize; + } + + ExReleaseResourceLite(&Vcb->tree_lock); + return Status; } -NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_SET_INFORMATION) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); device_extension* Vcb = DeviceObject->DeviceExtension; @@ -2609,44 +2563,47 @@ NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp BOOL top_level; FsRtlEnterFileSystem(); - + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + Irp->IoStatus.Information = 0; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_set_information(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + if (!(Vcb->Vpb->Flags & VPB_MOUNTED)) { Status = STATUS_ACCESS_DENIED; goto end; } - + if (Vcb->readonly && IrpSp->Parameters.SetFile.FileInformationClass != FilePositionInformation) { Status = STATUS_MEDIA_WRITE_PROTECTED; goto end; } - + if (!fcb) { ERR("no fcb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (!ccb) { ERR("no ccb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && IrpSp->Parameters.SetFile.FileInformationClass != FilePositionInformation && - (fcb->inode != SUBVOL_ROOT_INODE || IrpSp->Parameters.SetFile.FileInformationClass != FileBasicInformation)) { + + if (fcb != Vcb->dummy_fcb && is_subvol_readonly(fcb->subvol, Irp) && IrpSp->Parameters.SetFile.FileInformationClass != FilePositionInformation && + (fcb->inode != SUBVOL_ROOT_INODE || (IrpSp->Parameters.SetFile.FileInformationClass != FileBasicInformation && IrpSp->Parameters.SetFile.FileInformationClass != FileRenameInformation))) { Status = STATUS_ACCESS_DENIED; goto end; } - Irp->IoStatus.Information = 0; - Status = STATUS_NOT_IMPLEMENTED; TRACE("set information\n"); @@ -2655,59 +2612,59 @@ NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp case FileAllocationInformation: { TRACE("FileAllocationInformation\n"); - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_DATA)) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; break; } - - Status = set_end_of_file_information(Vcb, Irp, IrpSp->FileObject, FALSE); + + Status = set_end_of_file_information(Vcb, Irp, IrpSp->FileObject, FALSE, TRUE); break; } case FileBasicInformation: { TRACE("FileBasicInformation\n"); - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; break; } - + Status = set_basic_information(Vcb, Irp, IrpSp->FileObject); - + break; } case FileDispositionInformation: { TRACE("FileDispositionInformation\n"); - + if (Irp->RequestorMode == UserMode && !(ccb->access & DELETE)) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; break; } - + Status = set_disposition_information(Vcb, Irp, IrpSp->FileObject); - + break; } case FileEndOfFileInformation: { TRACE("FileEndOfFileInformation\n"); - + if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; break; } - - Status = set_end_of_file_information(Vcb, Irp, IrpSp->FileObject, IrpSp->Parameters.SetFile.AdvanceOnly); - + + Status = set_end_of_file_information(Vcb, Irp, IrpSp->FileObject, IrpSp->Parameters.SetFile.AdvanceOnly, FALSE); + break; } @@ -2717,13 +2674,9 @@ NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp break; case FilePositionInformation: - { TRACE("FilePositionInformation\n"); - - Status = set_position_information(Vcb, Irp, IrpSp->FileObject); - + Status = set_position_information(IrpSp->FileObject, Irp); break; - } case FileRenameInformation: TRACE("FileRenameInformation\n"); @@ -2732,1003 +2685,501 @@ NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp break; case FileValidDataLengthInformation: - FIXME("STUB: FileValidDataLengthInformation\n"); - break; - -#if (NTDDI_VERSION >= NTDDI_VISTA) - case FileNormalizedNameInformation: - FIXME("STUB: FileNormalizedNameInformation\n"); - break; -#endif - -#if (NTDDI_VERSION >= NTDDI_WIN7) - case FileStandardLinkInformation: - FIXME("STUB: FileStandardLinkInformation\n"); - break; - - case FileRemoteProtocolInformation: - TRACE("FileRemoteProtocolInformation\n"); + { + TRACE("FileValidDataLengthInformation\n"); + + if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) { + WARN("insufficient privileges\n"); + Status = STATUS_ACCESS_DENIED; + break; + } + + Status = set_valid_data_length_information(Vcb, Irp, IrpSp->FileObject); + break; -#endif - + } + default: WARN("unknown FileInformationClass %u\n", IrpSp->Parameters.SetFile.FileInformationClass); } - + end: Irp->IoStatus.Status = Status; + TRACE("returning %08x\n", Status); + IoCompleteRequest(Irp, IO_NO_INCREMENT); - -exit: - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; } -static NTSTATUS STDCALL fill_in_file_basic_information(FILE_BASIC_INFORMATION* fbi, INODE_ITEM* ii, LONG* length, fcb* fcb, file_ref* fileref) { +static NTSTATUS fill_in_file_basic_information(FILE_BASIC_INFORMATION* fbi, INODE_ITEM* ii, LONG* length, fcb* fcb, file_ref* fileref) { RtlZeroMemory(fbi, sizeof(FILE_BASIC_INFORMATION)); - + *length -= sizeof(FILE_BASIC_INFORMATION); - - fbi->CreationTime.QuadPart = unix_time_to_win(&ii->otime); - fbi->LastAccessTime.QuadPart = unix_time_to_win(&ii->st_atime); - fbi->LastWriteTime.QuadPart = unix_time_to_win(&ii->st_mtime); - fbi->ChangeTime.QuadPart = unix_time_to_win(&ii->st_ctime); - + + if (fcb == fcb->Vcb->dummy_fcb) { + LARGE_INTEGER time; + + KeQuerySystemTime(&time); + fbi->CreationTime = fbi->LastAccessTime = fbi->LastWriteTime = fbi->ChangeTime = time; + } else { + fbi->CreationTime.QuadPart = unix_time_to_win(&ii->otime); + fbi->LastAccessTime.QuadPart = unix_time_to_win(&ii->st_atime); + fbi->LastWriteTime.QuadPart = unix_time_to_win(&ii->st_mtime); + fbi->ChangeTime.QuadPart = unix_time_to_win(&ii->st_ctime); + } + if (fcb->ads) { if (!fileref || !fileref->parent) { ERR("no fileref for stream\n"); return STATUS_INTERNAL_ERROR; } else - fbi->FileAttributes = fileref->parent->fcb->atts; + fbi->FileAttributes = fileref->parent->fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fileref->parent->fcb->atts; } else - fbi->FileAttributes = fcb->atts; - + fbi->FileAttributes = fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fcb->atts; + return STATUS_SUCCESS; } -static NTSTATUS STDCALL fill_in_file_network_open_information(FILE_NETWORK_OPEN_INFORMATION* fnoi, fcb* fcb, file_ref* fileref, LONG* length) { +static NTSTATUS fill_in_file_network_open_information(FILE_NETWORK_OPEN_INFORMATION* fnoi, fcb* fcb, file_ref* fileref, LONG* length) { INODE_ITEM* ii; - - if (*length < sizeof(FILE_NETWORK_OPEN_INFORMATION)) { + + if (*length < (LONG)sizeof(FILE_NETWORK_OPEN_INFORMATION)) { WARN("overflow\n"); return STATUS_BUFFER_OVERFLOW; } - + RtlZeroMemory(fnoi, sizeof(FILE_NETWORK_OPEN_INFORMATION)); - + *length -= sizeof(FILE_NETWORK_OPEN_INFORMATION); - + if (fcb->ads) { if (!fileref || !fileref->parent) { ERR("no fileref for stream\n"); return STATUS_INTERNAL_ERROR; } - + ii = &fileref->parent->fcb->inode_item; } else ii = &fcb->inode_item; - - fnoi->CreationTime.QuadPart = unix_time_to_win(&ii->otime); - fnoi->LastAccessTime.QuadPart = unix_time_to_win(&ii->st_atime); - fnoi->LastWriteTime.QuadPart = unix_time_to_win(&ii->st_mtime); - fnoi->ChangeTime.QuadPart = unix_time_to_win(&ii->st_ctime); - + + if (fcb == fcb->Vcb->dummy_fcb) { + LARGE_INTEGER time; + + KeQuerySystemTime(&time); + fnoi->CreationTime = fnoi->LastAccessTime = fnoi->LastWriteTime = fnoi->ChangeTime = time; + } else { + fnoi->CreationTime.QuadPart = unix_time_to_win(&ii->otime); + fnoi->LastAccessTime.QuadPart = unix_time_to_win(&ii->st_atime); + fnoi->LastWriteTime.QuadPart = unix_time_to_win(&ii->st_mtime); + fnoi->ChangeTime.QuadPart = unix_time_to_win(&ii->st_ctime); + } + if (fcb->ads) { fnoi->AllocationSize.QuadPart = fnoi->EndOfFile.QuadPart = fcb->adsdata.Length; - fnoi->FileAttributes = fileref->parent->fcb->atts; + fnoi->FileAttributes = fileref->parent->fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fileref->parent->fcb->atts; } else { - fnoi->AllocationSize.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); + fnoi->AllocationSize.QuadPart = fcb_alloc_size(fcb); fnoi->EndOfFile.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : fcb->inode_item.st_size; - fnoi->FileAttributes = fcb->atts; + fnoi->FileAttributes = fcb->atts == 0 ? FILE_ATTRIBUTE_NORMAL : fcb->atts; } - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL fill_in_file_standard_information(FILE_STANDARD_INFORMATION* fsi, fcb* fcb, file_ref* fileref, LONG* length) { +static NTSTATUS fill_in_file_standard_information(FILE_STANDARD_INFORMATION* fsi, fcb* fcb, file_ref* fileref, LONG* length) { RtlZeroMemory(fsi, sizeof(FILE_STANDARD_INFORMATION)); - + *length -= sizeof(FILE_STANDARD_INFORMATION); - + if (fcb->ads) { if (!fileref || !fileref->parent) { ERR("no fileref for stream\n"); return STATUS_INTERNAL_ERROR; } - + fsi->AllocationSize.QuadPart = fsi->EndOfFile.QuadPart = fcb->adsdata.Length; fsi->NumberOfLinks = fileref->parent->fcb->inode_item.st_nlink; - fsi->Directory = S_ISDIR(fileref->parent->fcb->inode_item.st_mode); + fsi->Directory = FALSE; } else { - fsi->AllocationSize.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); + fsi->AllocationSize.QuadPart = fcb_alloc_size(fcb); fsi->EndOfFile.QuadPart = S_ISDIR(fcb->inode_item.st_mode) ? 0 : fcb->inode_item.st_size; fsi->NumberOfLinks = fcb->inode_item.st_nlink; fsi->Directory = S_ISDIR(fcb->inode_item.st_mode); } - + TRACE("length = %llu\n", fsi->EndOfFile.QuadPart); - + fsi->DeletePending = fileref ? fileref->delete_on_close : FALSE; - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL fill_in_file_internal_information(FILE_INTERNAL_INFORMATION* fii, fcb* fcb, LONG* length) { +static NTSTATUS fill_in_file_internal_information(FILE_INTERNAL_INFORMATION* fii, fcb* fcb, LONG* length) { *length -= sizeof(FILE_INTERNAL_INFORMATION); - + fii->IndexNumber.QuadPart = make_file_id(fcb->subvol, fcb->inode); - + return STATUS_SUCCESS; -} - -static NTSTATUS STDCALL fill_in_file_ea_information(FILE_EA_INFORMATION* eai, fcb* fcb, LONG* length) { +} + +static NTSTATUS fill_in_file_ea_information(FILE_EA_INFORMATION* eai, fcb* fcb, LONG* length) { *length -= sizeof(FILE_EA_INFORMATION); - + /* This value appears to be the size of the structure NTFS stores on disk, and not, * as might be expected, the size of FILE_FULL_EA_INFORMATION (which is what we store). * The formula is 4 bytes as a header, followed by 5 + NameLength + ValueLength for each * item. */ - + eai->EaSize = fcb->ealen; - - return STATUS_SUCCESS; -} -static NTSTATUS STDCALL fill_in_file_access_information(FILE_ACCESS_INFORMATION* fai, LONG* length) { - *length -= sizeof(FILE_ACCESS_INFORMATION); - - fai->AccessFlags = GENERIC_READ; - - return STATUS_NOT_IMPLEMENTED; + return STATUS_SUCCESS; } -static NTSTATUS STDCALL fill_in_file_position_information(FILE_POSITION_INFORMATION* fpi, PFILE_OBJECT FileObject, LONG* length) { +static NTSTATUS fill_in_file_position_information(FILE_POSITION_INFORMATION* fpi, PFILE_OBJECT FileObject, LONG* length) { RtlZeroMemory(fpi, sizeof(FILE_POSITION_INFORMATION)); - + *length -= sizeof(FILE_POSITION_INFORMATION); - - fpi->CurrentByteOffset = FileObject->CurrentByteOffset; - - return STATUS_SUCCESS; -} -static NTSTATUS STDCALL fill_in_file_mode_information(FILE_MODE_INFORMATION* fmi, ccb* ccb, LONG* length) { - RtlZeroMemory(fmi, sizeof(FILE_MODE_INFORMATION)); - - *length -= sizeof(FILE_MODE_INFORMATION); - - if (ccb->options & FILE_WRITE_THROUGH) - fmi->Mode |= FILE_WRITE_THROUGH; - - if (ccb->options & FILE_SEQUENTIAL_ONLY) - fmi->Mode |= FILE_SEQUENTIAL_ONLY; - - if (ccb->options & FILE_NO_INTERMEDIATE_BUFFERING) - fmi->Mode |= FILE_NO_INTERMEDIATE_BUFFERING; - - if (ccb->options & FILE_SYNCHRONOUS_IO_ALERT) - fmi->Mode |= FILE_SYNCHRONOUS_IO_ALERT; - - if (ccb->options & FILE_SYNCHRONOUS_IO_NONALERT) - fmi->Mode |= FILE_SYNCHRONOUS_IO_NONALERT; - - if (ccb->options & FILE_DELETE_ON_CLOSE) - fmi->Mode |= FILE_DELETE_ON_CLOSE; - - return STATUS_SUCCESS; -} + fpi->CurrentByteOffset = FileObject->CurrentByteOffset; -static NTSTATUS STDCALL fill_in_file_alignment_information(FILE_ALIGNMENT_INFORMATION* fai, device_extension* Vcb, LONG* length) { - RtlZeroMemory(fai, sizeof(FILE_ALIGNMENT_INFORMATION)); - - *length -= sizeof(FILE_ALIGNMENT_INFORMATION); - - fai->AlignmentRequirement = first_device(Vcb)->devobj->AlignmentRequirement; - return STATUS_SUCCESS; } -typedef struct { - file_ref* fileref; - LIST_ENTRY list_entry; -} fileref_list; - -NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* name_offset) { - LIST_ENTRY fr_list, *le; +NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* name_offset, ULONG* preqlen) { file_ref* fr; NTSTATUS Status; - ULONG len, i; - + ULONG reqlen = 0; + USHORT offset; + BOOL overflow = FALSE; + // FIXME - we need a lock on filerefs' filepart - + if (fileref == fileref->fcb->Vcb->root_fileref) { - fn->Buffer = ExAllocatePoolWithTag(PagedPool, sizeof(WCHAR), ALLOC_TAG); - if (!fn->Buffer) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - fn->Length = fn->MaximumLength = sizeof(WCHAR); - fn->Buffer[0] = '\\'; + if (fn->MaximumLength >= sizeof(WCHAR)) { + fn->Buffer[0] = '\\'; + fn->Length = sizeof(WCHAR); - if (name_offset) - *name_offset = 0; + if (name_offset) + *name_offset = 0; - return STATUS_SUCCESS; + return STATUS_SUCCESS; + } else { + if (preqlen) + *preqlen = sizeof(WCHAR); + fn->Length = 0; + + return STATUS_BUFFER_OVERFLOW; + } } - - InitializeListHead(&fr_list); - - len = 0; + fr = fileref; - - do { - fileref_list* frl; - - frl = ExAllocatePoolWithTag(PagedPool, sizeof(fileref_list), ALLOC_TAG); - if (!frl) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; + offset = 0; + + while (fr->parent) { + USHORT movelen; + + if (!fr->dc) + return STATUS_INTERNAL_ERROR; + + if (!overflow) { + if (fr->dc->name.Length + sizeof(WCHAR) + fn->Length > fn->MaximumLength) + overflow = TRUE; + } + + if (overflow) + movelen = fn->MaximumLength - fr->dc->name.Length - sizeof(WCHAR); + else + movelen = fn->Length; + + if ((!overflow || fn->MaximumLength > fr->dc->name.Length + sizeof(WCHAR)) && movelen > 0) { + RtlMoveMemory(&fn->Buffer[(fr->dc->name.Length / sizeof(WCHAR)) + 1], fn->Buffer, movelen); + offset += fr->dc->name.Length + sizeof(WCHAR); + } + + if (fn->MaximumLength >= sizeof(WCHAR)) { + fn->Buffer[0] = fr->fcb->ads ? ':' : '\\'; + fn->Length += sizeof(WCHAR); + + if (fn->MaximumLength > sizeof(WCHAR)) { + RtlCopyMemory(&fn->Buffer[1], fr->dc->name.Buffer, min(fr->dc->name.Length, fn->MaximumLength - sizeof(WCHAR))); + fn->Length += fr->dc->name.Length; + } + + if (fn->Length > fn->MaximumLength) { + fn->Length = fn->MaximumLength; + overflow = TRUE; + } } - - frl->fileref = fr; - InsertTailList(&fr_list, &frl->list_entry); - - len += fr->filepart.Length; - - if (fr != fileref->fcb->Vcb->root_fileref) - len += sizeof(WCHAR); - + + reqlen += sizeof(WCHAR) + fr->dc->name.Length; + fr = fr->parent; - } while (fr); - - fn->Buffer = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); - if (!fn->Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; } - - fn->Length = fn->MaximumLength = len; - - i = 0; - - le = fr_list.Blink; - while (le != &fr_list) { - fileref_list* frl = CONTAINING_RECORD(le, fileref_list, list_entry); - - if (frl->fileref != fileref->fcb->Vcb->root_fileref) { - fn->Buffer[i] = frl->fileref->fcb->ads ? ':' : '\\'; - i++; - - if (name_offset && frl->fileref == fileref) - *name_offset = i * sizeof(WCHAR); - - RtlCopyMemory(&fn->Buffer[i], frl->fileref->filepart.Buffer, frl->fileref->filepart.Length); - i += frl->fileref->filepart.Length / sizeof(WCHAR); - } - - le = le->Blink; + + offset += sizeof(WCHAR); + + if (overflow) { + if (preqlen) + *preqlen = reqlen; + Status = STATUS_BUFFER_OVERFLOW; + } else { + if (name_offset) + *name_offset = offset; + + Status = STATUS_SUCCESS; } - - Status = STATUS_SUCCESS; - -end: - while (!IsListEmpty(&fr_list)) { - fileref_list* frl; - - le = RemoveHeadList(&fr_list); - frl = CONTAINING_RECORD(le, fileref_list, list_entry); - - ExFreePool(frl); - } - + return Status; } -static NTSTATUS STDCALL fill_in_file_name_information(FILE_NAME_INFORMATION* fni, fcb* fcb, file_ref* fileref, LONG* length) { -#ifdef _DEBUG - ULONG retlen = 0; -#endif +static NTSTATUS fill_in_file_name_information(FILE_NAME_INFORMATION* fni, fcb* fcb, file_ref* fileref, LONG* length) { + ULONG reqlen; UNICODE_STRING fn; NTSTATUS Status; static WCHAR datasuf[] = {':','$','D','A','T','A',0}; - ULONG datasuflen = wcslen(datasuf) * sizeof(WCHAR); - + UINT16 datasuflen = (UINT16)wcslen(datasuf) * sizeof(WCHAR); + if (!fileref) { ERR("called without fileref\n"); return STATUS_INVALID_PARAMETER; } - - RtlZeroMemory(fni, sizeof(FILE_NAME_INFORMATION)); - + *length -= (LONG)offsetof(FILE_NAME_INFORMATION, FileName[0]); - + TRACE("maximum length is %u\n", *length); fni->FileNameLength = 0; - + fni->FileName[0] = 0; - - Status = fileref_get_filename(fileref, &fn, NULL); - if (!NT_SUCCESS(Status)) { + + fn.Buffer = fni->FileName; + fn.Length = 0; + fn.MaximumLength = (UINT16)*length; + + Status = fileref_get_filename(fileref, &fn, NULL, &reqlen); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { ERR("fileref_get_filename returned %08x\n", Status); return Status; } - - if (*length >= (LONG)fn.Length) { - RtlCopyMemory(fni->FileName, fn.Buffer, fn.Length); -#ifdef _DEBUG - retlen = fn.Length; -#endif - *length -= fn.Length; - } else { - if (*length > 0) { - RtlCopyMemory(fni->FileName, fn.Buffer, *length); -#ifdef _DEBUG - retlen = *length; -#endif - } - *length = -1; - } - - fni->FileNameLength = fn.Length; - + if (fcb->ads) { - if (*length >= (LONG)datasuflen) { - RtlCopyMemory(&fni->FileName[fn.Length / sizeof(WCHAR)], datasuf, datasuflen); -#ifdef _DEBUG - retlen += datasuflen; -#endif - *length -= datasuflen; - } else { - if (*length > 0) { - RtlCopyMemory(&fni->FileName[fn.Length / sizeof(WCHAR)], datasuf, *length); -#ifdef _DEBUG - retlen += *length; -#endif + if (Status == STATUS_BUFFER_OVERFLOW) + reqlen += datasuflen; + else { + if (fn.Length + datasuflen > fn.MaximumLength) { + RtlCopyMemory(&fn.Buffer[fn.Length / sizeof(WCHAR)], datasuf, fn.MaximumLength - fn.Length); + reqlen += datasuflen; + Status = STATUS_BUFFER_OVERFLOW; + } else { + RtlCopyMemory(&fn.Buffer[fn.Length / sizeof(WCHAR)], datasuf, datasuflen); + fn.Length += datasuflen; } - *length = -1; } } - - ExFreePool(fn.Buffer); - - TRACE("%.*S\n", retlen / sizeof(WCHAR), fni->FileName); - return STATUS_SUCCESS; + if (Status == STATUS_BUFFER_OVERFLOW) { + *length = -1; + fni->FileNameLength = reqlen; + TRACE("%.*S (truncated)\n", fn.Length / sizeof(WCHAR), fn.Buffer); + } else { + *length -= fn.Length; + fni->FileNameLength = fn.Length; + TRACE("%.*S\n", fn.Length / sizeof(WCHAR), fn.Buffer); + } + + return Status; } -static NTSTATUS STDCALL fill_in_file_attribute_information(FILE_ATTRIBUTE_TAG_INFORMATION* ati, fcb* fcb, file_ref* fileref, PIRP Irp, LONG* length) { +static NTSTATUS fill_in_file_attribute_information(FILE_ATTRIBUTE_TAG_INFORMATION* ati, fcb* fcb, ccb* ccb, PIRP Irp, LONG* length) { *length -= sizeof(FILE_ATTRIBUTE_TAG_INFORMATION); - + if (fcb->ads) { - if (!fileref || !fileref->parent) { + if (!ccb->fileref || !ccb->fileref->parent) { ERR("no fileref for stream\n"); return STATUS_INTERNAL_ERROR; } - - ati->FileAttributes = fileref->parent->fcb->atts; + + ati->FileAttributes = ccb->fileref->parent->fcb->atts; } else ati->FileAttributes = fcb->atts; - + if (!(ati->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)) ati->ReparseTag = 0; else - ati->ReparseTag = get_reparse_tag(fcb->Vcb, fcb->subvol, fcb->inode, fcb->type, fcb->atts, Irp); - + ati->ReparseTag = get_reparse_tag(fcb->Vcb, fcb->subvol, fcb->inode, fcb->type, fcb->atts, ccb->lxss, Irp); + return STATUS_SUCCESS; } -typedef struct { - UNICODE_STRING name; - UINT64 size; - BOOL ignore; - LIST_ENTRY list_entry; -} stream_info; - -static NTSTATUS STDCALL fill_in_file_stream_information(FILE_STREAM_INFORMATION* fsi, file_ref* fileref, PIRP Irp, LONG* length) { - ULONG reqsize; - LIST_ENTRY streamlist, *le; +static NTSTATUS fill_in_file_stream_information(FILE_STREAM_INFORMATION* fsi, file_ref* fileref, LONG* length) { + LONG reqsize; + LIST_ENTRY* le; FILE_STREAM_INFORMATION *entry, *lastentry; NTSTATUS Status; - KEY searchkey; - traverse_ptr tp, next_tp; - BOOL b; - stream_info* si; - - static WCHAR datasuf[] = {':','$','D','A','T','A',0}; - static char xapref[] = "user."; + + static WCHAR datasuf[] = L":$DATA"; UNICODE_STRING suf; - + if (!fileref) { ERR("fileref was NULL\n"); return STATUS_INVALID_PARAMETER; } - - InitializeListHead(&streamlist); - - ExAcquireResourceSharedLite(&fileref->fcb->Vcb->tree_lock, TRUE); - ExAcquireResourceSharedLite(fileref->fcb->Header.Resource, TRUE); - + suf.Buffer = datasuf; - suf.Length = suf.MaximumLength = wcslen(datasuf) * sizeof(WCHAR); - - searchkey.obj_id = fileref->fcb->inode; - searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = 0; + suf.Length = suf.MaximumLength = (UINT16)wcslen(datasuf) * sizeof(WCHAR); + + if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY) + reqsize = sizeof(FILE_STREAM_INFORMATION) - sizeof(WCHAR) + suf.Length + sizeof(WCHAR); + else + reqsize = 0; + + ExAcquireResourceSharedLite(&fileref->fcb->nonpaged->dir_children_lock, TRUE); + + le = fileref->fcb->dir_children_index.Flink; + while (le != &fileref->fcb->dir_children_index) { + dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index); + + if (dc->index == 0) { + reqsize = (ULONG)sector_align(reqsize, sizeof(LONGLONG)); + reqsize += sizeof(FILE_STREAM_INFORMATION) - sizeof(WCHAR) + suf.Length + sizeof(WCHAR) + dc->name.Length; + } else + break; - Status = find_item(fileref->fcb->Vcb, fileref->fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY) { - si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG); - if (!si) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - si->name.Length = si->name.MaximumLength = 0; - si->name.Buffer = NULL; - si->size = fileref->fcb->inode_item.st_size; - si->ignore = FALSE; - - InsertTailList(&streamlist, &si->list_entry); - } - - do { - if (tp.item->key.obj_id == fileref->fcb->inode && tp.item->key.obj_type == TYPE_XATTR_ITEM) { - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - } else { - ULONG len = tp.item->size; - DIR_ITEM* xa = (DIR_ITEM*)tp.item->data; - ULONG stringlen; - - do { - if (len < sizeof(DIR_ITEM) || len < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - if (xa->n > strlen(xapref) && RtlCompareMemory(xa->name, xapref, strlen(xapref)) == strlen(xapref) && - (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n) && - (tp.item->key.offset != EA_EA_HASH || xa->n != strlen(EA_EA) || RtlCompareMemory(xa->name, EA_EA, xa->n) != xa->n) - ) { - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, &xa->name[strlen(xapref)], xa->n - strlen(xapref)); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - goto end; - } - - si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG); - if (!si) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - si->name.Buffer = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - if (!si->name.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(si); - goto end; - } - - Status = RtlUTF8ToUnicodeN(si->name.Buffer, stringlen, &stringlen, &xa->name[strlen(xapref)], xa->n - strlen(xapref)); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - ExFreePool(si->name.Buffer); - ExFreePool(si); - goto end; - } - - si->name.Length = si->name.MaximumLength = stringlen; - - si->size = xa->m; - - si->ignore = FALSE; - - TRACE("stream name = %.*S (length = %u)\n", si->name.Length / sizeof(WCHAR), si->name.Buffer, si->name.Length / sizeof(WCHAR)); - - InsertTailList(&streamlist, &si->list_entry); - } - - len -= sizeof(DIR_ITEM) - sizeof(char) + xa->n + xa->m; - xa = (DIR_ITEM*)&xa->name[xa->n + xa->m]; // FIXME - test xattr hash collisions work - } while (len > 0); - } - } - - b = find_next_item(fileref->fcb->Vcb, &tp, &next_tp, FALSE, Irp); - if (b) { - tp = next_tp; - - if (next_tp.item->key.obj_id > fileref->fcb->inode || next_tp.item->key.obj_type > TYPE_XATTR_ITEM) - break; - } - } while (b); - - ExAcquireResourceSharedLite(&fileref->nonpaged->children_lock, TRUE); - - le = fileref->children.Flink; - while (le != &fileref->children) { - file_ref* fr = CONTAINING_RECORD(le, file_ref, list_entry); - - if (fr->fcb && fr->fcb->ads) { - LIST_ENTRY* le2 = streamlist.Flink; - BOOL found = FALSE; - - while (le2 != &streamlist) { - si = CONTAINING_RECORD(le2, stream_info, list_entry); - - if (si && si->name.Buffer && si->name.Length == fr->filepart.Length && - RtlCompareMemory(si->name.Buffer, fr->filepart.Buffer, si->name.Length) == si->name.Length) { - - si->size = fr->fcb->adsdata.Length; - si->ignore = fr->fcb->deleted; - - found = TRUE; - break; - } - - le2 = le2->Flink; - } - - if (!found && !fr->fcb->deleted) { - si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG); - if (!si) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - si->name.Length = si->name.MaximumLength = fr->filepart.Length; - - si->name.Buffer = ExAllocatePoolWithTag(PagedPool, si->name.MaximumLength, ALLOC_TAG); - if (!si->name.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(si); - goto end; - } - - RtlCopyMemory(si->name.Buffer, fr->filepart.Buffer, fr->filepart.Length); - - si->size = fr->fcb->adsdata.Length; - si->ignore = FALSE; - - InsertTailList(&streamlist, &si->list_entry); - } - } - le = le->Flink; } - - ExReleaseResourceLite(&fileref->nonpaged->children_lock); - - reqsize = 0; - - le = streamlist.Flink; - while (le != &streamlist) { - si = CONTAINING_RECORD(le, stream_info, list_entry); - - if (!si->ignore) { - reqsize = sector_align(reqsize, sizeof(LONGLONG)); - reqsize += sizeof(FILE_STREAM_INFORMATION) - sizeof(WCHAR) + suf.Length + sizeof(WCHAR) + si->name.Length; - } - le = le->Flink; - } - TRACE("length = %i, reqsize = %u\n", *length, reqsize); - + if (reqsize > *length) { Status = STATUS_BUFFER_OVERFLOW; goto end; } - + entry = fsi; lastentry = NULL; - - le = streamlist.Flink; - while (le != &streamlist) { - si = CONTAINING_RECORD(le, stream_info, list_entry); - - if (!si->ignore) { + + if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY) { + ULONG off; + + entry->NextEntryOffset = 0; + entry->StreamNameLength = suf.Length + sizeof(WCHAR); + entry->StreamSize.QuadPart = fileref->fcb->inode_item.st_size; + entry->StreamAllocationSize.QuadPart = fcb_alloc_size(fileref->fcb); + + entry->StreamName[0] = ':'; + RtlCopyMemory(&entry->StreamName[1], suf.Buffer, suf.Length); + + off = (ULONG)sector_align(sizeof(FILE_STREAM_INFORMATION) - sizeof(WCHAR) + suf.Length + sizeof(WCHAR), sizeof(LONGLONG)); + + lastentry = entry; + entry = (FILE_STREAM_INFORMATION*)((UINT8*)entry + off); + } + + le = fileref->fcb->dir_children_index.Flink; + while (le != &fileref->fcb->dir_children_index) { + dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index); + + if (dc->index == 0) { ULONG off; - + entry->NextEntryOffset = 0; - entry->StreamNameLength = si->name.Length + suf.Length + sizeof(WCHAR); - entry->StreamSize.QuadPart = si->size; - - if (le == streamlist.Flink) - entry->StreamAllocationSize.QuadPart = sector_align(fileref->fcb->inode_item.st_size, fileref->fcb->Vcb->superblock.sector_size); + entry->StreamNameLength = dc->name.Length + suf.Length + sizeof(WCHAR); + + if (dc->fileref) + entry->StreamSize.QuadPart = dc->fileref->fcb->adsdata.Length; else - entry->StreamAllocationSize.QuadPart = si->size; - + entry->StreamSize.QuadPart = dc->size; + + entry->StreamAllocationSize.QuadPart = entry->StreamSize.QuadPart; + entry->StreamName[0] = ':'; - - if (si->name.Length > 0) - RtlCopyMemory(&entry->StreamName[1], si->name.Buffer, si->name.Length); - - RtlCopyMemory(&entry->StreamName[1 + (si->name.Length / sizeof(WCHAR))], suf.Buffer, suf.Length); - + + RtlCopyMemory(&entry->StreamName[1], dc->name.Buffer, dc->name.Length); + RtlCopyMemory(&entry->StreamName[1 + (dc->name.Length / sizeof(WCHAR))], suf.Buffer, suf.Length); + if (lastentry) - lastentry->NextEntryOffset = (UINT8*)entry - (UINT8*)lastentry; - - off = sector_align(sizeof(FILE_STREAM_INFORMATION) - sizeof(WCHAR) + suf.Length + sizeof(WCHAR) + si->name.Length, sizeof(LONGLONG)); + lastentry->NextEntryOffset = (UINT32)((UINT8*)entry - (UINT8*)lastentry); + + off = (ULONG)sector_align(sizeof(FILE_STREAM_INFORMATION) - sizeof(WCHAR) + suf.Length + sizeof(WCHAR) + dc->name.Length, sizeof(LONGLONG)); lastentry = entry; entry = (FILE_STREAM_INFORMATION*)((UINT8*)entry + off); - } - + } else + break; + le = le->Flink; } - + *length -= reqsize; - + Status = STATUS_SUCCESS; - + end: - while (!IsListEmpty(&streamlist)) { - le = RemoveHeadList(&streamlist); - si = CONTAINING_RECORD(le, stream_info, list_entry); - - if (si->name.Buffer) - ExFreePool(si->name.Buffer); - - ExFreePool(si); - } - - ExReleaseResourceLite(fileref->fcb->Header.Resource); - ExReleaseResourceLite(&fileref->fcb->Vcb->tree_lock); - + ExReleaseResourceLite(&fileref->fcb->nonpaged->dir_children_lock); + return Status; } #ifndef __REACTOS__ -static NTSTATUS STDCALL fill_in_file_standard_link_information(FILE_STANDARD_LINK_INFORMATION* fsli, fcb* fcb, file_ref* fileref, LONG* length) { +static NTSTATUS fill_in_file_standard_link_information(FILE_STANDARD_LINK_INFORMATION* fsli, fcb* fcb, file_ref* fileref, LONG* length) { TRACE("FileStandardLinkInformation\n"); - + // FIXME - NumberOfAccessibleLinks should subtract open links which have been marked as delete_on_close - + fsli->NumberOfAccessibleLinks = fcb->inode_item.st_nlink; fsli->TotalNumberOfLinks = fcb->inode_item.st_nlink; fsli->DeletePending = fileref ? fileref->delete_on_close : FALSE; - fsli->Directory = fcb->type == BTRFS_TYPE_DIRECTORY ? TRUE : FALSE; - + fsli->Directory = (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) ? TRUE : FALSE; + *length -= sizeof(FILE_STANDARD_LINK_INFORMATION); - + return STATUS_SUCCESS; } #endif /* __REACTOS__ */ -typedef struct { - UNICODE_STRING name; - UINT64 inode; - LIST_ENTRY list_entry; -} name_bit; - -static NTSTATUS get_subvol_path(device_extension* Vcb, root* subvol, PIRP Irp) { - KEY searchkey; - traverse_ptr tp; - NTSTATUS Status; - LIST_ENTRY* le; - root* parsubvol; - UNICODE_STRING dirpath; - ROOT_REF* rr; - ULONG namelen; - - // FIXME - add subvol->parent field - - if (subvol == Vcb->root_fileref->fcb->subvol) { - subvol->path.Length = subvol->path.MaximumLength = sizeof(WCHAR); - subvol->path.Buffer = ExAllocatePoolWithTag(PagedPool, subvol->path.Length, ALLOC_TAG); - subvol->path.Buffer[0] = '\\'; - return STATUS_SUCCESS; - } - - searchkey.obj_id = subvol->id; - searchkey.obj_type = TYPE_ROOT_BACKREF; - searchkey.offset = 0xffffffffffffffff; - - Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { // top subvol - subvol->path.Length = subvol->path.MaximumLength = sizeof(WCHAR); - subvol->path.Buffer = ExAllocatePoolWithTag(PagedPool, subvol->path.Length, ALLOC_TAG); - subvol->path.Buffer[0] = '\\'; - return STATUS_SUCCESS; - } - - if (tp.item->size < sizeof(ROOT_REF)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); - return STATUS_INTERNAL_ERROR; - } - - rr = (ROOT_REF*)tp.item->data; - - if (tp.item->size < sizeof(ROOT_REF) - 1 + rr->n) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF) - 1 + rr->n); - return STATUS_INTERNAL_ERROR; - } - - le = Vcb->roots.Flink; - - parsubvol = NULL; - - while (le != &Vcb->roots) { - root* r2 = CONTAINING_RECORD(le, root, list_entry); - - if (r2->id == tp.item->key.offset) { - parsubvol = r2; - break; - } - - le = le->Flink; - } - - if (!parsubvol) { - ERR("unable to find subvol %llx\n", tp.item->key.offset); - return STATUS_INTERNAL_ERROR; - } - - // FIXME - recursion - - Status = get_inode_dir_path(Vcb, parsubvol, rr->dir, &dirpath, Irp); - if (!NT_SUCCESS(Status)) { - ERR("get_inode_dir_path returned %08x\n", Status); - return Status; - } - - Status = RtlUTF8ToUnicodeN(NULL, 0, &namelen, rr->name, rr->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - goto end; - } - - if (namelen == 0) { - ERR("length was 0\n"); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - subvol->path.Length = subvol->path.MaximumLength = dirpath.Length + namelen; - subvol->path.Buffer = ExAllocatePoolWithTag(PagedPool, subvol->path.Length, ALLOC_TAG); - - if (!subvol->path.Buffer) { - ERR("out of memory\n"); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(subvol->path.Buffer, dirpath.Buffer, dirpath.Length); - - Status = RtlUTF8ToUnicodeN(&subvol->path.Buffer[dirpath.Length / sizeof(WCHAR)], namelen, &namelen, rr->name, rr->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - goto end; - } - - Status = STATUS_SUCCESS; - -end: - if (dirpath.Buffer) - ExFreePool(dirpath.Buffer); - - if (!NT_SUCCESS(Status) && subvol->path.Buffer) { - ExFreePool(subvol->path.Buffer); - subvol->path.Buffer = NULL; - } - - return Status; -} - -static NTSTATUS get_inode_dir_path(device_extension* Vcb, root* subvol, UINT64 inode, PUNICODE_STRING us, PIRP Irp) { - KEY searchkey; - NTSTATUS Status; - UINT64 in; - traverse_ptr tp; - LIST_ENTRY name_trail, *le; - UINT16 levels = 0; - UINT32 namelen = 0; - WCHAR* usbuf; - - InitializeListHead(&name_trail); - - in = inode; - - // FIXME - start with subvol prefix - if (!subvol->path.Buffer) { - Status = get_subvol_path(Vcb, subvol, Irp); - if (!NT_SUCCESS(Status)) { - ERR("get_subvol_path returned %08x\n", Status); - return Status; - } - } - - while (in != subvol->root_item.objid) { - searchkey.obj_id = in; - searchkey.obj_type = TYPE_INODE_EXTREF; - searchkey.offset = 0xffffffffffffffff; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - if (tp.item->key.obj_id != searchkey.obj_id) { - ERR("could not find INODE_REF for inode %llx in subvol %llx\n", searchkey.obj_id, subvol->id); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (tp.item->key.obj_type == TYPE_INODE_REF) { - INODE_REF* ir = (INODE_REF*)tp.item->data; - name_bit* nb; - ULONG len; - - if (tp.item->size < sizeof(INODE_REF)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (tp.item->size < sizeof(INODE_REF) - 1 + ir->n) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF) - 1 + ir->n); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - nb = ExAllocatePoolWithTag(PagedPool, sizeof(name_bit), ALLOC_TAG); - if (!nb) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - nb->name.Buffer = NULL; - - InsertTailList(&name_trail, &nb->list_entry); - levels++; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &len, ir->name, ir->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - goto end; - } - - if (len == 0) { - ERR("length was 0\n"); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - nb->name.Length = nb->name.MaximumLength = len; - - nb->name.Buffer = ExAllocatePoolWithTag(PagedPool, nb->name.Length, ALLOC_TAG); - if (!nb->name.Buffer) { - ERR("out of memory\n"); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - Status = RtlUTF8ToUnicodeN(nb->name.Buffer, len, &len, ir->name, ir->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - goto end; - } - - in = tp.item->key.offset; - namelen += nb->name.Length; - -// } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { -// // FIXME - } else { - ERR("could not find INODE_REF for inode %llx in subvol %llx\n", searchkey.obj_id, subvol->id); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - } - - namelen += (levels + 1) * sizeof(WCHAR); - - us->Length = us->MaximumLength = namelen; - us->Buffer = ExAllocatePoolWithTag(PagedPool, us->Length, ALLOC_TAG); - - if (!us->Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - us->Buffer[0] = '\\'; - usbuf = &us->Buffer[1]; - - le = name_trail.Blink; - while (le != &name_trail) { - name_bit* nb = CONTAINING_RECORD(le, name_bit, list_entry); - - RtlCopyMemory(usbuf, nb->name.Buffer, nb->name.Length); - usbuf += nb->name.Length / sizeof(WCHAR); - - usbuf[0] = '\\'; - usbuf++; - - le = le->Blink; - } - - Status = STATUS_SUCCESS; - -end: - while (!IsListEmpty(&name_trail)) { - name_bit* nb = CONTAINING_RECORD(name_trail.Flink, name_bit, list_entry); - - if (nb->name.Buffer) - ExFreePool(nb->name.Buffer); - - RemoveEntryList(&nb->list_entry); - - ExFreePool(nb); - } - - return Status; -} - -NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp) { +NTSTATUS open_fileref_by_inode(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) device_extension* Vcb, + root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp) { NTSTATUS Status; fcb* fcb; - hardlink* hl; + UINT64 parent = 0; + UNICODE_STRING name; + BOOL hl_alloc = FALSE; file_ref *parfr, *fr; - dir_child* dc = NULL; - + Status = open_fcb(Vcb, subvol, inode, 0, NULL, NULL, &fcb, PagedPool, Irp); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); return Status; } - + if (fcb->fileref) { *pfr = fcb->fileref; increase_fileref_refcount(fcb->fileref); return STATUS_SUCCESS; } - + // find hardlink if fcb doesn't have any loaded if (IsListEmpty(&fcb->hardlinks)) { KEY searchkey; traverse_ptr tp; - + searchkey.obj_id = fcb->inode; searchkey.obj_type = TYPE_INODE_EXTREF; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - + if (tp.item->key.obj_id == fcb->inode) { if (tp.item->key.obj_type == TYPE_INODE_REF) { INODE_REF* ir; @@ -3736,274 +3187,309 @@ NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode ir = (INODE_REF*)tp.item->data; - hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); - if (!hl) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - hl->parent = tp.item->key.offset; - hl->index = ir->index; - - hl->utf8.Length = hl->utf8.MaximumLength = ir->n; - - if (hl->utf8.Length > 0) { - hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n); - } - + parent = tp.item->key.offset; + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ir->name, ir->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - - hl->name.Length = hl->name.MaximumLength = stringlen; - + + name.Length = name.MaximumLength = (UINT16)stringlen; + if (stringlen == 0) - hl->name.Buffer = NULL; + name.Buffer = NULL; else { - hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - - if (!hl->name.Buffer) { + name.Buffer = ExAllocatePoolWithTag(PagedPool, name.MaximumLength, ALLOC_TAG); + + if (!name.Buffer) { ERR("out of memory\n"); - ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ir->name, ir->n); + + Status = RtlUTF8ToUnicodeN(name.Buffer, stringlen, &stringlen, ir->name, ir->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - ExFreePool(hl->name.Buffer); - ExFreePool(hl); - free_fcb(fcb); + ExFreePool(name.Buffer); + free_fcb(Vcb, fcb); return Status; } + + hl_alloc = TRUE; } - - InsertTailList(&fcb->hardlinks, &hl->list_entry); } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { INODE_EXTREF* ier; - hardlink* hl; ULONG stringlen; ier = (INODE_EXTREF*)tp.item->data; - - hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); - if (!hl) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - hl->parent = ier->dir; - hl->index = ier->index; - - hl->utf8.Length = hl->utf8.MaximumLength = ier->n; - - if (hl->utf8.Length > 0) { - hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n); - } - + + parent = ier->dir; + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ier->name, ier->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return Status; } - - hl->name.Length = hl->name.MaximumLength = stringlen; - + + name.Length = name.MaximumLength = (UINT16)stringlen; + if (stringlen == 0) - hl->name.Buffer = NULL; + name.Buffer = NULL; else { - hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - - if (!hl->name.Buffer) { + name.Buffer = ExAllocatePoolWithTag(PagedPool, name.MaximumLength, ALLOC_TAG); + + if (!name.Buffer) { ERR("out of memory\n"); - ExFreePool(hl); - free_fcb(fcb); + free_fcb(Vcb, fcb); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ier->name, ier->n); + + Status = RtlUTF8ToUnicodeN(name.Buffer, stringlen, &stringlen, ier->name, ier->n); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - ExFreePool(hl->name.Buffer); - ExFreePool(hl); - free_fcb(fcb); + ExFreePool(name.Buffer); + free_fcb(Vcb, fcb); return Status; } + + hl_alloc = TRUE; } - - InsertTailList(&fcb->hardlinks, &hl->list_entry); + } } + } else { + hardlink* hl = CONTAINING_RECORD(fcb->hardlinks.Flink, hardlink, list_entry); + + name = hl->name; + parent = hl->parent; } - - if (IsListEmpty(&fcb->hardlinks)) { + + if (parent == 0) { ERR("subvol %llx, inode %llx has no hardlinks\n", subvol->id, inode); - free_fcb(fcb); - return STATUS_INTERNAL_ERROR; + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); + return STATUS_INVALID_PARAMETER; } - - hl = CONTAINING_RECORD(fcb->hardlinks.Flink, hardlink, list_entry); - - // FIXME - does this work with subvols? - - if (hl->parent == inode) // root of subvol - parfr = NULL; - else { - Status = open_fileref_by_inode(Vcb, subvol, hl->parent, &parfr, Irp); + + if (parent == inode) { // subvolume root + KEY searchkey; + traverse_ptr tp; + + searchkey.obj_id = subvol->id; + searchkey.obj_type = TYPE_ROOT_BACKREF; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { - ERR("open_fileref_by_inode returned %08x\n", Status); - free_fcb(fcb); + ERR("find_item returned %08x\n", Status); + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); return Status; } - } - - fr = create_fileref(); - if (!fr) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - fr->fcb = fcb; - fcb->fileref = fr; - - fr->index = hl->index; - - fr->utf8.Length = fr->utf8.MaximumLength = hl->utf8.Length; - if (fr->utf8.Length > 0) { - fr->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, fr->utf8.Length, ALLOC_TAG); - - if (!fr->utf8.Buffer) { - ERR("out of memory\n"); - free_fileref(fr); - return STATUS_INSUFFICIENT_RESOURCES; + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + ROOT_REF* rr = (ROOT_REF*)tp.item->data; + LIST_ENTRY* le; + root* r = NULL; + ULONG stringlen; + + if (tp.item->size < sizeof(ROOT_REF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); + return STATUS_INTERNAL_ERROR; + } + + if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n); + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); + return STATUS_INTERNAL_ERROR; + } + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == tp.item->key.offset) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("couldn't find subvol %llx\n", tp.item->key.offset); + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); + return STATUS_INTERNAL_ERROR; + } + + Status = open_fileref_by_inode(Vcb, r, rr->dir, &parfr, Irp); + if (!NT_SUCCESS(Status)) { + ERR("open_fileref_by_inode returned %08x\n", Status); + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); + return Status; + } + + if (hl_alloc) { + ExFreePool(name.Buffer); + hl_alloc = FALSE; + } + + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, rr->name, rr->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + free_fcb(Vcb, fcb); + return Status; + } + + name.Length = name.MaximumLength = (UINT16)stringlen; + + if (stringlen == 0) + name.Buffer = NULL; + else { + name.Buffer = ExAllocatePoolWithTag(PagedPool, name.MaximumLength, ALLOC_TAG); + + if (!name.Buffer) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = RtlUTF8ToUnicodeN(name.Buffer, stringlen, &stringlen, rr->name, rr->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(name.Buffer); + free_fcb(Vcb, fcb); + return Status; + } + + hl_alloc = TRUE; + } + } else { + ERR("couldn't find parent for subvol %llx\n", subvol->id); + free_fcb(Vcb, fcb); + if (hl_alloc) ExFreePool(name.Buffer); + return STATUS_INTERNAL_ERROR; } - - RtlCopyMemory(fr->utf8.Buffer, hl->utf8.Buffer, hl->utf8.Length); - } - - fr->filepart.MaximumLength = fr->filepart.Length = hl->name.Length; - - if (fr->filepart.Length > 0) { - fr->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, fr->filepart.MaximumLength, ALLOC_TAG); - if (!fr->filepart.Buffer) { - ERR("out of memory\n"); - free_fileref(fr); - return STATUS_INSUFFICIENT_RESOURCES; + } else { + Status = open_fileref_by_inode(Vcb, subvol, parent, &parfr, Irp); + if (!NT_SUCCESS(Status)) { + ERR("open_fileref_by_inode returned %08x\n", Status); + free_fcb(Vcb, fcb); + + if (hl_alloc) + ExFreePool(name.Buffer); + + return Status; } - - RtlCopyMemory(fr->filepart.Buffer, hl->name.Buffer, hl->name.Length); } - - Status = RtlUpcaseUnicodeString(&fr->filepart_uc, &fr->filepart, TRUE); + + Status = open_fileref_child(Vcb, parfr, &name, TRUE, TRUE, FALSE, PagedPool, &fr, Irp); + if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(fr); + ERR("open_fileref_child returned %08x\n", Status); + + if (hl_alloc) + ExFreePool(name.Buffer); + + free_fcb(Vcb, fcb); + free_fileref(Vcb, parfr); + return Status; } - - fr->parent = parfr; - - Status = add_dir_child(parfr->fcb, fr->fcb->inode == SUBVOL_ROOT_INODE ? fr->fcb->subvol->id : fr->fcb->inode, fr->fcb->inode == SUBVOL_ROOT_INODE, - fr->index, &fr->utf8, &fr->filepart, &fr->filepart_uc, fr->fcb->type, &dc); - if (!NT_SUCCESS(Status)) - WARN("add_dir_child returned %08x\n", Status); - - fr->dc = dc; - dc->fileref = fr; - - insert_fileref_child(parfr, fr, TRUE); *pfr = fr; - + + if (hl_alloc) + ExFreePool(name.Buffer); + + free_fcb(Vcb, fcb); + free_fileref(Vcb, parfr); + return STATUS_SUCCESS; } #ifndef __REACTOS__ -static NTSTATUS STDCALL fill_in_hard_link_information(FILE_LINKS_INFORMATION* fli, file_ref* fileref, PIRP Irp, LONG* length) { +static NTSTATUS fill_in_hard_link_information(FILE_LINKS_INFORMATION* fli, file_ref* fileref, PIRP Irp, LONG* length) { NTSTATUS Status; LIST_ENTRY* le; - ULONG bytes_needed; + LONG bytes_needed; FILE_LINK_ENTRY_INFORMATION* feli; BOOL overflow = FALSE; fcb* fcb = fileref->fcb; ULONG len; - + if (fcb->ads) return STATUS_INVALID_PARAMETER; - - if (*length < offsetof(FILE_LINKS_INFORMATION, Entry)) + + if (*length < (LONG)offsetof(FILE_LINKS_INFORMATION, Entry)) return STATUS_INVALID_PARAMETER; - + RtlZeroMemory(fli, *length); - + bytes_needed = offsetof(FILE_LINKS_INFORMATION, Entry); len = bytes_needed; feli = NULL; - + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + if (fcb->inode == SUBVOL_ROOT_INODE) { ULONG namelen; - + if (fcb == fcb->Vcb->root_fileref->fcb) namelen = sizeof(WCHAR); else - namelen = fileref->filepart.Length; - + namelen = fileref->dc->name.Length; + bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) - sizeof(WCHAR) + namelen; - + if (bytes_needed > *length) overflow = TRUE; - + if (!overflow) { feli = &fli->Entry; - + feli->NextEntryOffset = 0; feli->ParentFileId = 0; // we use an inode of 0 to mean the parent of a subvolume - + if (fcb == fcb->Vcb->root_fileref->fcb) { feli->FileNameLength = 1; feli->FileName[0] = '.'; } else { - feli->FileNameLength = fileref->filepart.Length / sizeof(WCHAR); - RtlCopyMemory(feli->FileName, fileref->filepart.Buffer, fileref->filepart.Length); + feli->FileNameLength = fileref->dc->name.Length / sizeof(WCHAR); + RtlCopyMemory(feli->FileName, fileref->dc->name.Buffer, fileref->dc->name.Length); } - + fli->EntriesReturned++; - + len = bytes_needed; } } else { ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE); - + if (IsListEmpty(&fcb->hardlinks)) { - bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) + fileref->filepart.Length - sizeof(WCHAR); - + bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) + fileref->dc->name.Length - sizeof(WCHAR); + if (bytes_needed > *length) overflow = TRUE; - + if (!overflow) { feli = &fli->Entry; feli->NextEntryOffset = 0; feli->ParentFileId = fileref->parent->fcb->inode; - feli->FileNameLength = fileref->filepart.Length / sizeof(WCHAR); - RtlCopyMemory(feli->FileName, fileref->filepart.Buffer, fileref->filepart.Length); - + feli->FileNameLength = fileref->dc->name.Length / sizeof(WCHAR); + RtlCopyMemory(feli->FileName, fileref->dc->name.Buffer, fileref->dc->name.Length); + fli->EntriesReturned++; - + len = bytes_needed; } } else { @@ -4011,83 +3497,83 @@ static NTSTATUS STDCALL fill_in_hard_link_information(FILE_LINKS_INFORMATION* fl while (le != &fcb->hardlinks) { hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); file_ref* parfr; - + TRACE("parent %llx, index %llx, name %.*S\n", hl->parent, hl->index, hl->name.Length / sizeof(WCHAR), hl->name.Buffer); - + Status = open_fileref_by_inode(fcb->Vcb, fcb->subvol, hl->parent, &parfr, Irp); - + if (!NT_SUCCESS(Status)) { ERR("open_fileref_by_inode returned %08x\n", Status); } else if (!parfr->deleted) { LIST_ENTRY* le2; BOOL found = FALSE, deleted = FALSE; - UNICODE_STRING* fn; - + UNICODE_STRING* fn = NULL; + le2 = parfr->children.Flink; while (le2 != &parfr->children) { file_ref* fr2 = CONTAINING_RECORD(le2, file_ref, list_entry); - - if (fr2->index == hl->index) { + + if (fr2->dc->index == hl->index) { found = TRUE; deleted = fr2->deleted; - + if (!deleted) - fn = &fr2->filepart; - + fn = &fr2->dc->name; + break; } - + le2 = le2->Flink; } - + if (!found) fn = &hl->name; - + if (!deleted) { TRACE("fn = %.*S (found = %u)\n", fn->Length / sizeof(WCHAR), fn->Buffer, found); - + if (feli) - bytes_needed = sector_align(bytes_needed, 8); - + bytes_needed = (LONG)sector_align(bytes_needed, 8); + bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) + fn->Length - sizeof(WCHAR); - + if (bytes_needed > *length) overflow = TRUE; - + if (!overflow) { if (feli) { - feli->NextEntryOffset = sector_align(sizeof(FILE_LINK_ENTRY_INFORMATION) + ((feli->FileNameLength - 1) * sizeof(WCHAR)), 8); + feli->NextEntryOffset = (ULONG)sector_align(sizeof(FILE_LINK_ENTRY_INFORMATION) + ((feli->FileNameLength - 1) * sizeof(WCHAR)), 8); feli = (FILE_LINK_ENTRY_INFORMATION*)((UINT8*)feli + feli->NextEntryOffset); } else feli = &fli->Entry; - + feli->NextEntryOffset = 0; feli->ParentFileId = parfr->fcb->inode; feli->FileNameLength = fn->Length / sizeof(WCHAR); RtlCopyMemory(feli->FileName, fn->Buffer, fn->Length); - + fli->EntriesReturned++; - + len = bytes_needed; } } - - free_fileref(parfr); + + free_fileref(fcb->Vcb, parfr); } - + le = le->Flink; } } - + ExReleaseResourceLite(&fcb->Vcb->fcb_lock); } - + fli->BytesNeeded = bytes_needed; - + *length -= len; Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS; - + ExReleaseResourceLite(fcb->Header.Resource); return Status; @@ -4098,7 +3584,7 @@ static NTSTATUS STDCALL fill_in_hard_link_information(FILE_LINKS_INFORMATION* fl #ifdef __MINGW32__ typedef struct _FILE_ID_128 { UCHAR Identifier[16]; -} FILE_ID_128, *PFILE_ID_128; +} FILE_ID_128, *PFILE_ID_128; typedef struct _FILE_ID_INFORMATION { ULONGLONG VolumeSerialNumber; @@ -4110,84 +3596,83 @@ static NTSTATUS fill_in_file_id_information(FILE_ID_INFORMATION* fii, fcb* fcb, RtlCopyMemory(&fii->VolumeSerialNumber, &fcb->Vcb->superblock.uuid.uuid[8], sizeof(UINT64)); RtlCopyMemory(&fii->FileId.Identifier[0], &fcb->inode, sizeof(UINT64)); RtlCopyMemory(&fii->FileId.Identifier[sizeof(UINT64)], &fcb->subvol->id, sizeof(UINT64)); - + *length -= sizeof(FILE_ID_INFORMATION); - + return STATUS_SUCCESS; } #endif -static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObject, PIRP Irp) { +static NTSTATUS query_info(device_extension* Vcb, PFILE_OBJECT FileObject, PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); LONG length = IrpSp->Parameters.QueryFile.Length; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; file_ref* fileref = ccb ? ccb->fileref : NULL; NTSTATUS Status; - + TRACE("(%p, %p, %p)\n", Vcb, FileObject, Irp); TRACE("fcb = %p\n", fcb); - + if (fcb == Vcb->volume_fcb) return STATUS_INVALID_PARAMETER; - + if (!ccb) { ERR("ccb is NULL\n"); return STATUS_INVALID_PARAMETER; } - + switch (IrpSp->Parameters.QueryFile.FileInformationClass) { case FileAllInformation: { FILE_ALL_INFORMATION* fai = Irp->AssociatedIrp.SystemBuffer; INODE_ITEM* ii; - + TRACE("FileAllInformation\n"); - + if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto exit; } - + if (fcb->ads) { if (!fileref || !fileref->parent) { ERR("no fileref for stream\n"); Status = STATUS_INTERNAL_ERROR; goto exit; } - + ii = &fileref->parent->fcb->inode_item; } else ii = &fcb->inode_item; - + + // Access, mode, and alignment are all filled in by the kernel + if (length > 0) fill_in_file_basic_information(&fai->BasicInformation, ii, &length, fcb, fileref); - + if (length > 0) fill_in_file_standard_information(&fai->StandardInformation, fcb, fileref, &length); - + if (length > 0) fill_in_file_internal_information(&fai->InternalInformation, fcb, &length); - + if (length > 0) fill_in_file_ea_information(&fai->EaInformation, fcb, &length); - - if (length > 0) - fill_in_file_access_information(&fai->AccessInformation, &length); - + + length -= sizeof(FILE_ACCESS_INFORMATION); + if (length > 0) fill_in_file_position_information(&fai->PositionInformation, FileObject, &length); - - if (length > 0) - fill_in_file_mode_information(&fai->ModeInformation, ccb, &length); - - if (length > 0) - fill_in_file_alignment_information(&fai->AlignmentInformation, Vcb, &length); - + + length -= sizeof(FILE_MODE_INFORMATION); + + length -= sizeof(FILE_ALIGNMENT_INFORMATION); + if (length > 0) fill_in_file_name_information(&fai->NameInformation, fcb, fileref, &length); - + Status = STATUS_SUCCESS; break; @@ -4196,17 +3681,19 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec case FileAttributeTagInformation: { FILE_ATTRIBUTE_TAG_INFORMATION* ati = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileAttributeTagInformation\n"); - + if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto exit; } - - Status = fill_in_file_attribute_information(ati, fcb, fileref, Irp, &length); - + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + Status = fill_in_file_attribute_information(ati, fcb, ccb, Irp, &length); + ExReleaseResourceLite(&Vcb->tree_lock); + break; } @@ -4214,32 +3701,32 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec { FILE_BASIC_INFORMATION* fbi = Irp->AssociatedIrp.SystemBuffer; INODE_ITEM* ii; - + TRACE("FileBasicInformation\n"); - + if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto exit; } - + if (IrpSp->Parameters.QueryFile.Length < sizeof(FILE_BASIC_INFORMATION)) { WARN("overflow\n"); Status = STATUS_BUFFER_OVERFLOW; goto exit; } - + if (fcb->ads) { if (!fileref || !fileref->parent) { ERR("no fileref for stream\n"); Status = STATUS_INTERNAL_ERROR; goto exit; } - + ii = &fileref->parent->fcb->inode_item; } else ii = &fcb->inode_item; - + Status = fill_in_file_basic_information(fbi, ii, &length, fcb, fileref); break; } @@ -4252,48 +3739,48 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec case FileEaInformation: { FILE_EA_INFORMATION* eai = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileEaInformation\n"); - + Status = fill_in_file_ea_information(eai, fcb, &length); - + break; } case FileInternalInformation: { FILE_INTERNAL_INFORMATION* fii = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileInternalInformation\n"); - + Status = fill_in_file_internal_information(fii, fcb, &length); - + break; } case FileNameInformation: { FILE_NAME_INFORMATION* fni = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileNameInformation\n"); - + Status = fill_in_file_name_information(fni, fcb, fileref, &length); - + break; } case FileNetworkOpenInformation: { FILE_NETWORK_OPEN_INFORMATION* fnoi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileNetworkOpenInformation\n"); - + if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto exit; } - + Status = fill_in_file_network_open_information(fnoi, fcb, fileref, &length); break; @@ -4302,38 +3789,38 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec case FilePositionInformation: { FILE_POSITION_INFORMATION* fpi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FilePositionInformation\n"); - + Status = fill_in_file_position_information(fpi, FileObject, &length); - + break; } case FileStandardInformation: { FILE_STANDARD_INFORMATION* fsi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileStandardInformation\n"); - + if (IrpSp->Parameters.QueryFile.Length < sizeof(FILE_STANDARD_INFORMATION)) { WARN("overflow\n"); Status = STATUS_BUFFER_OVERFLOW; goto exit; } - + Status = fill_in_file_standard_information(fsi, fcb, ccb->fileref, &length); - + break; } case FileStreamInformation: { FILE_STREAM_INFORMATION* fsi = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileStreamInformation\n"); - - Status = fill_in_file_stream_information(fsi, fileref, Irp, &length); + + Status = fill_in_file_stream_information(fsi, fileref, &length); break; } @@ -4342,38 +3829,40 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec case FileHardLinkInformation: { FILE_LINKS_INFORMATION* fli = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileHardLinkInformation\n"); - + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); Status = fill_in_hard_link_information(fli, fileref, Irp, &length); - + ExReleaseResourceLite(&Vcb->tree_lock); + break; } - + case FileNormalizedNameInformation: { FILE_NAME_INFORMATION* fni = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileNormalizedNameInformation\n"); - + Status = fill_in_file_name_information(fni, fcb, fileref, &length); - + break; } #endif - + #if (NTDDI_VERSION >= NTDDI_WIN7) case FileStandardLinkInformation: { FILE_STANDARD_LINK_INFORMATION* fsli = Irp->AssociatedIrp.SystemBuffer; - + TRACE("FileStandardLinkInformation\n"); - + Status = fill_in_file_standard_link_information(fsli, fcb, ccb->fileref, &length); - + break; } - + case FileRemoteProtocolInformation: TRACE("FileRemoteProtocolInformation\n"); Status = STATUS_INVALID_PARAMETER; @@ -4381,94 +3870,101 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec #endif #if (NTDDI_VERSION >= NTDDI_WIN10) +#ifndef _MSC_VER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch" +#endif case FileIdInformation: { FILE_ID_INFORMATION* fii = Irp->AssociatedIrp.SystemBuffer; - + if (IrpSp->Parameters.QueryFile.Length < sizeof(FILE_ID_INFORMATION)) { WARN("overflow\n"); Status = STATUS_BUFFER_OVERFLOW; goto exit; } - + TRACE("FileIdInformation\n"); - - Status = fill_in_file_id_information(fii, fcb, &length); - + + Status = fill_in_file_id_information(fii, fcb, &length); + break; } -#pragma GCC diagnostic pop +#ifndef _MSC_VER +#pragma GCC diagnostic pop +#endif #endif - + default: WARN("unknown FileInformationClass %u\n", IrpSp->Parameters.QueryFile.FileInformationClass); Status = STATUS_INVALID_PARAMETER; goto exit; } - + if (length < 0) { length = 0; Status = STATUS_BUFFER_OVERFLOW; } - + Irp->IoStatus.Information = IrpSp->Parameters.QueryFile.Length - length; -exit: +exit: TRACE("query_info returning %08x\n", Status); - + return Status; } -NTSTATUS STDCALL drv_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_QUERY_INFORMATION) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { PIO_STACK_LOCATION IrpSp; NTSTATUS Status; fcb* fcb; device_extension* Vcb = DeviceObject->DeviceExtension; BOOL top_level; - + FsRtlEnterFileSystem(); - + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_query_information(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + Irp->IoStatus.Information = 0; - + TRACE("query information\n"); - + IrpSp = IoGetCurrentIrpStackLocation(Irp); - - ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + fcb = IrpSp->FileObject->FsContext; TRACE("fcb = %p\n", fcb); TRACE("fcb->subvol = %p\n", fcb->subvol); - + Status = query_info(fcb->Vcb, IrpSp->FileObject, Irp); - + +end: TRACE("returning %08x\n", Status); - + Irp->IoStatus.Status = Status; - + IoCompleteRequest( Irp, IO_NO_INCREMENT ); - - ExReleaseResourceLite(&Vcb->tree_lock); - -exit: - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); - + return Status; } -NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_QUERY_EA) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; BOOL top_level; device_extension* Vcb = DeviceObject->DeviceExtension; @@ -4478,85 +3974,91 @@ NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { ccb* ccb; FILE_FULL_EA_INFORMATION* ffei; ULONG retlen = 0; - - TRACE("(%p, %p)\n", DeviceObject, Irp); FsRtlEnterFileSystem(); + TRACE("(%p, %p)\n", DeviceObject, Irp); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_query_ea(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - - ffei = map_user_buffer(Irp); + + ffei = map_user_buffer(Irp, NormalPagePriority); if (!ffei) { ERR("could not get output buffer\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (!FileObject) { ERR("no file object\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("no fcb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + ccb = FileObject->FsContext2; - + if (!ccb) { ERR("no ccb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_READ_EA | FILE_WRITE_EA))) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + + if (fcb->ads) + fcb = ccb->fileref->parent->fcb; + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + Status = STATUS_SUCCESS; - + if (fcb->ea_xattr.Length == 0) goto end2; - + if (IrpSp->Parameters.QueryEa.EaList) { FILE_FULL_EA_INFORMATION *ea, *out; FILE_GET_EA_INFORMATION* in; - + in = IrpSp->Parameters.QueryEa.EaList; do { STRING s; - + s.Length = s.MaximumLength = in->EaNameLength; s.Buffer = in->EaName; - + RtlUpperString(&s, &s); - + if (in->NextEntryOffset == 0) break; - + in = (FILE_GET_EA_INFORMATION*)(((UINT8*)in) + in->NextEntryOffset); } while (TRUE); - + ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer; out = NULL; - + do { BOOL found = FALSE; - + in = IrpSp->Parameters.QueryEa.EaList; do { if (in->EaNameLength == ea->EaNameLength && @@ -4564,125 +4066,126 @@ NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { found = TRUE; break; } - + if (in->NextEntryOffset == 0) break; - + in = (FILE_GET_EA_INFORMATION*)(((UINT8*)in) + in->NextEntryOffset); } while (TRUE); - + if (found) { UINT8 padding = retlen % 4 > 0 ? (4 - (retlen % 4)) : 0; - + if (offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength > IrpSp->Parameters.QueryEa.Length - retlen - padding) { Status = STATUS_BUFFER_OVERFLOW; retlen = 0; goto end2; } - + retlen += padding; - + if (out) { - out->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding; + out->NextEntryOffset = (ULONG)offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding; out = (FILE_FULL_EA_INFORMATION*)(((UINT8*)out) + out->NextEntryOffset); } else out = ffei; - + out->NextEntryOffset = 0; out->Flags = ea->Flags; out->EaNameLength = ea->EaNameLength; out->EaValueLength = ea->EaValueLength; RtlCopyMemory(out->EaName, ea->EaName, ea->EaNameLength + ea->EaValueLength + 1); - - retlen += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength; - + + retlen += (ULONG)offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength; + if (IrpSp->Flags & SL_RETURN_SINGLE_ENTRY) break; } - + if (ea->NextEntryOffset == 0) break; - + ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset); } while (TRUE); } else { FILE_FULL_EA_INFORMATION *ea, *out; ULONG index; - + if (IrpSp->Flags & SL_INDEX_SPECIFIED) { // The index is 1-based if (IrpSp->Parameters.QueryEa.EaIndex == 0) { Status = STATUS_NONEXISTENT_EA_ENTRY; - goto end; + goto end2; } else index = IrpSp->Parameters.QueryEa.EaIndex - 1; } else if (IrpSp->Flags & SL_RESTART_SCAN) index = ccb->ea_index = 0; else index = ccb->ea_index; - + ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer; - + if (index > 0) { ULONG i; - + for (i = 0; i < index; i++) { if (ea->NextEntryOffset == 0) // last item goto end2; - + ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset); } } - + out = NULL; - + do { UINT8 padding = retlen % 4 > 0 ? (4 - (retlen % 4)) : 0; - + if (offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength > IrpSp->Parameters.QueryEa.Length - retlen - padding) { Status = retlen == 0 ? STATUS_BUFFER_TOO_SMALL : STATUS_BUFFER_OVERFLOW; goto end2; } - + retlen += padding; - + if (out) { - out->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding; + out->NextEntryOffset = (ULONG)offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding; out = (FILE_FULL_EA_INFORMATION*)(((UINT8*)out) + out->NextEntryOffset); } else out = ffei; - + out->NextEntryOffset = 0; out->Flags = ea->Flags; out->EaNameLength = ea->EaNameLength; out->EaValueLength = ea->EaValueLength; RtlCopyMemory(out->EaName, ea->EaName, ea->EaNameLength + ea->EaValueLength + 1); - - retlen += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength; - + + retlen += (ULONG)offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength; + if (!(IrpSp->Flags & SL_INDEX_SPECIFIED)) ccb->ea_index++; - + if (ea->NextEntryOffset == 0 || IrpSp->Flags & SL_RETURN_SINGLE_ENTRY) break; - + ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset); } while (TRUE); } - + end2: ExReleaseResourceLite(fcb->Header.Resource); - + end: + TRACE("returning %08x\n", Status); + Irp->IoStatus.Status = Status; Irp->IoStatus.Information = NT_SUCCESS(Status) || Status == STATUS_BUFFER_OVERFLOW ? retlen : 0; IoCompleteRequest( Irp, IO_NO_INCREMENT ); -exit: - if (top_level) + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; @@ -4695,7 +4198,9 @@ typedef struct { LIST_ENTRY list_entry; } ea_item; -NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_SET_EA) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { device_extension* Vcb = DeviceObject->DeviceExtension; NTSTATUS Status; BOOL top_level; @@ -4703,6 +4208,7 @@ NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb; ccb* ccb; + file_ref* fileref; FILE_FULL_EA_INFORMATION* ffei; ULONG offset; LIST_ENTRY ealist; @@ -4711,71 +4217,80 @@ NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { LIST_ENTRY* le; LARGE_INTEGER time; BTRFS_TIME now; - - TRACE("(%p, %p)\n", DeviceObject, Irp); FsRtlEnterFileSystem(); + TRACE("(%p, %p)\n", DeviceObject, Irp); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_set_ea(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + if (Vcb->readonly) { Status = STATUS_MEDIA_WRITE_PROTECTED; goto end; } - - ffei = map_user_buffer(Irp); + + ffei = map_user_buffer(Irp, NormalPagePriority); if (!ffei) { ERR("could not get output buffer\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + Status = IoCheckEaBufferValidity(ffei, IrpSp->Parameters.SetEa.Length, &offset); if (!NT_SUCCESS(Status)) { ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); goto end; } - + if (!FileObject) { ERR("no file object\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("no fcb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + ccb = FileObject->FsContext2; - + if (!ccb) { ERR("no ccb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_EA)) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + + if (fcb->ads) { + fileref = ccb->fileref->parent; + fcb = fileref->fcb; + } else + fileref = ccb->fileref; + InitializeListHead(&ealist); - + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fcb->ea_xattr.Length > 0) { ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer; - + do { item = ExAllocatePoolWithTag(PagedPool, sizeof(ea_item), ALLOC_TAG); if (!item) { @@ -4783,39 +4298,39 @@ NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { Status = STATUS_INSUFFICIENT_RESOURCES; goto end2; } - + item->name.Length = item->name.MaximumLength = ea->EaNameLength; item->name.Buffer = ea->EaName; - + item->value.Length = item->value.MaximumLength = ea->EaValueLength; item->value.Buffer = &ea->EaName[ea->EaNameLength + 1]; - + item->flags = ea->Flags; - + InsertTailList(&ealist, &item->list_entry); - + if (ea->NextEntryOffset == 0) break; - + ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset); } while (TRUE); } - + ea = ffei; - + do { STRING s; BOOL found = FALSE; - + s.Length = s.MaximumLength = ea->EaNameLength; s.Buffer = ea->EaName; - + RtlUpperString(&s, &s); - + le = ealist.Flink; while (le != &ealist) { item = CONTAINING_RECORD(le, ea_item, list_entry); - + if (item->name.Length == s.Length && RtlCompareMemory(item->name.Buffer, s.Buffer, s.Length) == s.Length) { item->flags = ea->Flags; @@ -4824,10 +4339,10 @@ NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { found = TRUE; break; } - + le = le->Flink; } - + if (!found) { item = ExAllocatePoolWithTag(PagedPool, sizeof(ea_item), ALLOC_TAG); if (!item) { @@ -4835,149 +4350,150 @@ NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { Status = STATUS_INSUFFICIENT_RESOURCES; goto end2; } - + item->name.Length = item->name.MaximumLength = ea->EaNameLength; item->name.Buffer = ea->EaName; - + item->value.Length = item->value.MaximumLength = ea->EaValueLength; item->value.Buffer = &ea->EaName[ea->EaNameLength + 1]; - + item->flags = ea->Flags; - + InsertTailList(&ealist, &item->list_entry); } - + if (ea->NextEntryOffset == 0) break; - + ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset); } while (TRUE); - + // remove entries with zero-length value le = ealist.Flink; while (le != &ealist) { LIST_ENTRY* le2 = le->Flink; - + item = CONTAINING_RECORD(le, ea_item, list_entry); - + if (item->value.Length == 0) { RemoveEntryList(&item->list_entry); ExFreePool(item); } - + le = le2; } - + if (IsListEmpty(&ealist)) { fcb->ealen = 0; - + if (fcb->ea_xattr.Buffer) ExFreePool(fcb->ea_xattr.Buffer); - + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = 0; fcb->ea_xattr.Buffer = NULL; } else { - ULONG size = 0; + UINT16 size = 0; char *buf, *oldbuf; - + le = ealist.Flink; while (le != &ealist) { item = CONTAINING_RECORD(le, ea_item, list_entry); - + if (size % 4 > 0) size += 4 - (size % 4); - - size += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + item->name.Length + 1 + item->value.Length; - + + size += (UINT16)offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + item->name.Length + 1 + item->value.Length; + le = le->Flink; } - + buf = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG); if (!buf) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end2; } - + oldbuf = fcb->ea_xattr.Buffer; - + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = size; fcb->ea_xattr.Buffer = buf; - + fcb->ealen = 4; ea = NULL; - + le = ealist.Flink; while (le != &ealist) { item = CONTAINING_RECORD(le, ea_item, list_entry); - + if (ea) { - ea->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + ea->EaValueLength; - + ea->NextEntryOffset = (ULONG)offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + ea->EaValueLength; + if (ea->NextEntryOffset % 4 > 0) ea->NextEntryOffset += 4 - (ea->NextEntryOffset % 4); - + ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset); } else ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer; - + ea->NextEntryOffset = 0; ea->Flags = item->flags; - ea->EaNameLength = item->name.Length; + ea->EaNameLength = (UCHAR)item->name.Length; ea->EaValueLength = item->value.Length; - + RtlCopyMemory(ea->EaName, item->name.Buffer, item->name.Length); ea->EaName[item->name.Length] = 0; RtlCopyMemory(&ea->EaName[item->name.Length + 1], item->value.Buffer, item->value.Length); - + fcb->ealen += 5 + item->name.Length + item->value.Length; - + le = le->Flink; } - + if (oldbuf) ExFreePool(oldbuf); } - + fcb->ea_changed = TRUE; - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); fcb->inode_item.transid = Vcb->superblock.generation; fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - - send_notification_fileref(ccb->fileref, FILE_NOTIFY_CHANGE_EA, FILE_ACTION_MODIFIED); - + + send_notification_fileref(fileref, FILE_NOTIFY_CHANGE_EA, FILE_ACTION_MODIFIED, NULL); + Status = STATUS_SUCCESS; - + end2: ExReleaseResourceLite(fcb->Header.Resource); - + while (!IsListEmpty(&ealist)) { le = RemoveHeadList(&ealist); - + item = CONTAINING_RECORD(le, ea_item, list_entry); - + ExFreePool(item); } - + end: + TRACE("returning %08x\n", Status); + Irp->IoStatus.Status = Status; Irp->IoStatus.Information = 0; IoCompleteRequest(Irp, IO_NO_INCREMENT); - -exit: - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; diff --git a/reactos/drivers/filesystems/btrfs/flushthread.c b/reactos/drivers/filesystems/btrfs/flushthread.c index 69ba21e051f..aa3d9b6b0f8 100644 --- a/reactos/drivers/filesystems/btrfs/flushthread.c +++ b/reactos/drivers/filesystems/btrfs/flushthread.c @@ -1,21 +1,24 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" +#include +#include +#include #define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node)) @@ -38,55 +41,54 @@ typedef struct { TREE_BLOCK_REF tbr; } EXTENT_ITEM_SKINNY_METADATA; -static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback); +static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp); static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback); -static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset, - void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback); -static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#ifndef _MSC_VER // not in mingw yet +#define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000 +#endif + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif write_context* context = conptr; - + + UNUSED(DeviceObject); + context->iosb = Irp->IoStatus; KeSetEvent(&context->Event, 0, FALSE); - -// return STATUS_SUCCESS; + return STATUS_MORE_PROCESSING_REQUIRED; } -NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) { +NTSTATUS write_data_phys(_In_ PDEVICE_OBJECT device, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length) { NTSTATUS Status; LARGE_INTEGER offset; PIRP Irp; PIO_STACK_LOCATION IrpSp; - write_context* context = NULL; - + write_context context; + TRACE("(%p, %llx, %p, %x)\n", device, address, data, length); - - context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG); - if (!context) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(context, sizeof(write_context)); - - KeInitializeEvent(&context->Event, NotificationEvent, FALSE); - + + RtlZeroMemory(&context, sizeof(write_context)); + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + offset.QuadPart = address; - -// Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb); - + Irp = IoAllocateIrp(device->StackSize, FALSE); - + if (!Irp) { ERR("IoAllocateIrp failed\n"); - Status = STATUS_INTERNAL_ERROR; - goto exit2; + return STATUS_INSUFFICIENT_RESOURCES; } - + IrpSp = IoGetNextIrpStackLocation(Irp); IrpSp->MajorFunction = IRP_MJ_WRITE; - + if (device->Flags & DO_BUFFERED_IO) { Irp->AssociatedIrp.SystemBuffer = data; @@ -95,87 +97,368 @@ NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* da Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL); if (!Irp->MdlAddress) { DbgPrint("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(Irp->MdlAddress); goto exit; } - - MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess); } else { Irp->UserBuffer = data; } IrpSp->Parameters.Write.Length = length; IrpSp->Parameters.Write.ByteOffset = offset; - - Irp->UserIosb = &context->iosb; - Irp->UserEvent = &context->Event; + Irp->UserIosb = &context.iosb; - IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE); + Irp->UserEvent = &context.Event; + + IoSetCompletionRoutine(Irp, write_completion, &context, TRUE, TRUE, TRUE); Status = IoCallDriver(device, Irp); - + if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - Status = context->iosb.Status; + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + Status = context.iosb.Status; } - + if (!NT_SUCCESS(Status)) { ERR("IoCallDriver returned %08x\n", Status); } - + if (device->Flags & DO_DIRECT_IO) { MmUnlockPages(Irp->MdlAddress); IoFreeMdl(Irp->MdlAddress); } - + exit: IoFreeIrp(Irp); - -exit2: - if (context) - ExFreePool(context); - + return Status; } +static void add_trim_entry(device* dev, UINT64 address, UINT64 size) { + space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); + if (!s) { + ERR("out of memory\n"); + return; + } + + s->address = address; + s->size = size; + dev->num_trim_entries++; + + InsertTailList(&dev->trim_list, &s->list_entry); +} + static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) { - // FIXME - loop through c->deleting and do TRIM if device supports it - // FIXME - also find way of doing TRIM of dropped chunks - + ULONG type; + + if (Vcb->trim && !Vcb->options.no_trim) { + if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) + type = BLOCK_FLAG_DUPLICATE; + else if (c->chunk_item->type & BLOCK_FLAG_RAID0) + type = BLOCK_FLAG_RAID0; + else if (c->chunk_item->type & BLOCK_FLAG_RAID1) + type = BLOCK_FLAG_DUPLICATE; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + type = BLOCK_FLAG_RAID10; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) + type = BLOCK_FLAG_RAID5; + else if (c->chunk_item->type & BLOCK_FLAG_RAID6) + type = BLOCK_FLAG_RAID6; + else // SINGLE + type = BLOCK_FLAG_DUPLICATE; + } + while (!IsListEmpty(&c->deleting)) { space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry); - + + if (Vcb->trim && !Vcb->options.no_trim && (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + if (type == BLOCK_FLAG_DUPLICATE) { + UINT16 i; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) + add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size); + } + } else if (type == BLOCK_FLAG_RAID0) { + UINT64 startoff, endoff; + UINT16 startoffstripe, endoffstripe, i; + + get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); + get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) { + UINT64 stripestart, stripeend; + + if (startoffstripe > i) + stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + else if (startoffstripe == i) + stripestart = startoff; + else + stripestart = startoff - (startoff % c->chunk_item->stripe_length); + + if (endoffstripe > i) + stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + else if (endoffstripe == i) + stripeend = endoff + 1; + else + stripeend = endoff - (endoff % c->chunk_item->stripe_length); + + if (stripestart != stripeend) + add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart); + } + } + } else if (type == BLOCK_FLAG_RAID10) { + UINT64 startoff, endoff; + UINT16 sub_stripes, startoffstripe, endoffstripe, i; + + sub_stripes = max(1, c->chunk_item->sub_stripes); + + get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); + get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); + + startoffstripe *= sub_stripes; + endoffstripe *= sub_stripes; + + for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { + ULONG j; + UINT64 stripestart, stripeend; + + if (startoffstripe > i) + stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + else if (startoffstripe == i) + stripestart = startoff; + else + stripestart = startoff - (startoff % c->chunk_item->stripe_length); + + if (endoffstripe > i) + stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + else if (endoffstripe == i) + stripeend = endoff + 1; + else + stripeend = endoff - (endoff % c->chunk_item->stripe_length); + + if (stripestart != stripeend) { + for (j = 0; j < sub_stripes; j++) { + if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim) + add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart); + } + } + } + } + // FIXME - RAID5(?), RAID6(?) + } + RemoveEntryList(&s->list_entry); ExFreePool(s); } } +typedef struct { + DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa; + ATA_PASS_THROUGH_EX apte; + PIRP Irp; + IO_STATUS_BLOCK iosb; +} ioctl_context_stripe; + +typedef struct { + KEVENT Event; + LONG left; + ioctl_context_stripe* stripes; +} ioctl_context; + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif + ioctl_context* context = (ioctl_context*)conptr; + LONG left2 = InterlockedDecrement(&context->left); + + UNUSED(DeviceObject); + UNUSED(Irp); + + if (left2 == 0) + KeSetEvent(&context->Event, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + static void clean_space_cache(device_extension* Vcb) { + LIST_ENTRY* le; chunk* c; - + ULONG num; + TRACE("(%p)\n", Vcb); - - while (!IsListEmpty(&Vcb->chunks_changed)) { - c = CONTAINING_RECORD(Vcb->chunks_changed.Flink, chunk, list_entry_changed); - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - clean_space_cache_chunk(Vcb, c); - RemoveEntryList(&c->list_entry_changed); - c->list_entry_changed.Flink = NULL; - - ExReleaseResourceLite(&c->lock); + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry); + + if (c->space_changed) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (c->space_changed) + clean_space_cache_chunk(Vcb, c); + + c->space_changed = FALSE; + + ExReleaseResourceLite(&c->lock); + } + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (Vcb->trim && !Vcb->options.no_trim) { + ioctl_context context; + ULONG total_num; + + context.left = 0; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) + context.left++; + + le = le->Flink; + } + + if (context.left == 0) + return; + + total_num = context.left; + num = 0; + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); + if (!context.stripes) { + ERR("out of memory\n"); + return; + } + + RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) { + LIST_ENTRY* le2; + ioctl_context_stripe* stripe = &context.stripes[num]; + DEVICE_DATA_SET_RANGE* ranges; + ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i; + PIO_STACK_LOCATION IrpSp; + + stripe->dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); + if (!stripe->dmdsa) { + ERR("out of memory\n"); + goto nextdev; + } + + stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES); + stripe->dmdsa->Action = DeviceDsmAction_Trim; + stripe->dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED; + stripe->dmdsa->ParameterBlockOffset = 0; + stripe->dmdsa->ParameterBlockLength = 0; + stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)); + stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE); + + ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset); + + i = 0; + + le2 = dev->trim_list.Flink; + while (le2 != &dev->trim_list) { + space* s = CONTAINING_RECORD(le2, space, list_entry); + + ranges[i].StartingOffset = s->address; + ranges[i].LengthInBytes = s->size; + i++; + + le2 = le2->Flink; + } + + stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE); + + if (!stripe->Irp) { + ERR("IoAllocateIrp failed\n"); + goto nextdev; + } + + IrpSp = IoGetNextIrpStackLocation(stripe->Irp); + IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL; + + IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES; + IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen; + IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0; + + stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa; + stripe->Irp->Flags |= IRP_BUFFERED_IO; + stripe->Irp->UserBuffer = NULL; + stripe->Irp->UserIosb = &stripe->iosb; + + IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE); + + IoCallDriver(dev->devobj, stripe->Irp); + +nextdev: + while (!IsListEmpty(&dev->trim_list)) { + space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry); + ExFreePool(s); + } + + dev->num_trim_entries = 0; + + num++; + } + + le = le->Flink; + } + + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + + for (num = 0; num < total_num; num++) { + if (context.stripes[num].dmdsa) + ExFreePool(context.stripes[num].dmdsa); + } + + ExFreePool(context.stripes); } } -static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) { +static BOOL trees_consistent(device_extension* Vcb) { ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header); LIST_ENTRY* le; - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write) { if (t->header.num_items == 0 && t->parent) { #ifdef DEBUG_WRITE_LOOPS @@ -183,14 +466,14 @@ static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) { #endif return FALSE; } - + if (t->size > maxsize) { #ifdef DEBUG_WRITE_LOOPS ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize); #endif return FALSE; } - + if (!t->has_new_address) { #ifdef DEBUG_WRITE_LOOPS ERR("tree found without new address, looping again\n"); @@ -198,79 +481,86 @@ static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) { return FALSE; } } - + le = le->Flink; } - + return TRUE; } -static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { - UINT8 level; +static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp) { + ULONG level; LIST_ENTRY* le; - + for (level = 0; level <= 255; level++) { BOOL nothing_found = TRUE; - + TRACE("level = %u\n", level); - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && t->header.level == level) { TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent); - + nothing_found = FALSE; - + if (t->parent) { if (!t->parent->write) TRACE("adding tree %p (level %x)\n", t->parent, t->header.level); - + t->parent->write = TRUE; } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + searchkey.obj_id = t->root->id; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); return STATUS_INTERNAL_ERROR; } - + if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); - + if (!ri) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM)); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(ri); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ri); + return Status; } } } } - + le = le->Flink; } - + if (nothing_found) break; } @@ -278,7 +568,7 @@ static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollbac return STATUS_SUCCESS; } -static void add_parents_to_cache(device_extension* Vcb, tree* t) { +static void add_parents_to_cache(tree* t) { while (t->parent) { t = t->parent; t->write = TRUE; @@ -286,77 +576,91 @@ static void add_parents_to_cache(device_extension* Vcb, tree* t) { } static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; EXTENT_ITEM_SKINNY_METADATA* eism; traverse_ptr insert_tp; - + eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG); if (!eism) { ERR("out of memory\n"); return FALSE; } - + eism->ei.refcount = 1; eism->ei.generation = Vcb->superblock.generation; eism->ei.flags = EXTENT_ITEM_TREE_BLOCK; eism->type = TYPE_TREE_BLOCK_REF; eism->tbr.offset = root_id; - - if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(eism); return FALSE; } - + ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback); + + space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback); ExReleaseResourceLite(&c->lock); - - add_parents_to_cache(Vcb, insert_tp.tree); - + + add_parents_to_cache(insert_tp.tree); + return TRUE; } BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address) { LIST_ENTRY* le; space* s; - + TRACE("(%p, %llx, %p)\n", Vcb, c->offset, address); - + + if (Vcb->superblock.node_size > c->chunk_item->size - c->used) + return FALSE; + + if (!c->cache_loaded) { + NTSTATUS Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + return FALSE; + } + } + if (IsListEmpty(&c->space_size)) return FALSE; - + if (!c->last_alloc_set) { s = CONTAINING_RECORD(c->space.Blink, space, list_entry); - + c->last_alloc = s->address; c->last_alloc_set = TRUE; - + if (s->size >= Vcb->superblock.node_size) { *address = s->address; c->last_alloc += Vcb->superblock.node_size; return TRUE; } } - + le = c->space.Flink; while (le != &c->space) { s = CONTAINING_RECORD(le, space, list_entry); - + if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) { *address = c->last_alloc; c->last_alloc += Vcb->superblock.node_size; return TRUE; } - + le = le->Flink; } - + le = c->space_size.Flink; while (le != &c->space_size) { s = CONTAINING_RECORD(le, space, list_entry_size); - + if (s->size == Vcb->superblock.node_size) { *address = s->address; c->last_alloc = s->address + Vcb->superblock.node_size; @@ -364,48 +668,49 @@ BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* add } else if (s->size < Vcb->superblock.node_size) { if (le == c->space_size.Flink) return FALSE; - + s = CONTAINING_RECORD(le->Blink, space, list_entry_size); - + *address = s->address; c->last_alloc = s->address + Vcb->superblock.node_size; - + return TRUE; } - + le = le->Flink; } - + s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size); - + if (s->size > Vcb->superblock.node_size) { *address = s->address; c->last_alloc = s->address + Vcb->superblock.node_size; return TRUE; } - + return FALSE; } static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; UINT64 address; EXTENT_ITEM_TREE2* eit2; traverse_ptr insert_tp; - + TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback); - + if (!find_metadata_address_in_chunk(Vcb, c, &address)) return FALSE; - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback); - + if (b) *new_address = address; - + return b; } - + eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG); if (!eit2) { ERR("out of memory\n"); @@ -415,81 +720,61 @@ static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_i eit2->eit.extent_item.refcount = 1; eit2->eit.extent_item.generation = Vcb->superblock.generation; eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK; -// eit2->eit.firstitem = wt->firstitem; eit2->eit.level = level; eit2->type = TYPE_TREE_BLOCK_REF; eit2->tbr.offset = root_id; - -// #ifdef DEBUG_PARANOID -// if (wt->firstitem.obj_type == 0xcc) { // TESTING -// ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address); -// ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete); -// int3; -// } -// #endif - - if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(eit2); return FALSE; } - + ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback); - + + space_list_subtract(c, FALSE, address, Vcb->superblock.node_size, rollback); + ExReleaseResourceLite(&c->lock); - add_parents_to_cache(Vcb, insert_tp.tree); - + add_parents_to_cache(insert_tp.tree); + *new_address = address; - + return TRUE; } NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; chunk *origchunk = NULL, *c; LIST_ENTRY* le; UINT64 flags, addr; - + if (t->root->id == BTRFS_ROOT_CHUNK) flags = Vcb->system_flags; else flags = Vcb->metadata_flags; - -// TRACE("flags = %x\n", (UINT32)wt->flags); - -// if (!chunk_test) { // TESTING -// if ((c = alloc_chunk(Vcb, flags))) { -// if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { -// if (insert_tree_extent(Vcb, t, c)) { -// chunk_test = TRUE; -// return STATUS_SUCCESS; -// } -// } -// } -// } - + if (t->has_address) { origchunk = get_chunk_from_address(Vcb, t->header.address); - - if (!origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags && + + if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags && insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) { t->new_address = addr; t->has_new_address = TRUE; return STATUS_SUCCESS; } } - + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { ExReleaseResourceLite(&c->lock); @@ -499,88 +784,62 @@ NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENT return STATUS_SUCCESS; } } - + ExReleaseResourceLite(&c->lock); } le = le->Flink; } - + // allocate new chunk if necessary - if ((c = alloc_chunk(Vcb, flags))) { - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { - if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { - ExReleaseResourceLite(&c->lock); - ExReleaseResourceLite(&Vcb->chunk_lock); - t->new_address = addr; - t->has_new_address = TRUE; - return STATUS_SUCCESS; - } - } - - ExReleaseResourceLite(&c->lock); - } - - ExReleaseResourceLite(&Vcb->chunk_lock); - - ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size); - return STATUS_DISK_FULL; -} + Status = alloc_chunk(Vcb, flags, &c, FALSE); -// TESTING -// static void check_tree_num_items(tree* t) { -// LIST_ENTRY* le2; -// UINT32 ni; -// -// le2 = t->itemlist.Flink; -// ni = 0; -// while (le2 != &t->itemlist) { -// tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); -// if (!td->ignore) -// ni++; -// le2 = le2->Flink; -// } -// -// if (t->header.num_items != ni) { -// ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items); -// int3; -// } else { -// ERR("tree %p okay\n", t); -// } -// } -// -// static void check_trees_num_items(LIST_ENTRY* tc) { -// LIST_ENTRY* le = tc->Flink; -// while (le != tc) { -// tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry); -// -// check_tree_num_items(tc2->tree); -// -// le = le->Flink; -// } -// } + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->chunk_lock); + return Status; + } -static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, UINT64 parent_root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { - NTSTATUS Status; - UINT64 rc, root; - - TRACE("(%p, %llx, %p)\n", Vcb, address, t); + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { + if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) { + ExReleaseResourceLite(&c->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); + t->new_address = addr; + t->has_new_address = TRUE; + return STATUS_SUCCESS; + } + } + + ExReleaseResourceLite(&c->lock); + + ExReleaseResourceLite(&Vcb->chunk_lock); + + ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size); + + return STATUS_DISK_FULL; +} + +static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, UINT64 parent_root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; + UINT64 rc, root; + + TRACE("(%p, %llx, %p)\n", Vcb, address, t); rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp); if (rc == 0) { ERR("error - refcount for extent %llx was 0\n", address); return STATUS_INTERNAL_ERROR; } - + if (!t || t->parent) root = parent_root; else root = t->header.tree_id; - - Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp, rollback); + + Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount_tree returned %08x\n", Status); return Status; @@ -588,87 +847,97 @@ static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* if (rc == 1) { chunk* c = get_chunk_from_address(Vcb, address); - + if (c) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - decrease_chunk_usage(c, Vcb->superblock.node_size); - - space_list_add(Vcb, c, TRUE, address, Vcb->superblock.node_size, rollback); - + + if (!c->cache_loaded) { + Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + return Status; + } + } + + c->used -= Vcb->superblock.node_size; + + space_list_add(c, address, Vcb->superblock.node_size, rollback); + ExReleaseResourceLite(&c->lock); } else ERR("could not find chunk for address %llx\n", address); } - + return STATUS_SUCCESS; } static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, BOOL old) { LIST_ENTRY *le2, *list; changed_extent_ref* cer; - + list = old ? &ce->old_refs : &ce->refs; - + le2 = list->Flink; while (le2 != list) { cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) { cer->edr.count += edr->count; goto end; } - + le2 = le2->Flink; } - + cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); if (!cer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + cer->type = TYPE_EXTENT_DATA_REF; RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF)); InsertTailList(list, &cer->list_entry); - + end: if (old) ce->old_count += edr->count; else ce->count += edr->count; - + return STATUS_SUCCESS; } static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, BOOL old) { LIST_ENTRY *le2, *list; changed_extent_ref* cer; - + list = old ? &ce->old_refs : &ce->refs; - + le2 = list->Flink; while (le2 != list) { cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) { cer->sdr.count += sdr->count; goto end; } - + le2 = le2->Flink; } - + cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG); if (!cer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + cer->type = TYPE_SHARED_DATA_REF; RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF)); InsertTailList(list, &cer->list_entry); - + end: if (old) ce->old_count += sdr->count; @@ -682,7 +951,7 @@ static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + if (!t->updated_extents && t->has_address) { Status = update_tree_extents(Vcb, t, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -690,17 +959,17 @@ static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST return FALSE; } } - + searchkey.obj_id = t->header.address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return FALSE; } - + if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM)) return FALSE; else @@ -711,41 +980,41 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI NTSTATUS Status; UINT64 rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp); UINT64 flags = get_extent_flags(Vcb, t->header.address, Irp); - + if (rc == 0) { ERR("refcount for extent %llx was 0\n", t->header.address); return STATUS_INTERNAL_ERROR; } - + if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { TREE_BLOCK_REF tbr; BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : FALSE); - + if (t->header.level == 0) { LIST_ENTRY* le; - + le = t->itemlist.Flink; while (le != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { EXTENT_DATA* ed = (EXTENT_DATA*)td->data; - + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - + if (ed2->size > 0) { EXTENT_DATA_REF edr; changed_extent* ce = NULL; chunk* c = get_chunk_from_address(Vcb, ed2->address); - + if (c) { LIST_ENTRY* le2; - + le2 = c->changed_extents.Flink; while (le2 != &c->changed_extents) { changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry); - + if (ce2->address == ed2->address) { ce = ce2; break; @@ -754,240 +1023,240 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI le2 = le2->Flink; } } - + edr.root = t->root->id; edr.objid = td->key.obj_id; edr.offset = td->key.offset - ed2->offset; edr.count = 1; - + if (ce) { Status = add_changed_extent_ref_edr(ce, &edr, TRUE); if (!NT_SUCCESS(Status)) { ERR("add_changed_extent_ref_edr returned %08x\n", Status); return Status; } - + Status = add_changed_extent_ref_edr(ce, &edr, FALSE); if (!NT_SUCCESS(Status)) { ERR("add_changed_extent_ref_edr returned %08x\n", Status); return Status; } } - - Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback); + + Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; } - + if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp); if (sdrrc > 0) { SHARED_DATA_REF sdr; - + sdr.offset = t->header.address; sdr.count = 1; - + Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, - t->header.address, ce->superseded, Irp, rollback); + t->header.address, ce ? ce->superseded : FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; } - + if (ce) { LIST_ENTRY* le2; - + le2 = ce->refs.Flink; while (le2 != &ce->refs) { changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { ce->count--; cer->sdr.count--; break; } - + le2 = le2->Flink; } - + le2 = ce->old_refs.Flink; while (le2 != &ce->old_refs) { changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { ce->old_count--; - + if (cer->sdr.count > 1) cer->sdr.count--; else { RemoveEntryList(&cer->list_entry); ExFreePool(cer); } - + break; } - + le2 = le2->Flink; } } } } - + // FIXME - clear shared flag if unique? } } } - + le = le->Flink; } } else { LIST_ENTRY* le; - + le = t->itemlist.Flink; while (le != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->inserted) { - TREE_BLOCK_REF tbr; - tbr.offset = t->root->id; - + Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, - &tbr, &td->key, t->header.level - 1, Irp, rollback); + &tbr, &td->key, t->header.level - 1, Irp); if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; } - + if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp); if (sbrrc > 0) { SHARED_BLOCK_REF sbr; - + sbr.offset = t->header.address; - + Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, - t->header.address, FALSE, Irp, rollback); + t->header.address, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; } } } - + // FIXME - clear shared flag if unique? } - + le = le->Flink; } } - + if (unique) { UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp); - + if (sbrrc == 1) { SHARED_BLOCK_REF sbr; - + sbr.offset = t->parent->header.address; - + Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, - t->parent->header.address, FALSE, Irp, rollback); + t->parent->header.address, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; } } } - + if (t->parent) tbr.offset = t->parent->header.tree_id; else tbr.offset = t->header.tree_id; - + Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, - t->parent ? &t->paritem->key : NULL, t->header.level, Irp, rollback); + t->parent ? &t->paritem->key : NULL, t->header.level, Irp); if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; } - + // FIXME - clear shared flag if unique? - + t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF; } - - Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback); - - if (!NT_SUCCESS(Status)) { - ERR("reduce_tree_extent returned %08x\n", Status); - return Status; + + if (rc > 1 || t->header.tree_id == t->root->id) { + Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback); + + if (!NT_SUCCESS(Status)) { + ERR("reduce_tree_extent returned %08x\n", Status); + return Status; + } } - + t->has_address = FALSE; - - if (rc > 1 && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) { + + if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) { if (t->header.tree_id == t->root->id) { flags |= EXTENT_ITEM_SHARED_BACKREFS; update_extent_flags(Vcb, t->header.address, flags, Irp); } - + if (t->header.level > 0) { LIST_ENTRY* le; - + le = t->itemlist.Flink; while (le != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->inserted) { if (t->header.tree_id == t->root->id) { SHARED_BLOCK_REF sbr; - + sbr.offset = t->header.address; - - Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp, rollback); + + Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp); } else { TREE_BLOCK_REF tbr; - + tbr.offset = t->root->id; - - Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp, rollback); + + Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp); } - + if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; } } - + le = le->Flink; } } else { LIST_ENTRY* le; - + le = t->itemlist.Flink; while (le != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { EXTENT_DATA* ed = (EXTENT_DATA*)td->data; - + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - + if (ed2->size > 0) { changed_extent* ce = NULL; chunk* c = get_chunk_from_address(Vcb, ed2->address); - + if (c) { LIST_ENTRY* le2; - + le2 = c->changed_extents.Flink; while (le2 != &c->changed_extents) { changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry); - + if (ce2->address == ed2->address) { ce = ce2; break; @@ -996,53 +1265,53 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI le2 = le2->Flink; } } - + if (t->header.tree_id == t->root->id) { SHARED_DATA_REF sdr; - + sdr.offset = t->header.address; sdr.count = 1; - + if (ce) { Status = add_changed_extent_ref_sdr(ce, &sdr, TRUE); if (!NT_SUCCESS(Status)) { ERR("add_changed_extent_ref_edr returned %08x\n", Status); return Status; } - + Status = add_changed_extent_ref_sdr(ce, &sdr, FALSE); if (!NT_SUCCESS(Status)) { ERR("add_changed_extent_ref_edr returned %08x\n", Status); return Status; } } - - Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp, rollback); + + Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp); } else { EXTENT_DATA_REF edr; - + edr.root = t->root->id; edr.objid = td->key.obj_id; edr.offset = td->key.offset - ed2->offset; edr.count = 1; - + if (ce) { Status = add_changed_extent_ref_edr(ce, &edr, TRUE); if (!NT_SUCCESS(Status)) { ERR("add_changed_extent_ref_edr returned %08x\n", Status); return Status; } - + Status = add_changed_extent_ref_edr(ce, &edr, FALSE); if (!NT_SUCCESS(Status)) { ERR("add_changed_extent_ref_edr returned %08x\n", Status); return Status; } } - - Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback); + + Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp); } - + if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); return Status; @@ -1050,15 +1319,15 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI } } } - + le = le->Flink; } } } - + t->updated_extents = TRUE; t->header.tree_id = t->root->id; - + return STATUS_SUCCESS; } @@ -1067,51 +1336,73 @@ static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTR NTSTATUS Status; BOOL changed = FALSE; UINT8 max_level = 0, level; - + TRACE("(%p)\n", Vcb); - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && !t->has_new_address) { chunk* c; - + + if (t->has_address) { + c = get_chunk_from_address(Vcb, t->header.address); + + if (c) { + if (!c->cache_loaded) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (!c->cache_loaded) { + Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + return Status; + } + } + + ExReleaseResourceLite(&c->lock); + } + } + } + Status = get_tree_new_address(Vcb, t, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("get_tree_new_address returned %08x\n", Status); return Status; } - + TRACE("allocated extent %llx\n", t->new_address); - + c = get_chunk_from_address(Vcb, t->new_address); - - if (c) { - increase_chunk_usage(c, Vcb->superblock.node_size); - } else { + + if (c) + c->used += Vcb->superblock.node_size; + else { ERR("could not find chunk for address %llx\n", t->new_address); return STATUS_INTERNAL_ERROR; } - + changed = TRUE; - + if (t->header.level > max_level) max_level = t->header.level; } - + le = le->Flink; } - + if (!changed) return STATUS_SUCCESS; - + level = max_level; do { le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && !t->updated_extents && t->has_address && t->header.level == level) { Status = update_tree_extents(Vcb, t, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -1119,266 +1410,299 @@ static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTR return Status; } } - + le = le->Flink; } - + if (level == 0) break; - + level--; } while (TRUE); - + return STATUS_SUCCESS; } -static NTSTATUS update_root_root(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS update_root_root(device_extension* Vcb, BOOL no_cache, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY* le; NTSTATUS Status; - + TRACE("(%p)\n", Vcb); - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && !t->parent) { if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) { KEY searchkey; traverse_ptr tp; - + searchkey.obj_id = t->root->id; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); - int3; return STATUS_INTERNAL_ERROR; } - + TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address); - + t->root->root_item.block_number = t->new_address; t->root->root_item.root_level = t->header.level; t->root->root_item.generation = Vcb->superblock.generation; t->root->root_item.generation2 = Vcb->superblock.generation; - + // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM)); } - + t->root->treeholder.address = t->new_address; + t->root->treeholder.generation = Vcb->superblock.generation; } - + le = le->Flink; } - - Status = update_chunk_caches(Vcb, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("update_chunk_caches returned %08x\n", Status); - return Status; + + if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) { + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + Status = update_chunk_caches(Vcb, Irp, rollback); + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (!NT_SUCCESS(Status)) { + ERR("update_chunk_caches returned %08x\n", Status); + return Status; + } } - + return STATUS_SUCCESS; } -NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, PIRP Irp) { +NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, BOOL no_free) { chunk* c; LIST_ENTRY* le; tree_write* tw; NTSTATUS Status; + ULONG i, num_bits; write_data_context* wtc; - - wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG); - if (!wtc) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE); - InitializeListHead(&wtc->stripes); - wtc->tree = TRUE; - wtc->stripes_left = 0; - + ULONG bit_num = 0; + BOOL raid56 = FALSE; + // merge together runs c = NULL; le = tree_writes->Flink; while (le != tree_writes) { tw = CONTAINING_RECORD(le, tree_write, list_entry); - + if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) c = get_chunk_from_address(Vcb, tw->address); else { tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); - + if (tw->address == tw2->address + tw2->length) { UINT8* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG); - + if (!data) { ERR("out of memory\n"); - ExFreePool(wtc); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(data, tw2->data, tw2->length); RtlCopyMemory(&data[tw2->length], tw->data, tw->length); - - ExFreePool(tw2->data); + + if (!no_free) + ExFreePool(tw2->data); + tw2->data = data; tw2->length += tw->length; - - ExFreePool(tw->data); + + if (!no_free) // FIXME - what if we allocated this just now? + ExFreePool(tw->data); + RemoveEntryList(&tw->list_entry); ExFreePool(tw); - + le = tw2->list_entry.Flink; continue; } } - + + tw->c = c; + + if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6)) + raid56 = TRUE; + le = le->Flink; } - - // mark RAID5/6 overlaps so we can do them one by one - c = NULL; + + num_bits = 0; + le = tree_writes->Flink; while (le != tree_writes) { tw = CONTAINING_RECORD(le, tree_write, list_entry); - - if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) - c = get_chunk_from_address(Vcb, tw->address); - else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { - tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); - UINT64 last_stripe, this_stripe; - - last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)); - this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)); - - if (last_stripe == this_stripe) - tw->overlap = TRUE; - } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { - tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); - UINT64 last_stripe, this_stripe; - - last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)); - this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)); - - if (last_stripe == this_stripe) - tw->overlap = TRUE; - } - + + num_bits++; + le = le->Flink; } - + + wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context) * num_bits, ALLOC_TAG); + if (!wtc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + le = tree_writes->Flink; + while (le != tree_writes) { tw = CONTAINING_RECORD(le, tree_write, list_entry); - - if (!tw->overlap) { - TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap); - - Status = write_data(Vcb, tw->address, tw->data, TRUE, tw->length, wtc, NULL, NULL); - if (!NT_SUCCESS(Status)) { - ERR("write_data returned %08x\n", Status); - ExFreePool(wtc); - return Status; + + TRACE("address: %llx, size: %x\n", tw->address, tw->length); + + KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, FALSE); + InitializeListHead(&wtc[bit_num].stripes); + wtc[bit_num].need_wait = FALSE; + wtc[bit_num].stripes_left = 0; + wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL; + wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL; + + Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, FALSE, 0, HighPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("write_data returned %08x\n", Status); + + for (i = 0; i < num_bits; i++) { + free_write_data_stripes(&wtc[i]); } + ExFreePool(wtc); + + return Status; } - + + bit_num++; + le = le->Flink; } - - if (wtc->stripes.Flink != &wtc->stripes) { - // launch writes and wait - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { - write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - if (stripe->status != WriteDataStatus_Ignore) - IoCallDriver(stripe->device->devobj, stripe->Irp); - - le = le->Flink; + + for (i = 0; i < num_bits; i++) { + if (wtc[i].stripes.Flink != &wtc[i].stripes) { + // launch writes and wait + le = wtc[i].stripes.Flink; + while (le != &wtc[i].stripes) { + write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); + + if (stripe->status != WriteDataStatus_Ignore) { + wtc[i].need_wait = TRUE; + IoCallDriver(stripe->device->devobj, stripe->Irp); + } + + le = le->Flink; + } } - - KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL); - - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { + } + + for (i = 0; i < num_bits; i++) { + if (wtc[i].need_wait) + KeWaitForSingleObject(&wtc[i].Event, Executive, KernelMode, FALSE, NULL); + } + + for (i = 0; i < num_bits; i++) { + le = wtc[i].stripes.Flink; + while (le != &wtc[i].stripes) { write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - + if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { Status = stripe->iosb.Status; + log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); break; } - + le = le->Flink; } - - free_write_data_stripes(wtc); - } - - le = tree_writes->Flink; - while (le != tree_writes) { - tw = CONTAINING_RECORD(le, tree_write, list_entry); - - if (tw->overlap) { - TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap); - - Status = write_data_complete(Vcb, tw->address, tw->data, tw->length, Irp, NULL); - if (!NT_SUCCESS(Status)) { - ERR("write_data_complete returned %08x\n", Status); - ExFreePool(wtc); - return Status; - } - } - - le = le->Flink; + + free_write_data_stripes(&wtc[i]); } - - return STATUS_SUCCESS; + + ExFreePool(wtc); + + if (raid56) { + c = NULL; + + le = tree_writes->Flink; + while (le != tree_writes) { + tw = CONTAINING_RECORD(le, tree_write, list_entry); + + if (tw->c != c) { + c = tw->c; + + ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE); + + while (!IsListEmpty(&c->partial_stripes)) { + partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); + + Status = flush_partial_stripe(Vcb, c, ps); + + if (ps->bmparr) + ExFreePool(ps->bmparr); + + ExFreePool(ps); + + if (!NT_SUCCESS(Status)) { + ERR("flush_partial_stripe returned %08x\n", Status); + ExReleaseResourceLite(&c->partial_stripes_lock); + return Status; + } + } + + ExReleaseResourceLite(&c->partial_stripes_lock); + } + + le = le->Flink; + } + } + + return STATUS_SUCCESS; } static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { - UINT8 level; + ULONG level; UINT8 *data, *body; UINT32 crc32; NTSTATUS Status; LIST_ENTRY* le; LIST_ENTRY tree_writes; tree_write* tw; - + TRACE("(%p)\n", Vcb); - + InitializeListHead(&tree_writes); for (level = 0; level <= 255; level++) { BOOL nothing_found = TRUE; - + TRACE("level = %u\n", level); - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && t->header.level == level) { KEY firstitem, searchkey; LIST_ENTRY* le2; traverse_ptr tp; - EXTENT_ITEM_TREE* eit; - + if (!t->has_new_address) { ERR("error - tried to write tree with no new address\n"); - int3; + return STATUS_INTERNAL_ERROR; } - + le2 = t->itemlist.Flink; while (le2 != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); @@ -1388,127 +1712,116 @@ static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { } le2 = le2->Flink; } - + if (t->parent) { t->paritem->key = firstitem; t->paritem->treeholder.address = t->new_address; t->paritem->treeholder.generation = Vcb->superblock.generation; } - + if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { + EXTENT_ITEM_TREE* eit; + searchkey.obj_id = t->new_address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = Vcb->superblock.node_size; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(searchkey, tp.item->key)) { -// traverse_ptr next_tp; -// BOOL b; -// tree_data* paritem; - ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - -// searchkey.obj_id = 0; -// searchkey.obj_type = 0; -// searchkey.offset = 0; -// -// find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE); -// -// paritem = NULL; -// do { -// if (tp.tree->paritem != paritem) { -// paritem = tp.tree->paritem; -// ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset); -// } -// -// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); -// -// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE); -// if (b) { -// free_traverse_ptr(&tp); -// tp = next_tp; -// } -// } while (b); -// -// free_traverse_ptr(&tp); - return STATUS_INTERNAL_ERROR; } - + if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE)); return STATUS_INTERNAL_ERROR; } - + eit = (EXTENT_ITEM_TREE*)tp.item->data; eit->firstitem = firstitem; } - + nothing_found = FALSE; } - + le = le->Flink; } - + if (nothing_found) break; } - + TRACE("allocated tree extents\n"); - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); + LIST_ENTRY* le2; #ifdef DEBUG_PARANOID UINT32 num_items = 0, size = 0; - LIST_ENTRY* le2; BOOL crash = FALSE; #endif if (t->write) { #ifdef DEBUG_PARANOID + BOOL first = TRUE; + KEY lastkey; + le2 = t->itemlist.Flink; while (le2 != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); if (!td->ignore) { num_items++; - + + if (!first) { + if (keycmp(td->key, lastkey) == 0) { + ERR("(%llx,%x,%llx): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset); + crash = TRUE; + } else if (keycmp(td->key, lastkey) == -1) { + ERR("(%llx,%x,%llx): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset); + crash = TRUE; + } + } else + first = FALSE; + + lastkey = td->key; + if (t->header.level == 0) size += td->size; } le2 = le2->Flink; } - + if (t->header.level == 0) size += num_items * sizeof(leaf_node); else size += num_items * sizeof(internal_node); - + if (num_items != t->header.num_items) { ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items); crash = TRUE; } - + if (size != t->size) { ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size); crash = TRUE; } - + if (t->header.num_items == 0 && t->parent) { ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level); crash = TRUE; } - + if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header)); crash = TRUE; } - + if (crash) { ERR("tree %p\n", t); le2 = t->itemlist.Flink; @@ -1528,47 +1841,45 @@ static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { t->header.flags |= HEADER_FLAG_MIXED_BACKREF; t->header.fs_uuid = Vcb->superblock.uuid; t->has_address = TRUE; - + data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); if (!data) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + body = data + sizeof(tree_header); - + RtlCopyMemory(data, &t->header, sizeof(tree_header)); RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header)); - + if (t->header.level == 0) { leaf_node* itemptr = (leaf_node*)body; int i = 0; - LIST_ENTRY* le2; UINT8* dataptr = data + Vcb->superblock.node_size; - + le2 = t->itemlist.Flink; while (le2 != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); if (!td->ignore) { dataptr = dataptr - td->size; - + itemptr[i].key = td->key; - itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body; + itemptr[i].offset = (UINT32)((UINT8*)dataptr - (UINT8*)body); itemptr[i].size = td->size; i++; - + if (td->size > 0) RtlCopyMemory(dataptr, td->data, td->size); } - + le2 = le2->Flink; } } else { internal_node* itemptr = (internal_node*)body; int i = 0; - LIST_ENTRY* le2; - + le2 = t->itemlist.Flink; while (le2 != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); @@ -1578,46 +1889,46 @@ static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { itemptr[i].generation = td->treeholder.generation; i++; } - + le2 = le2->Flink; } } - + crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum)); crc32 = ~crc32; *((UINT32*)data) = crc32; TRACE("setting crc32 to %08x\n", crc32); - + tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG); if (!tw) { ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(data); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; } - + tw->address = t->new_address; tw->length = Vcb->superblock.node_size; tw->data = data; - tw->overlap = FALSE; - + if (IsListEmpty(&tree_writes)) InsertTailList(&tree_writes, &tw->list_entry); else { - LIST_ENTRY* le2; BOOL inserted = FALSE; - + le2 = tree_writes.Flink; while (le2 != &tree_writes) { tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry); - + if (tw2->address > tw->address) { InsertHeadList(le2->Blink, &tw->list_entry); inserted = TRUE; break; } - + le2 = le2->Flink; } - + if (!inserted) InsertTailList(&tree_writes, &tw->list_entry); } @@ -1625,32 +1936,35 @@ static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { le = le->Flink; } - - Status = do_tree_writes(Vcb, &tree_writes, Irp); + + Status = do_tree_writes(Vcb, &tree_writes, FALSE); if (!NT_SUCCESS(Status)) { ERR("do_tree_writes returned %08x\n", Status); goto end; } - + Status = STATUS_SUCCESS; end: while (!IsListEmpty(&tree_writes)) { le = RemoveHeadList(&tree_writes); tw = CONTAINING_RECORD(le, tree_write, list_entry); - + + if (tw->data) + ExFreePool(tw->data); + ExFreePool(tw); } - + return Status; } static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) { KEY searchkey; traverse_ptr tp; - + RtlZeroMemory(sb, sizeof(superblock_backup)); - + sb->root_tree_addr = Vcb->superblock.root_tree_addr; sb->root_tree_generation = Vcb->superblock.generation; sb->root_level = Vcb->superblock.root_level; @@ -1662,11 +1976,11 @@ static void update_backup_superblock(device_extension* Vcb, superblock_backup* s searchkey.obj_id = BTRFS_ROOT_EXTENT; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) { if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; - + sb->extent_tree_addr = ri->block_number; sb->extent_tree_generation = ri->generation; sb->extent_root_level = ri->root_level; @@ -1674,23 +1988,23 @@ static void update_backup_superblock(device_extension* Vcb, superblock_backup* s } searchkey.obj_id = BTRFS_ROOT_FSTREE; - + if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) { if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; - + sb->fs_tree_addr = ri->block_number; sb->fs_tree_generation = ri->generation; sb->fs_root_level = ri->root_level; } } - + searchkey.obj_id = BTRFS_ROOT_DEVTREE; - + if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) { if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; - + sb->dev_root_addr = ri->block_number; sb->dev_root_generation = ri->generation; sb->dev_root_level = ri->root_level; @@ -1698,11 +2012,11 @@ static void update_backup_superblock(device_extension* Vcb, superblock_backup* s } searchkey.obj_id = BTRFS_ROOT_CHECKSUM; - + if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) { if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) { ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data; - + sb->csum_root_addr = ri->block_number; sb->csum_root_generation = ri->generation; sb->csum_root_level = ri->root_level; @@ -1714,55 +2028,149 @@ static void update_backup_superblock(device_extension* Vcb, superblock_backup* s sb->num_devices = Vcb->superblock.num_devices; } -static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) { +typedef struct { + void* context; + UINT8* buf; + PMDL mdl; + device* device; NTSTATUS Status; - unsigned int i = 0; - UINT32 crc32; + PIRP Irp; + LIST_ENTRY list_entry; +} write_superblocks_stripe; + +typedef struct _write_superblocks_context { + KEVENT Event; + LIST_ENTRY stripes; + LONG left; +} write_superblocks_context; + +_Function_class_(IO_COMPLETION_ROUTINE) #ifdef __REACTOS__ - Status = STATUS_INTERNAL_ERROR; +static NTSTATUS NTAPI write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { #endif - - RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM)); - + write_superblocks_stripe* stripe = conptr; + write_superblocks_context* context = stripe->context; + + UNUSED(DeviceObject); + + stripe->Status = Irp->IoStatus.Status; + + if (InterlockedDecrement(&context->left) == 0) + KeSetEvent(&context->Event, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + +static NTSTATUS write_superblock(device_extension* Vcb, device* device, write_superblocks_context* context) { + unsigned int i = 0; + // All the documentation says that the Linux driver only writes one superblock // if it thinks a disk is an SSD, but this doesn't seem to be the case! - - while (superblock_addrs[i] > 0 && device->length >= superblock_addrs[i] + sizeof(superblock)) { - TRACE("writing superblock %u\n", i); - - Vcb->superblock.sb_phys_addr = superblock_addrs[i]; - - crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum)); - crc32 = ~crc32; - TRACE("crc32 is %08x\n", crc32); - RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32)); - - Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock)); - - if (!NT_SUCCESS(Status)) - break; - + + while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) { + ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size); + superblock* sb; + UINT32 crc32; + write_superblocks_stripe* stripe; + PIO_STACK_LOCATION IrpSp; + + sb = ExAllocatePoolWithTag(NonPagedPool, sblen, ALLOC_TAG); + if (!sb) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock)); + + if (sblen > sizeof(superblock)) + RtlZeroMemory((UINT8*)sb + sizeof(superblock), sblen - sizeof(superblock)); + + RtlCopyMemory(&sb->dev_item, &device->devitem, sizeof(DEV_ITEM)); + sb->sb_phys_addr = superblock_addrs[i]; + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); + RtlCopyMemory(&sb->checksum, &crc32, sizeof(UINT32)); + + stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_superblocks_stripe), ALLOC_TAG); + if (!stripe) { + ERR("out of memory\n"); + ExFreePool(sb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + stripe->buf = (UINT8*)sb; + + stripe->Irp = IoAllocateIrp(device->devobj->StackSize, FALSE); + if (!stripe->Irp) { + ERR("IoAllocateIrp failed\n"); + ExFreePool(stripe); + ExFreePool(sb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + IrpSp = IoGetNextIrpStackLocation(stripe->Irp); + IrpSp->MajorFunction = IRP_MJ_WRITE; + + if (i == 0) + IrpSp->Flags |= SL_WRITE_THROUGH; + + if (device->devobj->Flags & DO_BUFFERED_IO) { + stripe->Irp->AssociatedIrp.SystemBuffer = sb; + stripe->mdl = NULL; + + stripe->Irp->Flags = IRP_BUFFERED_IO; + } else if (device->devobj->Flags & DO_DIRECT_IO) { + stripe->mdl = IoAllocateMdl(sb, sblen, FALSE, FALSE, NULL); + if (!stripe->mdl) { + ERR("IoAllocateMdl failed\n"); + IoFreeIrp(stripe->Irp); + ExFreePool(stripe); + ExFreePool(sb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + stripe->Irp->MdlAddress = stripe->mdl; + + MmBuildMdlForNonPagedPool(stripe->mdl); + } else { + stripe->Irp->UserBuffer = sb; + stripe->mdl = NULL; + } + + IrpSp->Parameters.Write.Length = sblen; + IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i]; + + IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, TRUE, TRUE, TRUE); + + stripe->context = context; + stripe->device = device; + InsertTailList(&context->stripes, &stripe->list_entry); + + context->left++; + i++; } - - if (i == 0) { + + if (i == 0) ERR("no superblocks written!\n"); - } - return Status; + return STATUS_SUCCESS; } static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) { UINT64 i; NTSTATUS Status; LIST_ENTRY* le; - + write_superblocks_context context; + TRACE("(%p)\n", Vcb); - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && !t->parent) { if (t->root == Vcb->root_root) { Vcb->superblock.root_tree_addr = t->new_address; @@ -1773,177 +2181,263 @@ static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) { Vcb->superblock.chunk_root_level = t->header.level; } } - + le = le->Flink; } - + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) { RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup)); } - + update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp); - + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + InitializeListHead(&context.stripes); + context.left = 0; + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - + if (dev->devobj && !dev->readonly) { - Status = write_superblock(Vcb, dev); + Status = write_superblock(Vcb, dev, &context); if (!NT_SUCCESS(Status)) { ERR("write_superblock returned %08x\n", Status); - return Status; + goto end; } } - + le = le->Flink; } - - return STATUS_SUCCESS; + + if (IsListEmpty(&context.stripes)) { + ERR("error - not writing any superblocks\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + le = context.stripes.Flink; + while (le != &context.stripes) { + write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry); + + IoCallDriver(stripe->device->devobj, stripe->Irp); + + le = le->Flink; + } + + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + + le = context.stripes.Flink; + while (le != &context.stripes) { + write_superblocks_stripe* stripe = CONTAINING_RECORD(le, write_superblocks_stripe, list_entry); + + if (!NT_SUCCESS(stripe->Status)) { + ERR("device %llx returned %08x\n", stripe->device->devitem.dev_id, stripe->Status); + log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); + Status = stripe->Status; + goto end; + } + + le = le->Flink; + } + + Status = STATUS_SUCCESS; + +end: + while (!IsListEmpty(&context.stripes)) { + write_superblocks_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&context.stripes), write_superblocks_stripe, list_entry); + + if (stripe->mdl) { + if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(stripe->mdl); + + IoFreeMdl(stripe->mdl); + } + + if (stripe->Irp) + IoFreeIrp(stripe->Irp); + + if (stripe->buf) + ExFreePool(stripe->buf); + + ExFreePool(stripe); + } + + return Status; } static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY *le, *le2; NTSTATUS Status; UINT64 old_size; - + if (ce->count == 0 && ce->old_count == 0) { while (!IsListEmpty(&ce->refs)) { changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry); ExFreePool(cer); } - + while (!IsListEmpty(&ce->old_refs)) { changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry); ExFreePool(cer); } - + goto end; } - + le = ce->refs.Flink; while (le != &ce->refs) { changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); - LIST_ENTRY* le3 = le->Flink; - UINT64 old_count = 0; - + UINT32 old_count = 0; + if (cer->type == TYPE_EXTENT_DATA_REF) { le2 = ce->old_refs.Flink; while (le2 != &ce->old_refs) { changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - + if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) { old_count = cer2->edr.count; - - RemoveEntryList(&cer2->list_entry); - ExFreePool(cer2); break; } - + le2 = le2->Flink; } - + old_size = ce->old_count > 0 ? ce->old_size : ce->size; - + if (cer->edr.count > old_count) { - Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp, rollback); - + Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp); + if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount_data returned %08x\n", Status); return Status; } - } else if (cer->edr.count < old_count) { + } + } else if (cer->type == TYPE_SHARED_DATA_REF) { + le2 = ce->old_refs.Flink; + while (le2 != &ce->old_refs) { + changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); + + if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) { + RemoveEntryList(&cer2->list_entry); + ExFreePool(cer2); + break; + } + + le2 = le2->Flink; + } + } + + le = le->Flink; + } + + le = ce->refs.Flink; + while (le != &ce->refs) { + changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); + LIST_ENTRY* le3 = le->Flink; + UINT32 old_count = 0; + + if (cer->type == TYPE_EXTENT_DATA_REF) { + le2 = ce->old_refs.Flink; + while (le2 != &ce->old_refs) { + changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); + + if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) { + old_count = cer2->edr.count; + + RemoveEntryList(&cer2->list_entry); + ExFreePool(cer2); + break; + } + + le2 = le2->Flink; + } + + old_size = ce->old_count > 0 ? ce->old_size : ce->size; + + if (cer->edr.count < old_count) { Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, - old_count - cer->edr.count, ce->superseded, Irp, rollback); - + old_count - cer->edr.count, ce->superseded, Irp); + if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount_data returned %08x\n", Status); return Status; } } - + if (ce->size != ce->old_size && ce->old_count > 0) { KEY searchkey; traverse_ptr tp; void* data; - + searchkey.obj_id = ce->address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = ce->old_size; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(searchkey, tp.item->key)) { ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); return STATUS_INTERNAL_ERROR; } - + if (tp.item->size > 0) { data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); - + if (!data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(data, tp.item->data, tp.item->size); } else data = NULL; - - if (!insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + if (data) ExFreePool(data); + return Status; } - - delete_tree_item(Vcb, &tp, rollback); - } - } else if (cer->type == TYPE_SHARED_DATA_REF) { - le2 = ce->old_refs.Flink; - while (le2 != &ce->old_refs) { - changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); - - if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) { -// old_count = cer2->edr.count; - - RemoveEntryList(&cer2->list_entry); - ExFreePool(cer2); - break; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; } - - le2 = le2->Flink; - } + } } - + RemoveEntryList(&cer->list_entry); ExFreePool(cer); - + le = le3; } - + #ifdef DEBUG_PARANOID if (!IsListEmpty(&ce->old_refs)) WARN("old_refs not empty\n"); #endif - + end: if (ce->count == 0 && !ce->superseded) { - decrease_chunk_usage(c, ce->size); - space_list_add(Vcb, c, TRUE, ce->address, ce->size, rollback); + c->used -= ce->size; + space_list_add(c, ce->address, ce->size, rollback); } RemoveEntryList(&ce->list_entry); ExFreePool(ce); - + return STATUS_SUCCESS; } -void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp, LIST_ENTRY* rollback) { +void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp) { KEY searchkey; traverse_ptr tp, next_tp; - UINT32* data; NTSTATUS Status; UINT64 startaddr, endaddr; ULONG len; @@ -1951,40 +2445,41 @@ void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UIN RTL_BITMAP bmp; ULONG* bmparr; ULONG runlength, index; - + searchkey.obj_id = EXTENT_CSUM_ID; searchkey.obj_type = TYPE_EXTENT_CSUM; searchkey.offset = address; - + // FIXME - create checksum_root if it doesn't exist at all - + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); if (Status == STATUS_NOT_FOUND) { // tree is completely empty if (csum) { // not deleted ULONG length2 = length; UINT64 off = address; UINT32* data = csum; - + do { - ULONG il = min(length2, MAX_CSUM_SIZE / sizeof(UINT32)); - + UINT16 il = (UINT16)min(length2, MAX_CSUM_SIZE / sizeof(UINT32)); + checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(UINT32), ALLOC_TAG); if (!checksums) { ERR("out of memory\n"); return; } - + RtlCopyMemory(checksums, data, il * sizeof(UINT32)); - - if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums, - il * sizeof(UINT32), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums, + il * sizeof(UINT32), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(checksums); return; } - + length2 -= il; - + if (length2 > 0) { off += il * Vcb->superblock.sector_size; data += il; @@ -1996,102 +2491,111 @@ void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UIN return; } else { UINT32 tplen; - + // FIXME - check entry is TYPE_EXTENT_CSUM? - + if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= address) startaddr = tp.item->key.offset; else startaddr = address; - + searchkey.obj_id = EXTENT_CSUM_ID; searchkey.obj_type = TYPE_EXTENT_CSUM; searchkey.offset = address + (length * Vcb->superblock.sector_size); - + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return; } - + tplen = tp.item->size / sizeof(UINT32); - + if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size)) endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size); else endaddr = address + (length * Vcb->superblock.sector_size); - + TRACE("cs starts at %llx (%x sectors)\n", address, length); TRACE("startaddr = %llx\n", startaddr); TRACE("endaddr = %llx\n", endaddr); - - len = (endaddr - startaddr) / Vcb->superblock.sector_size; - + + len = (ULONG)((endaddr - startaddr) / Vcb->superblock.sector_size); + checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG); if (!checksums) { ERR("out of memory\n"); return; } - + bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG); if (!bmparr) { ERR("out of memory\n"); ExFreePool(checksums); return; } - + RtlInitializeBitMap(&bmp, bmparr, len); RtlSetAllBits(&bmp); - + searchkey.obj_id = EXTENT_CSUM_ID; searchkey.obj_type = TYPE_EXTENT_CSUM; searchkey.offset = address; - + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); + ExFreePool(checksums); + ExFreePool(bmparr); return; } - + // set bit = free space, cleared bit = allocated sector - + while (tp.item->key.offset < endaddr) { if (tp.item->key.offset >= startaddr) { if (tp.item->size > 0) { - ULONG itemlen = min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(UINT32), tp.item->size); - + ULONG itemlen = (ULONG)min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(UINT32), tp.item->size); + RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen); - RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, itemlen / sizeof(UINT32)); + RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) / Vcb->superblock.sector_size), itemlen / sizeof(UINT32)); + } + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(checksums); + ExFreePool(bmparr); + return; } - - delete_tree_item(Vcb, &tp, rollback); } - + if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { tp = next_tp; } else break; } - + if (!csum) { // deleted - RtlSetBits(&bmp, (address - startaddr) / Vcb->superblock.sector_size, length); + RtlSetBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length); } else { RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(UINT32)); - RtlClearBits(&bmp, (address - startaddr) / Vcb->superblock.sector_size, length); + RtlClearBits(&bmp, (ULONG)((address - startaddr) / Vcb->superblock.sector_size), length); } - + runlength = RtlFindFirstRunClear(&bmp, &index); - + while (runlength != 0) { do { - ULONG rl; + UINT16 rl; UINT64 off; - + UINT32* data; + if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE) rl = MAX_CSUM_SIZE / sizeof(UINT32); else - rl = runlength; - + rl = (UINT16)runlength; + data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG); if (!data) { ERR("out of memory\n"); @@ -2099,26 +2603,27 @@ void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UIN ExFreePool(checksums); return; } - + RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl); - + off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size); - - if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(data); ExFreePool(bmparr); ExFreePool(checksums); return; } - + runlength -= rl; index += rl; } while (runlength > 0); - + runlength = RtlFindNextForwardRunClear(&bmp, index, &index); } - + ExFreePool(bmparr); ExFreePool(checksums); } @@ -2131,35 +2636,45 @@ static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* traverse_ptr tp; BLOCK_GROUP_ITEM* bgi; NTSTATUS Status; - + TRACE("(%p)\n", Vcb); - + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); - + while (le != &Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + + if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) { + Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + goto end; + } + } + le2 = c->changed_extents.Flink; while (le2 != &c->changed_extents) { LIST_ENTRY* le3 = le2->Flink; changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry); - + Status = flush_changed_extent(Vcb, c, ce, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("flush_changed_extent returned %08x\n", Status); ExReleaseResourceLite(&c->lock); goto end; } - + le2 = le3; } - + // This is usually done by update_chunks, but we have to check again in case any new chunks // have been allocated since. if (c->created) { - Status = create_chunk(Vcb, c, Irp, rollback); + Status = create_chunk(Vcb, c, Irp); if (!NT_SUCCESS(Status)) { ERR("create_chunk returned %08x\n", Status); ExReleaseResourceLite(&c->lock); @@ -2167,33 +2682,58 @@ static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* } } + if (c->old_cache) { + if (c->old_cache->dirty) { + LIST_ENTRY batchlist; + + InitializeListHead(&batchlist); + + Status = flush_fcb(c->old_cache, FALSE, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + clear_batch_list(Vcb, &batchlist); + goto end; + } + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + goto end; + } + } + + free_fcb(Vcb, c->old_cache); + c->old_cache = NULL; + } + if (c->used != c->oldused) { searchkey.obj_id = c->offset; searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; searchkey.offset = c->chunk_item->size; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); ExReleaseResourceLite(&c->lock); goto end; } - + if (keycmp(searchkey, tp.item->key)) { ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); - int3; Status = STATUS_INTERNAL_ERROR; ExReleaseResourceLite(&c->lock); goto end; } - + if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM)); Status = STATUS_INTERNAL_ERROR; ExReleaseResourceLite(&c->lock); goto end; } - + bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!bgi) { ERR("out of memory\n"); @@ -2201,47 +2741,53 @@ static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* ExReleaseResourceLite(&c->lock); goto end; } - + RtlCopyMemory(bgi, tp.item->data, tp.item->size); bgi->used = c->used; - + TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(bgi); + ExReleaseResourceLite(&c->lock); + goto end; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(bgi); - Status = STATUS_INTERNAL_ERROR; ExReleaseResourceLite(&c->lock); goto end; } - + TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used); - + Vcb->superblock.bytes_used += c->used - c->oldused; - + TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used); - + c->oldused = c->used; } - + ExReleaseResourceLite(&c->lock); - + le = le->Flink; } - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(&Vcb->chunk_lock); - + return Status; } static void get_first_item(tree* t, KEY* key) { LIST_ENTRY* le; - + le = t->itemlist.Flink; while (le != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); @@ -2251,189 +2797,139 @@ static void get_first_item(tree* t, KEY* key) { } } -static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) { +static NTSTATUS split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) { tree *nt, *pt; tree_data* td; tree_data* oldlastitem; -// write_tree* wt2; -// // tree_data *firsttd, *lasttd; -// // LIST_ENTRY* le; -// #ifdef DEBUG_PARANOID -// KEY lastkey1, lastkey2; -// traverse_ptr tp, next_tp; -// ULONG numitems1, numitems2; -// #endif - + TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset); - -// #ifdef DEBUG_PARANOID -// lastkey1.obj_id = 0xffffffffffffffff; -// lastkey1.obj_type = 0xff; -// lastkey1.offset = 0xffffffffffffffff; -// -// if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE)) -// ERR("error - find_item failed\n"); -// else { -// lastkey1 = tp.item->key; -// numitems1 = 0; -// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) { -// free_traverse_ptr(&tp); -// tp = next_tp; -// numitems1++; -// } -// free_traverse_ptr(&tp); -// } -// #endif - + nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); if (!nt) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header)); nt->header.address = 0; nt->header.generation = Vcb->superblock.generation; nt->header.num_items = t->header.num_items - numitems; nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; - + nt->has_address = FALSE; nt->Vcb = Vcb; nt->parent = t->parent; - + #ifdef DEBUG_PARANOID if (nt->parent && nt->parent->header.level <= nt->header.level) int3; #endif - + nt->root = t->root; -// nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); nt->new_address = 0; nt->has_new_address = FALSE; nt->updated_extents = FALSE; + nt->uniqueness_determined = TRUE; + nt->is_unique = TRUE; nt->list_entry_hash.Flink = NULL; + nt->buf = NULL; InitializeListHead(&nt->itemlist); - -// ExInitializeResourceLite(&nt->nonpaged->load_tree_lock); - + oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry); -// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry); -// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry); -// // -// // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n", -// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset, -// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset); -// // -// // le = wt->tree->itemlist.Flink; -// // while (le != &wt->tree->itemlist) { -// // td = CONTAINING_RECORD(le, tree_data, list_entry); -// // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset); -// // le = le->Flink; -// // } - nt->itemlist.Flink = &newfirstitem->list_entry; nt->itemlist.Blink = t->itemlist.Blink; nt->itemlist.Flink->Blink = &nt->itemlist; nt->itemlist.Blink->Flink = &nt->itemlist; - + t->itemlist.Blink = &oldlastitem->list_entry; t->itemlist.Blink->Flink = &t->itemlist; - -// // le = wt->tree->itemlist.Flink; -// // while (le != &wt->tree->itemlist) { -// // td = CONTAINING_RECORD(le, tree_data, list_entry); -// // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset); -// // le = le->Flink; -// // } -// // -// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry); -// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry); -// // -// // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n", -// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset, -// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset); - + nt->size = t->size - size; t->size = size; t->header.num_items = numitems; nt->write = TRUE; - - InterlockedIncrement(&Vcb->open_trees); + InsertTailList(&Vcb->trees, &nt->list_entry); - -// // // TESTING -// // td = wt->tree->items; -// // while (td) { -// // if (!td->ignore) { -// // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset); -// // } -// // td = td->next; -// // } - -// // oldlastitem->next = NULL; -// // wt->tree->lastitem = oldlastitem; - -// // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset); - + if (nt->header.level > 0) { LIST_ENTRY* le = nt->itemlist.Flink; - + while (le != &nt->itemlist) { tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); - + if (td2->treeholder.tree) { td2->treeholder.tree->parent = nt; #ifdef DEBUG_PARANOID if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3; #endif } - + + le = le->Flink; + } + } else { + LIST_ENTRY* le = nt->itemlist.Flink; + + while (le != &nt->itemlist) { + tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); + + if (!td2->inserted && td2->data) { + UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG); + + if (!data) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(data, td2->data, td2->size); + td2->data = data; + td2->inserted = TRUE; + } + le = le->Flink; } } - + if (nt->parent) { td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = newfirstitem->key; - + InsertHeadList(&t->paritem->list_entry, &td->list_entry); - + td->ignore = FALSE; td->inserted = TRUE; td->treeholder.tree = nt; -// td->treeholder.nonpaged->status = tree_holder_loaded; nt->paritem = td; - + nt->parent->header.num_items++; nt->parent->size += sizeof(internal_node); goto end; } - + TRACE("adding new tree parent\n"); - + if (nt->header.level == 255) { ERR("cannot add parent to tree at level 255\n"); return STATUS_INTERNAL_ERROR; } - + pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); if (!pt) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header)); pt->header.address = 0; pt->header.num_items = 2; pt->header.level = nt->header.level + 1; pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; - + pt->has_address = FALSE; pt->Vcb = Vcb; pt->parent = NULL; @@ -2442,117 +2938,89 @@ static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* pt->new_address = 0; pt->has_new_address = FALSE; pt->updated_extents = FALSE; -// pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); pt->size = pt->header.num_items * sizeof(internal_node); + pt->uniqueness_determined = TRUE; + pt->is_unique = TRUE; pt->list_entry_hash.Flink = NULL; + pt->buf = NULL; InitializeListHead(&pt->itemlist); - -// ExInitializeResourceLite(&pt->nonpaged->load_tree_lock); - - InterlockedIncrement(&Vcb->open_trees); + InsertTailList(&Vcb->trees, &pt->list_entry); - + td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + get_first_item(t, &td->key); td->ignore = FALSE; td->inserted = FALSE; td->treeholder.address = 0; td->treeholder.generation = Vcb->superblock.generation; td->treeholder.tree = t; -// td->treeholder.nonpaged->status = tree_holder_loaded; InsertTailList(&pt->itemlist, &td->list_entry); t->paritem = td; - + td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = newfirstitem->key; td->ignore = FALSE; td->inserted = FALSE; td->treeholder.address = 0; td->treeholder.generation = Vcb->superblock.generation; td->treeholder.tree = nt; -// td->treeholder.nonpaged->status = tree_holder_loaded; InsertTailList(&pt->itemlist, &td->list_entry); nt->paritem = td; - + pt->write = TRUE; t->root->treeholder.tree = pt; - + t->parent = pt; nt->parent = pt; - + #ifdef DEBUG_PARANOID if (t->parent && t->parent->header.level <= t->header.level) int3; if (nt->parent && nt->parent->header.level <= nt->header.level) int3; #endif - + end: t->root->root_item.bytes_used += Vcb->superblock.node_size; -// #ifdef DEBUG_PARANOID -// lastkey2.obj_id = 0xffffffffffffffff; -// lastkey2.obj_type = 0xff; -// lastkey2.offset = 0xffffffffffffffff; -// -// if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE)) -// ERR("error - find_item failed\n"); -// else { -// lastkey2 = tp.item->key; -// -// numitems2 = 0; -// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) { -// free_traverse_ptr(&tp); -// tp = next_tp; -// numitems2++; -// } -// free_traverse_ptr(&tp); -// } -// -// ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset); -// ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset); -// ERR("numitems1 = %u\n", numitems1); -// ERR("numitems2 = %u\n", numitems2); -// #endif - return STATUS_SUCCESS; } -static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) { +static NTSTATUS split_tree(device_extension* Vcb, tree* t) { LIST_ENTRY* le; UINT32 size, ds, numitems; - + size = 0; numitems = 0; - + // FIXME - naïve implementation: maximizes number of filled trees - + le = t->itemlist.Flink; while (le != &t->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->ignore) { if (t->header.level == 0) ds = sizeof(leaf_node) + td->size; else ds = sizeof(internal_node); - + if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) { ERR("(%llx,%x,%llx) in tree %llx is too large (%x > %x)\n", td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id, ds, Vcb->superblock.node_size - sizeof(tree_header)); - int3; + return STATUS_INTERNAL_ERROR; } - + // FIXME - move back if previous item was deleted item with same key if (size + ds > Vcb->superblock.node_size - sizeof(tree_header)) return split_tree_at(Vcb, t, td, numitems, size); @@ -2560,10 +3028,10 @@ static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) { size += ds; numitems++; } - + le = le->Flink; } - + return STATUS_SUCCESS; } @@ -2571,102 +3039,105 @@ BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - - do { - EXTENT_ITEM* ei; - UINT8* type; - - if (t->has_address) { - searchkey.obj_id = t->header.address; - searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; - searchkey.offset = 0xffffffffffffffff; - - Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return FALSE; - } - - if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM)) - return FALSE; - - if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0)) - return FALSE; - - if (tp.item->size < sizeof(EXTENT_ITEM)) - return FALSE; - - ei = (EXTENT_ITEM*)tp.item->data; - - if (ei->refcount > 1) - return FALSE; - - if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { - EXTENT_ITEM2* ei2; - - if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) - return FALSE; - - ei2 = (EXTENT_ITEM2*)&ei[1]; - type = (UINT8*)&ei2[1]; - } else - type = (UINT8*)&ei[1]; - - if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF) - return FALSE; + BOOL ret = FALSE; + EXTENT_ITEM* ei; + UINT8* type; + + if (t->uniqueness_determined) + return t->is_unique; + + if (t->parent && !is_tree_unique(Vcb, t->parent, Irp)) + goto end; + + if (t->has_address) { + searchkey.obj_id = t->header.address; + searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + goto end; } - - t = t->parent; - } while (t); - - return TRUE; + + if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM)) + goto end; + + if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0)) + goto end; + + if (tp.item->size < sizeof(EXTENT_ITEM)) + goto end; + + ei = (EXTENT_ITEM*)tp.item->data; + + if (ei->refcount > 1) + goto end; + + if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { + EXTENT_ITEM2* ei2; + + if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) + goto end; + + ei2 = (EXTENT_ITEM2*)&ei[1]; + type = (UINT8*)&ei2[1]; + } else + type = (UINT8*)&ei[1]; + + if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF) + goto end; + } + + ret = TRUE; + +end: + t->is_unique = ret; + t->uniqueness_determined = TRUE; + + return ret; } -static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, BOOL* done_deletions, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY* le; tree_data* nextparitem = NULL; NTSTATUS Status; tree *next_tree, *par; BOOL loaded; - + *done = FALSE; - + TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size); - + // FIXME - doesn't capture everything, as it doesn't ascend le = t->paritem->list_entry.Flink; while (le != &t->parent->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->ignore) { nextparitem = td; break; } - + le = le->Flink; } - + if (!nextparitem) return STATUS_SUCCESS; - + TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset); -// nextparitem = t->paritem; - -// ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE); - + Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return Status; } - + if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp)) return STATUS_SUCCESS; - -// ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock); - + next_tree = nextparitem->treeholder.tree; - + if (!next_tree->updated_extents && next_tree->has_address) { Status = update_tree_extents(Vcb, next_tree, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -2674,99 +3145,114 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, return Status; } } - + if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) { // merge two trees into one - + t->header.num_items += next_tree->header.num_items; t->size += next_tree->size; - + if (next_tree->header.level > 0) { le = next_tree->itemlist.Flink; - + while (le != &next_tree->itemlist) { tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); - + if (td2->treeholder.tree) { td2->treeholder.tree->parent = t; #ifdef DEBUG_PARANOID if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3; #endif } - + + td2->inserted = TRUE; + le = le->Flink; + } + } else { + le = next_tree->itemlist.Flink; + + while (le != &next_tree->itemlist) { + tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry); + + if (!td2->inserted && td2->data) { + UINT8* data = ExAllocatePoolWithTag(PagedPool, td2->size, ALLOC_TAG); + + if (!data) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(data, td2->data, td2->size); + td2->data = data; + td2->inserted = TRUE; + } + le = le->Flink; } } - - le = next_tree->itemlist.Flink; - while (le != &next_tree->itemlist) { - tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - - td->inserted = TRUE; - - le = le->Flink; - } - + t->itemlist.Blink->Flink = next_tree->itemlist.Flink; t->itemlist.Blink->Flink->Blink = t->itemlist.Blink; t->itemlist.Blink = next_tree->itemlist.Blink; t->itemlist.Blink->Flink = &t->itemlist; - + next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist; - + next_tree->header.num_items = 0; next_tree->size = 0; - + if (next_tree->has_new_address) { // delete associated EXTENT_ITEM Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } } else if (next_tree->has_address) { Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } } - + if (!nextparitem->ignore) { nextparitem->ignore = TRUE; next_tree->parent->header.num_items--; next_tree->parent->size -= sizeof(internal_node); + + *done_deletions = TRUE; } - + par = next_tree->parent; while (par) { par->write = TRUE; par = par->parent; } - + RemoveEntryList(&nextparitem->list_entry); ExFreePool(next_tree->paritem); next_tree->paritem = NULL; - + next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size; - + free_tree(next_tree); - + *done = TRUE; } else { // rebalance by moving items from second tree into first ULONG avg_size = (t->size + next_tree->size) / 2; KEY firstitem = {0, 0, 0}; BOOL changed = FALSE; - + TRACE("attempting rebalance\n"); - + le = next_tree->itemlist.Flink; while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); ULONG size; - + if (!td->ignore) { if (next_tree->header.level == 0) size = sizeof(leaf_node) + td->size; @@ -2774,263 +3260,273 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, size = sizeof(internal_node); } else size = 0; - + if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) { RemoveEntryList(&td->list_entry); InsertTailList(&t->itemlist, &td->list_entry); - td->inserted = TRUE; - + if (next_tree->header.level > 0 && td->treeholder.tree) { td->treeholder.tree->parent = t; #ifdef DEBUG_PARANOID if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3; #endif + } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) { + UINT8* data = ExAllocatePoolWithTag(PagedPool, td->size, ALLOC_TAG); + + if (!data) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(data, td->data, td->size); + td->data = data; } - + + td->inserted = TRUE; + if (!td->ignore) { next_tree->size -= size; t->size += size; next_tree->header.num_items--; t->header.num_items++; } - + changed = TRUE; } else break; - + le = next_tree->itemlist.Flink; } - + le = next_tree->itemlist.Flink; while (le != &next_tree->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->ignore) { firstitem = td->key; break; } - + le = le->Flink; } - -// ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset); - + // FIXME - once ascension is working, make this work with parent's parent, etc. if (next_tree->paritem) next_tree->paritem->key = firstitem; - + par = next_tree; while (par) { par->write = TRUE; par = par->parent; } - + if (changed) *done = TRUE; } - + return STATUS_SUCCESS; } -static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { searchkey.obj_id = address; searchkey.obj_type = TYPE_METADATA_ITEM; searchkey.offset = t->header.level; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(tp.item->key, searchkey)) { EXTENT_ITEM_SKINNY_METADATA* eism; - + if (tp.item->size > 0) { eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); - + if (!eism) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(eism, tp.item->data, tp.item->size); } else eism = NULL; - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - ExFreePool(eism); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + if (eism) ExFreePool(eism); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + if (eism) ExFreePool(eism); + return Status; } - + return STATUS_SUCCESS; } } - + searchkey.obj_id = address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { EXTENT_ITEM_TREE* eit; - + if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE)); return STATUS_INTERNAL_ERROR; } - + eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); - + if (!eit) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(eit, tp.item->data, tp.item->size); - - delete_tree_item(Vcb, &tp, rollback); - + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(eit); + return Status; + } + eit->level = level; - - if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(eit); - return STATUS_INTERNAL_ERROR; + return Status; } - + return STATUS_SUCCESS; } - + ERR("could not find EXTENT_ITEM for address %llx\n", address); - + return STATUS_INTERNAL_ERROR; } static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; - + if (t->parent && !t->parent->updated_extents && t->parent->has_address) { Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback); if (!NT_SUCCESS(Status)) return Status; } - + Status = update_tree_extents(Vcb, t, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("update_tree_extents returned %08x\n", Status); return Status; } - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { -// LIST_ENTRY *le, *le2; -// write_tree* wt; -// tree_data* td; - UINT8 level, max_level; +static NTSTATUS do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { + ULONG level, max_level; UINT32 min_size; BOOL empty, done_deletions = FALSE; NTSTATUS Status; tree* t; - + TRACE("(%p)\n", Vcb); - + max_level = 0; - + for (level = 0; level <= 255; level++) { LIST_ENTRY *le, *nextle; - + empty = TRUE; - + TRACE("doing level %u\n", level); - + le = Vcb->trees.Flink; - + while (le != &Vcb->trees) { t = CONTAINING_RECORD(le, tree, list_entry); - + nextle = le->Flink; - + if (t->write && t->header.level == level) { empty = FALSE; - - if (t->header.num_items == 0) { - if (!t->updated_extents && t->has_address) { - Status = update_tree_extents(Vcb, t, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("update_tree_extents returned %08x\n", Status); - return Status; - } - } + if (t->header.num_items == 0) { if (t->parent) { done_deletions = TRUE; - + TRACE("deleting tree in root %llx\n", t->root->id); - + t->root->root_item.bytes_used -= Vcb->superblock.node_size; - + if (t->has_new_address) { // delete associated EXTENT_ITEM Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } - + t->has_new_address = FALSE; } else if (t->has_address) { Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } - + t->has_address = FALSE; } - + if (!t->paritem->ignore) { t->paritem->ignore = TRUE; t->parent->header.num_items--; t->parent->size -= sizeof(internal_node); } - + RemoveEntryList(&t->paritem->list_entry); ExFreePool(t->paritem); t->paritem = NULL; - + free_tree(t); } else if (t->header.level != 0) { if (t->has_new_address) { - Status = update_extent_level(Vcb, t->new_address, t, 0, Irp, rollback); - + Status = update_extent_level(Vcb, t->new_address, t, 0, Irp); + if (!NT_SUCCESS(Status)) { ERR("update_extent_level returned %08x\n", Status); return Status; } } - + t->header.level = 0; } } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header)); - + if (!t->updated_extents && t->has_address) { Status = update_tree_extents_recursive(Vcb, t, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -3038,7 +3534,7 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r return Status; } } - + Status = split_tree(Vcb, t); if (!NT_SUCCESS(Status)) { @@ -3047,10 +3543,10 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r } } } - + le = nextle; } - + if (!empty) { max_level = level; } else { @@ -3058,48 +3554,49 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r break; } } - + min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2; - + for (level = 0; level <= max_level; level++) { LIST_ENTRY* le; - + le = Vcb->trees.Flink; - + while (le != &Vcb->trees) { t = CONTAINING_RECORD(le, tree, list_entry); - - if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size && is_tree_unique(Vcb, t, Irp)) { + + if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size && + t->root->id != BTRFS_ROOT_FREE_SPACE && is_tree_unique(Vcb, t, Irp)) { BOOL done; - + do { - Status = try_tree_amalgamate(Vcb, t, &done, Irp, rollback); + Status = try_tree_amalgamate(Vcb, t, &done, &done_deletions, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("try_tree_amalgamate returned %08x\n", Status); return Status; } } while (done && t->size < min_size); } - + le = le->Flink; } } - + // simplify trees if top tree only has one entry - + if (done_deletions) { for (level = max_level; level > 0; level--) { LIST_ENTRY *le, *nextle; - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { nextle = le->Flink; t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->write && t->header.level == level) { if (!t->parent && t->header.num_items == 1) { LIST_ENTRY* le2 = t->itemlist.Flink; - tree_data* td; + tree_data* td = NULL; tree* child_tree = NULL; while (le2 != &t->itemlist) { @@ -3108,95 +3605,95 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r break; le2 = le2->Flink; } - + TRACE("deleting top-level tree in root %llx with one item\n", t->root->id); - + if (t->has_new_address) { // delete associated EXTENT_ITEM Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } - + t->has_new_address = FALSE; } else if (t->has_address) { Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } - + t->has_address = FALSE; } - + if (!td->treeholder.tree) { // load first item if not already loaded KEY searchkey = {0,0,0}; traverse_ptr tp; - + Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } } - + child_tree = td->treeholder.tree; - + if (child_tree) { child_tree->parent = NULL; child_tree->paritem = NULL; } - + t->root->root_item.bytes_used -= Vcb->superblock.node_size; free_tree(t); - + if (child_tree) child_tree->root->treeholder.tree = child_tree; } } - + le = nextle; } } } - + return STATUS_SUCCESS; } static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; - + if (!th->tree) { - Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL); - + Status = load_tree(Vcb, th->address, r, &th->tree, th->generation, NULL); + if (!NT_SUCCESS(Status)) { ERR("load_tree(%llx) returned %08x\n", th->address, Status); return Status; } } - + if (level > 0) { LIST_ENTRY* le = th->tree->itemlist.Flink; - + while (le != &th->tree->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - + if (!td->ignore) { Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("remove_root_extents returned %08x\n", Status); return Status; } } - + le = le->Flink; } } - + if (th->tree && !th->tree->updated_extents && th->tree->has_address) { Status = update_tree_extents(Vcb, th->tree, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -3204,16 +3701,16 @@ static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* return Status; } } - + if (!th->tree || th->tree->has_address) { Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status); return Status; } } - + return STATUS_SUCCESS; } @@ -3221,113 +3718,192 @@ static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* NTSTATUS Status; KEY searchkey; traverse_ptr tp; - + Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("remove_root_extents returned %08x\n", Status); return Status; } - - // remove entry in uuid root (tree 9) + + // remove entries in uuid root (tree 9) if (Vcb->uuid_root) { RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64)); searchkey.obj_type = TYPE_SUBVOL_UUID; RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64)); - + if (searchkey.obj_id != 0 || searchkey.offset != 0) { Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { WARN("find_item returned %08x\n", Status); } else { - if (!keycmp(tp.item->key, searchkey)) - delete_tree_item(Vcb, &tp, rollback); - else + if (!keycmp(tp.item->key, searchkey)) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } else + WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); + } + } + + if (r->root_item.rtransid > 0) { + RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid.uuid[0], sizeof(UINT64)); + searchkey.obj_type = TYPE_SUBVOL_REC_UUID; + RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(UINT64)], sizeof(UINT64)); + + Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) + WARN("find_item returned %08x\n", Status); + else { + if (!keycmp(tp.item->key, searchkey)) { + if (tp.item->size == sizeof(UINT64)) { + UINT64* id = (UINT64*)tp.item->data; + + if (*id == r->id) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } + } else if (tp.item->size > sizeof(UINT64)) { + ULONG i; + UINT64* ids = (UINT64*)tp.item->data; + + for (i = 0; i < tp.item->size / sizeof(UINT64); i++) { + if (ids[i] == r->id) { + UINT64* ne; + + ne = ExAllocatePoolWithTag(PagedPool, tp.item->size - sizeof(UINT64), ALLOC_TAG); + if (!ne) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + if (i > 0) + RtlCopyMemory(ne, ids, sizeof(UINT64) * i); + + if ((i + 1) * sizeof(UINT64) < tp.item->size) + RtlCopyMemory(&ne[i], &ids[i + 1], tp.item->size - ((i + 1) * sizeof(UINT64))); + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(ne); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->uuid_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + ne, tp.item->size - sizeof(UINT64), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ne); + return Status; + } + + break; + } + } + } + } else WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); } } } - + // delete ROOT_ITEM - + searchkey.obj_id = r->id; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); return Status; } - - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) - delete_tree_item(Vcb, &tp, rollback); - else + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + Status = delete_tree_item(Vcb, &tp); + + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } else WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); - + // delete items in tree cache - + free_trees_root(Vcb, r); - + return STATUS_SUCCESS; } static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2; NTSTATUS Status; - + while (le != &Vcb->drop_roots) { root* r = CONTAINING_RECORD(le, root, list_entry); - + le2 = le->Flink; - + Status = drop_root(Vcb, r, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("drop_root(%llx) returned %08x\n", r->id, Status); return Status; } - + le = le2; } - + return STATUS_SUCCESS; } -static NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp) { KEY searchkey; traverse_ptr tp; DEV_ITEM* di; NTSTATUS Status; - + searchkey.obj_id = 1; searchkey.obj_type = TYPE_DEV_ITEM; searchkey.offset = device->devitem.dev_id; - + Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(tp.item->key, searchkey)) { ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id); return STATUS_INTERNAL_ERROR; } - - delete_tree_item(Vcb, &tp, rollback); - + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); if (!di) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM)); - - if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(di); + return Status; } - + return STATUS_SUCCESS; } @@ -3335,33 +3911,33 @@ static void regen_bootstrap(device_extension* Vcb) { sys_chunk* sc2; USHORT i = 0; LIST_ENTRY* le; - + i = 0; le = Vcb->sys_chunks.Flink; while (le != &Vcb->sys_chunks) { sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); - + TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset); - + RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY)); i += sizeof(KEY); - + RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size); i += sc2->size; - + le = le->Flink; } } -static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) { - sys_chunk *sc, *sc2; +static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, UINT16 size) { + sys_chunk* sc; LIST_ENTRY* le; - + if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) { ERR("error - bootstrap is full\n"); return STATUS_INTERNAL_ERROR; } - + sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG); if (!sc) { ERR("out of memory\n"); @@ -3378,46 +3954,47 @@ static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj ExFreePool(sc); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(sc->data, data, sc->size); - + le = Vcb->sys_chunks.Flink; while (le != &Vcb->sys_chunks) { - sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); - + sys_chunk* sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); + if (keycmp(sc2->key, sc->key) == 1) break; - + le = le->Flink; } InsertTailList(le, &sc->list_entry); - + Vcb->superblock.n += sizeof(KEY) + size; - + regen_bootstrap(Vcb); - + return STATUS_SUCCESS; } -static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp) { CHUNK_ITEM* ci; CHUNK_ITEM_STRIPE* cis; BLOCK_GROUP_ITEM* bgi; UINT16 i, factor; NTSTATUS Status; - + ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG); if (!ci) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(ci, c->chunk_item, c->size); - - if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp, rollback)) { + + Status = insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp); + if (!NT_SUCCESS(Status)) { ERR("insert_tree_item failed\n"); ExFreePool(ci); - return STATUS_INTERNAL_ERROR; + return Status; } if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) { @@ -3429,7 +4006,7 @@ static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENT } // add BLOCK_GROUP_ITEM to tree 2 - + bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG); if (!bgi) { ERR("out of memory\n"); @@ -3439,13 +4016,14 @@ static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENT bgi->used = c->used; bgi->chunk_tree = 0x100; bgi->flags = c->chunk_item->type; - - if (!insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp, rollback)) { + + Status = insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { ERR("insert_tree_item failed\n"); ExFreePool(bgi); - return STATUS_INSUFFICIENT_RESOURCES; + return Status; } - + if (c->chunk_item->type & BLOCK_FLAG_RAID0) factor = c->chunk_item->num_stripes; else if (c->chunk_item->type & BLOCK_FLAG_RAID10) @@ -3458,40 +4036,41 @@ static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENT factor = 1; // add DEV_EXTENTs to tree 4 - + cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; - + for (i = 0; i < c->chunk_item->num_stripes; i++) { DEV_EXTENT* de; - + de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG); if (!de) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + de->chunktree = Vcb->chunk_root->id; de->objid = 0x100; de->address = c->offset; de->length = c->chunk_item->size / factor; de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid; - if (!insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + Status = insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(de); - return STATUS_INTERNAL_ERROR; + return Status; } - + // FIXME - no point in calling this twice for the same device - Status = update_dev_item(Vcb, c->devices[i], Irp, rollback); + Status = update_dev_item(Vcb, c->devices[i], Irp); if (!NT_SUCCESS(Status)) { ERR("update_dev_item returned %08x\n", Status); return Status; } } - + c->created = FALSE; - + return STATUS_SUCCESS; } @@ -3502,238 +4081,195 @@ static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 ob le = Vcb->sys_chunks.Flink; while (le != &Vcb->sys_chunks) { sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry); - + if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) { RemoveEntryList(&sc2->list_entry); - + Vcb->superblock.n -= sizeof(KEY) + sc2->size; - + ExFreePool(sc2->data); ExFreePool(sc2); regen_bootstrap(Vcb); return; } - + le = le->Flink; } } -static NTSTATUS STDCALL set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, UINT32 crc32, - UINT8* data, UINT16 datalen, PIRP Irp, LIST_ENTRY* rollback) { - ULONG xasize; +static NTSTATUS set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, UINT16 namelen, + UINT32 crc32, UINT8* data, UINT16 datalen) { + NTSTATUS Status; + UINT16 xasize; DIR_ITEM* xa; - - TRACE("(%p, %llx, %llx, %s, %08x, %p, %u)\n", Vcb, subvol->id, inode, name, crc32, data, datalen); - - xasize = sizeof(DIR_ITEM) - 1 + (ULONG)strlen(name) + datalen; - + + TRACE("(%p, %llx, %llx, %.*s, %08x, %p, %u)\n", Vcb, subvol->id, inode, namelen, name, crc32, data, datalen); + + xasize = (UINT16)offsetof(DIR_ITEM, name[0]) + namelen + datalen; + xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG); if (!xa) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + xa->key.obj_id = 0; xa->key.obj_type = 0; xa->key.offset = 0; xa->transid = Vcb->superblock.generation; xa->m = datalen; - xa->n = (UINT16)strlen(name); + xa->n = namelen; xa->type = BTRFS_TYPE_EA; - RtlCopyMemory(xa->name, name, strlen(name)); - RtlCopyMemory(xa->name + strlen(name), data, datalen); - - if (!insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr, Irp, rollback)) - return STATUS_INTERNAL_ERROR; - + RtlCopyMemory(xa->name, name, namelen); + RtlCopyMemory(xa->name + namelen, data, datalen); + + Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(xa); + return Status; + } + return STATUS_SUCCESS; } -static BOOL STDCALL delete_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, PIRP Irp, LIST_ENTRY* rollback) { - KEY searchkey; - traverse_ptr tp; - DIR_ITEM* xa; +static NTSTATUS delete_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, + UINT16 namelen, UINT32 crc32) { NTSTATUS Status; - - TRACE("(%p, %llx, %llx, %s, %08x)\n", Vcb, subvol->id, inode, name, crc32); - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = crc32; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return FALSE; - } - - if (!keycmp(tp.item->key, searchkey)) { // key exists - ULONG size = tp.item->size; - - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - - return FALSE; - } else { - xa = (DIR_ITEM*)tp.item->data; - - while (TRUE) { - ULONG oldxasize; - - if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - - return FALSE; - } - - oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n; - - if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) { - ULONG newsize; - UINT8 *newdata, *dioff; - - newsize = tp.item->size - (sizeof(DIR_ITEM) - 1 + xa->n + xa->m); - - delete_tree_item(Vcb, &tp, rollback); - - if (newsize == 0) { - TRACE("xattr %s deleted\n", name); - - return TRUE; - } + UINT16 xasize; + DIR_ITEM* xa; - // FIXME - deleting collisions almost certainly works, but we should test it properly anyway - newdata = ExAllocatePoolWithTag(PagedPool, newsize, ALLOC_TAG); - if (!newdata) { - ERR("out of memory\n"); - return FALSE; - } + TRACE("(%p, %llx, %llx, %.*s, %08x)\n", Vcb, subvol->id, inode, namelen, name, crc32); - if ((UINT8*)xa > tp.item->data) { - RtlCopyMemory(newdata, tp.item->data, (UINT8*)xa - tp.item->data); - dioff = newdata + ((UINT8*)xa - tp.item->data); - } else { - dioff = newdata; - } - - if ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data < tp.item->size) - RtlCopyMemory(dioff, &xa->name[xa->n+xa->m], tp.item->size - ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data)); - - insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, newsize, NULL, Irp, rollback); - - - return TRUE; - } - - if (xa->m + xa->n >= size) { // FIXME - test this works - WARN("xattr %s not found\n", name); + xasize = (UINT16)offsetof(DIR_ITEM, name[0]) + namelen; - return FALSE; - } else { - xa = (DIR_ITEM*)&xa->name[xa->m + xa->n]; - size -= oldxasize; - } - } - } - } else { - WARN("xattr %s not found\n", name); - - return FALSE; + xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG); + if (!xa) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + xa->key.obj_id = 0; + xa->key.obj_type = 0; + xa->key.offset = 0; + xa->transid = Vcb->superblock.generation; + xa->m = 0; + xa->n = namelen; + xa->type = BTRFS_TYPE_EA; + RtlCopyMemory(xa->name, name, namelen); + + Status = insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_DeleteXattr); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(xa); + return Status; } + + return STATUS_SUCCESS; } -static NTSTATUS insert_sparse_extent(fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS insert_sparse_extent(fcb* fcb, LIST_ENTRY* batchlist, UINT64 start, UINT64 length) { + NTSTATUS Status; EXTENT_DATA* ed; EXTENT_DATA2* ed2; - + TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length); - + ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); if (!ed) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ed->generation = fcb->Vcb->superblock.generation; ed->decoded_size = length; ed->compression = BTRFS_COMPRESSION_NONE; ed->encryption = BTRFS_ENCRYPTION_NONE; ed->encoding = BTRFS_ENCODING_NONE; ed->type = EXTENT_TYPE_REGULAR; - + ed2 = (EXTENT_DATA2*)ed->data; ed2->address = 0; ed2->size = 0; ed2->offset = 0; ed2->num_bytes = length; - - if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(ed); + return Status; } return STATUS_SUCCESS; } -static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset, - void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback) { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(suppress: 28194) +#endif +NTSTATUS insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT8 objtype, UINT64 offset, + _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data, UINT16 datalen, enum batch_operation operation) { LIST_ENTRY* le; batch_root* br = NULL; batch_item* bi; - + le = batchlist->Flink; while (le != batchlist) { batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry); - + if (br2->r == r) { br = br2; break; } - + le = le->Flink; } - + if (!br) { br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG); if (!br) { ERR("out of memory\n"); - return FALSE; + return STATUS_INSUFFICIENT_RESOURCES; } - + br->r = r; InitializeListHead(&br->items); InsertTailList(batchlist, &br->list_entry); } - + bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); if (!bi) { ERR("out of memory\n"); - return FALSE; + return STATUS_INSUFFICIENT_RESOURCES; } - + bi->key.obj_id = objid; bi->key.obj_type = objtype; bi->key.offset = offset; bi->data = data; bi->datalen = datalen; bi->operation = operation; - + le = br->items.Blink; while (le != &br->items) { batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry); - + if (keycmp(bi2->key, bi->key) != 1) { InsertHeadList(&bi2->list_entry, &bi->list_entry); - return TRUE; + return STATUS_SUCCESS; } - + le = le->Blink; } - + InsertHeadList(&br->items, &bi->list_entry); - - return TRUE; + + return STATUS_SUCCESS; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif typedef struct { UINT64 address; @@ -3746,45 +4282,45 @@ typedef struct { LIST_ENTRY list_entry; } extent_range; -static void rationalize_extents(fcb* fcb, PIRP Irp, LIST_ENTRY* rollback) { +static void rationalize_extents(fcb* fcb, PIRP Irp) { LIST_ENTRY* le; LIST_ENTRY extent_ranges; extent_range* er; BOOL changed = FALSE, truncating = FALSE; UINT32 num_extents = 0; - + InitializeListHead(&extent_ranges); - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + + if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + if (ed2->size != 0) { LIST_ENTRY* le2; - + le2 = extent_ranges.Flink; while (le2 != &extent_ranges) { extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); - + if (er2->address == ed2->address) { er2->skip_start = min(er2->skip_start, ed2->offset); er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes); goto cont; } else if (er2->address > ed2->address) break; - + le2 = le2->Flink; } - + er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside? if (!er) { ERR("out of memory\n"); goto end; } - + er->address = ed2->address; er->length = ed2->size; er->offset = ext->offset - ed2->offset; @@ -3792,232 +4328,256 @@ static void rationalize_extents(fcb* fcb, PIRP Irp, LIST_ENTRY* rollback) { er->chunk = NULL; er->skip_start = ed2->offset; er->skip_end = ed2->size - ed2->offset - ed2->num_bytes; - + if (er->skip_start != 0 || er->skip_end != 0) truncating = TRUE; - + InsertHeadList(le2->Blink, &er->list_entry); num_extents++; } } - + cont: le = le->Flink; } - + if (num_extents == 0 || (num_extents == 1 && !truncating)) goto end; - + le = extent_ranges.Flink; while (le != &extent_ranges) { er = CONTAINING_RECORD(le, extent_range, list_entry); - + if (!er->chunk) { LIST_ENTRY* le2; - + er->chunk = get_chunk_from_address(fcb->Vcb, er->address); - + if (!er->chunk) { ERR("get_chunk_from_address(%llx) failed\n", er->address); goto end; } - + le2 = le->Flink; while (le2 != &extent_ranges) { extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); - + if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size) er2->chunk = er->chunk; - + le2 = le2->Flink; } } - + le = le->Flink; } - + if (truncating) { // truncate beginning or end of extent if unused - + le = extent_ranges.Flink; while (le != &extent_ranges) { er = CONTAINING_RECORD(le, extent_range, list_entry); - + if (er->skip_start > 0) { LIST_ENTRY* le2 = fcb->extents.Flink; while (le2 != &fcb->extents) { extent* ext = CONTAINING_RECORD(le2, extent, list_entry); - - if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + + if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + if (ed2->size != 0 && ed2->address == er->address) { NTSTATUS Status; - + Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp); if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); goto end; } - - ext->data->decoded_size -= er->skip_start; + + ext->extent_data.decoded_size -= er->skip_start; ed2->size -= er->skip_start; ed2->address += er->skip_start; ed2->offset -= er->skip_start; - + add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); } } - + le2 = le2->Flink; } - + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) - add_checksum_entry(fcb->Vcb, er->address, er->skip_start / fcb->Vcb->superblock.sector_size, NULL, NULL, rollback); - - decrease_chunk_usage(er->chunk, er->skip_start); - - space_list_add(fcb->Vcb, er->chunk, TRUE, er->address, er->skip_start, NULL); - + add_checksum_entry(fcb->Vcb, er->address, (ULONG)(er->skip_start / fcb->Vcb->superblock.sector_size), NULL, NULL); + + ExAcquireResourceExclusiveLite(&er->chunk->lock, TRUE); + + if (!er->chunk->cache_loaded) { + NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + ExReleaseResourceLite(&er->chunk->lock); + goto end; + } + } + + er->chunk->used -= er->skip_start; + + space_list_add(er->chunk, er->address, er->skip_start, NULL); + + ExReleaseResourceLite(&er->chunk->lock); + er->address += er->skip_start; er->length -= er->skip_start; } - + if (er->skip_end > 0) { LIST_ENTRY* le2 = fcb->extents.Flink; while (le2 != &fcb->extents) { extent* ext = CONTAINING_RECORD(le2, extent, list_entry); - - if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + + if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + if (ed2->size != 0 && ed2->address == er->address) { NTSTATUS Status; - + Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp); if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); goto end; } - - ext->data->decoded_size -= er->skip_end; + + ext->extent_data.decoded_size -= er->skip_end; ed2->size -= er->skip_end; - + add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); } } - + le2 = le2->Flink; } - + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) - add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, er->skip_end / fcb->Vcb->superblock.sector_size, NULL, NULL, rollback); - - decrease_chunk_usage(er->chunk, er->skip_end); - - space_list_add(fcb->Vcb, er->chunk, TRUE, er->address + er->length - er->skip_end, er->skip_end, NULL); - + add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, (ULONG)(er->skip_end / fcb->Vcb->superblock.sector_size), NULL, NULL); + + ExAcquireResourceExclusiveLite(&er->chunk->lock, TRUE); + + if (!er->chunk->cache_loaded) { + NTSTATUS Status = load_cache_chunk(fcb->Vcb, er->chunk, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + ExReleaseResourceLite(&er->chunk->lock); + goto end; + } + } + + er->chunk->used -= er->skip_end; + + space_list_add(er->chunk, er->address + er->length - er->skip_end, er->skip_end, NULL); + + ExReleaseResourceLite(&er->chunk->lock); + er->length -= er->skip_end; } - + le = le->Flink; } } - + if (num_extents < 2) goto end; - + // merge together adjacent extents le = extent_ranges.Flink; while (le != &extent_ranges) { er = CONTAINING_RECORD(le, extent_range, list_entry); - + if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) { extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry); - + if (er->chunk == er2->chunk) { if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) { if (er->length + er2->length <= MAX_EXTENT_SIZE) { er->length += er2->length; er->changed = TRUE; - + RemoveEntryList(&er2->list_entry); ExFreePool(er2); - + changed = TRUE; continue; -// } else { // FIXME - make changing of beginning of offset work -// er2->length = er2->address + er->length - er->address - MAX_EXTENT_SIZE; -// er2->address = er->address + MAX_EXTENT_SIZE; -// er->length = MAX_EXTENT_SIZE; } } } } - + le = le->Flink; } - + if (!changed) goto end; - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + + if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->unique) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + if (ed2->size != 0) { LIST_ENTRY* le2; - + le2 = extent_ranges.Flink; while (le2 != &extent_ranges) { extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry); - + if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) { NTSTATUS Status; - + Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp); if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); goto end; } - + ed2->offset += ed2->address - er2->address; ed2->address = er2->address; ed2->size = er2->length; - ext->data->decoded_size = ed2->size; - + ext->extent_data.decoded_size = ed2->size; + add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); - + break; } - + le2 = le2->Flink; } } } - + le = le->Flink; } - + end: while (!IsListEmpty(&extent_ranges)) { le = RemoveHeadList(&extent_ranges); er = CONTAINING_RECORD(le, extent_range, list_entry); - + ExFreePool(er); } } -void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp) { traverse_ptr tp; KEY searchkey; NTSTATUS Status; @@ -4027,127 +4587,135 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY UINT64 old_size = 0; BOOL extents_changed; #endif - + if (fcb->ads) { - if (fcb->deleted) - delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, Irp, rollback); - else { - Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length, Irp, rollback); + if (fcb->deleted) { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, fcb->adshash); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } + } else { + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adsxattr.Length, + fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length); if (!NT_SUCCESS(Status)) { ERR("set_xattr returned %08x\n", Status); goto end; } } + + Status = STATUS_SUCCESS; goto end; } - + if (fcb->deleted) { - if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode, Irp, rollback)) - ERR("insert_tree_item_batch failed\n"); - + Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + goto end; + } + + Status = STATUS_SUCCESS; goto end; } - + #ifdef DEBUG_PARANOID extents_changed = fcb->extents_changed; #endif - + if (fcb->extents_changed) { - BOOL b; - traverse_ptr next_tp; LIST_ENTRY* le; BOOL prealloc = FALSE, extents_inline = FALSE; UINT64 last_end; - + // delete ignored extent items le = fcb->extents.Flink; while (le != &fcb->extents) { LIST_ENTRY* le2 = le->Flink; extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + if (ext->ignore) { RemoveEntryList(&ext->list_entry); - + if (ext->csum) ExFreePool(ext->csum); - - ExFreePool(ext->data); + ExFreePool(ext); } - + le = le2; } - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if (ext->inserted && ext->csum && ext->data->type == EXTENT_TYPE_REGULAR) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + + if (ext->inserted && ext->csum && ext->extent_data.type == EXTENT_TYPE_REGULAR) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + if (ed2->size > 0) { // not sparse - if (ext->data->compression == BTRFS_COMPRESSION_NONE) - add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, ed2->num_bytes / fcb->Vcb->superblock.sector_size, ext->csum, Irp, rollback); + if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE) + add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size), ext->csum, Irp); else - add_checksum_entry(fcb->Vcb, ed2->address, ed2->size / fcb->Vcb->superblock.sector_size, ext->csum, Irp, rollback); + add_checksum_entry(fcb->Vcb, ed2->address, (ULONG)(ed2->size / fcb->Vcb->superblock.sector_size), ext->csum, Irp); } } - + le = le->Flink; } - + if (!IsListEmpty(&fcb->extents)) { - rationalize_extents(fcb, Irp, rollback); - + rationalize_extents(fcb, Irp); + // merge together adjacent EXTENT_DATAs pointing to same extent - + le = fcb->extents.Flink; while (le != &fcb->extents) { LIST_ENTRY* le2 = le->Flink; extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) { + + if ((ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) { extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry); - - if (ext->data->type == nextext->data->type) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->data->data; - + + if (ext->extent_data.type == nextext->extent_data.type) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->extent_data.data; + if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size && nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) { chunk* c; - - if (ext->data->compression == BTRFS_COMPRESSION_NONE && ext->csum) { - ULONG len = (ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size; + + if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE && ext->csum) { + ULONG len = (ULONG)((ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size); UINT32* csum; - + csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(UINT32), ALLOC_TAG); if (!csum) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(csum, ext->csum, ed2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size); + + RtlCopyMemory(csum, ext->csum, (ULONG)(ed2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size)); RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum, - ned2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size); - + (ULONG)(ned2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size)); + ExFreePool(ext->csum); ext->csum = csum; } - - ext->data->generation = fcb->Vcb->superblock.generation; + + ext->extent_data.generation = fcb->Vcb->superblock.generation; ed2->num_bytes += ned2->num_bytes; - + RemoveEntryList(&nextext->list_entry); - + if (nextext->csum) ExFreePool(nextext->csum); - - ExFreePool(nextext->data); + ExFreePool(nextext); - + c = get_chunk_from_address(fcb->Vcb, ed2->address); - + if (!c) { ERR("get_chunk_from_address(%llx) failed\n", ed2->address); } else { @@ -4158,281 +4726,413 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY goto end; } } - + le2 = le; } } } - + le = le2; } } - + if (!fcb->created) { // delete existing EXTENT_DATA items - - searchkey.obj_id = fcb->inode; - searchkey.obj_type = TYPE_EXTENT_DATA; - searchkey.offset = 0; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); + + Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, NULL, 0, Batch_DeleteExtentData); if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); + ERR("insert_tree_item_batch returned %08x\n", Status); goto end; } - - do { - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) - delete_tree_item(fcb->Vcb, &tp, rollback); - - b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp); - - if (b) { - tp = next_tp; - - if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) - break; - } - } while (b); } - + // add new EXTENT_DATAs - + last_end = 0; - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); EXTENT_DATA* ed; - + ext->inserted = FALSE; - + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) { - Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback); + Status = insert_sparse_extent(fcb, batchlist, last_end, ext->offset - last_end); if (!NT_SUCCESS(Status)) { ERR("insert_sparse_extent returned %08x\n", Status); goto end; } } - + ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); if (!ed) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - RtlCopyMemory(ed, ext->data, ext->datalen); - - if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, - ed, ext->datalen, Batch_Insert, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - Status = STATUS_INTERNAL_ERROR; + + RtlCopyMemory(ed, &ext->extent_data, ext->datalen); + + Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, + ed, ext->datalen, Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); goto end; } - - if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC) + + if (ed->type == EXTENT_TYPE_PREALLOC) prealloc = TRUE; - - if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE) + + if (ed->type == EXTENT_TYPE_INLINE) extents_inline = TRUE; - + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) { if (ed->type == EXTENT_TYPE_INLINE) last_end = ext->offset + ed->decoded_size; else { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - + last_end = ext->offset + ed2->num_bytes; } } - + le = le->Flink; } - + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline && sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) { - Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback); + Status = insert_sparse_extent(fcb, batchlist, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end); if (!NT_SUCCESS(Status)) { ERR("insert_sparse_extent returned %08x\n", Status); goto end; } } - + // update prealloc flag in INODE_ITEM - + if (!prealloc) fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC; else fcb->inode_item.flags |= BTRFS_INODE_PREALLOC; - + fcb->inode_item_changed = TRUE; - + fcb->extents_changed = FALSE; } - + if ((!fcb->created && fcb->inode_item_changed) || cache) { searchkey.obj_id = fcb->inode; searchkey.obj_type = TYPE_INODE_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); goto end; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { if (cache) { ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); if (!ii) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM)); - - if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); goto end; } - + ii_offset = 0; } else { ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id); - int3; + Status = STATUS_INTERNAL_ERROR; goto end; } } else { #ifdef DEBUG_PARANOID INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data; - + old_size = ii2->st_size; #endif - + ii_offset = tp.item->key.offset; } - - if (!cache) - delete_tree_item(fcb->Vcb, &tp, rollback); - else { + + if (!cache) { + Status = delete_tree_item(fcb->Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + goto end; + } + } else { searchkey.obj_id = fcb->inode; searchkey.obj_type = TYPE_INODE_ITEM; searchkey.offset = ii_offset; - + Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); goto end; } - + if (keycmp(tp.item->key, searchkey)) { ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id); - int3; + Status = STATUS_INTERNAL_ERROR; goto end; } else RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM))); } - } else - ii_offset = 0; - + #ifdef DEBUG_PARANOID - if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) { - ERR("error - size has changed but extents not marked as changed\n"); - int3; - } + if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) { + ERR("error - size has changed but extents not marked as changed\n"); + int3; + } #endif - + } else + ii_offset = 0; + fcb->created = FALSE; - + if (!cache && fcb->inode_item_changed) { ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); if (!ii) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM)); - - if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM), - Batch_Insert, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); + + Status = insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM), + Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); goto end; } - + fcb->inode_item_changed = FALSE; } - + if (fcb->sd_dirty) { - Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8*)fcb->sd, RtlLengthSecurityDescriptor(fcb->sd), Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("set_xattr returned %08x\n", Status); + if (!fcb->sd_deleted) { + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, (UINT16)strlen(EA_NTACL), + EA_NTACL_HASH, (UINT8*)fcb->sd, (UINT16)RtlLengthSecurityDescriptor(fcb->sd)); + if (!NT_SUCCESS(Status)) { + ERR("set_xattr returned %08x\n", Status); + goto end; + } + } else { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, (UINT16)strlen(EA_NTACL), EA_NTACL_HASH); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } } - + + fcb->sd_deleted = FALSE; fcb->sd_dirty = FALSE; } - + if (fcb->atts_changed) { if (!fcb->atts_deleted) { UINT8 val[16], *val2; ULONG atts = fcb->atts; - + TRACE("inserting new DOSATTRIB xattr\n"); - + + if (fcb->inode == SUBVOL_ROOT_INODE) + atts &= ~FILE_ATTRIBUTE_READONLY; + val2 = &val[sizeof(val) - 1]; - + do { UINT8 c = atts % 16; - *val2 = (c >= 0 && c <= 9) ? (c + '0') : (c - 0xa + 'a'); - + *val2 = c <= 9 ? (c + '0') : (c - 0xa + 'a'); + val2--; atts >>= 4; } while (atts != 0); - + *val2 = 'x'; val2--; *val2 = '0'; - - Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, val2, val + sizeof(val) - val2, Irp, rollback); + + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, (UINT16)strlen(EA_DOSATTRIB), + EA_DOSATTRIB_HASH, val2, (UINT16)(val + sizeof(val) - val2)); if (!NT_SUCCESS(Status)) { ERR("set_xattr returned %08x\n", Status); goto end; } - } else - delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, Irp, rollback); - + } else { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, (UINT16)strlen(EA_DOSATTRIB), EA_DOSATTRIB_HASH); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } + } + fcb->atts_changed = FALSE; fcb->atts_deleted = FALSE; } - + if (fcb->reparse_xattr_changed) { if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) { - Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length, Irp, rollback); + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, (UINT16)strlen(EA_REPARSE), + EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, (UINT16)fcb->reparse_xattr.Length); if (!NT_SUCCESS(Status)) { ERR("set_xattr returned %08x\n", Status); goto end; } - } else - delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, Irp, rollback); - + } else { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, (UINT16)strlen(EA_REPARSE), EA_REPARSE_HASH); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } + } + fcb->reparse_xattr_changed = FALSE; } - + if (fcb->ea_changed) { if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) { - Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, EA_EA_HASH, (UINT8*)fcb->ea_xattr.Buffer, fcb->ea_xattr.Length, Irp, rollback); + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, (UINT16)strlen(EA_EA), + EA_EA_HASH, (UINT8*)fcb->ea_xattr.Buffer, (UINT16)fcb->ea_xattr.Length); if (!NT_SUCCESS(Status)) { ERR("set_xattr returned %08x\n", Status); goto end; } - } else - delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_EA, EA_EA_HASH, Irp, rollback); - + } else { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, (UINT16)strlen(EA_EA), EA_EA_HASH); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } + } + fcb->ea_changed = FALSE; } - + + if (fcb->prop_compression_changed) { + if (fcb->prop_compression == PropCompression_None) { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, (UINT16)strlen(EA_PROP_COMPRESSION), EA_PROP_COMPRESSION_HASH); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } + } else if (fcb->prop_compression == PropCompression_Zlib) { + const char zlib[] = "zlib"; + + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, (UINT16)strlen(EA_PROP_COMPRESSION), + EA_PROP_COMPRESSION_HASH, (UINT8*)zlib, (UINT16)strlen(zlib)); + if (!NT_SUCCESS(Status)) { + ERR("set_xattr returned %08x\n", Status); + goto end; + } + } else if (fcb->prop_compression == PropCompression_LZO) { + const char lzo[] = "lzo"; + + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_PROP_COMPRESSION, (UINT16)strlen(EA_PROP_COMPRESSION), + EA_PROP_COMPRESSION_HASH, (UINT8*)lzo, (UINT16)strlen(lzo)); + if (!NT_SUCCESS(Status)) { + ERR("set_xattr returned %08x\n", Status); + goto end; + } + } + + fcb->prop_compression_changed = FALSE; + } + + if (fcb->xattrs_changed) { + LIST_ENTRY* le; + + le = fcb->xattrs.Flink; + while (le != &fcb->xattrs) { + xattr* xa = CONTAINING_RECORD(le, xattr, list_entry); + LIST_ENTRY* le2 = le->Flink; + + if (xa->dirty) { + UINT32 hash = calc_crc32c(0xfffffffe, (UINT8*)xa->data, xa->namelen); + + if (xa->valuelen == 0) { + Status = delete_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, hash); + if (!NT_SUCCESS(Status)) { + ERR("delete_xattr returned %08x\n", Status); + goto end; + } + + RemoveEntryList(&xa->list_entry); + ExFreePool(xa); + } else { + Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, xa->data, xa->namelen, + hash, (UINT8*)&xa->data[xa->namelen], xa->valuelen); + if (!NT_SUCCESS(Status)) { + ERR("set_xattr returned %08x\n", Status); + goto end; + } + + xa->dirty = FALSE; + } + } + + le = le2; + } + + fcb->xattrs_changed = FALSE; + } + + Status = STATUS_SUCCESS; + end: - fcb->dirty = FALSE; + if (fcb->dirty) { + BOOL lock = FALSE; + + fcb->dirty = FALSE; + + if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->dirty_fcbs_lock)) { + ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, TRUE); + lock = TRUE; + } + + RemoveEntryList(&fcb->list_entry_dirty); + + if (lock) + ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock); + } + + return Status; +} + +void add_trim_entry_avoid_sb(device_extension* Vcb, device* dev, UINT64 address, UINT64 size) { + int i; + ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size); + + i = 0; + while (superblock_addrs[i] != 0) { + if (superblock_addrs[i] + sblen >= address && superblock_addrs[i] < address + size) { + if (superblock_addrs[i] > address) + add_trim_entry(dev, address, superblock_addrs[i] - address); + + if (size <= superblock_addrs[i] + sblen - address) + return; + + size -= superblock_addrs[i] + sblen - address; + address = superblock_addrs[i] + sblen; + } else if (superblock_addrs[i] > address + size) + break; + + i++; + } + + add_trim_entry(dev, address, size); } static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { @@ -4440,28 +5140,61 @@ static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlis KEY searchkey; traverse_ptr tp; UINT64 i, factor; - CHUNK_ITEM_STRIPE* cis; - + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];; + TRACE("dropping chunk %llx\n", c->offset); - + + if (c->chunk_item->type & BLOCK_FLAG_RAID0) + factor = c->chunk_item->num_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) + factor = c->chunk_item->num_stripes - 1; + else if (c->chunk_item->type & BLOCK_FLAG_RAID6) + factor = c->chunk_item->num_stripes - 2; + else // SINGLE, DUPLICATE, RAID1 + factor = 1; + + // do TRIM + if (Vcb->trim && !Vcb->options.no_trim) { + UINT64 len = c->chunk_item->size / factor; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) + add_trim_entry_avoid_sb(Vcb, c->devices[i], cis[i].offset, len); + } + } + + if (!c->cache) { + Status = load_stored_free_space_cache(Vcb, c, TRUE, Irp); + + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) + WARN("load_stored_free_space_cache returned %08x\n", Status); + } + // remove free space cache if (c->cache) { c->cache->deleted = TRUE; - + Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); return Status; } - - flush_fcb(c->cache, TRUE, batchlist, Irp, rollback); - - free_fcb(c->cache); - + + Status = flush_fcb(c->cache, TRUE, batchlist, Irp); + + free_fcb(Vcb, c->cache); + + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + return Status; + } + searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); @@ -4469,286 +5202,626 @@ static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlis } if (!keycmp(tp.item->key, searchkey)) { - delete_tree_item(Vcb, &tp, rollback); + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } } } - - if (c->chunk_item->type & BLOCK_FLAG_RAID0) - factor = c->chunk_item->num_stripes; - else if (c->chunk_item->type & BLOCK_FLAG_RAID10) - factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; - else // SINGLE, DUPLICATE, RAID1 - factor = 1; - - cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + if (Vcb->space_root) { + Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, + NULL, 0, Batch_DeleteFreeSpace); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; + } + } + for (i = 0; i < c->chunk_item->num_stripes; i++) { if (!c->created) { // remove DEV_EXTENTs from tree 4 searchkey.obj_id = cis[i].dev_id; searchkey.obj_type = TYPE_DEV_EXTENT; searchkey.offset = cis[i].offset; - + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(tp.item->key, searchkey)) { - delete_tree_item(Vcb, &tp, rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (tp.item->size >= sizeof(DEV_EXTENT)) { DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data; - + c->devices[i]->devitem.bytes_used -= de->length; - - space_list_add2(Vcb, &c->devices[i]->space, NULL, cis[i].offset, de->length, rollback); + + if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) { + if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + de->length > Vcb->balance.opts[0].drange_start) + space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback); + } else + space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, NULL, rollback); } } else WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); } else { UINT64 len = c->chunk_item->size / factor; - + c->devices[i]->devitem.bytes_used -= len; - space_list_add2(Vcb, &c->devices[i]->space, NULL, cis[i].offset, len, rollback); + + if (Vcb->balance.thread && Vcb->balance.shrinking && Vcb->balance.opts[0].devid == c->devices[i]->devitem.dev_id) { + if (cis[i].offset < Vcb->balance.opts[0].drange_start && cis[i].offset + len > Vcb->balance.opts[0].drange_start) + space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, Vcb->balance.opts[0].drange_start - cis[i].offset, NULL, rollback); + } else + space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, NULL, rollback); } } - + // modify DEV_ITEMs in chunk tree for (i = 0; i < c->chunk_item->num_stripes; i++) { if (c->devices[i]) { UINT64 j; DEV_ITEM* di; - + searchkey.obj_id = 1; searchkey.obj_type = TYPE_DEV_ITEM; searchkey.offset = c->devices[i]->devitem.dev_id; - + Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(tp.item->key, searchkey)) { ERR("error - could not find DEV_ITEM for device %llx\n", searchkey.offset); return STATUS_INTERNAL_ERROR; } - - delete_tree_item(Vcb, &tp, rollback); - + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); if (!di) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM)); - - if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; } - + for (j = i + 1; j < c->chunk_item->num_stripes; j++) { if (c->devices[j] == c->devices[i]) c->devices[j] = NULL; } } } - + if (!c->created) { // remove CHUNK_ITEM from chunk tree searchkey.obj_id = 0x100; searchkey.obj_type = TYPE_CHUNK_ITEM; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - - if (!keycmp(tp.item->key, searchkey)) - delete_tree_item(Vcb, &tp, rollback); - else + + if (!keycmp(tp.item->key, searchkey)) { + Status = delete_tree_item(Vcb, &tp); + + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } else WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset); - + // remove BLOCK_GROUP_ITEM from extent tree searchkey.obj_id = c->offset; searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) - delete_tree_item(Vcb, &tp, rollback); - else + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + Status = delete_tree_item(Vcb, &tp); + + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } else WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset); } - + if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset); - + RemoveEntryList(&c->list_entry); - + // clear raid56 incompat flag if dropping last RAID5/6 chunk - + if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) { LIST_ENTRY* le; BOOL clear_flag = TRUE; - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); - + if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) { clear_flag = FALSE; break; } - + le = le->Flink; } - + if (clear_flag) Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56; } - + Vcb->superblock.bytes_used -= c->oldused; - - if (c->list_entry_changed.Flink) - RemoveEntryList(&c->list_entry_changed); - + ExFreePool(c->chunk_item); ExFreePool(c->devices); - + while (!IsListEmpty(&c->space)) { space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry); - + RemoveEntryList(&s->list_entry); ExFreePool(s); } - + while (!IsListEmpty(&c->deleting)) { space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry); - + RemoveEntryList(&s->list_entry); ExFreePool(s); } - + + ExDeleteResourceLite(&c->partial_stripes_lock); + ExDeleteResourceLite(&c->range_locks_lock); ExDeleteResourceLite(&c->lock); ExDeleteResourceLite(&c->changed_extents_lock); ExFreePool(c); - + return STATUS_SUCCESS; } -static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { - LIST_ENTRY *le = Vcb->chunks_changed.Flink, *le2; +static NTSTATUS partial_stripe_read(device_extension* Vcb, chunk* c, partial_stripe* ps, UINT64 startoff, UINT16 parity, ULONG offset, ULONG len) { NTSTATUS Status; - UINT64 used_minus_cache; - - ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); - - // FIXME - do tree chunks before data chunks - - while (le != &Vcb->chunks_changed) { - chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed); - - le2 = le->Flink; - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - if (c->list_entry_balance.Flink) { - ExReleaseResourceLite(&c->lock); - le = le2; - continue; - } - - used_minus_cache = c->used; - - // subtract self-hosted cache - if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) { - LIST_ENTRY* le3; - - le3 = c->cache->extents.Flink; - while (le3 != &c->cache->extents) { - extent* ext = CONTAINING_RECORD(le3, extent, list_entry); - EXTENT_DATA* ed = ext->data; - - if (!ext->ignore) { - if (ext->datalen < sizeof(EXTENT_DATA)) { - ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA)); - break; - } - - if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - - if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { - ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, - sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)); - break; - } - - if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size) - used_minus_cache -= ed2->size; - } - } - - le3 = le3->Flink; - } + ULONG sl = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size); + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + while (len > 0) { + ULONG readlen = min(offset + len, offset + (sl - (offset % sl))) - offset; + UINT16 stripe; + + stripe = (parity + (offset / sl) + 1) % c->chunk_item->num_stripes; + + if (c->devices[stripe]->devobj) { + Status = sync_read_phys(c->devices[stripe]->devobj, cis[stripe].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + return Status; + } + } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { + UINT16 i; + UINT8* scratch; + + scratch = ExAllocatePoolWithTag(NonPagedPool, readlen * Vcb->superblock.sector_size, ALLOC_TAG); + if (!scratch) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (i != stripe) { + if (!c->devices[i]->devobj) { + ExFreePool(scratch); + return STATUS_UNEXPECTED_IO_ERROR; + } + + if (i == 0 || (stripe == 0 && i == 1)) { + Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size, ps->data + (offset * Vcb->superblock.sector_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + ExFreePool(scratch); + return Status; + } + } else { + Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size, scratch, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + ExFreePool(scratch); + return Status; + } + + do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch, readlen * Vcb->superblock.sector_size); + } + } + } + + ExFreePool(scratch); + } else { + UINT8* scratch; + UINT16 k, i, logstripe, error_stripe, num_errors = 0; + + scratch = ExAllocatePoolWithTag(NonPagedPool, (c->chunk_item->num_stripes + 2) * readlen * Vcb->superblock.sector_size, ALLOC_TAG); + if (!scratch) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + i = (parity + 1) % c->chunk_item->num_stripes; + for (k = 0; k < c->chunk_item->num_stripes; k++) { + if (i != stripe) { + if (c->devices[i]->devobj) { + Status = sync_read_phys(c->devices[i]->devobj, cis[i].offset + startoff + ((offset % sl) * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size, scratch + (k * readlen * Vcb->superblock.sector_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + num_errors++; + error_stripe = k; + } + } else { + num_errors++; + error_stripe = k; + } + + if (num_errors > 1) { + ExFreePool(scratch); + return STATUS_UNEXPECTED_IO_ERROR; + } + } else + logstripe = k; + + i = (i + 1) % c->chunk_item->num_stripes; + } + + if (num_errors == 0 || error_stripe == c->chunk_item->num_stripes - 1) { + for (k = 0; k < c->chunk_item->num_stripes - 1; k++) { + if (k != logstripe) { + if (k == 0 || (k == 1 && logstripe == 0)) { + RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size); + } else { + do_xor(ps->data + (offset * Vcb->superblock.sector_size), scratch + (k * readlen * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size); + } + } + } + } else { + raid6_recover2(scratch, c->chunk_item->num_stripes, readlen * Vcb->superblock.sector_size, logstripe, + error_stripe, scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size)); + + RtlCopyMemory(ps->data + (offset * Vcb->superblock.sector_size), scratch + (c->chunk_item->num_stripes * readlen * Vcb->superblock.sector_size), + readlen * Vcb->superblock.sector_size); + } + + ExFreePool(scratch); } - - if (used_minus_cache == 0) { - Status = drop_chunk(Vcb, c, batchlist, Irp, rollback); + + offset += readlen; + len -= readlen; + } + + return STATUS_SUCCESS; +} + +NTSTATUS flush_partial_stripe(device_extension* Vcb, chunk* c, partial_stripe* ps) { + NTSTATUS Status; + UINT16 parity2, stripe, startoffstripe; + UINT8* data; + UINT64 startoff; + ULONG runlength, index, last1; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + LIST_ENTRY* le; + UINT16 k, num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2); + UINT64 ps_length = num_data_stripes * c->chunk_item->stripe_length; + ULONG stripe_length = (ULONG)c->chunk_item->stripe_length; + + // FIXME - do writes asynchronously? + + get_raid0_offset(ps->address - c->offset, stripe_length, num_data_stripes, &startoff, &startoffstripe); + + parity2 = (((ps->address - c->offset) / ps_length) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; + + // read data (or reconstruct if degraded) + + runlength = RtlFindFirstRunClear(&ps->bmp, &index); + last1 = 0; + + while (runlength != 0) { + if (index > last1) { + Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, index - last1); if (!NT_SUCCESS(Status)) { - ERR("drop_chunk returned %08x\n", Status); - ExReleaseResourceLite(&c->lock); - ExReleaseResourceLite(&Vcb->chunk_lock); + ERR("partial_stripe_read returned %08x\n", Status); return Status; } - } else if (c->created) { - Status = create_chunk(Vcb, c, Irp, rollback); + } + + last1 = index + runlength; + + runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); + } + + if (last1 < ps_length / Vcb->superblock.sector_size) { + Status = partial_stripe_read(Vcb, c, ps, startoff, parity2, last1, (ULONG)((ps_length / Vcb->superblock.sector_size) - last1)); + if (!NT_SUCCESS(Status)) { + ERR("partial_stripe_read returned %08x\n", Status); + return Status; + } + } + + // set unallocated data to 0 + le = c->space.Flink; + while (le != &c->space) { + space* s = CONTAINING_RECORD(le, space, list_entry); + + if (s->address + s->size > ps->address && s->address < ps->address + ps_length) { + UINT64 start = max(ps->address, s->address); + UINT64 end = min(ps->address + ps_length, s->address + s->size); + + RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start)); + } else if (s->address >= ps->address + ps_length) + break; + + le = le->Flink; + } + + le = c->deleting.Flink; + while (le != &c->deleting) { + space* s = CONTAINING_RECORD(le, space, list_entry); + + if (s->address + s->size > ps->address && s->address < ps->address + ps_length) { + UINT64 start = max(ps->address, s->address); + UINT64 end = min(ps->address + ps_length, s->address + s->size); + + RtlZeroMemory(ps->data + start - ps->address, (ULONG)(end - start)); + } else if (s->address >= ps->address + ps_length) + break; + + le = le->Flink; + } + + stripe = (parity2 + 1) % c->chunk_item->num_stripes; + + data = ps->data; + for (k = 0; k < num_data_stripes; k++) { + if (c->devices[stripe]->devobj) { + Status = write_data_phys(c->devices[stripe]->devobj, cis[stripe].offset + startoff, data, stripe_length); if (!NT_SUCCESS(Status)) { - ERR("create_chunk returned %08x\n", Status); - ExReleaseResourceLite(&c->lock); - ExReleaseResourceLite(&Vcb->chunk_lock); + ERR("write_data_phys returned %08x\n", Status); return Status; } } - - if (used_minus_cache > 0) - ExReleaseResourceLite(&c->lock); + + data += stripe_length; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + // write parity + if (c->chunk_item->type & BLOCK_FLAG_RAID5) { + if (c->devices[parity2]->devobj) { + UINT16 i; + + for (i = 1; i < c->chunk_item->num_stripes - 1; i++) { + do_xor(ps->data, ps->data + (i * stripe_length), stripe_length); + } + + Status = write_data_phys(c->devices[parity2]->devobj, cis[parity2].offset + startoff, ps->data, stripe_length); + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + return Status; + } + } + } else { + UINT16 parity1 = (parity2 + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; + + if (c->devices[parity1]->devobj || c->devices[parity2]->devobj) { + UINT8* scratch; + UINT16 i; + + scratch = ExAllocatePoolWithTag(NonPagedPool, stripe_length * 2, ALLOC_TAG); + if (!scratch) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + i = c->chunk_item->num_stripes - 3; + + while (TRUE) { + if (i == c->chunk_item->num_stripes - 3) { + RtlCopyMemory(scratch, ps->data + (i * stripe_length), stripe_length); + RtlCopyMemory(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length); + } else { + do_xor(scratch, ps->data + (i * stripe_length), stripe_length); + + galois_double(scratch + stripe_length, stripe_length); + do_xor(scratch + stripe_length, ps->data + (i * stripe_length), stripe_length); + } + + if (i == 0) + break; + + i--; + } + + if (c->devices[parity1]->devobj) { + Status = write_data_phys(c->devices[parity1]->devobj, cis[parity1].offset + startoff, scratch, stripe_length); + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + ExFreePool(scratch); + return Status; + } + } + + if (c->devices[parity2]->devobj) { + Status = write_data_phys(c->devices[parity2]->devobj, cis[parity2].offset + startoff, scratch + stripe_length, stripe_length); + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + ExFreePool(scratch); + return Status; + } + } + + ExFreePool(scratch); + } + } + + return STATUS_SUCCESS; +} + +static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { + LIST_ENTRY *le, *le2; + NTSTATUS Status; + UINT64 used_minus_cache; + + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + // FIXME - do tree chunks before data chunks + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + le2 = le->Flink; + + if (c->changed) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + // flush partial stripes + if (!Vcb->readonly && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) { + ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE); + + while (!IsListEmpty(&c->partial_stripes)) { + partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); + + Status = flush_partial_stripe(Vcb, c, ps); + + if (ps->bmparr) + ExFreePool(ps->bmparr); + + ExFreePool(ps); + + if (!NT_SUCCESS(Status)) { + ERR("flush_partial_stripe returned %08x\n", Status); + ExReleaseResourceLite(&c->partial_stripes_lock); + ExReleaseResourceLite(&c->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); + return Status; + } + } + + ExReleaseResourceLite(&c->partial_stripes_lock); + } + + if (c->list_entry_balance.Flink) { + ExReleaseResourceLite(&c->lock); + le = le2; + continue; + } + + if (c->space_changed || c->created) { + used_minus_cache = c->used; + + // subtract self-hosted cache + if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) { + LIST_ENTRY* le3; + + le3 = c->cache->extents.Flink; + while (le3 != &c->cache->extents) { + extent* ext = CONTAINING_RECORD(le3, extent, list_entry); + EXTENT_DATA* ed = &ext->extent_data; + + if (!ext->ignore) { + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size) + used_minus_cache -= ed2->size; + } + } + + le3 = le3->Flink; + } + } + + if (used_minus_cache == 0) { + Status = drop_chunk(Vcb, c, batchlist, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("drop_chunk returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); + return Status; + } + } else if (c->created) { + Status = create_chunk(Vcb, c, Irp); + if (!NT_SUCCESS(Status)) { + ERR("create_chunk returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); + return Status; + } + } + + if (used_minus_cache > 0) + ExReleaseResourceLite(&c->lock); + } + } le = le2; } - + ExReleaseResourceLite(&Vcb->chunk_lock); - + return STATUS_SUCCESS; } -static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + searchkey.obj_id = parsubvolid; searchkey.obj_type = TYPE_ROOT_REF; searchkey.offset = subvolid; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(searchkey, tp.item->key)) { if (tp.item->size < sizeof(ROOT_REF)) { ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); @@ -4756,35 +5829,39 @@ static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 p } else { ROOT_REF* rr; ULONG len; - + rr = (ROOT_REF*)tp.item->data; len = tp.item->size; - + do { - ULONG itemlen; - - if (len < sizeof(ROOT_REF) || len < sizeof(ROOT_REF) - 1 + rr->n) { + UINT16 itemlen; + + if (len < sizeof(ROOT_REF) || len < offsetof(ROOT_REF, name[0]) + rr->n) { ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); break; } - - itemlen = sizeof(ROOT_REF) - sizeof(char) + rr->n; - + + itemlen = (UINT16)offsetof(ROOT_REF, name[0]) + rr->n; + if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) { - ULONG newlen = tp.item->size - itemlen; - - delete_tree_item(Vcb, &tp, rollback); - + UINT16 newlen = tp.item->size - itemlen; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (newlen == 0) { TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); } else { UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff; - + if (!newrr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); if ((UINT8*)rr > tp.item->data) { @@ -4793,16 +5870,21 @@ static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 p } else { rroff = newrr; } - - if ((UINT8*)&rr->name[rr->n] - tp.item->data < tp.item->size) + + if ((UINT8*)&rr->name[rr->n] < tp.item->data + tp.item->size) RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data)); - - insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp, rollback); + + Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(newrr); + return Status; + } } - + break; } - + if (len > itemlen) { len -= itemlen; rr = (ROOT_REF*)&rr->name[rr->n]; @@ -4814,192 +5896,223 @@ static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 p WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id); return STATUS_NOT_FOUND; } - + return STATUS_SUCCESS; } -static NTSTATUS add_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, ROOT_REF* rr, PIRP Irp, LIST_ENTRY* rollback) { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(suppress: 28194) +#endif +static NTSTATUS add_root_ref(_In_ device_extension* Vcb, _In_ UINT64 subvolid, _In_ UINT64 parsubvolid, _In_ __drv_aliasesMem ROOT_REF* rr, _In_opt_ PIRP Irp) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + searchkey.obj_id = parsubvolid; searchkey.obj_type = TYPE_ROOT_REF; searchkey.offset = subvolid; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(searchkey, tp.item->key)) { - ULONG rrsize = tp.item->size + sizeof(ROOT_REF) - 1 + rr->n; + UINT16 rrsize = tp.item->size + (UINT16)offsetof(ROOT_REF, name[0]) + rr->n; UINT8* rr2; - + rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG); if (!rr2) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + if (tp.item->size > 0) RtlCopyMemory(rr2, tp.item->data, tp.item->size); - - RtlCopyMemory(rr2 + tp.item->size, rr, sizeof(ROOT_REF) - 1 + rr->n); + + RtlCopyMemory(rr2 + tp.item->size, rr, offsetof(ROOT_REF, name[0]) + rr->n); ExFreePool(rr); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); ExFreePool(rr2); - return STATUS_INTERNAL_ERROR; + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(rr2); + return Status; } } else { - if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, sizeof(ROOT_REF) - 1 + rr->n, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); + Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, (UINT16)offsetof(ROOT_REF, name[0]) + rr->n, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(rr); - return STATUS_INTERNAL_ERROR; + return Status; } } - + return STATUS_SUCCESS; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif -static NTSTATUS STDCALL update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp) { KEY searchkey; traverse_ptr tp; UINT8* data; - ULONG datalen; + UINT16 datalen; NTSTATUS Status; - + searchkey.obj_id = parsubvolid; searchkey.obj_type = TYPE_ROOT_REF; searchkey.offset = subvolid; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) { datalen = tp.item->size; - + data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG); if (!data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(data, tp.item->data, datalen); } else { datalen = 0; + data = NULL; } - + searchkey.obj_id = subvolid; searchkey.obj_type = TYPE_ROOT_BACKREF; searchkey.offset = parsubvolid; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); - + if (datalen > 0) ExFreePool(data); - + return Status; } - - if (!keycmp(tp.item->key, searchkey)) - delete_tree_item(Vcb, &tp, rollback); - + + if (!keycmp(tp.item->key, searchkey)) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + + if (datalen > 0) + ExFreePool(data); + + return Status; + } + } + if (datalen > 0) { - if (!insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); + Status = insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(data); - return STATUS_INTERNAL_ERROR; + return Status; } } - + return STATUS_SUCCESS; } -static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; - + searchkey.obj_id = root; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); - int3; return STATUS_INTERNAL_ERROR; } - + if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); if (!ri) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + if (tp.item->size > 0) RtlCopyMemory(ri, tp.item->data, tp.item->size); - + RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(ri); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ri); + return Status; } } else { tp.tree->write = TRUE; } - + return STATUS_SUCCESS; } -static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp) { NTSTATUS Status; - + // if fileref created and then immediately deleted, do nothing if (fileref->created && fileref->deleted) { fileref->dirty = FALSE; return STATUS_SUCCESS; } - + if (fileref->fcb->ads) { fileref->dirty = FALSE; return STATUS_SUCCESS; } - + if (fileref->created) { - ULONG disize; + UINT16 disize; DIR_ITEM *di, *di2; UINT32 crc32; - - crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length); - - disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length; + + crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + + disize = (UINT16)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length); di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); if (!di) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { di->key.obj_id = fileref->fcb->inode; di->key.obj_type = TYPE_INODE_ITEM; @@ -5012,205 +6125,212 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp di->transid = fileref->fcb->Vcb->superblock.generation; di->m = 0; - di->n = (UINT16)fileref->utf8.Length; + di->n = (UINT16)fileref->dc->utf8.Length; di->type = fileref->fcb->type; - RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length); - + RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); if (!di2) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(di2, di, disize); - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, - di, disize, Batch_Insert, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->dc->index, di, disize, Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; } - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, - di2, disize, Batch_DirItem, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, + di2, disize, Batch_DirItem); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; } - + if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { INODE_REF* ir; - - ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->utf8.Length, ALLOC_TAG); + + ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG); if (!ir) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - ir->index = fileref->index; - ir->n = fileref->utf8.Length; - RtlCopyMemory(ir->name, fileref->utf8.Buffer, ir->n); - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, - ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + ir->index = fileref->dc->index; + ir->n = fileref->dc->utf8.Length; + RtlCopyMemory(ir->name, fileref->dc->utf8.Buffer, ir->n); + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, + ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; } - } else { + } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { ULONG rrlen; ROOT_REF* rr; - rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length; - + rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length; + rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); if (!rr) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + rr->dir = fileref->parent->fcb->inode; - rr->index = fileref->index; - rr->n = fileref->utf8.Length; - RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length); - - Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback); + rr->index = fileref->dc->index; + rr->n = fileref->dc->utf8.Length; + RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + + Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp); if (!NT_SUCCESS(Status)) { ERR("add_root_ref returned %08x\n", Status); return Status; } - - Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback); + + Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); if (!NT_SUCCESS(Status)) { ERR("update_root_backref returned %08x\n", Status); return Status; } } - + fileref->created = FALSE; } else if (fileref->deleted) { UINT32 crc32; ANSI_STRING* name; DIR_ITEM* di; - - if (fileref->oldutf8.Buffer) - name = &fileref->oldutf8; - else - name = &fileref->utf8; + + name = &fileref->oldutf8; crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length); TRACE("deleting %.*S\n", file_desc_fileref(fileref)); - + di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG); if (!di) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + di->m = 0; di->n = name->Length; RtlCopyMemory(di->name, name->Buffer, name->Length); - + // delete DIR_ITEM (0x54) - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, - crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, + crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; } - + if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { INODE_REF* ir; - + // delete INODE_REF (0xc) - + ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG); if (!ir) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - ir->index = fileref->index; + + ir->index = fileref->oldindex; ir->n = name->Length; RtlCopyMemory(ir->name, name->Buffer, name->Length); - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, - fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, + fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; } - } else { // subvolume - Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp, rollback); + } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume + Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp); if (!NT_SUCCESS(Status)) { ERR("delete_root_ref returned %08x\n", Status); return Status; } - - Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback); + + Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); if (!NT_SUCCESS(Status)) { ERR("update_root_backref returned %08x\n", Status); return Status; } } - + // delete DIR_INDEX (0x60) - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, - fileref->index, NULL, 0, Batch_Delete, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->oldindex, NULL, 0, Batch_Delete); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + return Status; } - + if (fileref->oldutf8.Buffer) { ExFreePool(fileref->oldutf8.Buffer); fileref->oldutf8.Buffer = NULL; } } else { // rename or change type - PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->utf8; + PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->dc->utf8; UINT32 crc32, oldcrc32; - ULONG disize; + UINT16 disize; DIR_ITEM *olddi, *di, *di2; - - crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length); - + + crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + if (!fileref->oldutf8.Buffer) oldcrc32 = crc32; else oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length); - + olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG); if (!olddi) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + olddi->m = 0; olddi->n = (UINT16)oldutf8->Length; RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length); - + // delete DIR_ITEM (0x54) - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, - oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, + oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(olddi); + return Status; } // add DIR_ITEM (0x54) - - disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length; + + disize = (UINT16)(offsetof(DIR_ITEM, name[0]) + fileref->dc->utf8.Length); di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); if (!di) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG); if (!di2) { ERR("out of memory\n"); ExFreePool(di); return STATUS_INSUFFICIENT_RESOURCES; } - - if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { + + if (fileref->dc) + di->key = fileref->dc->key; + else if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { di->key.obj_id = fileref->fcb->inode; di->key.obj_type = TYPE_INODE_ITEM; di->key.offset = 0; @@ -5219,127 +6339,550 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp di->key.obj_type = TYPE_ROOT_ITEM; di->key.offset = 0xffffffffffffffff; } - + di->transid = fileref->fcb->Vcb->superblock.generation; di->m = 0; - di->n = (UINT16)fileref->utf8.Length; + di->n = (UINT16)fileref->dc->utf8.Length; di->type = fileref->fcb->type; - RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length); - + RtlCopyMemory(di->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + RtlCopyMemory(di2, di, disize); - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, - di, disize, Batch_DirItem, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, + di, disize, Batch_DirItem); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(di2); + ExFreePool(di); + return Status; } - + if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { INODE_REF *ir, *ir2; - + // delete INODE_REF (0xc) - + ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG); if (!ir) { ERR("out of memory\n"); + ExFreePool(di2); return STATUS_INSUFFICIENT_RESOURCES; } - ir->index = fileref->index; + ir->index = fileref->dc->index; ir->n = oldutf8->Length; RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n); - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, - ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, + ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(ir); + ExFreePool(di2); + return Status; } - + // add INODE_REF (0xc) - - ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->utf8.Length, ALLOC_TAG); + + ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->dc->utf8.Length, ALLOC_TAG); if (!ir2) { ERR("out of memory\n"); + ExFreePool(di2); return STATUS_INSUFFICIENT_RESOURCES; } - ir2->index = fileref->index; - ir2->n = fileref->utf8.Length; - RtlCopyMemory(ir2->name, fileref->utf8.Buffer, ir2->n); - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, - ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + ir2->index = fileref->dc->index; + ir2->n = fileref->dc->utf8.Length; + RtlCopyMemory(ir2->name, fileref->dc->utf8.Buffer, ir2->n); + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, + ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(ir2); + ExFreePool(di2); + return Status; } - } else { // subvolume + } else if (fileref->fcb != fileref->fcb->Vcb->dummy_fcb) { // subvolume ULONG rrlen; ROOT_REF* rr; - - // FIXME - make sure this works with duff subvols within snapshots - - Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp, rollback); + + Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp); if (!NT_SUCCESS(Status)) { ERR("delete_root_ref returned %08x\n", Status); + ExFreePool(di2); + return Status; + } + + rrlen = sizeof(ROOT_REF) - 1 + fileref->dc->utf8.Length; + + rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); + if (!rr) { + ERR("out of memory\n"); + ExFreePool(di2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + rr->dir = fileref->parent->fcb->inode; + rr->index = fileref->dc->index; + rr->n = fileref->dc->utf8.Length; + RtlCopyMemory(rr->name, fileref->dc->utf8.Buffer, fileref->dc->utf8.Length); + + Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp); + if (!NT_SUCCESS(Status)) { + ERR("add_root_ref returned %08x\n", Status); + ExFreePool(di2); + return Status; + } + + Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp); + if (!NT_SUCCESS(Status)) { + ERR("update_root_backref returned %08x\n", Status); + ExFreePool(di2); + return Status; + } + } + + // delete DIR_INDEX (0x60) + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->dc->index, NULL, 0, Batch_Delete); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(di2); + return Status; + } + + // add DIR_INDEX (0x60) + + Status = insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->dc->index, di2, disize, Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(di2); + return Status; + } + + if (fileref->oldutf8.Buffer) { + ExFreePool(fileref->oldutf8.Buffer); + fileref->oldutf8.Buffer = NULL; + } + } + + fileref->dirty = FALSE; + + return STATUS_SUCCESS; +} + +static void flush_disk_caches(device_extension* Vcb) { + LIST_ENTRY* le; + ioctl_context context; + ULONG num; + + context.left = 0; + + le = Vcb->devices.Flink; + + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && !dev->readonly && dev->can_flush) + context.left++; + + le = le->Flink; + } + + if (context.left == 0) + return; + + num = 0; + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(ioctl_context_stripe) * context.left, ALLOC_TAG); + if (!context.stripes) { + ERR("out of memory\n"); + return; + } + + RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left); + + le = Vcb->devices.Flink; + + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && !dev->readonly && dev->can_flush) { + PIO_STACK_LOCATION IrpSp; + ioctl_context_stripe* stripe = &context.stripes[num]; + + RtlZeroMemory(&stripe->apte, sizeof(ATA_PASS_THROUGH_EX)); + + stripe->apte.Length = sizeof(ATA_PASS_THROUGH_EX); + stripe->apte.TimeOutValue = 5; + stripe->apte.CurrentTaskFile[6] = IDE_COMMAND_FLUSH_CACHE; + + stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE); + + if (!stripe->Irp) { + ERR("IoAllocateIrp failed\n"); + goto nextdev; + } + + IrpSp = IoGetNextIrpStackLocation(stripe->Irp); + IrpSp->MajorFunction = IRP_MJ_DEVICE_CONTROL; + + IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_ATA_PASS_THROUGH; + IrpSp->Parameters.DeviceIoControl.InputBufferLength = sizeof(ATA_PASS_THROUGH_EX); + IrpSp->Parameters.DeviceIoControl.OutputBufferLength = sizeof(ATA_PASS_THROUGH_EX); + + stripe->Irp->AssociatedIrp.SystemBuffer = &stripe->apte; + stripe->Irp->Flags |= IRP_BUFFERED_IO | IRP_INPUT_OPERATION; + stripe->Irp->UserBuffer = &stripe->apte; + stripe->Irp->UserIosb = &stripe->iosb; + + IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, TRUE, TRUE, TRUE); + + IoCallDriver(dev->devobj, stripe->Irp); + +nextdev: + num++; + } + + le = le->Flink; + } + + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + + ExFreePool(context.stripes); +} + +static NTSTATUS flush_changed_dev_stats(device_extension* Vcb, device* dev, PIRP Irp) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + UINT16 statslen; + UINT64* stats; + + searchkey.obj_id = 0; + searchkey.obj_type = TYPE_DEV_STATS; + searchkey.offset = dev->devitem.dev_id; + + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (!keycmp(tp.item->key, searchkey)) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + } + + statslen = sizeof(UINT64) * 5; + stats = ExAllocatePoolWithTag(PagedPool, statslen, ALLOC_TAG); + if (!stats) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(stats, dev->stats, statslen); + + Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, dev->devitem.dev_id, stats, statslen, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(stats); + return Status; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS flush_subvol(device_extension* Vcb, root* r, PIRP Irp) { + NTSTATUS Status; + + if (r != Vcb->root_root && r != Vcb->chunk_root) { + KEY searchkey; + traverse_ptr tp; + ROOT_ITEM* ri; + + searchkey.obj_id = r->id; + searchkey.obj_type = TYPE_ROOT_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + return Status; + } + + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { + ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); + return STATUS_INTERNAL_ERROR; + } + + ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); + if (!ri) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + return Status; + } + } + + if (r->received) { + KEY searchkey; + traverse_ptr tp; + + if (!Vcb->uuid_root) { + root* uuid_root; + + TRACE("uuid root doesn't exist, creating it\n"); + + Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("create_root returned %08x\n", Status); + return Status; + } + + Vcb->uuid_root = uuid_root; + } + + RtlCopyMemory(&searchkey.obj_id, &r->root_item.received_uuid, sizeof(UINT64)); + searchkey.obj_type = TYPE_SUBVOL_REC_UUID; + RtlCopyMemory(&searchkey.offset, &r->root_item.received_uuid.uuid[sizeof(UINT64)], sizeof(UINT64)); + + Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (!keycmp(tp.item->key, searchkey)) { + if (tp.item->size + sizeof(UINT64) <= Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)) { + UINT64* ids; + + ids = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(UINT64), ALLOC_TAG); + if (!ids) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(ids, tp.item->data, tp.item->size); + RtlCopyMemory((UINT8*)ids + tp.item->size, &r->id, sizeof(UINT64)); + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(ids); + return Status; + } + + Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ids, tp.item->size + sizeof(UINT64), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ids); + return Status; + } } - - rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length; - - rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG); - if (!rr) { + } else { + UINT64* root_num; + + root_num = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64), ALLOC_TAG); + if (!root_num) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - rr->dir = fileref->parent->fcb->inode; - rr->index = fileref->index; - rr->n = fileref->utf8.Length; - RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length); - - Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback); + + *root_num = r->id; + + Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp); if (!NT_SUCCESS(Status)) { - ERR("add_root_ref returned %08x\n", Status); + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(root_num); return Status; } - - Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("update_root_backref returned %08x\n", Status); - return Status; + } + + r->received = FALSE; + } + + r->dirty = FALSE; + + return STATUS_SUCCESS; +} + +static NTSTATUS test_not_full(device_extension* Vcb) { + UINT64 reserve, could_alloc, free_space; + LIST_ENTRY* le; + + // This function ensures we drop into readonly mode if we're about to leave very little + // space for metadata - this is similar to the "global reserve" of the Linux driver. + // Otherwise we might completely fill our space, at which point due to COW we can't + // delete anything in order to fix this. + + reserve = Vcb->extent_root->root_item.bytes_used; + reserve += Vcb->root_root->root_item.bytes_used; + if (Vcb->checksum_root) reserve += Vcb->checksum_root->root_item.bytes_used; + + reserve = max(reserve, 0x1000000); // 16 M + reserve = min(reserve, 0x20000000); // 512 M + + // Find out how much space would be available for new metadata chunks + + could_alloc = 0; + + if (Vcb->metadata_flags & BLOCK_FLAG_RAID5) { + UINT64 s1 = 0, s2 = 0, s3 = 0; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly) { + UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used; + + if (space >= s1) { + s3 = s2; + s2 = s1; + s1 = space; + } else if (space >= s2) { + s3 = s2; + s2 = space; + } else if (space >= s3) + s3 = space; } + + le = le->Flink; } - - // delete DIR_INDEX (0x60) - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, - fileref->index, NULL, 0, Batch_Delete, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + could_alloc = s3 * 2; + } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID6)) { + UINT64 s1 = 0, s2 = 0, s3 = 0, s4 = 0; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly) { + UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used; + + if (space >= s1) { + s4 = s3; + s3 = s2; + s2 = s1; + s1 = space; + } else if (space >= s2) { + s4 = s3; + s3 = s2; + s2 = space; + } else if (space >= s3) { + s4 = s3; + s3 = space; + } else if (space >= s4) + s4 = space; + } + + le = le->Flink; } - - // add DIR_INDEX (0x60) - - if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, - fileref->index, di2, disize, Batch_Insert, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - return STATUS_INTERNAL_ERROR; + + could_alloc = s4 * 2; + } else if (Vcb->metadata_flags & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1)) { + UINT64 s1 = 0, s2 = 0; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly) { + UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used; + + if (space >= s1) { + s2 = s1; + s1 = space; + } else if (space >= s2) + s2 = space; + } + + le = le->Flink; } - if (fileref->oldutf8.Buffer) { - ExFreePool(fileref->oldutf8.Buffer); - fileref->oldutf8.Buffer = NULL; + if (Vcb->metadata_flags & BLOCK_FLAG_RAID1) + could_alloc = s2; + else // RAID0 + could_alloc = s2 * 2; + } else if (Vcb->metadata_flags & BLOCK_FLAG_DUPLICATE) { + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly) { + UINT64 space = (dev->devitem.num_bytes - dev->devitem.bytes_used) / 2; + + could_alloc = max(could_alloc, space); + } + + le = le->Flink; + } + } else { // SINGLE + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly) { + UINT64 space = dev->devitem.num_bytes - dev->devitem.bytes_used; + + could_alloc = max(could_alloc, space); + } + + le = le->Flink; } } - fileref->dirty = FALSE; - - return STATUS_SUCCESS; + if (could_alloc >= reserve) + return STATUS_SUCCESS; + + free_space = 0; + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + if (!c->reloc && !c->readonly && c->chunk_item->type & BLOCK_FLAG_METADATA) { + free_space += c->chunk_item->size - c->used; + + if (free_space + could_alloc >= reserve) + return STATUS_SUCCESS; + } + + le = le->Flink; + } + + return STATUS_DISK_FULL; } -NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS do_write2(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; LIST_ENTRY *le, batchlist; BOOL cache_changed = FALSE; + volume_device_extension* vde; + BOOL no_cache = FALSE; #ifdef DEBUG_FLUSH_TIMES UINT64 filerefs = 0, fcbs = 0; LARGE_INTEGER freq, time1, time2; @@ -5347,33 +6890,36 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) #ifdef DEBUG_WRITE_LOOPS UINT loops = 0; #endif - + TRACE("(%p)\n", Vcb); - + InitializeListHead(&batchlist); #ifdef DEBUG_FLUSH_TIMES time1 = KeQueryPerformanceCounter(&freq); #endif - + + ExAcquireResourceExclusiveLite(&Vcb->dirty_filerefs_lock, TRUE); + while (!IsListEmpty(&Vcb->dirty_filerefs)) { - dirty_fileref* dirt; - - le = RemoveHeadList(&Vcb->dirty_filerefs); - - dirt = CONTAINING_RECORD(le, dirty_fileref, list_entry); - - flush_fileref(dirt->fileref, &batchlist, Irp, rollback); - free_fileref(dirt->fileref); - ExFreePool(dirt); + file_ref* fr = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_filerefs), file_ref, list_entry_dirty); + + flush_fileref(fr, &batchlist, Irp); + free_fileref(Vcb, fr); #ifdef DEBUG_FLUSH_TIMES filerefs++; #endif } - - commit_batch_list(Vcb, &batchlist, Irp, rollback); - + + ExReleaseResourceLite(&Vcb->dirty_filerefs_lock); + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + return Status; + } + #ifdef DEBUG_FLUSH_TIMES time2 = KeQueryPerformanceCounter(NULL); @@ -5384,243 +6930,360 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) // We process deleted streams first, so we don't run over our xattr // limit unless we absolutely have to. - + // We also process deleted normal files, to avoid any problems + // caused by inode collisions. + + ExAcquireResourceExclusiveLite(&Vcb->dirty_fcbs_lock, TRUE); + le = Vcb->dirty_fcbs.Flink; while (le != &Vcb->dirty_fcbs) { - dirty_fcb* dirt; + fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty); LIST_ENTRY* le2 = le->Flink; - - dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry); - - if (dirt->fcb->deleted && dirt->fcb->ads) { - RemoveEntryList(le); - - ExAcquireResourceExclusiveLite(dirt->fcb->Header.Resource, TRUE); - flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback); - ExReleaseResourceLite(dirt->fcb->Header.Resource); - - free_fcb(dirt->fcb); - ExFreePool(dirt); + + if (fcb->deleted) { + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + Status = flush_fcb(fcb, FALSE, &batchlist, Irp); + ExReleaseResourceLite(fcb->Header.Resource); + + free_fcb(Vcb, fcb); + + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + clear_batch_list(Vcb, &batchlist); + ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); + return Status; + } #ifdef DEBUG_FLUSH_TIMES fcbs++; #endif } - + le = le2; } - + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); + return Status; + } + le = Vcb->dirty_fcbs.Flink; while (le != &Vcb->dirty_fcbs) { - dirty_fcb* dirt; + fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_dirty); LIST_ENTRY* le2 = le->Flink; - - dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry); - - if (dirt->fcb->subvol != Vcb->root_root || dirt->fcb->deleted) { - RemoveEntryList(le); - - ExAcquireResourceExclusiveLite(dirt->fcb->Header.Resource, TRUE); - flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback); - ExReleaseResourceLite(dirt->fcb->Header.Resource); - free_fcb(dirt->fcb); - ExFreePool(dirt); + + if (fcb->subvol != Vcb->root_root) { + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + Status = flush_fcb(fcb, FALSE, &batchlist, Irp); + ExReleaseResourceLite(fcb->Header.Resource); + free_fcb(Vcb, fcb); + + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); + return Status; + } #ifdef DEBUG_FLUSH_TIMES fcbs++; #endif } - + le = le2; } - - commit_batch_list(Vcb, &batchlist, Irp, rollback); - + + ExReleaseResourceLite(&Vcb->dirty_fcbs_lock); + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + return Status; + } + #ifdef DEBUG_FLUSH_TIMES time2 = KeQueryPerformanceCounter(NULL); ERR("flushed %llu fcbs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart); #endif - if (!IsListEmpty(&Vcb->drop_roots)) { - Status = drop_roots(Vcb, Irp, rollback); - + // no need to get dirty_subvols_lock here, as we have tree_lock exclusively + while (!IsListEmpty(&Vcb->dirty_subvols)) { + root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->dirty_subvols), root, list_entry_dirty); + + Status = flush_subvol(Vcb, r, Irp); if (!NT_SUCCESS(Status)) { - ERR("drop_roots returned %08x\n", Status); + ERR("flush_subvol returned %08x\n", Status); return Status; } } - - if (!IsListEmpty(&Vcb->chunks_changed)) { - Status = update_chunks(Vcb, &batchlist, Irp, rollback); - + + if (!IsListEmpty(&Vcb->drop_roots)) { + Status = drop_roots(Vcb, Irp, rollback); + if (!NT_SUCCESS(Status)) { - ERR("update_chunks returned %08x\n", Status); + ERR("drop_roots returned %08x\n", Status); return Status; } } - - commit_batch_list(Vcb, &batchlist, Irp, rollback); - + + Status = update_chunks(Vcb, &batchlist, Irp, rollback); + + if (!NT_SUCCESS(Status)) { + ERR("update_chunks returned %08x\n", Status); + return Status; + } + + Status = commit_batch_list(Vcb, &batchlist, Irp); + // If only changing superblock, e.g. changing label, we still need to rewrite // the root tree so the generations match, otherwise you won't be able to mount on Linux. if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) { KEY searchkey; - + traverse_ptr tp; - + searchkey.obj_id = 0; searchkey.obj_type = 0; searchkey.offset = 0; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + Vcb->root_root->treeholder.tree->write = TRUE; } - + // make sure we always update the extent tree - Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp, rollback); + Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp); if (!NT_SUCCESS(Status)) { ERR("add_root_item_to_cache returned %08x\n", Status); return Status; } - + + if (Vcb->stats_changed) { + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->stats_changed) { + Status = flush_changed_dev_stats(Vcb, dev, Irp); + if (!NT_SUCCESS(Status)) { + ERR("flush_changed_dev_stats returned %08x\n", Status); + return Status; + } + dev->stats_changed = FALSE; + } + + le = le->Flink; + } + + Vcb->stats_changed = FALSE; + } + do { - Status = add_parents(Vcb, Irp, rollback); + Status = add_parents(Vcb, Irp); if (!NT_SUCCESS(Status)) { ERR("add_parents returned %08x\n", Status); goto end; } - - Status = do_splits(Vcb, Irp, rollback); + + Status = allocate_tree_extents(Vcb, Irp, rollback); if (!NT_SUCCESS(Status)) { - ERR("do_splits returned %08x\n", Status); + ERR("allocate_tree_extents returned %08x\n", Status); goto end; } - - Status = allocate_tree_extents(Vcb, Irp, rollback); + + Status = do_splits(Vcb, Irp, rollback); if (!NT_SUCCESS(Status)) { - ERR("add_parents returned %08x\n", Status); + ERR("do_splits returned %08x\n", Status); goto end; } - + Status = update_chunk_usage(Vcb, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("update_chunk_usage returned %08x\n", Status); goto end; } - - Status = allocate_cache(Vcb, &cache_changed, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("allocate_cache returned %08x\n", Status); - goto end; + + if (!(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) { + if (!no_cache) { + Status = allocate_cache(Vcb, &cache_changed, Irp, rollback); + if (!NT_SUCCESS(Status)) { + WARN("allocate_cache returned %08x\n", Status); + no_cache = TRUE; + cache_changed = FALSE; + } + } + } else { + Status = update_chunk_caches_tree(Vcb, Irp); + if (!NT_SUCCESS(Status)) { + ERR("update_chunk_caches_tree returned %08x\n", Status); + goto end; + } } #ifdef DEBUG_WRITE_LOOPS loops++; - + if (cache_changed) ERR("cache has changed, looping again\n"); #endif - } while (cache_changed || !trees_consistent(Vcb, rollback)); - + } while (cache_changed || !trees_consistent(Vcb)); + #ifdef DEBUG_WRITE_LOOPS ERR("%u loops\n", loops); #endif - + TRACE("trees consistent\n"); - - Status = update_root_root(Vcb, Irp, rollback); + + Status = update_root_root(Vcb, no_cache, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("update_root_root returned %08x\n", Status); goto end; } - + Status = write_trees(Vcb, Irp); if (!NT_SUCCESS(Status)) { ERR("write_trees returned %08x\n", Status); goto end; } - + + Status = test_not_full(Vcb); + if (!NT_SUCCESS(Status)) { + ERR("test_not_full returned %08x\n", Status); + goto end; + } + #ifdef DEBUG_PARANOID le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); KEY searchkey; traverse_ptr tp; - + searchkey.obj_id = t->header.address; searchkey.obj_type = TYPE_METADATA_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); - int3; + goto end; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { searchkey.obj_id = t->header.address; searchkey.obj_type = TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); - int3; + goto end; } - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address); - int3; + Status = STATUS_INTERNAL_ERROR; + goto end; } } - + le = le->Flink; } #endif - + Vcb->superblock.cache_generation = Vcb->superblock.generation; - + + if (!Vcb->options.no_barrier) + flush_disk_caches(Vcb); + Status = write_superblocks(Vcb, Irp); if (!NT_SUCCESS(Status)) { ERR("write_superblocks returned %08x\n", Status); goto end; } - + + vde = Vcb->vde; + + if (vde) { + pdo_device_extension* pdode = vde->pdode; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + vc->generation = Vcb->superblock.generation; + le = le->Flink; + } + + ExReleaseResourceLite(&pdode->child_lock); + } + clean_space_cache(Vcb); - + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + c->changed = FALSE; + c->space_changed = FALSE; + + le = le->Flink; + } + Vcb->superblock.generation++; - + Status = STATUS_SUCCESS; - + le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); t->write = FALSE; - + le = le->Flink; } - + Vcb->need_write = FALSE; - + while (!IsListEmpty(&Vcb->drop_roots)) { - LIST_ENTRY* le = RemoveHeadList(&Vcb->drop_roots); - root* r = CONTAINING_RECORD(le, root, list_entry); + root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->drop_roots), root, list_entry); ExDeleteResourceLite(&r->nonpaged->load_tree_lock); ExFreePool(r->nonpaged); ExFreePool(r); } - + end: TRACE("do_write returning %08x\n", Status); - + + return Status; +} + +NTSTATUS do_write(device_extension* Vcb, PIRP Irp) { + LIST_ENTRY rollback; + NTSTATUS Status; + + InitializeListHead(&rollback); + + Status = do_write2(Vcb, Irp, &rollback); + + if (!NT_SUCCESS(Status)) { + ERR("do_write2 returned %08x, dropping into readonly mode\n", Status); + Vcb->readonly = TRUE; + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_FORCED_CLOSED); + do_rollback(Vcb, &rollback); + } else + clear_rollback(&rollback); + return Status; } @@ -5633,25 +7296,23 @@ static void print_stats(device_extension* Vcb) { ERR("csum time taken: %llu\n", Vcb->stats.read_csum_time); ERR("disk time taken: %llu\n", Vcb->stats.read_disk_time); ERR("other time taken: %llu\n", Vcb->stats.read_total_time - Vcb->stats.read_csum_time - Vcb->stats.read_disk_time); - + ERR("OPEN STATS:\n"); ERR("number of opens: %llu\n", Vcb->stats.num_opens); ERR("total time taken: %llu\n", Vcb->stats.open_total_time); ERR("number of overwrites: %llu\n", Vcb->stats.num_overwrites); ERR("total time taken: %llu\n", Vcb->stats.overwrite_total_time); ERR("number of creates: %llu\n", Vcb->stats.num_creates); + ERR("calls to open_fcb: %llu\n", Vcb->stats.open_fcb_calls); + ERR("time spent in open_fcb: %llu\n", Vcb->stats.open_fcb_time); ERR("total time taken: %llu\n", Vcb->stats.create_total_time); - + RtlZeroMemory(&Vcb->stats, sizeof(debug_stats)); } #endif static void do_flush(device_extension* Vcb) { - LIST_ENTRY rollback; - - InitializeListHead(&rollback); - - FsRtlEnterFileSystem(); + NTSTATUS Status; ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); @@ -5660,46 +7321,52 @@ static void do_flush(device_extension* Vcb) { #endif if (Vcb->need_write && !Vcb->readonly) - do_write(Vcb, NULL, &rollback); - + Status = do_write(Vcb, NULL); + else + Status = STATUS_SUCCESS; + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - ExReleaseResourceLite(&Vcb->tree_lock); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); - FsRtlExitFileSystem(); + ExReleaseResourceLite(&Vcb->tree_lock); } -void STDCALL flush_thread(void* context) { +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +void NTAPI flush_thread(void* context) { +#else +void flush_thread(void* context) { +#endif DEVICE_OBJECT* devobj = context; device_extension* Vcb = devobj->DeviceExtension; LARGE_INTEGER due_time; - + ObReferenceObject(devobj); - + KeInitializeTimer(&Vcb->flush_thread_timer); - + due_time.QuadPart = (UINT64)Vcb->options.flush_interval * -10000000; - + KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); - + while (TRUE) { KeWaitForSingleObject(&Vcb->flush_thread_timer, Executive, KernelMode, FALSE, NULL); if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing) break; - + if (!Vcb->locked) do_flush(Vcb); - + KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); } - + ObDereferenceObject(devobj); KeCancelTimer(&Vcb->flush_thread_timer); - + KeSetEvent(&Vcb->flush_thread_finished, 0, FALSE); - + PsTerminateSystemThread(STATUS_SUCCESS); } diff --git a/reactos/drivers/filesystems/btrfs/free-space.c b/reactos/drivers/filesystems/btrfs/free-space.c index 9c631301717..1ed2da93615 100644 --- a/reactos/drivers/filesystems/btrfs/free-space.c +++ b/reactos/drivers/filesystems/btrfs/free-space.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -21,20 +21,18 @@ // this be a constant number of sectors, a constant 256 KB, or what? #define CACHE_INCREMENTS 64 -// #define DEBUG_SPACE_LISTS - static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; fcb* fcb; - + Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &fcb, PagedPool, Irp); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); return Status; } - - fcb->dirty = TRUE; - + + mark_fcb_dirty(fcb); + if (fcb->inode_item.st_size > 0) { Status = excise_extents(fcb->Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -42,12 +40,17 @@ static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, LIS return Status; } } - + fcb->deleted = TRUE; - - flush_fcb(fcb, FALSE, batchlist, Irp, rollback); - - free_fcb(fcb); + + Status = flush_fcb(fcb, FALSE, batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + free_fcb(Vcb, fcb); + return Status; + } + + free_fcb(Vcb, fcb); return STATUS_SUCCESS; } @@ -58,73 +61,134 @@ NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PI NTSTATUS Status; BOOL b; LIST_ENTRY rollback; - + InitializeListHead(&rollback); - + searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = 0; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + do { if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) break; - + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { - delete_tree_item(Vcb, &tp, &rollback); - + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + if (tp.item->size >= sizeof(FREE_SPACE_ITEM)) { FREE_SPACE_ITEM* fsi = (FREE_SPACE_ITEM*)tp.item->data; - + if (fsi->key.obj_type != TYPE_INODE_ITEM) WARN("key (%llx,%x,%llx) does not point to an INODE_ITEM\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset); else { LIST_ENTRY* le; - + Status = remove_free_space_inode(Vcb, fsi->key.obj_id, batchlist, Irp, &rollback); - + if (!NT_SUCCESS(Status)) ERR("remove_free_space_inode for (%llx,%x,%llx) returned %08x\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset, Status); - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { chunk* c = CONTAINING_RECORD(le, chunk, list_entry); - + if (c->offset == tp.item->key.offset && c->cache) { - free_fcb(c->cache); + free_fcb(Vcb, c->cache); c->cache = NULL; } - + le = le->Flink; } } } else WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM)); } - + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); if (b) tp = next_tp; } while (b); - + Status = STATUS_SUCCESS; - + if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(Vcb, &rollback); - + + if (Vcb->space_root) { + searchkey.obj_id = 0; + searchkey.obj_type = 0; + searchkey.offset = 0; + + Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + do { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); + if (b) + tp = next_tp; + } while (b); + } + + // regenerate free space tree + if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE) { + LIST_ENTRY* le; + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + if (!c->cache_loaded) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + Status = load_cache_chunk(Vcb, c, NULL); + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk(%llx) returned %08x\n", c->offset, Status); + ExReleaseResourceLite(&c->lock); + ExReleaseResourceLite(&Vcb->chunk_lock); + return Status; + } + + c->changed = TRUE; + c->space_changed = TRUE; + + ExReleaseResourceLite(&c->lock); + } + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + } + return Status; } NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 offset, UINT64 size) { space* s; - + s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); if (!s) { @@ -134,59 +198,59 @@ NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 offset, s->address = offset; s->size = size; - + if (IsListEmpty(list)) InsertTailList(list, &s->list_entry); else { space* s2 = CONTAINING_RECORD(list->Blink, space, list_entry); - + if (s2->address < offset) InsertTailList(list, &s->list_entry); else { LIST_ENTRY* le; - + le = list->Flink; while (le != list) { s2 = CONTAINING_RECORD(le, space, list_entry); - + if (s2->address > offset) { InsertTailList(le, &s->list_entry); goto size; } - + le = le->Flink; } } } - + size: if (!list_size) return STATUS_SUCCESS; - + if (IsListEmpty(list_size)) InsertTailList(list_size, &s->list_entry_size); else { space* s2 = CONTAINING_RECORD(list_size->Blink, space, list_entry_size); - + if (s2->size >= size) InsertTailList(list_size, &s->list_entry_size); else { LIST_ENTRY* le; - + le = list_size->Flink; while (le != list_size) { s2 = CONTAINING_RECORD(le, space, list_entry_size); - + if (s2->size <= size) { InsertHeadList(le->Blink, &s->list_entry_size); return STATUS_SUCCESS; } - + le = le->Flink; } } } - + return STATUS_SUCCESS; } @@ -194,7 +258,7 @@ static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offse RTL_BITMAP bmph; UINT32 i, *dwords = data; ULONG runlength, index; - + // flip bits for (i = 0; i < Vcb->superblock.sector_size / sizeof(UINT32); i++) { dwords[i] = ~dwords[i]; @@ -204,42 +268,42 @@ static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offse index = 0; runlength = RtlFindFirstRunClear(&bmph, &index); - + while (runlength != 0) { UINT64 addr, length; - + addr = offset + (index * Vcb->superblock.sector_size); length = Vcb->superblock.sector_size * runlength; - + add_space_entry(&c->space, &c->space_size, addr, length); index += runlength; *total_space += length; - + runlength = RtlFindNextForwardRunClear(&bmph, index, &index); } } static void order_space_entry(space* s, LIST_ENTRY* list_size) { LIST_ENTRY* le; - + if (IsListEmpty(list_size)) { InsertHeadList(list_size, &s->list_entry_size); return; } - + le = list_size->Flink; - + while (le != list_size) { space* s2 = CONTAINING_RECORD(le, space, list_entry_size); - + if (s2->size <= s->size) { InsertHeadList(le->Blink, &s->list_entry_size); return; } - + le = le->Flink; } - + InsertTailList(list_size, &s->list_entry_size); } @@ -250,37 +314,37 @@ typedef struct { static NTSTATUS add_superblock_stripe(LIST_ENTRY* stripes, UINT64 off, UINT64 len) { UINT64 i; - + for (i = 0; i < len; i++) { LIST_ENTRY* le; superblock_stripe* ss; BOOL ignore = FALSE; - + le = stripes->Flink; while (le != stripes) { ss = CONTAINING_RECORD(le, superblock_stripe, list_entry); - + if (ss->stripe == off + i) { ignore = TRUE; break; } - + le = le->Flink; } - + if (ignore) continue; - + ss = ExAllocatePoolWithTag(PagedPool, sizeof(superblock_stripe), ALLOC_TAG); if (!ss) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ss->stripe = off + i; InsertTailList(stripes, &ss->list_entry); } - + return STATUS_SUCCESS; } @@ -288,17 +352,17 @@ static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { NTSTATUS Status; CHUNK_ITEM* ci = c->chunk_item; CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - UINT64 off_start, off_end, space; + UINT64 off_start, off_end, space = 0; UINT16 i = 0, j; LIST_ENTRY stripes; - + InitializeListHead(&stripes); - + while (superblock_addrs[i] != 0) { if (ci->type & BLOCK_FLAG_RAID0 || ci->type & BLOCK_FLAG_RAID10) { for (j = 0; j < ci->num_stripes; j++) { ULONG sub_stripes = max(ci->sub_stripes, 1); - + if (cis[j].offset + (ci->size * ci->num_stripes / sub_stripes) > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { off_start = superblock_addrs[i] - cis[j].offset; off_start -= off_start % ci->stripe_length; @@ -306,7 +370,7 @@ static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { off_start += (j / sub_stripes) * ci->stripe_length; off_end = off_start + ci->stripe_length; - + Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1); if (!NT_SUCCESS(Status)) { ERR("add_superblock_stripe returned %08x\n", Status); @@ -317,7 +381,7 @@ static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { } else if (ci->type & BLOCK_FLAG_RAID5) { for (j = 0; j < ci->num_stripes; j++) { UINT64 stripe_size = ci->size / (ci->num_stripes - 1); - + if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { off_start = superblock_addrs[i] - cis[j].offset; off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1)); @@ -335,7 +399,7 @@ static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { } else if (ci->type & BLOCK_FLAG_RAID6) { for (j = 0; j < ci->num_stripes; j++) { UINT64 stripe_size = ci->size / (ci->num_stripes - 2); - + if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { off_start = superblock_addrs[i] - cis[j].offset; off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2)); @@ -355,7 +419,7 @@ static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { if (cis[j].offset + ci->size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) { off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length; off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length); - + Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); if (!NT_SUCCESS(Status)) { ERR("add_superblock_stripe returned %08x\n", Status); @@ -364,142 +428,143 @@ static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { } } } - + i++; } - - space = 0; - + Status = STATUS_SUCCESS; - + end: while (!IsListEmpty(&stripes)) { LIST_ENTRY* le = RemoveHeadList(&stripes); superblock_stripe* ss = CONTAINING_RECORD(le, superblock_stripe, list_entry); - + space++; - + ExFreePool(ss); } - + if (NT_SUCCESS(Status)) *size = space * ci->stripe_length; - + return Status; } -static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) { +NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, BOOL load_only, PIRP Irp) { KEY searchkey; traverse_ptr tp; FREE_SPACE_ITEM* fsi; - UINT64 inode, num_sectors, num_valid_sectors, i, *generation; + UINT64 inode, *generation; UINT8* data; NTSTATUS Status; - UINT32 *checksums, crc32; + UINT32 *checksums, crc32, i, num_sectors, num_valid_sectors, size; FREE_SPACE_ENTRY* fse; - UINT64 size, num_entries, num_bitmaps, extent_length, bmpnum, off, total_space = 0, superblock_size; + UINT64 num_entries, num_bitmaps, extent_length, bmpnum, off, total_space = 0, superblock_size; LIST_ENTRY *le, rollback; - + // FIXME - does this break if Vcb->superblock.sector_size is not 4096? - + TRACE("(%p, %llx)\n", Vcb, c->offset); - + searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(tp.item->key, searchkey)) { TRACE("(%llx,%x,%llx) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); return STATUS_NOT_FOUND; } - + if (tp.item->size < sizeof(FREE_SPACE_ITEM)) { WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM)); return STATUS_NOT_FOUND; } - + fsi = (FREE_SPACE_ITEM*)tp.item->data; - + if (fsi->key.obj_type != TYPE_INODE_ITEM) { WARN("cache pointed to something other than an INODE_ITEM\n"); return STATUS_NOT_FOUND; } - + inode = fsi->key.obj_id; num_entries = fsi->num_entries; num_bitmaps = fsi->num_bitmaps; - + Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &c->cache, PagedPool, Irp); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); return STATUS_NOT_FOUND; } - + + if (load_only) + return STATUS_SUCCESS; + if (c->cache->inode_item.st_size == 0) { WARN("cache had zero length\n"); - free_fcb(c->cache); + free_fcb(Vcb, c->cache); c->cache = NULL; return STATUS_NOT_FOUND; } - + c->cache->inode_item.flags |= BTRFS_INODE_NODATACOW; - + if (num_entries == 0 && num_bitmaps == 0) return STATUS_SUCCESS; - - size = sector_align(c->cache->inode_item.st_size, Vcb->superblock.sector_size); - + + size = (UINT32)sector_align(c->cache->inode_item.st_size, Vcb->superblock.sector_size); + data = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG); - + if (!data) { ERR("out of memory\n"); - free_fcb(c->cache); + free_fcb(Vcb, c->cache); c->cache = NULL; return STATUS_INSUFFICIENT_RESOURCES; } - - Status = read_file(c->cache, data, 0, c->cache->inode_item.st_size, NULL, NULL, FALSE); + + Status = read_file(c->cache, data, 0, c->cache->inode_item.st_size, NULL, NULL); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); ExFreePool(data); - + c->cache->deleted = TRUE; mark_fcb_dirty(c->cache); - - free_fcb(c->cache); + + free_fcb(Vcb, c->cache); c->cache = NULL; return STATUS_NOT_FOUND; } - + if (size > c->cache->inode_item.st_size) - RtlZeroMemory(&data[c->cache->inode_item.st_size], size - c->cache->inode_item.st_size); - + RtlZeroMemory(&data[c->cache->inode_item.st_size], (ULONG)(size - c->cache->inode_item.st_size)); + num_sectors = size / Vcb->superblock.sector_size; - + generation = (UINT64*)(data + (num_sectors * sizeof(UINT32))); - + if (*generation != fsi->generation) { WARN("free space cache generation for %llx was %llx, expected %llx\n", c->offset, *generation, fsi->generation); goto clearcache; } - + extent_length = (num_sectors * sizeof(UINT32)) + sizeof(UINT64) + (num_entries * sizeof(FREE_SPACE_ENTRY)); - - num_valid_sectors = (sector_align(extent_length, Vcb->superblock.sector_size) / Vcb->superblock.sector_size) + num_bitmaps; - + + num_valid_sectors = (ULONG)((sector_align(extent_length, Vcb->superblock.sector_size) / Vcb->superblock.sector_size) + num_bitmaps); + if (num_valid_sectors > num_sectors) { ERR("free space cache for %llx was %llx sectors, expected at least %llx\n", c->offset, num_sectors, num_valid_sectors); goto clearcache; } - + checksums = (UINT32*)data; - + for (i = 0; i < num_valid_sectors; i++) { if (i * Vcb->superblock.sector_size > sizeof(UINT32) * num_sectors) crc32 = ~calc_crc32c(0xffffffff, &data[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); @@ -507,22 +572,22 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI crc32 = 0; // FIXME - test this else crc32 = ~calc_crc32c(0xffffffff, &data[sizeof(UINT32) * num_sectors], ((i + 1) * Vcb->superblock.sector_size) - (sizeof(UINT32) * num_sectors)); - + if (crc32 != checksums[i]) { WARN("checksum %llu was %08x, expected %08x\n", i, crc32, checksums[i]); goto clearcache; } } - + off = (sizeof(UINT32) * num_sectors) + sizeof(UINT64); bmpnum = 0; for (i = 0; i < num_entries; i++) { if ((off + sizeof(FREE_SPACE_ENTRY)) / Vcb->superblock.sector_size != off / Vcb->superblock.sector_size) off = sector_align(off, Vcb->superblock.sector_size); - + fse = (FREE_SPACE_ENTRY*)&data[off]; - + if (fse->type == FREE_SPACE_EXTENT) { Status = add_space_entry(&c->space, &c->space_size, fse->offset, fse->size); if (!NT_SUCCESS(Status)) { @@ -530,35 +595,35 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI ExFreePool(data); return Status; } - + total_space += fse->size; } else if (fse->type != FREE_SPACE_BITMAP) { ERR("unknown free-space type %x\n", fse->type); } - + off += sizeof(FREE_SPACE_ENTRY); } - + if (num_bitmaps > 0) { bmpnum = sector_align(off, Vcb->superblock.sector_size) / Vcb->superblock.sector_size; off = (sizeof(UINT32) * num_sectors) + sizeof(UINT64); - + for (i = 0; i < num_entries; i++) { if ((off + sizeof(FREE_SPACE_ENTRY)) / Vcb->superblock.sector_size != off / Vcb->superblock.sector_size) off = sector_align(off, Vcb->superblock.sector_size); - + fse = (FREE_SPACE_ENTRY*)&data[off]; - + if (fse->type == FREE_SPACE_BITMAP) { // FIXME - make sure we don't overflow the buffer here load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum * Vcb->superblock.sector_size], &total_space); bmpnum++; } - + off += sizeof(FREE_SPACE_ENTRY); } } - + // do sanity check Status = get_superblock_size(c, &superblock_size); @@ -567,210 +632,377 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI ExFreePool(data); return Status; } - + if (c->chunk_item->size - c->used != total_space + superblock_size) { WARN("invalidating cache for chunk %llx: space was %llx, expected %llx\n", c->offset, total_space + superblock_size, c->chunk_item->size - c->used); goto clearcache; } - + le = c->space.Flink; while (le != &c->space) { space* s = CONTAINING_RECORD(le, space, list_entry); LIST_ENTRY* le2 = le->Flink; - + if (le2 != &c->space) { space* s2 = CONTAINING_RECORD(le2, space, list_entry); - + if (s2->address == s->address + s->size) { s->size += s2->size; - + RemoveEntryList(&s2->list_entry); RemoveEntryList(&s2->list_entry_size); ExFreePool(s2); - + RemoveEntryList(&s->list_entry_size); order_space_entry(s, &c->space_size); - + le2 = le; } } - + le = le2; } - + ExFreePool(data); - + return STATUS_SUCCESS; - + clearcache: ExFreePool(data); - + InitializeListHead(&rollback); - - delete_tree_item(Vcb, &tp, &rollback); - + + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + return Status; + } + Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); do_rollback(Vcb, &rollback); return Status; } - - clear_rollback(Vcb, &rollback); - + + clear_rollback(&rollback); + c->cache->deleted = TRUE; mark_fcb_dirty(c->cache); - - free_fcb(c->cache); + + c->old_cache = c->cache; c->cache = NULL; + + le = c->space.Flink; + while (le != &c->space) { + space* s = CONTAINING_RECORD(le, space, list_entry); + LIST_ENTRY* le2 = le->Flink; + + RemoveEntryList(&s->list_entry); + RemoveEntryList(&s->list_entry_size); + ExFreePool(s); + + le = le2; + } + return STATUS_NOT_FOUND; } -NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) { - traverse_ptr tp, next_tp; +static NTSTATUS load_stored_free_space_tree(device_extension* Vcb, chunk* c, PIRP Irp) { KEY searchkey; - UINT64 lastaddr; - BOOL b; - space* s; + traverse_ptr tp, next_tp; NTSTATUS Status; -// LIST_ENTRY* le; - - if (Vcb->superblock.generation - 1 == Vcb->superblock.cache_generation) { - Status = load_stored_free_space_cache(Vcb, c, Irp); - - if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { - ERR("load_stored_free_space_cache returned %08x\n", Status); - return Status; - } - } else - Status = STATUS_NOT_FOUND; - - if (Status == STATUS_NOT_FOUND) { - TRACE("generating free space cache for chunk %llx\n", c->offset); - - searchkey.obj_id = c->offset; - searchkey.obj_type = TYPE_EXTENT_ITEM; - searchkey.offset = 0; - - Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - lastaddr = c->offset; - - do { - if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) - break; - - if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { - if (tp.item->key.obj_id > lastaddr) { - s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); - - if (!s) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - s->address = lastaddr; - s->size = tp.item->key.obj_id - lastaddr; - InsertTailList(&c->space, &s->list_entry); - - order_space_entry(s, &c->space_size); - - TRACE("(%llx,%llx)\n", s->address, s->size); - } - - if (tp.item->key.obj_type == TYPE_METADATA_ITEM) - lastaddr = tp.item->key.obj_id + Vcb->superblock.node_size; - else - lastaddr = tp.item->key.obj_id + tp.item->key.offset; - } - - b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - if (b) - tp = next_tp; - } while (b); - - if (lastaddr < c->offset + c->chunk_item->size) { - s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); - - if (!s) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - s->address = lastaddr; - s->size = c->offset + c->chunk_item->size - lastaddr; - InsertTailList(&c->space, &s->list_entry); - - order_space_entry(s, &c->space_size); - - TRACE("(%llx,%llx)\n", s->address, s->size); - } + ULONG* bmparr = NULL; + ULONG bmplen = 0; + LIST_ENTRY* le; + + TRACE("(%p, %llx)\n", Vcb, c->offset); + + if (!Vcb->space_root) + return STATUS_NOT_FOUND; + + searchkey.obj_id = c->offset; + searchkey.obj_type = TYPE_FREE_SPACE_INFO; + searchkey.offset = c->chunk_item->size; + + Status = find_item(Vcb, Vcb->space_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; } - -// le = c->space_size.Flink; -// while (le != &c->space_size) { -// space* s = CONTAINING_RECORD(le, space, list_entry_size); -// -// ERR("(%llx, %llx)\n", s->address, s->size); -// -// le = le->Flink; -// } -// ERR("---\n"); - return STATUS_SUCCESS; -} + if (keycmp(tp.item->key, searchkey)) { + TRACE("(%llx,%x,%llx) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); + return STATUS_NOT_FOUND; + } -static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) { - LIST_ENTRY* le = fcb->Vcb->chunks.Flink; + if (tp.item->size < sizeof(FREE_SPACE_INFO)) { + WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_INFO)); + return STATUS_NOT_FOUND; + } + + while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { + tp = next_tp; + + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) + break; + + if (tp.item->key.obj_type == TYPE_FREE_SPACE_EXTENT) { + Status = add_space_entry(&c->space, &c->space_size, tp.item->key.obj_id, tp.item->key.offset); + if (!NT_SUCCESS(Status)) { + ERR("add_space_entry returned %08x\n", Status); + if (bmparr) ExFreePool(bmparr); + return Status; + } + } else if (tp.item->key.obj_type == TYPE_FREE_SPACE_BITMAP) { + ULONG explen, index, runlength; + RTL_BITMAP bmp; + UINT64 lastoff; + + explen = (ULONG)(tp.item->key.offset / (Vcb->superblock.sector_size * 8)); + + if (tp.item->size < explen) { + WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, explen); + return STATUS_NOT_FOUND; + } else if (tp.item->size == 0) { + WARN("(%llx,%x,%llx) has size of 0\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return STATUS_NOT_FOUND; + } + + if (bmplen < tp.item->size) { + if (bmparr) + ExFreePool(bmparr); + + bmplen = (ULONG)sector_align(tp.item->size, sizeof(ULONG)); + bmparr = ExAllocatePoolWithTag(PagedPool, bmplen, ALLOC_TAG); + if (!bmparr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + } + + // We copy the bitmap because it supposedly has to be ULONG-aligned + RtlCopyMemory(bmparr, tp.item->data, tp.item->size); + + RtlInitializeBitMap(&bmp, bmparr, (ULONG)(tp.item->key.offset / Vcb->superblock.sector_size)); + + lastoff = tp.item->key.obj_id; + + runlength = RtlFindFirstRunClear(&bmp, &index); + + while (runlength != 0) { + UINT64 runstart = tp.item->key.obj_id + (index * Vcb->superblock.sector_size); + UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); + + if (runstart > lastoff) { + Status = add_space_entry(&c->space, &c->space_size, lastoff, runstart - lastoff); + if (!NT_SUCCESS(Status)) { + ERR("add_space_entry returned %08x\n", Status); + if (bmparr) ExFreePool(bmparr); + return Status; + } + } + + lastoff = runend; + + runlength = RtlFindNextForwardRunClear(&bmp, index + runlength, &index); + } + + if (lastoff < tp.item->key.obj_id + tp.item->key.offset) { + Status = add_space_entry(&c->space, &c->space_size, lastoff, tp.item->key.obj_id + tp.item->key.offset - lastoff); + if (!NT_SUCCESS(Status)) { + ERR("add_space_entry returned %08x\n", Status); + if (bmparr) ExFreePool(bmparr); + return Status; + } + } + } + } + + if (bmparr) + ExFreePool(bmparr); + + le = c->space.Flink; + while (le != &c->space) { + space* s = CONTAINING_RECORD(le, space, list_entry); + LIST_ENTRY* le2 = le->Flink; + + if (le2 != &c->space) { + space* s2 = CONTAINING_RECORD(le2, space, list_entry); + + if (s2->address == s->address + s->size) { + s->size += s2->size; + + RemoveEntryList(&s2->list_entry); + RemoveEntryList(&s2->list_entry_size); + ExFreePool(s2); + + RemoveEntryList(&s->list_entry_size); + order_space_entry(s, &c->space_size); + + le2 = le; + } + } + + le = le2; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) { + traverse_ptr tp, next_tp; + KEY searchkey; + UINT64 lastaddr; + BOOL b; + space* s; + NTSTATUS Status; + + if (Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE && Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID) { + Status = load_stored_free_space_tree(Vcb, c, Irp); + + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("load_stored_free_space_tree returned %08x\n", Status); + return Status; + } + } else if (Vcb->superblock.generation - 1 == Vcb->superblock.cache_generation) { + Status = load_stored_free_space_cache(Vcb, c, FALSE, Irp); + + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("load_stored_free_space_cache returned %08x\n", Status); + return Status; + } + } else + Status = STATUS_NOT_FOUND; + + if (Status == STATUS_NOT_FOUND) { + TRACE("generating free space cache for chunk %llx\n", c->offset); + + searchkey.obj_id = c->offset; + searchkey.obj_type = TYPE_EXTENT_ITEM; + searchkey.offset = 0; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + return Status; + } + + lastaddr = c->offset; + + do { + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) + break; + + if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { + if (tp.item->key.obj_id > lastaddr) { + s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); + + if (!s) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + s->address = lastaddr; + s->size = tp.item->key.obj_id - lastaddr; + InsertTailList(&c->space, &s->list_entry); + + order_space_entry(s, &c->space_size); + + TRACE("(%llx,%llx)\n", s->address, s->size); + } + + if (tp.item->key.obj_type == TYPE_METADATA_ITEM) + lastaddr = tp.item->key.obj_id + Vcb->superblock.node_size; + else + lastaddr = tp.item->key.obj_id + tp.item->key.offset; + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); + if (b) + tp = next_tp; + } while (b); + + if (lastaddr < c->offset + c->chunk_item->size) { + s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); + + if (!s) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + s->address = lastaddr; + s->size = c->offset + c->chunk_item->size - lastaddr; + InsertTailList(&c->space, &s->list_entry); + + order_space_entry(s, &c->space_size); + + TRACE("(%llx,%llx)\n", s->address, s->size); + } + } + + return STATUS_SUCCESS; +} + +NTSTATUS load_cache_chunk(device_extension* Vcb, chunk* c, PIRP Irp) { + NTSTATUS Status; + + if (c->cache_loaded) + return STATUS_SUCCESS; + + Status = load_free_space_cache(Vcb, c, Irp); + if (!NT_SUCCESS(Status)) { + ERR("load_free_space_cache returned %08x\n", Status); + return Status; + } + + protect_superblocks(c); + + c->cache_loaded = TRUE; + + return STATUS_SUCCESS; +} + +static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) { + NTSTATUS Status; + LIST_ENTRY* le = fcb->Vcb->chunks.Flink; chunk* c; UINT64 flags; - + flags = fcb->Vcb->data_flags; - - ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE); - + while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) { - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length, FALSE, 0)) return STATUS_SUCCESS; - } } - + ExReleaseResourceLite(&c->lock); } - + le = le->Flink; } - - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - - ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE); - - if ((c = alloc_chunk(fcb->Vcb, flags))) { - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) - return STATUS_SUCCESS; - } - - ExReleaseResourceLite(&c->lock); - } else - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - - WARN("couldn't find any data chunks with %llx bytes free\n", length); + + Status = alloc_chunk(fcb->Vcb, flags, &c, FALSE); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); + return Status; + } + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length, FALSE, 0)) + return STATUS_SUCCESS; + } + + ExReleaseResourceLite(&c->lock); return STATUS_DISK_FULL; } @@ -781,14 +1013,14 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan UINT64 num_entries, new_cache_size, i; UINT32 num_sectors; BOOL realloc_extents = FALSE; - + // FIXME - also do bitmaps // FIXME - make sure this works when sector_size is not 4096 - + *changed = FALSE; - + num_entries = 0; - + // num_entries is the number of entries in c->space and c->deleting - it might // be slightly higher then what we end up writing, but doing it this way is much // quicker and simpler. @@ -800,7 +1032,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan le = le->Flink; } } - + if (!IsListEmpty(&c->deleting)) { le = c->deleting.Flink; while (le != &c->deleting) { @@ -809,339 +1041,376 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan le = le->Flink; } } - + new_cache_size = sizeof(UINT64) + (num_entries * sizeof(FREE_SPACE_ENTRY)); - - num_sectors = sector_align(new_cache_size, Vcb->superblock.sector_size) / Vcb->superblock.sector_size; - num_sectors = sector_align(num_sectors, CACHE_INCREMENTS); - + + num_sectors = (UINT32)sector_align(new_cache_size, Vcb->superblock.sector_size) / Vcb->superblock.sector_size; + num_sectors = (UINT32)sector_align(num_sectors, CACHE_INCREMENTS); + // adjust for padding // FIXME - there must be a more efficient way of doing this new_cache_size = sizeof(UINT64) + (sizeof(UINT32) * num_sectors); for (i = 0; i < num_entries; i++) { if ((new_cache_size / Vcb->superblock.sector_size) != ((new_cache_size + sizeof(FREE_SPACE_ENTRY)) / Vcb->superblock.sector_size)) new_cache_size = sector_align(new_cache_size, Vcb->superblock.sector_size); - + new_cache_size += sizeof(FREE_SPACE_ENTRY); } - + new_cache_size = sector_align(new_cache_size, CACHE_INCREMENTS * Vcb->superblock.sector_size); - + TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c->offset, c->cache ? c->cache->inode_item.st_size : 0, new_cache_size); - + if (c->cache) { if (new_cache_size > c->cache->inode_item.st_size) realloc_extents = TRUE; else { le = c->cache->extents.Flink; - + while (le != &c->cache->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if (!ext->ignore && (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC)) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->data->data[0]; - + + if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0]; + if (ed2->size != 0) { chunk* c2 = get_chunk_from_address(Vcb, ed2->address); - + if (c2 && (c2->readonly || c2->reloc)) { realloc_extents = TRUE; break; } } } - + le = le->Flink; } } } - + if (!c->cache) { FREE_SPACE_ITEM* fsi; KEY searchkey; traverse_ptr tp; - + // create new inode - - c->cache = create_fcb(PagedPool); + + c->cache = create_fcb(Vcb, PagedPool); if (!c->cache) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + c->cache->Vcb = Vcb; - + c->cache->inode_item.st_size = new_cache_size; c->cache->inode_item.st_blocks = new_cache_size; c->cache->inode_item.st_nlink = 1; c->cache->inode_item.st_mode = S_IRUSR | S_IWUSR | __S_IFREG; c->cache->inode_item.flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW | BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; - + c->cache->Header.IsFastIoPossible = fast_io_possible(c->cache); c->cache->Header.AllocationSize.QuadPart = 0; c->cache->Header.FileSize.QuadPart = 0; c->cache->Header.ValidDataLength.QuadPart = 0; - + c->cache->subvol = Vcb->root_root; - + c->cache->inode = InterlockedIncrement64(&Vcb->root_root->lastinode); - + c->cache->type = BTRFS_TYPE_FILE; c->cache->created = TRUE; - + // create new free space entry - + fsi = ExAllocatePoolWithTag(PagedPool, sizeof(FREE_SPACE_ITEM), ALLOC_TAG); if (!fsi) { ERR("out of memory\n"); - free_fcb(c->cache); + free_fcb(Vcb, c->cache); c->cache = NULL; return STATUS_INSUFFICIENT_RESOURCES; } - + searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); ExFreePool(fsi); - free_fcb(c->cache); + free_fcb(Vcb, c->cache); c->cache = NULL; return Status; } - - if (!keycmp(searchkey, tp.item->key)) - delete_tree_item(Vcb, &tp, rollback); - + + if (!keycmp(searchkey, tp.item->key)) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(fsi); + free_fcb(Vcb, c->cache); + c->cache = NULL; + return Status; + } + } + fsi->key.obj_id = c->cache->inode; fsi->key.obj_type = TYPE_INODE_ITEM; fsi->key.offset = 0; - - if (!insert_tree_item(Vcb, Vcb->root_root, FREE_SPACE_CACHE_ID, 0, c->offset, fsi, sizeof(FREE_SPACE_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - free_fcb(c->cache); + + Status = insert_tree_item(Vcb, Vcb->root_root, FREE_SPACE_CACHE_ID, 0, c->offset, fsi, sizeof(FREE_SPACE_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(fsi); + free_fcb(Vcb, c->cache); c->cache = NULL; - return STATUS_INTERNAL_ERROR; + return Status; } - + // allocate space - + Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_cache_extent returned %08x\n", Status); - free_fcb(c->cache); + free_fcb(Vcb, c->cache); c->cache = NULL; return Status; } - + c->cache->extents_changed = TRUE; InsertTailList(&Vcb->all_fcbs, &c->cache->list_entry_all); - - flush_fcb(c->cache, TRUE, batchlist, Irp, rollback); - + + Status = flush_fcb(c->cache, TRUE, batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + free_fcb(Vcb, c->cache); + c->cache = NULL; + return Status; + } + *changed = TRUE; } else if (realloc_extents) { KEY searchkey; traverse_ptr tp; - + TRACE("reallocating extents\n"); - + // add free_space entry to tree cache - + searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(searchkey, tp.item->key)) { ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); return STATUS_INTERNAL_ERROR; } - + if (tp.item->size < sizeof(FREE_SPACE_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM)); return STATUS_INTERNAL_ERROR; } - + tp.tree->write = TRUE; // remove existing extents - + if (c->cache->inode_item.st_size > 0) { le = c->cache->extents.Flink; - + while (le != &c->cache->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - - if (!ext->ignore && (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC)) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->data->data[0]; - + + if (!ext->ignore && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->extent_data.data[0]; + if (ed2->size != 0) { chunk* c2 = get_chunk_from_address(Vcb, ed2->address); - - if (!c2->list_entry_changed.Flink) - InsertTailList(&Vcb->chunks_changed, &c2->list_entry_changed); + + if (c2) { + c2->changed = TRUE; + c2->space_changed = TRUE; + } } } - + le = le->Flink; } - + Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); return Status; } } - + // add new extent - + Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_cache_extent returned %08x\n", Status); return Status; } - + // modify INODE_ITEM - + c->cache->inode_item.st_size = new_cache_size; c->cache->inode_item.st_blocks = new_cache_size; - - flush_fcb(c->cache, TRUE, batchlist, Irp, rollback); - + + Status = flush_fcb(c->cache, TRUE, batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + return Status; + } + *changed = TRUE; } else { KEY searchkey; traverse_ptr tp; - + // add INODE_ITEM and free_space entry to tree cache, for writing later - + searchkey.obj_id = c->cache->inode; searchkey.obj_type = TYPE_INODE_ITEM; searchkey.offset = 0; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(searchkey, tp.item->key)) { INODE_ITEM* ii; - + ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); + if (!ii) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + RtlCopyMemory(ii, &c->cache->inode_item, sizeof(INODE_ITEM)); - - if (!insert_tree_item(Vcb, Vcb->root_root, c->cache->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - return STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->root_root, c->cache->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ii); + return Status; } - + *changed = TRUE; - } else { + } else { if (tp.item->size < sizeof(INODE_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_ITEM)); return STATUS_INTERNAL_ERROR; } - + tp.tree->write = TRUE; } searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); return Status; } - + if (keycmp(searchkey, tp.item->key)) { ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); - int3; return STATUS_INTERNAL_ERROR; } - + if (tp.item->size < sizeof(FREE_SPACE_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM)); return STATUS_INTERNAL_ERROR; } - + tp.tree->write = TRUE; } - + // FIXME - reduce inode allocation if cache is shrinking. Make sure to avoid infinite write loops - + return STATUS_SUCCESS; } NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback) { - LIST_ENTRY *le = Vcb->chunks_changed.Flink, batchlist; + LIST_ENTRY *le, batchlist; NTSTATUS Status; *changed = FALSE; - + InitializeListHead(&batchlist); - - while (le != &Vcb->chunks_changed) { - BOOL b; - chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed); - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - Status = allocate_cache_chunk(Vcb, c, &b, &batchlist, Irp, rollback); - ExReleaseResourceLite(&c->lock); - - if (b) - *changed = TRUE; - - if (!NT_SUCCESS(Status)) { - ERR("allocate_cache_chunk(%llx) returned %08x\n", c->offset, Status); - clear_batch_list(Vcb, &batchlist); - return Status; + + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + if (c->space_changed) { + BOOL b; + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + Status = allocate_cache_chunk(Vcb, c, &b, &batchlist, Irp, rollback); + ExReleaseResourceLite(&c->lock); + + if (b) + *changed = TRUE; + + if (!NT_SUCCESS(Status)) { + ERR("allocate_cache_chunk(%llx) returned %08x\n", c->offset, Status); + ExReleaseResourceLite(&Vcb->chunk_lock); + clear_batch_list(Vcb, &batchlist); + return Status; + } } - + le = le->Flink; } - - commit_batch_list(Vcb, &batchlist, Irp, rollback); - + + ExReleaseResourceLite(&Vcb->chunk_lock); + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + return Status; + } + return STATUS_SUCCESS; } -static void add_rollback_space(device_extension* Vcb, LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c) { +static void add_rollback_space(LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c) { rollback_space* rs; - + rs = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_space), ALLOC_TAG); if (!rs) { ERR("out of memory\n"); return; } - + rs->list = list; rs->list_size = list_size; rs->address = address; rs->length = length; rs->chunk = c; - - add_rollback(Vcb, rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs); + + add_rollback(rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs); } -void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) { +void space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback) { LIST_ENTRY* le; space *s, *s2; - -#ifdef DEBUG_SPACE_LISTS - _debug_message(func, "called space_list_add (%p, %llx, %llx, %p)\n", list, address, length, rollback); -#endif - + if (IsListEmpty(list)) { s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); @@ -1149,150 +1418,150 @@ void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_ ERR("out of memory\n"); return; } - + s->address = address; s->size = length; InsertTailList(list, &s->list_entry); - + if (list_size) InsertTailList(list_size, &s->list_entry_size); - + if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c); - + add_rollback_space(rollback, TRUE, list, list_size, address, length, c); + return; } - + le = list->Flink; - while (le != list) { + do { s2 = CONTAINING_RECORD(le, space, list_entry); - + // old entry envelops new one completely if (s2->address <= address && s2->address + s2->size >= address + length) return; - + // new entry envelops old one completely if (address <= s2->address && address + length >= s2->address + s2->size) { if (address < s2->address) { if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address - address, c); - + add_rollback_space(rollback, TRUE, list, list_size, address, s2->address - address, c); + s2->size += s2->address - address; s2->address = address; - + while (s2->list_entry.Blink != list) { space* s3 = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry); - + if (s3->address + s3->size == s2->address) { s2->address = s3->address; s2->size += s3->size; - + RemoveEntryList(&s3->list_entry); - + if (list_size) RemoveEntryList(&s3->list_entry_size); - + ExFreePool(s3); } else break; } } - + if (length > s2->size) { if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c); - + add_rollback_space(rollback, TRUE, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c); + s2->size = length; - + while (s2->list_entry.Flink != list) { space* s3 = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry); - + if (s3->address <= s2->address + s2->size) { s2->size = max(s2->size, s3->address + s3->size - s2->address); - + RemoveEntryList(&s3->list_entry); - + if (list_size) RemoveEntryList(&s3->list_entry_size); - + ExFreePool(s3); } else break; } } - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); } - + return; } - + // new entry overlaps start of old one if (address < s2->address && address + length >= s2->address) { if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address - address, c); - + add_rollback_space(rollback, TRUE, list, list_size, address, s2->address - address, c); + s2->size += s2->address - address; s2->address = address; - + while (s2->list_entry.Blink != list) { space* s3 = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry); - + if (s3->address + s3->size == s2->address) { s2->address = s3->address; s2->size += s3->size; - + RemoveEntryList(&s3->list_entry); - + if (list_size) RemoveEntryList(&s3->list_entry_size); - + ExFreePool(s3); } else break; } - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); } - + return; } - + // new entry overlaps end of old one if (address <= s2->address + s2->size && address + length > s2->address + s2->size) { if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address + s2->size - address, c); - + add_rollback_space(rollback, TRUE, list, list_size, address, s2->address + s2->size - address, c); + s2->size = address + length - s2->address; - + while (s2->list_entry.Flink != list) { space* s3 = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry); - + if (s3->address <= s2->address + s2->size) { s2->size = max(s2->size, s3->address + s3->size - s2->address); - + RemoveEntryList(&s3->list_entry); - + if (list_size) RemoveEntryList(&s3->list_entry_size); - + ExFreePool(s3); } else break; } - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); } - + return; } - + // add completely separate entry if (s2->address > address + length) { s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); @@ -1301,35 +1570,35 @@ void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_ ERR("out of memory\n"); return; } - + if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c); - + add_rollback_space(rollback, TRUE, list, list_size, address, length, c); + s->address = address; s->size = length; InsertHeadList(s2->list_entry.Blink, &s->list_entry); - + if (list_size) order_space_entry(s, list_size); - + return; } - + le = le->Flink; - } - + } while (le != list); + // check if contiguous with last entry if (s2->address + s2->size == address) { s2->size += length; - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); } - + return; } - + // otherwise, insert at end s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); @@ -1337,28 +1606,28 @@ void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_ ERR("out of memory\n"); return; } - + s->address = address; s->size = length; InsertTailList(list, &s->list_entry); - + if (list_size) order_space_entry(s, list_size); - + if (rollback) - add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c); + add_rollback_space(rollback, TRUE, list, list_size, address, length, c); } -static void space_list_merge(device_extension* Vcb, LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) { +static void space_list_merge(LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) { LIST_ENTRY* le; - + if (!IsListEmpty(deleting)) { le = deleting->Flink; while (le != deleting) { space* s = CONTAINING_RECORD(le, space, list_entry); - - space_list_add2(Vcb, spacelist, spacelist_size, s->address, s->size, NULL); - + + space_list_add2(spacelist, spacelist_size, s->address, s->size, NULL, NULL); + le = le->Flink; } } @@ -1370,92 +1639,99 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* traverse_ptr tp; FREE_SPACE_ITEM* fsi; void* data; - FREE_SPACE_ENTRY* fse; - UINT64 num_entries, num_sectors, *cachegen, i, off; - UINT32* checksums; + UINT64 num_entries, *cachegen, off; + UINT32 *checksums, num_sectors, i; LIST_ENTRY* le; - - space_list_merge(Vcb, &c->space, &c->space_size, &c->deleting); - - data = ExAllocatePoolWithTag(NonPagedPool, c->cache->inode_item.st_size, ALLOC_TAG); + + space_list_merge(&c->space, &c->space_size, &c->deleting); + + data = ExAllocatePoolWithTag(NonPagedPool, (ULONG)c->cache->inode_item.st_size, ALLOC_TAG); if (!data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlZeroMemory(data, c->cache->inode_item.st_size); - + + RtlZeroMemory(data, (ULONG)c->cache->inode_item.st_size); + num_entries = 0; - num_sectors = c->cache->inode_item.st_size / Vcb->superblock.sector_size; + num_sectors = (UINT32)(c->cache->inode_item.st_size / Vcb->superblock.sector_size); off = (sizeof(UINT32) * num_sectors) + sizeof(UINT64); - + le = c->space.Flink; while (le != &c->space) { + FREE_SPACE_ENTRY* fse; + space* s = CONTAINING_RECORD(le, space, list_entry); if ((off + sizeof(FREE_SPACE_ENTRY)) / Vcb->superblock.sector_size != off / Vcb->superblock.sector_size) off = sector_align(off, Vcb->superblock.sector_size); - + fse = (FREE_SPACE_ENTRY*)((UINT8*)data + off); - + fse->offset = s->address; fse->size = s->size; fse->type = FREE_SPACE_EXTENT; num_entries++; - + off += sizeof(FREE_SPACE_ENTRY); - + le = le->Flink; } // update INODE_ITEM - + c->cache->inode_item.generation = Vcb->superblock.generation; c->cache->inode_item.transid = Vcb->superblock.generation; c->cache->inode_item.sequence++; c->cache->inode_item.st_ctime = *now; - - flush_fcb(c->cache, TRUE, batchlist, Irp, rollback); - + + Status = flush_fcb(c->cache, TRUE, batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("flush_fcb returned %08x\n", Status); + goto end; + } + // update free_space item - + searchkey.obj_id = FREE_SPACE_CACHE_ID; searchkey.obj_type = 0; searchkey.offset = c->offset; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); - return Status; + goto end; } - + if (keycmp(searchkey, tp.item->key)) { ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); - return STATUS_INTERNAL_ERROR; + Status = STATUS_INTERNAL_ERROR; + goto end; } - + if (tp.item->size < sizeof(FREE_SPACE_ITEM)) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(FREE_SPACE_ITEM)); - return STATUS_INTERNAL_ERROR; + Status = STATUS_INTERNAL_ERROR; + goto end; } - + fsi = (FREE_SPACE_ITEM*)tp.item->data; - + fsi->generation = Vcb->superblock.generation; fsi->num_entries = num_entries; fsi->num_bitmaps = 0; - + // set cache generation - + cachegen = (UINT64*)((UINT8*)data + (sizeof(UINT32) * num_sectors)); *cachegen = Vcb->superblock.generation; - + // calculate cache checksums - + checksums = (UINT32*)data; - + // FIXME - if we know sector is fully zeroed, use cached checksum - + for (i = 0; i < num_sectors; i++) { if (i * Vcb->superblock.sector_size > sizeof(UINT32) * num_sectors) checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); @@ -1464,128 +1740,256 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* else checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (sizeof(UINT32) * num_sectors), ((i + 1) * Vcb->superblock.sector_size) - (sizeof(UINT32) * num_sectors)); } - + // write cache - - Status = do_write_file(c->cache, 0, c->cache->inode_item.st_size, data, NULL, rollback); + + Status = do_write_file(c->cache, 0, c->cache->inode_item.st_size, data, NULL, FALSE, 0, rollback); if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); - return Status; + + // Writing the cache isn't critical, so we don't return an error if writing fails. This means + // we can still flush on a degraded mount if metadata is RAID1 but data is RAID0. } + Status = STATUS_SUCCESS; + +end: ExFreePool(data); - + + return Status; +} + +static NTSTATUS update_chunk_cache_tree(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist) { + NTSTATUS Status; + LIST_ENTRY* le; + FREE_SPACE_INFO* fsi; + + fsi = ExAllocatePoolWithTag(PagedPool, sizeof(FREE_SPACE_INFO), ALLOC_TAG); + if (!fsi) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + space_list_merge(&c->space, &c->space_size, &c->deleting); + + fsi->count = 0; + fsi->flags = 0; + + le = c->space.Flink; + while (le != &c->space) { + space* s = CONTAINING_RECORD(le, space, list_entry); + + fsi->count++; + + Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, s->address, TYPE_FREE_SPACE_EXTENT, s->size, + NULL, 0, Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(fsi); + return Status; + } + + le = le->Flink; + } + + Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, + NULL, 0, Batch_DeleteFreeSpace); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(fsi); + return Status; + } + + Status = insert_tree_item_batch(batchlist, Vcb, Vcb->space_root, c->offset, TYPE_FREE_SPACE_INFO, c->chunk_item->size, + fsi, sizeof(FREE_SPACE_INFO), Batch_Insert); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item_batch returned %08x\n", Status); + ExFreePool(fsi); + return Status; + } + return STATUS_SUCCESS; } NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { - LIST_ENTRY *le = Vcb->chunks_changed.Flink, batchlist; + LIST_ENTRY *le, batchlist; NTSTATUS Status; chunk* c; LARGE_INTEGER time; BTRFS_TIME now; - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + InitializeListHead(&batchlist); - - while (le != &Vcb->chunks_changed) { - c = CONTAINING_RECORD(le, chunk, list_entry_changed); - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - Status = update_chunk_cache(Vcb, c, &now, &batchlist, Irp, rollback); - ExReleaseResourceLite(&c->lock); - if (!NT_SUCCESS(Status)) { - ERR("update_chunk_cache(%llx) returned %08x\n", c->offset, Status); - clear_batch_list(Vcb, &batchlist); - return Status; + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry); + + if (c->space_changed) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + Status = update_chunk_cache(Vcb, c, &now, &batchlist, Irp, rollback); + ExReleaseResourceLite(&c->lock); + + if (!NT_SUCCESS(Status)) { + ERR("update_chunk_cache(%llx) returned %08x\n", c->offset, Status); + clear_batch_list(Vcb, &batchlist); + return Status; + } + } + + le = le->Flink; + } + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + return Status; + } + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry); + + if (c->changed && (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)) { + ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE); + + while (!IsListEmpty(&c->partial_stripes)) { + partial_stripe* ps = CONTAINING_RECORD(RemoveHeadList(&c->partial_stripes), partial_stripe, list_entry); + + Status = flush_partial_stripe(Vcb, c, ps); + + if (ps->bmparr) + ExFreePool(ps->bmparr); + + ExFreePool(ps); + + if (!NT_SUCCESS(Status)) { + ERR("flush_partial_stripe returned %08x\n", Status); + ExReleaseResourceLite(&c->partial_stripes_lock); + return Status; + } + } + + ExReleaseResourceLite(&c->partial_stripes_lock); } - + le = le->Flink; } - - commit_batch_list(Vcb, &batchlist, Irp, rollback); - + return STATUS_SUCCESS; } -void _space_list_add(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func) { - LIST_ENTRY* list; - - TRACE("(%p, %p, %u, %llx, %llx, %p)\n", Vcb, c, deleting, address, length, rollback); - - list = deleting ? &c->deleting : &c->space; - - if (!c->list_entry_changed.Flink) - InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); - - _space_list_add2(Vcb, list, deleting ? NULL : &c->space_size, address, length, c, rollback, func); +NTSTATUS update_chunk_caches_tree(device_extension* Vcb, PIRP Irp) { + LIST_ENTRY *le, batchlist; + NTSTATUS Status; + chunk* c; + + Vcb->superblock.compat_ro_flags |= BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID; + + InitializeListHead(&batchlist); + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry); + + if (c->space_changed) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + Status = update_chunk_cache_tree(Vcb, c, &batchlist); + ExReleaseResourceLite(&c->lock); + + if (!NT_SUCCESS(Status)) { + ERR("update_chunk_cache_tree(%llx) returned %08x\n", c->offset, Status); + ExReleaseResourceLite(&Vcb->chunk_lock); + clear_batch_list(Vcb, &batchlist); + return Status; + } + } + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + Status = commit_batch_list(Vcb, &batchlist, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; } -void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) { +void space_list_add(chunk* c, UINT64 address, UINT64 length, LIST_ENTRY* rollback) { + TRACE("(%p, %llx, %llx, %p)\n", c, address, length, rollback); + + c->changed = TRUE; + c->space_changed = TRUE; + + space_list_add2(&c->deleting, NULL, address, length, c, rollback); +} + +void space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback) { LIST_ENTRY *le, *le2; space *s, *s2; - -#ifdef DEBUG_SPACE_LISTS - _debug_message(func, "called space_list_subtract (%p, %llx, %llx, %p)\n", list, address, length, rollback); -#endif - + if (IsListEmpty(list)) return; - + le = list->Flink; while (le != list) { s2 = CONTAINING_RECORD(le, space, list_entry); le2 = le->Flink; - + if (s2->address >= address + length) return; - + if (s2->address >= address && s2->address + s2->size <= address + length) { // remove entry entirely if (rollback) - add_rollback_space(Vcb, rollback, FALSE, list, list_size, s2->address, s2->size, c); - + add_rollback_space(rollback, FALSE, list, list_size, s2->address, s2->size, c); + RemoveEntryList(&s2->list_entry); - + if (list_size) RemoveEntryList(&s2->list_entry_size); - + ExFreePool(s2); } else if (address + length > s2->address && address + length < s2->address + s2->size) { if (address > s2->address) { // cut out hole if (rollback) - add_rollback_space(Vcb, rollback, FALSE, list, list_size, address, length, c); - + add_rollback_space(rollback, FALSE, list, list_size, address, length, c); + s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG); if (!s) { ERR("out of memory\n"); return; } - + s->address = s2->address; s->size = address - s2->address; InsertHeadList(s2->list_entry.Blink, &s->list_entry); - + s2->size = s2->address + s2->size - address - length; s2->address = address + length; - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); order_space_entry(s, list_size); } - + return; } else { // remove start of entry if (rollback) - add_rollback_space(Vcb, rollback, FALSE, list, list_size, s2->address, address + length - s2->address, c); - + add_rollback_space(rollback, FALSE, list, list_size, s2->address, address + length - s2->address, c); + s2->size -= address + length - s2->address; s2->address = address + length; - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); @@ -1593,27 +1997,27 @@ void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* } } else if (address > s2->address && address < s2->address + s2->size) { // remove end of entry if (rollback) - add_rollback_space(Vcb, rollback, FALSE, list, list_size, address, s2->address + s2->size - address, c); - + add_rollback_space(rollback, FALSE, list, list_size, address, s2->address + s2->size - address, c); + s2->size = address - s2->address; - + if (list_size) { RemoveEntryList(&s2->list_entry_size); order_space_entry(s2, list_size); } } - + le = le2; } } -void _space_list_subtract(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func) { +void space_list_subtract(chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback) { LIST_ENTRY* list; - + list = deleting ? &c->deleting : &c->space; - - if (!c->list_entry_changed.Flink) - InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); - - _space_list_subtract2(Vcb, list, deleting ? NULL : &c->space_size, address, length, c, rollback, func); + + c->changed = TRUE; + c->space_changed = TRUE; + + space_list_subtract2(list, deleting ? NULL : &c->space_size, address, length, c, rollback); } diff --git a/reactos/drivers/filesystems/btrfs/fsctl.c b/reactos/drivers/filesystems/btrfs/fsctl.c index 4de4e4e733f..8e0fb3437b8 100644 --- a/reactos/drivers/filesystems/btrfs/fsctl.c +++ b/reactos/drivers/filesystems/btrfs/fsctl.c @@ -1,72 +1,85 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" #include "btrfsioctl.h" +#include +#include #ifndef __REACTOS__ -#include +#include #endif #ifndef FSCTL_CSV_CONTROL #define FSCTL_CSV_CONTROL CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 181, METHOD_BUFFERED, FILE_ANY_ACCESS) #endif +#ifndef FSCTL_QUERY_VOLUME_CONTAINER_STATE +#define FSCTL_QUERY_VOLUME_CONTAINER_STATE CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 228, METHOD_BUFFERED, FILE_ANY_ACCESS) +#endif + #define DOTDOT ".." #define SEF_AVOID_PRIVILEGE_CHECK 0x08 // on MSDN but not in any header files(?) +#ifndef _MSC_VER // not in mingw yet +#define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000 +#endif + +#define SEF_SACL_AUTO_INHERIT 0x02 + extern LIST_ENTRY VcbList; extern ERESOURCE global_loading_lock; -extern LIST_ENTRY volumes; -extern ERESOURCE volumes_lock; +extern PDRIVER_OBJECT drvobj; + +static void mark_subvol_dirty(device_extension* Vcb, root* r); static NTSTATUS get_file_ids(PFILE_OBJECT FileObject, void* data, ULONG length) { btrfs_get_file_ids* bgfi; fcb* fcb; - + if (length < sizeof(btrfs_get_file_ids)) return STATUS_BUFFER_OVERFLOW; - + if (!FileObject) return STATUS_INVALID_PARAMETER; - + fcb = FileObject->FsContext; - + if (!fcb) return STATUS_INVALID_PARAMETER; - + bgfi = data; - + bgfi->subvol = fcb->subvol->id; bgfi->inode = fcb->inode; bgfi->top = fcb->Vcb->root_fileref->fcb == fcb ? TRUE : FALSE; - + return STATUS_SUCCESS; } static void get_uuid(BTRFS_UUID* uuid) { LARGE_INTEGER seed; UINT8 i; - + seed = KeQueryPerformanceCounter(NULL); for (i = 0; i < 16; i+=2) { ULONG rand = RtlRandomEx(&seed.LowPart); - + uuid->uuid[i] = (rand & 0xff00) >> 8; uuid->uuid[i+1] = rand & 0xff; } @@ -75,79 +88,75 @@ static void get_uuid(BTRFS_UUID* uuid) { static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* subvol, UINT64* newaddr, PIRP Irp, LIST_ENTRY* rollback) { UINT8* buf; NTSTATUS Status; - write_data_context* wtc; + write_data_context wtc; LIST_ENTRY* le; tree t; tree_header* th; chunk* c; - + buf = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); if (!buf) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG); - if (!wtc) { - ERR("out of memory\n"); - ExFreePool(buf); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL, Irp, FALSE); + + wtc.parity1 = wtc.parity2 = wtc.scratch = NULL; + wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL; + + Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL, Irp, 0, FALSE, NormalPagePriority); if (!NT_SUCCESS(Status)) { ERR("read_data returned %08x\n", Status); goto end; } - + th = (tree_header*)buf; - + RtlZeroMemory(&t, sizeof(tree)); t.root = subvol; t.header.level = th->level; t.header.tree_id = t.root->id; - + Status = get_tree_new_address(Vcb, &t, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("get_tree_new_address returned %08x\n", Status); goto end; } - + if (!t.has_new_address) { ERR("tree new address not set\n"); Status = STATUS_INTERNAL_ERROR; goto end; } - + c = get_chunk_from_address(Vcb, t.new_address); - - if (c) { - increase_chunk_usage(c, Vcb->superblock.node_size); - } else { + + if (c) + c->used += Vcb->superblock.node_size; + else { ERR("could not find chunk for address %llx\n", t.new_address); Status = STATUS_INTERNAL_ERROR; goto end; } - + th->address = t.new_address; th->tree_id = subvol->id; th->generation = Vcb->superblock.generation; th->fs_uuid = Vcb->superblock.uuid; - + if (th->level == 0) { UINT32 i; leaf_node* ln = (leaf_node*)&th[1]; - + for (i = 0; i < th->num_items; i++) { if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) && ln[i].offset + ln[i].size <= Vcb->superblock.node_size - sizeof(tree_header)) { EXTENT_DATA* ed = (EXTENT_DATA*)(((UINT8*)&th[1]) + ln[i].offset); - + if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0]; - + if (ed2->size != 0) { // not sparse - Status = increase_extent_refcount_data(Vcb, ed2->address, ed2->size, subvol->id, ln[i].key.obj_id, ln[i].key.offset - ed2->offset, 1, Irp, rollback); - + Status = increase_extent_refcount_data(Vcb, ed2->address, ed2->size, subvol->id, ln[i].key.obj_id, ln[i].key.offset - ed2->offset, 1, Irp); + if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount_data returned %08x\n", Status); goto end; @@ -159,100 +168,104 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub } else { UINT32 i; internal_node* in = (internal_node*)&th[1]; - + for (i = 0; i < th->num_items; i++) { TREE_BLOCK_REF tbr; - + tbr.offset = subvol->id; - - Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, NULL, th->level - 1, Irp, rollback); + + Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, NULL, th->level - 1, Irp); if (!NT_SUCCESS(Status)) { ERR("increase_extent_refcount returned %08x\n", Status); goto end; } } } - + *((UINT32*)buf) = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE); - InitializeListHead(&wtc->stripes); - wtc->tree = TRUE; - wtc->stripes_left = 0; - - Status = write_data(Vcb, t.new_address, buf, FALSE, Vcb->superblock.node_size, wtc, NULL, NULL); + + KeInitializeEvent(&wtc.Event, NotificationEvent, FALSE); + InitializeListHead(&wtc.stripes); + wtc.stripes_left = 0; + + Status = write_data(Vcb, t.new_address, buf, Vcb->superblock.node_size, &wtc, NULL, NULL, FALSE, 0, NormalPagePriority); if (!NT_SUCCESS(Status)) { ERR("write_data returned %08x\n", Status); goto end; } - - if (wtc->stripes.Flink != &wtc->stripes) { + + if (wtc.stripes.Flink != &wtc.stripes) { + BOOL need_wait = FALSE; + // launch writes and wait - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { + le = wtc.stripes.Flink; + while (le != &wtc.stripes) { write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - if (stripe->status != WriteDataStatus_Ignore) + + if (stripe->status != WriteDataStatus_Ignore) { + need_wait = TRUE; IoCallDriver(stripe->device->devobj, stripe->Irp); - + } + le = le->Flink; } - - KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL); - - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { + + if (need_wait) + KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, FALSE, NULL); + + le = wtc.stripes.Flink; + while (le != &wtc.stripes) { write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - + if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { Status = stripe->iosb.Status; + log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); break; } - + le = le->Flink; } - - free_write_data_stripes(wtc); + + free_write_data_stripes(&wtc); buf = NULL; } - + if (NT_SUCCESS(Status)) *newaddr = t.new_address; - + end: - ExFreePool(wtc); - + if (buf) ExFreePool(buf); - + return Status; } -static void flush_subvol_fcbs(root* subvol, LIST_ENTRY* rollback) { +void flush_subvol_fcbs(root* subvol) { LIST_ENTRY* le = subvol->fcbs.Flink; - + if (IsListEmpty(&subvol->fcbs)) return; - + while (le != &subvol->fcbs) { struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); IO_STATUS_BLOCK iosb; - + if (fcb->type != BTRFS_TYPE_DIRECTORY && !fcb->deleted) CcFlushCache(&fcb->nonpaged->segment_object, NULL, 0, &iosb); - + le = le->Flink; } } -static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, fcb* subvol_fcb, PANSI_STRING utf8, PUNICODE_STRING name, PIRP Irp) { +static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, fcb* subvol_fcb, PANSI_STRING utf8, PUNICODE_STRING name, BOOL readonly, PIRP Irp) { LIST_ENTRY rollback; UINT64 id; NTSTATUS Status; root *r, *subvol = subvol_fcb->subvol; KEY searchkey; traverse_ptr tp; - UINT64 address, dirpos, *root_num; + UINT64 address, *root_num; LARGE_INTEGER time; BTRFS_TIME now; fcb* fcb = parent->FsContext; @@ -260,110 +273,116 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f LIST_ENTRY* le; file_ref *fileref, *fr; dir_child* dc = NULL; - + if (!ccb) { ERR("error - ccb was NULL\n"); return STATUS_INTERNAL_ERROR; } - + if (!(ccb->access & FILE_ADD_SUBDIRECTORY)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + fileref = ccb->fileref; - - InitializeListHead(&rollback); - + + if (fileref->fcb == Vcb->dummy_fcb) + return STATUS_ACCESS_DENIED; + // flush open files on this subvol - - flush_subvol_fcbs(subvol, &rollback); + + flush_subvol_fcbs(subvol); // flush metadata - + if (Vcb->need_write) - do_write(Vcb, Irp, &rollback); - + Status = do_write(Vcb, Irp); + else + Status = STATUS_SUCCESS; + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + return Status; + } + InitializeListHead(&rollback); - + // create new root - + id = InterlockedIncrement64(&Vcb->root_root->lastinode); - Status = create_root(Vcb, id, &r, TRUE, Vcb->superblock.generation, Irp, &rollback); - + Status = create_root(Vcb, id, &r, TRUE, Vcb->superblock.generation, Irp); + if (!NT_SUCCESS(Status)) { ERR("create_root returned %08x\n", Status); goto end; } - + r->lastinode = subvol->lastinode; - + if (!Vcb->uuid_root) { root* uuid_root; - + TRACE("uuid root doesn't exist, creating it\n"); - - Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp, &rollback); - + + Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp); + if (!NT_SUCCESS(Status)) { ERR("create_root returned %08x\n", Status); goto end; } - + Vcb->uuid_root = uuid_root; } - + root_num = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64), ALLOC_TAG); if (!root_num) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + tp.tree = NULL; - + do { get_uuid(&r->root_item.uuid); - + RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid, sizeof(UINT64)); searchkey.obj_type = TYPE_SUBVOL_UUID; RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64)); - + Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); } while (NT_SUCCESS(Status) && !keycmp(searchkey, tp.item->key)); - + *root_num = r->id; - - if (!insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp, &rollback)) { - ERR("insert_tree_item failed\n"); + + Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); ExFreePool(root_num); - Status = STATUS_INTERNAL_ERROR; goto end; } - + searchkey.obj_id = r->id; searchkey.obj_type = TYPE_ROOT_ITEM; searchkey.offset = 0xffffffffffffffff; - + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { ERR("error - find_item returned %08x\n", Status); goto end; } - + Status = snapshot_tree_copy(Vcb, subvol->root_item.block_number, r, &address, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("snapshot_tree_copy returned %08x\n", Status); goto end; } - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + r->root_item.inode.generation = 1; r->root_item.inode.st_size = 3; r->root_item.inode.st_blocks = subvol->root_item.inode.st_blocks; @@ -382,158 +401,117 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f r->root_item.otransid = Vcb->superblock.generation; r->root_item.ctime = subvol->root_item.ctime; r->root_item.otime = now; - + + if (readonly) + r->root_item.flags |= BTRFS_SUBVOL_READONLY; + r->treeholder.address = address; - + // FIXME - do we need to copy over the send and receive fields too? - + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { ERR("error - could not find ROOT_ITEM for subvol %llx\n", r->id); Status = STATUS_INTERNAL_ERROR; goto end; } - + RtlCopyMemory(tp.item->data, &r->root_item, sizeof(ROOT_ITEM)); - + // update ROOT_ITEM of original subvol - + subvol->root_item.last_snapshot_generation = Vcb->superblock.generation; - - // We also rewrite the top of the old subvolume tree, for some reason - searchkey.obj_id = 0; - searchkey.obj_type = 0; - searchkey.offset = 0; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - subvol->treeholder.tree->write = TRUE; - + + mark_subvol_dirty(Vcb, subvol); + // create fileref for entry in other subvolume - - fr = create_fileref(); + + fr = create_fileref(Vcb); if (!fr) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - fr->utf8.Length = fr->utf8.MaximumLength = utf8->Length; - fr->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, fr->utf8.MaximumLength, ALLOC_TAG); - if (!fr->utf8.Buffer) { - ERR("out of memory\n"); - free_fileref(fr); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(fr->utf8.Buffer, utf8->Buffer, utf8->Length); - + Status = open_fcb(Vcb, r, r->root_item.objid, BTRFS_TYPE_DIRECTORY, utf8, fcb, &fr->fcb, PagedPool, Irp); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - Status = fcb_get_last_dir_index(fcb, &dirpos, Irp); - if (!NT_SUCCESS(Status)) { - ERR("fcb_get_last_dir_index returned %08x\n", Status); - free_fileref(fr); - goto end; - } - - fr->index = dirpos; - - fr->filepart.MaximumLength = fr->filepart.Length = name->Length; - - fr->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, fr->filepart.MaximumLength, ALLOC_TAG); - if (!fr->filepart.Buffer) { - ERR("out of memory\n"); - free_fileref(fr); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(fr->filepart.Buffer, name->Buffer, name->Length); - - Status = RtlUpcaseUnicodeString(&fr->filepart_uc, &fr->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(fr); + free_fileref(Vcb, fr); goto end; } - + fr->parent = fileref; - - Status = add_dir_child(fileref->fcb, r->id, TRUE, dirpos, utf8, &fr->filepart, &fr->filepart_uc, BTRFS_TYPE_DIRECTORY, &dc); + + Status = add_dir_child(fileref->fcb, r->id, TRUE, utf8, name, BTRFS_TYPE_DIRECTORY, &dc); if (!NT_SUCCESS(Status)) WARN("add_dir_child returned %08x\n", Status); - + fr->dc = dc; dc->fileref = fr; - - insert_fileref_child(fileref, fr, TRUE); + + ExAcquireResourceExclusiveLite(&fileref->nonpaged->children_lock, TRUE); + InsertTailList(&fileref->children, &fr->list_entry); + ExReleaseResourceLite(&fileref->nonpaged->children_lock); + increase_fileref_refcount(fileref); - + fr->created = TRUE; mark_fileref_dirty(fr); - + if (fr->fcb->type == BTRFS_TYPE_DIRECTORY) fr->fcb->fileref = fr; - - free_fileref(fr); + + fr->fcb->subvol->parent = fileref->fcb->subvol->id; + + free_fileref(Vcb, fr); // change fcb's INODE_ITEM - + fcb->inode_item.transid = Vcb->superblock.generation; fcb->inode_item.sequence++; fcb->inode_item.st_size += utf8->Length * 2; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fcb->inode_item.st_mtime = now; - + fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + fcb->subvol->root_item.ctime = now; fcb->subvol->root_item.ctransid = Vcb->superblock.generation; - - send_notification_fileref(fr, FILE_NOTIFY_CHANGE_DIR_NAME, FILE_ACTION_ADDED); - send_notification_fileref(fr->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); - + + send_notification_fileref(fr, FILE_NOTIFY_CHANGE_DIR_NAME, FILE_ACTION_ADDED, NULL); + send_notification_fileref(fr->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + le = subvol->fcbs.Flink; while (le != &subvol->fcbs) { struct _fcb* fcb2 = CONTAINING_RECORD(le, struct _fcb, list_entry); LIST_ENTRY* le2 = fcb2->extents.Flink; - + while (le2 != &fcb2->extents) { extent* ext = CONTAINING_RECORD(le2, extent, list_entry); - + if (!ext->ignore) ext->unique = FALSE; - + le2 = le2->Flink; } - + le = le->Flink; } - - do_write(Vcb, Irp, &rollback); - + + Status = do_write(Vcb, Irp); + free_trees(Vcb); - - Status = STATUS_SUCCESS; - + + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + end: if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(Vcb, &rollback); @@ -545,74 +523,111 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject, NTSTATUS Status; btrfs_create_snapshot* bcs = data; fcb* subvol_fcb; + HANDLE subvolh; + BOOL readonly, posix; ANSI_STRING utf8; UNICODE_STRING nameus; ULONG len; fcb* fcb; ccb* ccb; file_ref *fileref, *fr2; - - if (length < offsetof(btrfs_create_snapshot, name)) - return STATUS_INVALID_PARAMETER; - - if (length < offsetof(btrfs_create_snapshot, name) + bcs->namelen) - return STATUS_INVALID_PARAMETER; - - if (!bcs->subvol) + +#if defined(_WIN64) + if (IoIs32bitProcess(Irp)) { + btrfs_create_snapshot32* bcs32 = data; + + if (length < offsetof(btrfs_create_snapshot32, name)) + return STATUS_INVALID_PARAMETER; + + if (length < offsetof(btrfs_create_snapshot32, name) + bcs32->namelen) + return STATUS_INVALID_PARAMETER; + + subvolh = Handle32ToHandle(bcs32->subvol); + + nameus.Buffer = bcs32->name; + nameus.Length = nameus.MaximumLength = bcs32->namelen; + + readonly = bcs32->readonly; + posix = bcs32->posix; + } else { +#endif + if (length < offsetof(btrfs_create_snapshot, name)) + return STATUS_INVALID_PARAMETER; + + if (length < offsetof(btrfs_create_snapshot, name) + bcs->namelen) + return STATUS_INVALID_PARAMETER; + + subvolh = bcs->subvol; + + nameus.Buffer = bcs->name; + nameus.Length = nameus.MaximumLength = bcs->namelen; + + readonly = bcs->readonly; + posix = bcs->posix; +#if defined(_WIN64) + } +#endif + + if (!subvolh) return STATUS_INVALID_PARAMETER; - + if (!FileObject || !FileObject->FsContext) return STATUS_INVALID_PARAMETER; - + fcb = FileObject->FsContext; ccb = FileObject->FsContext2; - + if (!fcb || !ccb || fcb->type != BTRFS_TYPE_DIRECTORY) return STATUS_INVALID_PARAMETER; - + fileref = ccb->fileref; - + if (!fileref) { ERR("fileref was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (!(ccb->access & FILE_ADD_SUBDIRECTORY)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + if (is_subvol_readonly(fcb->subvol, Irp)) return STATUS_ACCESS_DENIED; - - nameus.Buffer = bcs->name; - nameus.Length = nameus.MaximumLength = bcs->namelen; - - if (!is_file_name_valid(&nameus)) + + if (!is_file_name_valid(&nameus, posix)) return STATUS_OBJECT_NAME_INVALID; - + utf8.Buffer = NULL; - - Status = RtlUnicodeToUTF8N(NULL, 0, &len, bcs->name, bcs->namelen); + + Status = RtlUnicodeToUTF8N(NULL, 0, &len, nameus.Buffer, nameus.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N failed with error %08x\n", Status); return Status; } - + if (len == 0) { ERR("RtlUnicodeToUTF8N returned a length of 0\n"); return STATUS_INTERNAL_ERROR; } - - utf8.MaximumLength = utf8.Length = len; + + if (len > 0xffff) { + ERR("len was too long\n"); + return STATUS_INVALID_PARAMETER; + } + + utf8.MaximumLength = utf8.Length = (USHORT)len; utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.Length, ALLOC_TAG); - + if (!utf8.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = RtlUnicodeToUTF8N(utf8.Buffer, len, &len, bcs->name, bcs->namelen); + + Status = RtlUnicodeToUTF8N(utf8.Buffer, len, &len, nameus.Buffer, nameus.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N failed with error %08x\n", Status); goto end2; @@ -621,116 +636,126 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject, ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); // no need for fcb_lock as we have tree_lock exclusively - Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp); - + Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive || posix, Irp); + if (NT_SUCCESS(Status)) { if (!fr2->deleted) { WARN("file already exists\n"); - free_fileref(fr2); + free_fileref(Vcb, fr2); Status = STATUS_OBJECT_NAME_COLLISION; goto end3; } else - free_fileref(fr2); + free_fileref(Vcb, fr2); } else if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) { ERR("open_fileref returned %08x\n", Status); goto end3; } - - Status = ObReferenceObjectByHandle(bcs->subvol, 0, *IoFileObjectType, UserMode, (void**)&subvol_obj, NULL); + + Status = ObReferenceObjectByHandle(subvolh, 0, *IoFileObjectType, Irp->RequestorMode, (void**)&subvol_obj, NULL); if (!NT_SUCCESS(Status)) { ERR("ObReferenceObjectByHandle returned %08x\n", Status); goto end3; } - + + if (subvol_obj->DeviceObject != FileObject->DeviceObject) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + subvol_fcb = subvol_obj->FsContext; if (!subvol_fcb) { Status = STATUS_INVALID_PARAMETER; goto end; } - + if (subvol_fcb->inode != subvol_fcb->subvol->root_item.objid) { WARN("handle inode was %llx, expected %llx\n", subvol_fcb->inode, subvol_fcb->subvol->root_item.objid); Status = STATUS_INVALID_PARAMETER; goto end; } - + ccb = subvol_obj->FsContext2; - + if (!ccb) { Status = STATUS_INVALID_PARAMETER; goto end; } - + if (!(ccb->access & FILE_TRAVERSE)) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + + if (fcb == Vcb->dummy_fcb) { + Status = STATUS_ACCESS_DENIED; + goto end; + } + // clear unique flag on extents of open files in subvol if (!IsListEmpty(&subvol_fcb->subvol->fcbs)) { LIST_ENTRY* le = subvol_fcb->subvol->fcbs.Flink; - + while (le != &subvol_fcb->subvol->fcbs) { struct _fcb* openfcb = CONTAINING_RECORD(le, struct _fcb, list_entry); LIST_ENTRY* le2; - + le2 = openfcb->extents.Flink; - + while (le2 != &openfcb->extents) { extent* ext = CONTAINING_RECORD(le2, extent, list_entry); - + ext->unique = FALSE; - + le2 = le2->Flink; } - + le = le->Flink; } } - - Status = do_create_snapshot(Vcb, FileObject, subvol_fcb, &utf8, &nameus, Irp); - + + Status = do_create_snapshot(Vcb, FileObject, subvol_fcb, &utf8, &nameus, readonly, Irp); + if (NT_SUCCESS(Status)) { file_ref* fr; Status = open_fileref(Vcb, &fr, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp); - + if (!NT_SUCCESS(Status)) { ERR("open_fileref returned %08x\n", Status); Status = STATUS_SUCCESS; } else { - send_notification_fileref(fr, FILE_NOTIFY_CHANGE_DIR_NAME, FILE_ACTION_ADDED); - free_fileref(fr); + send_notification_fileref(fr, FILE_NOTIFY_CHANGE_DIR_NAME, FILE_ACTION_ADDED, NULL); + free_fileref(Vcb, fr); } } - + end: ObDereferenceObject(subvol_obj); - + end3: ExReleaseResourceLite(&Vcb->tree_lock); - + end2: ExFreePool(utf8.Buffer); - + return Status; } -static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WCHAR* name, ULONG length, PIRP Irp) { - fcb *fcb, *rootfcb; +static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, PIRP Irp) { + btrfs_create_subvol* bcs; + fcb *fcb, *rootfcb = NULL; ccb* ccb; file_ref* fileref; NTSTATUS Status; - LIST_ENTRY rollback; UINT64 id; - root* r; + root* r = NULL; LARGE_INTEGER time; BTRFS_TIME now; - ULONG len, irsize; + ULONG len; + UINT16 irsize; UNICODE_STRING nameus; ANSI_STRING utf8; - UINT64 dirpos; INODE_REF* ir; KEY searchkey; traverse_ptr tp; @@ -740,388 +765,401 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC UINT64* root_num; file_ref *fr = NULL, *fr2; dir_child* dc = NULL; - + fcb = FileObject->FsContext; if (!fcb) { ERR("error - fcb was NULL\n"); return STATUS_INTERNAL_ERROR; } - + ccb = FileObject->FsContext2; if (!ccb) { ERR("error - ccb was NULL\n"); return STATUS_INTERNAL_ERROR; } - + fileref = ccb->fileref; - + if (fcb->type != BTRFS_TYPE_DIRECTORY) { ERR("parent FCB was not a directory\n"); return STATUS_NOT_A_DIRECTORY; } - + if (!fileref) { ERR("fileref was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (fileref->deleted || fcb->deleted) { ERR("parent has been deleted\n"); return STATUS_FILE_DELETED; } - + if (!(ccb->access & FILE_ADD_SUBDIRECTORY)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + if (is_subvol_readonly(fcb->subvol, Irp)) + return STATUS_ACCESS_DENIED; + + if (fcb == Vcb->dummy_fcb) return STATUS_ACCESS_DENIED; - - nameus.Length = nameus.MaximumLength = length; - nameus.Buffer = name; - - if (!is_file_name_valid(&nameus)) + + if (!data || datalen < sizeof(btrfs_create_subvol)) + return STATUS_INVALID_PARAMETER; + + bcs = (btrfs_create_subvol*)data; + + if (offsetof(btrfs_create_subvol, name[0]) + bcs->namelen > datalen) + return STATUS_INVALID_PARAMETER; + + nameus.Length = nameus.MaximumLength = bcs->namelen; + nameus.Buffer = bcs->name; + + if (!is_file_name_valid(&nameus, bcs->posix)) return STATUS_OBJECT_NAME_INVALID; - + utf8.Buffer = NULL; - - Status = RtlUnicodeToUTF8N(NULL, 0, &len, name, length); + + Status = RtlUnicodeToUTF8N(NULL, 0, &len, nameus.Buffer, nameus.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N failed with error %08x\n", Status); return Status; } - + if (len == 0) { ERR("RtlUnicodeToUTF8N returned a length of 0\n"); return STATUS_INTERNAL_ERROR; } - - utf8.MaximumLength = utf8.Length = len; + + if (len > 0xffff) { + ERR("len was too long\n"); + return STATUS_INVALID_PARAMETER; + } + + utf8.MaximumLength = utf8.Length = (USHORT)len; utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.Length, ALLOC_TAG); - + if (!utf8.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = RtlUnicodeToUTF8N(utf8.Buffer, len, &len, name, length); + + Status = RtlUnicodeToUTF8N(utf8.Buffer, len, &len, nameus.Buffer, nameus.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N failed with error %08x\n", Status); goto end2; } - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - - InitializeListHead(&rollback); - + // no need for fcb_lock as we have tree_lock exclusively - Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp); - + Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive || bcs->posix, Irp); + if (NT_SUCCESS(Status)) { if (!fr2->deleted) { WARN("file already exists\n"); - free_fileref(fr2); + free_fileref(Vcb, fr2); Status = STATUS_OBJECT_NAME_COLLISION; goto end; } else - free_fileref(fr2); + free_fileref(Vcb, fr2); } else if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) { ERR("open_fileref returned %08x\n", Status); goto end; } - - // FIXME - make sure rollback removes new roots from internal structures - + id = InterlockedIncrement64(&Vcb->root_root->lastinode); - Status = create_root(Vcb, id, &r, FALSE, 0, Irp, &rollback); - + Status = create_root(Vcb, id, &r, FALSE, 0, Irp); + if (!NT_SUCCESS(Status)) { ERR("create_root returned %08x\n", Status); goto end; } - + TRACE("created root %llx\n", id); - + if (!Vcb->uuid_root) { root* uuid_root; - + TRACE("uuid root doesn't exist, creating it\n"); - - Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp, &rollback); - + + Status = create_root(Vcb, BTRFS_ROOT_UUID, &uuid_root, FALSE, 0, Irp); + if (!NT_SUCCESS(Status)) { ERR("create_root returned %08x\n", Status); goto end; } - + Vcb->uuid_root = uuid_root; } - + root_num = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64), ALLOC_TAG); if (!root_num) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + tp.tree = NULL; - + do { get_uuid(&r->root_item.uuid); - + RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid, sizeof(UINT64)); searchkey.obj_type = TYPE_SUBVOL_UUID; RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64)); - + Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); } while (NT_SUCCESS(Status) && !keycmp(searchkey, tp.item->key)); - + *root_num = r->id; - - if (!insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp, &rollback)) { - ERR("insert_tree_item failed\n"); - Status = STATUS_INTERNAL_ERROR; + + Status = insert_tree_item(Vcb, Vcb->uuid_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, root_num, sizeof(UINT64), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(root_num); goto end; } - + r->root_item.inode.generation = 1; r->root_item.inode.st_size = 3; r->root_item.inode.st_blocks = Vcb->superblock.node_size; r->root_item.inode.st_nlink = 1; r->root_item.inode.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; // 40755 r->root_item.inode.flags = 0xffffffff80000000; // FIXME - find out what these mean - + + if (bcs->readonly) + r->root_item.flags |= BTRFS_SUBVOL_READONLY; + r->root_item.objid = SUBVOL_ROOT_INODE; r->root_item.bytes_used = Vcb->superblock.node_size; r->root_item.ctransid = Vcb->superblock.generation; r->root_item.otransid = Vcb->superblock.generation; r->root_item.ctime = now; r->root_item.otime = now; - + // add .. inode to new subvol - - rootfcb = create_fcb(PagedPool); + + rootfcb = create_fcb(Vcb, PagedPool); if (!rootfcb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + rootfcb->Vcb = Vcb; - + rootfcb->subvol = r; rootfcb->inode = SUBVOL_ROOT_INODE; rootfcb->type = BTRFS_TYPE_DIRECTORY; - + rootfcb->inode_item.generation = Vcb->superblock.generation; rootfcb->inode_item.transid = Vcb->superblock.generation; rootfcb->inode_item.st_nlink = 1; - rootfcb->inode_item.st_mode = __S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; // 40755 + rootfcb->inode_item.st_mode = __S_IFDIR | inherit_mode(fileref->fcb, TRUE); rootfcb->inode_item.st_atime = rootfcb->inode_item.st_ctime = rootfcb->inode_item.st_mtime = rootfcb->inode_item.otime = now; - rootfcb->inode_item.st_gid = GID_NOBODY; // FIXME? - - rootfcb->atts = get_file_attributes(Vcb, &rootfcb->inode_item, rootfcb->subvol, rootfcb->inode, rootfcb->type, FALSE, TRUE, Irp); - + rootfcb->inode_item.st_gid = GID_NOBODY; + + rootfcb->atts = get_file_attributes(Vcb, rootfcb->subvol, rootfcb->inode, rootfcb->type, FALSE, TRUE, Irp); + + if (r->root_item.flags & BTRFS_SUBVOL_READONLY) + rootfcb->atts |= FILE_ATTRIBUTE_READONLY; + SeCaptureSubjectContext(&subjcont); - + Status = SeAssignSecurity(fcb->sd, NULL, (void**)&rootfcb->sd, TRUE, &subjcont, IoGetFileObjectGenericMapping(), PagedPool); - + if (!NT_SUCCESS(Status)) { ERR("SeAssignSecurity returned %08x\n", Status); goto end; } - + if (!rootfcb->sd) { ERR("SeAssignSecurity returned NULL security descriptor\n"); Status = STATUS_INTERNAL_ERROR; goto end; } - + Status = RtlGetOwnerSecurityDescriptor(rootfcb->sd, &owner, &defaulted); if (!NT_SUCCESS(Status)) { ERR("RtlGetOwnerSecurityDescriptor returned %08x\n", Status); rootfcb->inode_item.st_uid = UID_NOBODY; + rootfcb->sd_dirty = TRUE; } else { rootfcb->inode_item.st_uid = sid_to_uid(owner); + rootfcb->sd_dirty = rootfcb->inode_item.st_uid == UID_NOBODY; } - - rootfcb->sd_dirty = TRUE; + + find_gid(rootfcb, fileref->fcb, &subjcont); + rootfcb->inode_item_changed = TRUE; ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); InsertTailList(&r->fcbs, &rootfcb->list_entry); InsertTailList(&Vcb->all_fcbs, &rootfcb->list_entry_all); ExReleaseResourceLite(&Vcb->fcb_lock); - + rootfcb->Header.IsFastIoPossible = fast_io_possible(rootfcb); rootfcb->Header.AllocationSize.QuadPart = 0; rootfcb->Header.FileSize.QuadPart = 0; rootfcb->Header.ValidDataLength.QuadPart = 0; - + rootfcb->created = TRUE; - + + if (fileref->fcb->inode_item.flags & BTRFS_INODE_COMPRESS) + rootfcb->inode_item.flags |= BTRFS_INODE_COMPRESS; + + rootfcb->prop_compression = fileref->fcb->prop_compression; + rootfcb->prop_compression_changed = rootfcb->prop_compression != PropCompression_None; + r->lastinode = rootfcb->inode; - + // add INODE_REF - - irsize = sizeof(INODE_REF) - 1 + strlen(DOTDOT); + + irsize = (UINT16)(offsetof(INODE_REF, name[0]) + strlen(DOTDOT)); ir = ExAllocatePoolWithTag(PagedPool, irsize, ALLOC_TAG); if (!ir) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + ir->index = 0; - ir->n = strlen(DOTDOT); + ir->n = (USHORT)strlen(DOTDOT); RtlCopyMemory(ir->name, DOTDOT, ir->n); - if (!insert_tree_item(Vcb, r, r->root_item.objid, TYPE_INODE_REF, r->root_item.objid, ir, irsize, NULL, Irp, &rollback)) { - ERR("insert_tree_item failed\n"); - Status = STATUS_INTERNAL_ERROR; + Status = insert_tree_item(Vcb, r, r->root_item.objid, TYPE_INODE_REF, r->root_item.objid, ir, irsize, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(ir); goto end; } - + // create fileref for entry in other subvolume - - fr = create_fileref(); + + fr = create_fileref(Vcb); if (!fr) { ERR("out of memory\n"); - + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fcb(rootfcb); + free_fcb(Vcb, rootfcb); ExReleaseResourceLite(&Vcb->fcb_lock); - + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + fr->fcb = rootfcb; - + mark_fcb_dirty(rootfcb); - - Status = fcb_get_last_dir_index(fcb, &dirpos, Irp); - if (!NT_SUCCESS(Status)) { - ERR("fcb_get_last_dir_index returned %08x\n", Status); - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fileref(fr); - ExReleaseResourceLite(&Vcb->fcb_lock); - goto end; - } - - fr->index = dirpos; - fr->utf8 = utf8; - - fr->filepart.MaximumLength = fr->filepart.Length = nameus.Length; - fr->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, fr->filepart.MaximumLength, ALLOC_TAG); - if (!fr->filepart.Buffer) { - ERR("out of memory\n"); - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fileref(fr); - ExReleaseResourceLite(&Vcb->fcb_lock); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(fr->filepart.Buffer, nameus.Buffer, nameus.Length); - - Status = RtlUpcaseUnicodeString(&fr->filepart_uc, &fr->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fileref(fr); - ExReleaseResourceLite(&Vcb->fcb_lock); - goto end; - } - + fr->parent = fileref; - - Status = add_dir_child(fileref->fcb, r->id, TRUE, dirpos, &utf8, &fr->filepart, &fr->filepart_uc, BTRFS_TYPE_DIRECTORY, &dc); + + Status = add_dir_child(fileref->fcb, r->id, TRUE, &utf8, &nameus, BTRFS_TYPE_DIRECTORY, &dc); if (!NT_SUCCESS(Status)) WARN("add_dir_child returned %08x\n", Status); - + fr->dc = dc; dc->fileref = fr; - + fr->fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); if (!fr->fcb->hash_ptrs) { ERR("out of memory\n"); ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fileref(fr); + free_fileref(Vcb, fr); ExReleaseResourceLite(&Vcb->fcb_lock); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlZeroMemory(fr->fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); - + fr->fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); - if (!fcb->hash_ptrs_uc) { + if (!fr->fcb->hash_ptrs_uc) { ERR("out of memory\n"); ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fileref(fr); + free_fileref(Vcb, fr); ExReleaseResourceLite(&Vcb->fcb_lock); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlZeroMemory(fr->fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); - - insert_fileref_child(fileref, fr, TRUE); + + ExAcquireResourceExclusiveLite(&fileref->nonpaged->children_lock, TRUE); + InsertTailList(&fileref->children, &fr->list_entry); + ExReleaseResourceLite(&fileref->nonpaged->children_lock); + increase_fileref_refcount(fileref); - + if (fr->fcb->type == BTRFS_TYPE_DIRECTORY) fr->fcb->fileref = fr; - + fr->created = TRUE; mark_fileref_dirty(fr); - + // change fcb->subvol's ROOT_ITEM - + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; - + // change fcb's INODE_ITEM - + fcb->inode_item.transid = Vcb->superblock.generation; fcb->inode_item.st_size += utf8.Length * 2; fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fcb->inode_item.st_mtime = now; - + fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - - Status = STATUS_SUCCESS; - + + fr->fcb->subvol->parent = fcb->subvol->id; + + Status = STATUS_SUCCESS; + end: - if (!NT_SUCCESS(Status)) - do_rollback(Vcb, &rollback); - else - clear_rollback(Vcb, &rollback); - + if (!NT_SUCCESS(Status)) { + if (fr) { + fr->deleted = TRUE; + mark_fileref_dirty(fr); + } else if (rootfcb) { + rootfcb->deleted = TRUE; + mark_fcb_dirty(rootfcb); + } + + if (r) { + RemoveEntryList(&r->list_entry); + InsertTailList(&Vcb->drop_roots, &r->list_entry); + } + } + ExReleaseResourceLite(&Vcb->tree_lock); - + if (NT_SUCCESS(Status)) { - send_notification_fileref(fr, FILE_NOTIFY_CHANGE_DIR_NAME, FILE_ACTION_ADDED); - send_notification_fileref(fr->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); + send_notification_fileref(fr, FILE_NOTIFY_CHANGE_DIR_NAME, FILE_ACTION_ADDED, NULL); + send_notification_fileref(fr->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); } - + end2: if (fr) { ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - free_fileref(fr); + free_fileref(Vcb, fr); ExReleaseResourceLite(&Vcb->fcb_lock); } - + return Status; } @@ -1129,30 +1167,33 @@ static NTSTATUS get_inode_info(PFILE_OBJECT FileObject, void* data, ULONG length btrfs_inode_info* bii = data; fcb* fcb; ccb* ccb; - + if (length < sizeof(btrfs_inode_info)) return STATUS_BUFFER_OVERFLOW; - + if (!FileObject) return STATUS_INVALID_PARAMETER; - + fcb = FileObject->FsContext; - + if (!fcb) return STATUS_INVALID_PARAMETER; - + ccb = FileObject->FsContext2; - + if (!ccb) return STATUS_INVALID_PARAMETER; - + if (!(ccb->access & FILE_READ_ATTRIBUTES)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + + if (fcb->ads) + fcb = ccb->fileref->parent->fcb; + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + bii->subvol = fcb->subvol->id; bii->inode = fcb->inode; bii->top = fcb->Vcb->root_fileref->fcb == fcb ? TRUE : FALSE; @@ -1160,88 +1201,113 @@ static NTSTATUS get_inode_info(PFILE_OBJECT FileObject, void* data, ULONG length bii->st_uid = fcb->inode_item.st_uid; bii->st_gid = fcb->inode_item.st_gid; bii->st_mode = fcb->inode_item.st_mode; - bii->st_rdev = fcb->inode_item.st_rdev; + + if (fcb->inode_item.st_rdev == 0) + bii->st_rdev = 0; + else + bii->st_rdev = makedev((fcb->inode_item.st_rdev & 0xFFFFFFFFFFF) >> 20, fcb->inode_item.st_rdev & 0xFFFFF); + bii->flags = fcb->inode_item.flags; - + bii->inline_length = 0; bii->disk_size[0] = 0; bii->disk_size[1] = 0; bii->disk_size[2] = 0; - + if (fcb->type != BTRFS_TYPE_DIRECTORY) { LIST_ENTRY* le; - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + if (!ext->ignore) { - if (ext->data->type == EXTENT_TYPE_INLINE) { - bii->inline_length += ext->data->decoded_size; + if (ext->extent_data.type == EXTENT_TYPE_INLINE) { + bii->inline_length += ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]); } else { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; - + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data; + // FIXME - compressed extents with a hole in them are counted more than once if (ed2->size != 0) { - if (ext->data->compression == BTRFS_COMPRESSION_NONE) { + if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE) { bii->disk_size[0] += ed2->num_bytes; - } else if (ext->data->compression == BTRFS_COMPRESSION_ZLIB) { + } else if (ext->extent_data.compression == BTRFS_COMPRESSION_ZLIB) { bii->disk_size[1] += ed2->size; - } else if (ext->data->compression == BTRFS_COMPRESSION_LZO) { + } else if (ext->extent_data.compression == BTRFS_COMPRESSION_LZO) { bii->disk_size[2] += ed2->size; } } } } - + le = le->Flink; } } - + + switch (fcb->prop_compression) { + case PropCompression_Zlib: + bii->compression_type = BTRFS_COMPRESSION_ZLIB; + break; + + case PropCompression_LZO: + bii->compression_type = BTRFS_COMPRESSION_LZO; + break; + + default: + bii->compression_type = BTRFS_COMPRESSION_ANY; + break; + } + ExReleaseResourceLite(fcb->Header.Resource); - + return STATUS_SUCCESS; } -static NTSTATUS set_inode_info(PFILE_OBJECT FileObject, void* data, ULONG length) { +static NTSTATUS set_inode_info(PFILE_OBJECT FileObject, void* data, ULONG length, PIRP Irp) { btrfs_set_inode_info* bsii = data; NTSTATUS Status; fcb* fcb; ccb* ccb; - + if (length < sizeof(btrfs_set_inode_info)) - return STATUS_BUFFER_OVERFLOW; - + return STATUS_INVALID_PARAMETER; + if (!FileObject) return STATUS_INVALID_PARAMETER; - + fcb = FileObject->FsContext; - + if (!fcb) return STATUS_INVALID_PARAMETER; - + ccb = FileObject->FsContext2; - + if (!ccb) return STATUS_INVALID_PARAMETER; - + if (bsii->flags_changed && !(ccb->access & FILE_WRITE_ATTRIBUTES)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - - if (bsii->mode_changed && !(ccb->access & WRITE_DAC)) { + + if ((bsii->mode_changed || bsii->uid_changed || bsii->gid_changed) && !(ccb->access & WRITE_DAC)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - - if ((bsii->uid_changed || bsii->gid_changed) && !(ccb->access & WRITE_OWNER)) { - WARN("insufficient privileges\n"); + + if (bsii->compression_type_changed && bsii->compression_type > BTRFS_COMPRESSION_LZO) + return STATUS_INVALID_PARAMETER; + + if (fcb->ads) + fcb = ccb->fileref->parent->fcb; + + if (is_subvol_readonly(fcb->subvol, Irp)) { + WARN("trying to change inode on readonly subvolume\n"); return STATUS_ACCESS_DENIED; } - + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (bsii->flags_changed) { if (fcb->type != BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0 && (bsii->flags & BTRFS_INODE_NODATACOW) != (fcb->inode_item.flags & BTRFS_INODE_NODATACOW)) { @@ -1249,74 +1315,64 @@ static NTSTATUS set_inode_info(PFILE_OBJECT FileObject, void* data, ULONG length Status = STATUS_INVALID_PARAMETER; goto end; } - + fcb->inode_item.flags = bsii->flags; - + if (fcb->inode_item.flags & BTRFS_INODE_NODATACOW) fcb->inode_item.flags |= BTRFS_INODE_NODATASUM; - else + else fcb->inode_item.flags &= ~(UINT64)BTRFS_INODE_NODATASUM; } - + if (bsii->mode_changed) { - UINT32 allowed = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH; - + UINT32 allowed = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH | + S_ISGID | S_ISVTX; + + if (ccb->access & WRITE_OWNER) + allowed |= S_ISUID; + fcb->inode_item.st_mode &= ~allowed; fcb->inode_item.st_mode |= bsii->st_mode & allowed; } - - if (bsii->uid_changed) { - PSID sid; - SECURITY_INFORMATION secinfo; - SECURITY_DESCRIPTOR sd; - void* oldsd; - + + if (bsii->uid_changed && fcb->inode_item.st_uid != bsii->st_uid) { fcb->inode_item.st_uid = bsii->st_uid; - - uid_to_sid(bsii->st_uid, &sid); - - Status = RtlCreateSecurityDescriptor(&sd, SECURITY_DESCRIPTOR_REVISION); - if (!NT_SUCCESS(Status)) { - ERR("RtlCreateSecurityDescriptor returned %08x\n", Status); - goto end; - } - - Status = RtlSetOwnerSecurityDescriptor(&sd, sid, FALSE); - if (!NT_SUCCESS(Status)) { - ERR("RtlSetOwnerSecurityDescriptor returned %08x\n", Status); - goto end; - } - - oldsd = fcb->sd; - - secinfo = OWNER_SECURITY_INFORMATION; - Status = SeSetSecurityDescriptorInfoEx(NULL, &secinfo, &sd, (void**)&fcb->sd, SEF_AVOID_PRIVILEGE_CHECK, PagedPool, IoGetFileObjectGenericMapping()); - - if (!NT_SUCCESS(Status)) { - ERR("SeSetSecurityDescriptorInfo returned %08x\n", Status); - goto end; - } - - ExFreePool(oldsd); - + fcb->sd_dirty = TRUE; - - send_notification_fcb(ccb->fileref, FILE_NOTIFY_CHANGE_SECURITY, FILE_ACTION_MODIFIED); + fcb->sd_deleted = FALSE; } - + if (bsii->gid_changed) fcb->inode_item.st_gid = bsii->st_gid; - - if (bsii->flags_changed || bsii->mode_changed || bsii->uid_changed || bsii->gid_changed) { + + if (bsii->compression_type_changed) { + switch (bsii->compression_type) { + case BTRFS_COMPRESSION_ANY: + fcb->prop_compression = PropCompression_None; + break; + + case BTRFS_COMPRESSION_ZLIB: + fcb->prop_compression = PropCompression_Zlib; + break; + + case BTRFS_COMPRESSION_LZO: + fcb->prop_compression = PropCompression_LZO; + break; + } + + fcb->prop_compression_changed = TRUE; + } + + if (bsii->flags_changed || bsii->mode_changed || bsii->uid_changed || bsii->gid_changed || bsii->compression_type_changed) { fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); } - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(fcb->Header.Resource); - + return Status; } @@ -1324,94 +1380,130 @@ static NTSTATUS get_devices(device_extension* Vcb, void* data, ULONG length) { btrfs_device* dev = NULL; NTSTATUS Status; LIST_ENTRY* le; - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev2 = CONTAINING_RECORD(le, device, list_entry); ULONG structlen; - + if (length < sizeof(btrfs_device) - sizeof(WCHAR)) { Status = STATUS_BUFFER_OVERFLOW; goto end; } - + if (!dev) dev = data; else { dev->next_entry = sizeof(btrfs_device) - sizeof(WCHAR) + dev->namelen; dev = (btrfs_device*)((UINT8*)dev + dev->next_entry); } - + structlen = length - offsetof(btrfs_device, namelen); - - Status = dev_ioctl(dev2->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &dev->namelen, structlen, TRUE, NULL); - if (!NT_SUCCESS(Status)) - goto end; - + + if (dev2->devobj) { + Status = dev_ioctl(dev2->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &dev->namelen, structlen, TRUE, NULL); + if (!NT_SUCCESS(Status)) + goto end; + + dev->missing = FALSE; + } else { + dev->namelen = 0; + dev->missing = TRUE; + } + dev->next_entry = 0; dev->dev_id = dev2->devitem.dev_id; - dev->size = dev2->length; dev->readonly = (Vcb->readonly || dev2->readonly) ? TRUE : FALSE; dev->device_number = dev2->disk_num; dev->partition_number = dev2->part_num; - + dev->size = dev2->devitem.num_bytes; + + if (dev2->devobj) { + GET_LENGTH_INFORMATION gli; + + Status = dev_ioctl(dev2->devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, &gli, sizeof(gli), TRUE, NULL); + if (!NT_SUCCESS(Status)) + goto end; + + dev->max_size = gli.Length.QuadPart; + } else + dev->max_size = dev->size; + + RtlCopyMemory(dev->stats, dev2->stats, sizeof(UINT64) * 5); + length -= sizeof(btrfs_device) - sizeof(WCHAR) + dev->namelen; - + le = le->Flink; } end: ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } -static NTSTATUS get_usage(device_extension* Vcb, void* data, ULONG length) { +static NTSTATUS get_usage(device_extension* Vcb, void* data, ULONG length, PIRP Irp) { btrfs_usage* usage = (btrfs_usage*)data; btrfs_usage* lastbue = NULL; NTSTATUS Status; LIST_ENTRY* le; - + if (length < sizeof(btrfs_usage)) return STATUS_BUFFER_OVERFLOW; - + + if (!Vcb->chunk_usage_found) { + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + if (!Vcb->chunk_usage_found) + Status = find_chunk_usage(Vcb, Irp); + else + Status = STATUS_SUCCESS; + + ExReleaseResourceLite(&Vcb->tree_lock); + + if (!NT_SUCCESS(Status)) { + ERR("find_chunk_usage returned %08x\n", Status); + return Status; + } + } + length -= offsetof(btrfs_usage, devices); - + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { BOOL addnew = FALSE; - + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!lastbue) // first entry addnew = TRUE; else { btrfs_usage* bue = usage; - + addnew = TRUE; - + while (TRUE) { if (bue->type == c->chunk_item->type) { addnew = FALSE; break; } - + if (bue->next_entry == 0) break; else bue = (btrfs_usage*)((UINT8*)bue + bue->next_entry); } } - + if (addnew) { btrfs_usage* bue; LIST_ENTRY* le2; UINT64 factor; - + if (!lastbue) { bue = usage; } else { @@ -1419,20 +1511,20 @@ static NTSTATUS get_usage(device_extension* Vcb, void* data, ULONG length) { Status = STATUS_BUFFER_OVERFLOW; goto end; } - + length -= offsetof(btrfs_usage, devices); - - lastbue->next_entry = offsetof(btrfs_usage, devices) + (lastbue->num_devices * sizeof(btrfs_usage_device)); - + + lastbue->next_entry = offsetof(btrfs_usage, devices) + (ULONG)(lastbue->num_devices * sizeof(btrfs_usage_device)); + bue = (btrfs_usage*)((UINT8*)lastbue + lastbue->next_entry); } - + bue->next_entry = 0; bue->type = c->chunk_item->type; bue->size = 0; bue->used = 0; bue->num_devices = 0; - + if (c->chunk_item->type & BLOCK_FLAG_RAID0) factor = c->chunk_item->num_stripes; else if (c->chunk_item->type & BLOCK_FLAG_RAID10) @@ -1443,25 +1535,25 @@ static NTSTATUS get_usage(device_extension* Vcb, void* data, ULONG length) { factor = c->chunk_item->num_stripes - 2; else factor = 1; - + le2 = le; while (le2 != &Vcb->chunks) { chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); - + if (c2->chunk_item->type == c->chunk_item->type) { UINT16 i; CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c2->chunk_item[1]; UINT64 stripesize; - + bue->size += c2->chunk_item->size; bue->used += c2->used; - + stripesize = c2->chunk_item->size / factor; - + for (i = 0; i < c2->chunk_item->num_stripes; i++) { UINT64 j; BOOL found = FALSE; - + for (j = 0; j < bue->num_devices; j++) { if (bue->devices[j].dev_id == cis[i].dev_id) { bue->devices[j].alloc += stripesize; @@ -1469,36 +1561,36 @@ static NTSTATUS get_usage(device_extension* Vcb, void* data, ULONG length) { break; } } - + if (!found) { if (length < sizeof(btrfs_usage_device)) { Status = STATUS_BUFFER_OVERFLOW; goto end; } - + length -= sizeof(btrfs_usage_device); - + bue->devices[bue->num_devices].dev_id = cis[i].dev_id; bue->devices[bue->num_devices].alloc = stripesize; bue->num_devices++; } } } - + le2 = le2->Flink; } - + lastbue = bue; } le = le->Flink; } - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(&Vcb->chunk_lock); - + return Status; } @@ -1508,62 +1600,62 @@ static NTSTATUS is_volume_mounted(device_extension* Vcb, PIRP Irp) { IO_STATUS_BLOCK iosb; BOOL verify = FALSE; LIST_ENTRY* le; - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - + if (dev->devobj && dev->removable) { Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), FALSE, &iosb); - + if (iosb.Information != sizeof(ULONG)) cc = 0; - + if (Status == STATUS_VERIFY_REQUIRED || (NT_SUCCESS(Status) && cc != dev->change_count)) { dev->devobj->Flags |= DO_VERIFY_VOLUME; verify = TRUE; } - + if (NT_SUCCESS(Status) && iosb.Information == sizeof(ULONG)) dev->change_count = cc; - + if (!NT_SUCCESS(Status) || verify) { IoSetHardErrorOrVerifyDevice(Irp, dev->devobj); ExReleaseResourceLite(&Vcb->tree_lock); - + return verify ? STATUS_VERIFY_REQUIRED : Status; } } - + le = le->Flink; } - + ExReleaseResourceLite(&Vcb->tree_lock); - + return STATUS_SUCCESS; } -static NTSTATUS fs_get_statistics(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen) { +static NTSTATUS fs_get_statistics(void* buffer, DWORD buflen, ULONG_PTR* retlen) { FILESYSTEM_STATISTICS* fss; - + WARN("STUB: FSCTL_FILESYSTEM_GET_STATISTICS\n"); - + // This is hideously wrong, but at least it stops SMB from breaking - + if (buflen < sizeof(FILESYSTEM_STATISTICS)) return STATUS_BUFFER_TOO_SMALL; - + fss = buffer; RtlZeroMemory(fss, sizeof(FILESYSTEM_STATISTICS)); - + fss->Version = 1; fss->FileSystemType = FILESYSTEM_STATISTICS_TYPE_NTFS; fss->SizeOfCompleteStructure = sizeof(FILESYSTEM_STATISTICS); - + *retlen = sizeof(FILESYSTEM_STATISTICS); - + return STATUS_SUCCESS; } @@ -1574,133 +1666,180 @@ static NTSTATUS set_sparse(device_extension* Vcb, PFILE_OBJECT FileObject, void* fcb* fcb; ccb* ccb = FileObject->FsContext2; file_ref* fileref = ccb ? ccb->fileref : NULL; - + if (data && length < sizeof(FILE_SET_SPARSE_BUFFER)) return STATUS_INVALID_PARAMETER; - + if (!FileObject) { ERR("FileObject was NULL\n"); return STATUS_INVALID_PARAMETER; } - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("FCB was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (!ccb) { ERR("CCB was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + if (!fileref) { ERR("no fileref\n"); return STATUS_INVALID_PARAMETER; } - + + if (fcb->ads) { + fileref = fileref->parent; + fcb = fileref->fcb; + } + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fcb->type != BTRFS_TYPE_FILE) { WARN("FileObject did not point to a file\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (fssb) set = fssb->SetSparse; else set = TRUE; - + if (set) { fcb->atts |= FILE_ATTRIBUTE_SPARSE_FILE; fcb->atts_changed = TRUE; } else { ULONG defda; - + fcb->atts &= ~FILE_ATTRIBUTE_SPARSE_FILE; fcb->atts_changed = TRUE; - - defda = get_file_attributes(Vcb, &fcb->inode_item, fcb->subvol, fcb->inode, fcb->type, - fileref && fileref->filepart.Length > 0 && fileref->filepart.Buffer[0] == '.', TRUE, Irp); - + + defda = get_file_attributes(Vcb, fcb->subvol, fcb->inode, fcb->type, + fileref && fileref->dc && fileref->dc->name.Length >= sizeof(WCHAR) && fileref->dc->name.Buffer[0] == '.', TRUE, Irp); + fcb->atts_deleted = defda == fcb->atts; } - + mark_fcb_dirty(fcb); - send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_ACTION_MODIFIED); - + send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_ACTION_MODIFIED, NULL); + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } static NTSTATUS zero_data(device_extension* Vcb, fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; - BOOL compress = write_fcb_compressed(fcb); + BOOL make_inline, compress; UINT64 start_data, end_data; + ULONG buf_head; UINT8* data; - - if (compress) { + + make_inline = fcb->inode_item.st_size <= Vcb->options.max_inline || fcb_is_inline(fcb); + + if (!make_inline) + compress = write_fcb_compressed(fcb); + + if (make_inline) { + start_data = 0; + end_data = fcb->inode_item.st_size; + buf_head = (ULONG)offsetof(EXTENT_DATA, data[0]); + } else if (compress) { start_data = start & ~(UINT64)(COMPRESSED_EXTENT_SIZE - 1); end_data = min(sector_align(start + length, COMPRESSED_EXTENT_SIZE), - sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size)); + sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size)); + buf_head = 0; } else { - start_data = start & ~(UINT64)(fcb->Vcb->superblock.sector_size - 1); - end_data = sector_align(start + length, fcb->Vcb->superblock.sector_size); + start_data = start & ~(UINT64)(Vcb->superblock.sector_size - 1); + end_data = sector_align(start + length, Vcb->superblock.sector_size); + buf_head = 0; } - data = ExAllocatePoolWithTag(PagedPool, end_data - start_data, ALLOC_TAG); + data = ExAllocatePoolWithTag(PagedPool, (ULONG)(buf_head + end_data - start_data), ALLOC_TAG); if (!data) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlZeroMemory(data, end_data - start_data); - + + RtlZeroMemory(data + buf_head, (ULONG)(end_data - start_data)); + if (start > start_data || start + length < end_data) { - Status = read_file(fcb, data, start_data, end_data - start_data, NULL, Irp, TRUE); - + Status = read_file(fcb, data + buf_head, start_data, end_data - start_data, NULL, Irp); + if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); ExFreePool(data); return Status; } } - - RtlZeroMemory(data + start - start_data, length); - - if (compress) { + + RtlZeroMemory(data + buf_head + start - start_data, (ULONG)length); + + if (make_inline) { + UINT16 edsize; + EXTENT_DATA* ed = (EXTENT_DATA*)data; + + Status = excise_extents(Vcb, fcb, 0, sector_align(end_data, Vcb->superblock.sector_size), Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + ExFreePool(data); + return Status; + } + + edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + end_data); + + ed->generation = Vcb->superblock.generation; + ed->decoded_size = end_data; + ed->compression = BTRFS_COMPRESSION_NONE; + ed->encryption = BTRFS_ENCRYPTION_NONE; + ed->encoding = BTRFS_ENCODING_NONE; + ed->type = EXTENT_TYPE_INLINE; + + Status = add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); + ExFreePool(data); + return Status; + } + + ExFreePool(data); + + fcb->inode_item.st_blocks += end_data; + } else if (compress) { Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback); - + ExFreePool(data); - + if (!NT_SUCCESS(Status)) { ERR("write_compressed returned %08x\n", Status); return Status; } } else { - Status = do_write_file(fcb, start_data, end_data, data, Irp, rollback); - + Status = do_write_file(fcb, start_data, end_data, data, Irp, FALSE, 0, rollback); + ExFreePool(data); - + if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); return Status; } } - + return STATUS_SUCCESS; } @@ -1716,75 +1855,75 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo UINT64 start, end; extent* ext; IO_STATUS_BLOCK iosb; - + if (!data || length < sizeof(FILE_ZERO_DATA_INFORMATION)) return STATUS_INVALID_PARAMETER; - + if (!FileObject) { ERR("FileObject was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (fzdi->BeyondFinalZero.QuadPart <= fzdi->FileOffset.QuadPart) { WARN("BeyondFinalZero was less than or equal to FileOffset (%llx <= %llx)\n", fzdi->BeyondFinalZero.QuadPart, fzdi->FileOffset.QuadPart); return STATUS_INVALID_PARAMETER; } - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("FCB was NULL\n"); return STATUS_INVALID_PARAMETER; } - + ccb = FileObject->FsContext2; - + if (!ccb) { ERR("ccb was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_DATA)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + fileref = ccb->fileref; - + if (!fileref) { ERR("fileref was NULL\n"); return STATUS_INVALID_PARAMETER; } - + InitializeListHead(&rollback); - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + CcFlushCache(&fcb->nonpaged->segment_object, NULL, 0, &iosb); - + if (fcb->type != BTRFS_TYPE_FILE) { WARN("FileObject did not point to a file\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (fcb->ads) { ERR("FileObject is stream\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - - if (fzdi->FileOffset.QuadPart >= fcb->inode_item.st_size) { + + if ((UINT64)fzdi->FileOffset.QuadPart >= fcb->inode_item.st_size) { Status = STATUS_SUCCESS; goto end; } - + ext = NULL; le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext2 = CONTAINING_RECORD(le, extent, list_entry); - + if (!ext2->ignore) { ext = ext2; break; @@ -1792,13 +1931,13 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo le = le->Flink; } - + if (!ext) { Status = STATUS_SUCCESS; goto end; } - - if (ext->datalen >= sizeof(EXTENT_DATA) && ext->data->type == EXTENT_TYPE_INLINE) { + + if (ext->extent_data.type == EXTENT_TYPE_INLINE) { Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); @@ -1806,12 +1945,12 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo } } else { start = sector_align(fzdi->FileOffset.QuadPart, Vcb->superblock.sector_size); - - if (fzdi->BeyondFinalZero.QuadPart > fcb->inode_item.st_size) + + if ((UINT64)fzdi->BeyondFinalZero.QuadPart > fcb->inode_item.st_size) end = sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size); else end = (fzdi->BeyondFinalZero.QuadPart / Vcb->superblock.sector_size) * Vcb->superblock.sector_size; - + if (end <= start) { Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { @@ -1819,22 +1958,22 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo goto end; } } else { - if (start > fzdi->FileOffset.QuadPart) { + if (start > (UINT64)fzdi->FileOffset.QuadPart) { Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, start - fzdi->FileOffset.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); goto end; } } - - if (end < fzdi->BeyondFinalZero.QuadPart) { + + if (end < (UINT64)fzdi->BeyondFinalZero.QuadPart) { Status = zero_data(Vcb, fcb, end, fzdi->BeyondFinalZero.QuadPart - end, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); goto end; } } - + if (end > start) { Status = excise_extents(Vcb, fcb, start, end, Irp, &rollback); if (!NT_SUCCESS(Status)) { @@ -1844,73 +1983,73 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo } } } - - CcPurgeCacheSection(&fcb->nonpaged->segment_object, &fzdi->FileOffset, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, FALSE); - + + CcPurgeCacheSection(&fcb->nonpaged->segment_object, &fzdi->FileOffset, (ULONG)(fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart), FALSE); + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + fcb->inode_item.transid = Vcb->superblock.generation; fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fcb->inode_item.st_mtime = now; - + fcb->extents_changed = TRUE; fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - - send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); - + + send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; Status = STATUS_SUCCESS; - + end: if (!NT_SUCCESS(Status)) do_rollback(Vcb, &rollback); else - clear_rollback(Vcb, &rollback); - + clear_rollback(&rollback); + ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&Vcb->tree_lock); - + return Status; } -static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_ALLOCATED_RANGE_BUFFER* inbuf, ULONG inbuflen, void* outbuf, ULONG outbuflen, ULONG_PTR* retlen) { +static NTSTATUS query_ranges(PFILE_OBJECT FileObject, FILE_ALLOCATED_RANGE_BUFFER* inbuf, ULONG inbuflen, void* outbuf, ULONG outbuflen, ULONG_PTR* retlen) { NTSTATUS Status; fcb* fcb; LIST_ENTRY* le; FILE_ALLOCATED_RANGE_BUFFER* ranges = outbuf; ULONG i = 0; UINT64 last_start, last_end; - + TRACE("FSCTL_QUERY_ALLOCATED_RANGES\n"); - + if (!FileObject) { ERR("FileObject was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (!inbuf || inbuflen < sizeof(FILE_ALLOCATED_RANGE_BUFFER) || !outbuf) return STATUS_INVALID_PARAMETER; - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("FCB was NULL\n"); return STATUS_INVALID_PARAMETER; } - + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + // If file is not marked as sparse, claim the whole thing as an allocated range - + if (!(fcb->atts & FILE_ATTRIBUTE_SPARSE_FILE)) { if (fcb->inode_item.st_size == 0) Status = STATUS_SUCCESS; @@ -1922,23 +2061,23 @@ static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FIL i++; Status = STATUS_SUCCESS; } - + goto end; - + } - + le = fcb->extents.Flink; - + last_start = 0; last_end = 0; - + while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + if (!ext->ignore) { - EXTENT_DATA2* ed2 = (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ext->data->data : NULL; - UINT64 len = ed2 ? ed2->num_bytes : ext->data->decoded_size; - + EXTENT_DATA2* ed2 = (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ext->extent_data.data : NULL; + UINT64 len = ed2 ? ed2->num_bytes : ext->extent_data.decoded_size; + if (ext->offset > last_end) { // first extent after a hole if (last_end > last_start) { if ((i + 1) * sizeof(FILE_ALLOCATED_RANGE_BUFFER) <= outbuflen) { @@ -1950,16 +2089,16 @@ static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FIL goto end; } } - + last_start = ext->offset; } - + last_end = ext->offset + len; } - + le = le->Flink; } - + if (last_end > last_start) { if ((i + 1) * sizeof(FILE_ALLOCATED_RANGE_BUFFER) <= outbuflen) { ranges[i].FileOffset.QuadPart = last_start; @@ -1972,60 +2111,60 @@ static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FIL } Status = STATUS_SUCCESS; - + end: *retlen = i * sizeof(FILE_ALLOCATED_RANGE_BUFFER); - + ExReleaseResourceLite(fcb->Header.Resource); - + return Status; } static NTSTATUS get_object_id(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_OBJECTID_BUFFER* buf, ULONG buflen, ULONG_PTR* retlen) { fcb* fcb; - + TRACE("(%p, %p, %p, %x, %p)\n", Vcb, FileObject, buf, buflen, retlen); - + if (!FileObject) { ERR("FileObject was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (!buf || buflen < sizeof(FILE_OBJECTID_BUFFER)) return STATUS_INVALID_PARAMETER; - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("FCB was NULL\n"); return STATUS_INVALID_PARAMETER; } - + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + RtlCopyMemory(&buf->ObjectId[0], &fcb->inode, sizeof(UINT64)); RtlCopyMemory(&buf->ObjectId[sizeof(UINT64)], &fcb->subvol->id, sizeof(UINT64)); - + ExReleaseResourceLite(fcb->Header.Resource); - + RtlZeroMemory(&buf->ExtendedInfo, sizeof(buf->ExtendedInfo)); - + *retlen = sizeof(FILE_OBJECTID_BUFFER); - + return STATUS_SUCCESS; } static void flush_fcb_caches(device_extension* Vcb) { LIST_ENTRY* le; - + le = Vcb->all_fcbs.Flink; while (le != &Vcb->all_fcbs) { struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all); IO_STATUS_BLOCK iosb; - + if (fcb->type != BTRFS_TYPE_DIRECTORY && !fcb->deleted) CcFlushCache(&fcb->nonpaged->segment_object, NULL, 0, &iosb); - + le = le->Flink; } } @@ -2033,55 +2172,67 @@ static void flush_fcb_caches(device_extension* Vcb) { static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); NTSTATUS Status; - LIST_ENTRY rollback; KIRQL irql; BOOL lock_paused_balance = FALSE; - + TRACE("FSCTL_LOCK_VOLUME\n"); - + + if (Vcb->scrub.thread) { + WARN("cannot lock while scrub running\n"); + return STATUS_DEVICE_NOT_READY; + } + + if (Vcb->balance.thread) { + WARN("cannot lock while balance running\n"); + return STATUS_DEVICE_NOT_READY; + } + TRACE("locking volume\n"); - + FsRtlNotifyVolumeEvent(IrpSp->FileObject, FSRTL_VOLUME_LOCK); - + if (Vcb->locked) return STATUS_SUCCESS; - + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); - + if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) { Status = STATUS_ACCESS_DENIED; ExReleaseResourceLite(&Vcb->fcb_lock); goto end; } - + ExReleaseResourceLite(&Vcb->fcb_lock); - - InitializeListHead(&rollback); - + if (Vcb->balance.thread && KeReadStateEvent(&Vcb->balance.event)) { ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); KeClearEvent(&Vcb->balance.event); ExReleaseResourceLite(&Vcb->tree_lock); - + lock_paused_balance = TRUE; } - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + flush_fcb_caches(Vcb); - + if (Vcb->need_write && !Vcb->readonly) - do_write(Vcb, Irp, &rollback); - + Status = do_write(Vcb, Irp); + else + Status = STATUS_SUCCESS; + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - + ExReleaseResourceLite(&Vcb->tree_lock); - + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + goto end; + } + IoAcquireVpbSpinLock(&irql); - if (!(Vcb->Vpb->Flags & VPB_LOCKED)) { + if (!(Vcb->Vpb->Flags & VPB_LOCKED)) { Vcb->Vpb->Flags |= VPB_LOCKED; Vcb->locked = TRUE; Vcb->locked_fileobj = IrpSp->FileObject; @@ -2089,21 +2240,21 @@ static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) { } else { Status = STATUS_ACCESS_DENIED; IoReleaseVpbSpinLock(irql); - + if (lock_paused_balance) KeSetEvent(&Vcb->balance.event, 0, FALSE); - + goto end; } IoReleaseVpbSpinLock(irql); - + Status = STATUS_SUCCESS; - + end: if (!NT_SUCCESS(Status)) FsRtlNotifyVolumeEvent(IrpSp->FileObject, FSRTL_VOLUME_LOCK_FAILED); - + return Status; } @@ -2117,23 +2268,23 @@ void do_unlock_volume(device_extension* Vcb) { Vcb->locked_fileobj = NULL; IoReleaseVpbSpinLock(irql); - + if (Vcb->lock_paused_balance) KeSetEvent(&Vcb->balance.event, 0, FALSE); } static NTSTATUS unlock_volume(device_extension* Vcb, PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); - + TRACE("FSCTL_UNLOCK_VOLUME\n"); - + if (!Vcb->locked || IrpSp->FileObject != Vcb->locked_fileobj) return STATUS_NOT_LOCKED; - + TRACE("unlocking volume\n"); - + do_unlock_volume(Vcb); - + FsRtlNotifyVolumeEvent(IrpSp->FileObject, FSRTL_VOLUME_UNLOCK); return STATUS_SUCCESS; @@ -2147,9 +2298,9 @@ static NTSTATUS invalidate_volumes(PIRP Irp) { PFILE_OBJECT fileobj; PDEVICE_OBJECT devobj; LIST_ENTRY* le; - + TRACE("FSCTL_INVALIDATE_VOLUMES\n"); - + if (!SeSinglePrivilegeCheck(TcbPrivilege, Irp->RequestorMode)) return STATUS_PRIVILEGE_NOT_HELD; @@ -2169,68 +2320,70 @@ static NTSTATUS invalidate_volumes(PIRP Irp) { } #endif - Status = ObReferenceObjectByHandle(h, 0, *IoFileObjectType, KernelMode, (void**)&fileobj, NULL); + Status = ObReferenceObjectByHandle(h, 0, *IoFileObjectType, Irp->RequestorMode, (void**)&fileobj, NULL); if (!NT_SUCCESS(Status)) { ERR("ObReferenceObjectByHandle returned %08x\n", Status); return Status; } - + devobj = fileobj->DeviceObject; - ObDereferenceObject(fileobj); ExAcquireResourceSharedLite(&global_loading_lock, TRUE); - + le = VcbList.Flink; - + while (le != &VcbList) { device_extension* Vcb = CONTAINING_RECORD(le, device_extension, list_entry); - + if (Vcb->Vpb && Vcb->Vpb->RealDevice == devobj) { if (Vcb->Vpb == devobj->Vpb) { KIRQL irql; PVPB newvpb; BOOL free_newvpb = FALSE; - LIST_ENTRY rollback; - + newvpb = ExAllocatePoolWithTag(NonPagedPool, sizeof(VPB), ALLOC_TAG); if (!newvpb) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlZeroMemory(newvpb, sizeof(VPB)); - + IoAcquireVpbSpinLock(&irql); devobj->Vpb->Flags &= ~VPB_MOUNTED; IoReleaseVpbSpinLock(irql); - - InitializeListHead(&rollback); - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + Vcb->removing = TRUE; - + ExReleaseResourceLite(&Vcb->tree_lock); - + CcWaitForCurrentLazyWriterActivity(); - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - + flush_fcb_caches(Vcb); - + if (Vcb->need_write && !Vcb->readonly) - do_write(Vcb, Irp, &rollback); - + Status = do_write(Vcb, Irp); + else + Status = STATUS_SUCCESS; + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + ExReleaseResourceLite(&Vcb->tree_lock); + goto end; + } + flush_fcb_caches(Vcb); - + ExReleaseResourceLite(&Vcb->tree_lock); - + IoAcquireVpbSpinLock(&irql); if (devobj->Vpb->Flags & VPB_MOUNTED) { @@ -2238,30 +2391,33 @@ static NTSTATUS invalidate_volumes(PIRP Irp) { newvpb->Size = sizeof(VPB); newvpb->RealDevice = devobj; newvpb->Flags = devobj->Vpb->Flags & VPB_REMOVE_PENDING; - + devobj->Vpb = newvpb; } else free_newvpb = TRUE; IoReleaseVpbSpinLock(irql); - + if (free_newvpb) ExFreePool(newvpb); - - uninit(Vcb, FALSE); + + if (Vcb->open_files == 0) + uninit(Vcb, FALSE); } - + break; } - + le = le->Flink; } - + Status = STATUS_SUCCESS; - + end: ExReleaseResourceLite(&global_loading_lock); - + + ObDereferenceObject(fileobj); + return Status; } @@ -2283,7 +2439,7 @@ static NTSTATUS is_volume_dirty(device_extension* Vcb, PIRP Irp) { return STATUS_INVALID_PARAMETER; *volstate = 0; - + if (IrpSp->FileObject->FsContext != Vcb->volume_fcb) return STATUS_INVALID_PARAMETER; @@ -2292,10 +2448,10 @@ static NTSTATUS is_volume_dirty(device_extension* Vcb, PIRP Irp) { return STATUS_SUCCESS; } -static NTSTATUS get_compression(device_extension* Vcb, PIRP Irp) { +static NTSTATUS get_compression(PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); USHORT* compression; - + TRACE("FSCTL_GET_COMPRESSION\n"); if (Irp->AssociatedIrp.SystemBuffer) { @@ -2318,83 +2474,94 @@ static NTSTATUS get_compression(device_extension* Vcb, PIRP Irp) { return STATUS_SUCCESS; } +static NTSTATUS set_compression(PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + USHORT* compression; + + TRACE("FSCTL_SET_COMPRESSION\n"); + + if (IrpSp->Parameters.FileSystemControl.InputBufferLength < sizeof(USHORT)) + return STATUS_INVALID_PARAMETER; + + compression = Irp->AssociatedIrp.SystemBuffer; + + if (*compression != COMPRESSION_FORMAT_NONE) + return STATUS_INVALID_PARAMETER; + + return STATUS_SUCCESS; +} + static void update_volumes(device_extension* Vcb) { LIST_ENTRY* le; - + volume_device_extension* vde = Vcb->vde; + pdo_device_extension* pdode = vde->pdode; + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - - le = volumes.Flink; - - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - LIST_ENTRY* le; - - le = Vcb->devices.Flink; - while (le != &Vcb->devices) { - device* dev = CONTAINING_RECORD(le, device, list_entry); - - if (RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - v->gen1 = v->gen2 = Vcb->superblock.generation - 1; - break; - } - - le = le->Flink; - } - } - + + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + vc->generation = Vcb->superblock.generation - 1; + le = le->Flink; } - - ExReleaseResourceLite(&volumes_lock); + + ExReleaseResourceLite(&pdode->child_lock); + ExReleaseResourceLite(&Vcb->tree_lock); } static NTSTATUS dismount_volume(device_extension* Vcb, PIRP Irp) { NTSTATUS Status; KIRQL irql; - LIST_ENTRY rollback; - + TRACE("FSCTL_DISMOUNT_VOLUME\n"); - + if (!(Vcb->Vpb->Flags & VPB_MOUNTED)) return STATUS_SUCCESS; - + if (Vcb->disallow_dismount) { WARN("attempting to dismount boot volume or one containing a pagefile\n"); return STATUS_ACCESS_DENIED; } - - InitializeListHead(&rollback); - + Status = FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_DISMOUNT); if (!NT_SUCCESS(Status)) { WARN("FsRtlNotifyVolumeEvent returned %08x\n", Status); } - + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - - flush_fcb_caches(Vcb); - - if (Vcb->need_write && !Vcb->readonly) - do_write(Vcb, Irp, &rollback); - + + if (!Vcb->locked) { + flush_fcb_caches(Vcb); + + if (Vcb->need_write && !Vcb->readonly) { + Status = do_write(Vcb, Irp); + + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + } + } + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - + Vcb->removing = TRUE; - update_volumes(Vcb); - + + if (Vcb->vde) { + update_volumes(Vcb); + Vcb->vde->mounted_device = NULL; + } + ExReleaseResourceLite(&Vcb->tree_lock); - + IoAcquireVpbSpinLock(&irql); Vcb->Vpb->Flags &= ~VPB_MOUNTED; Vcb->Vpb->Flags |= VPB_DIRECT_WRITES_ALLOWED; IoReleaseVpbSpinLock(irql); - + return STATUS_SUCCESS; } @@ -2405,28 +2572,28 @@ static NTSTATUS is_device_part_of_mounted_btrfs_raid(PDEVICE_OBJECT devobj) { UINT32 crc32; BTRFS_UUID fsuuid, devuuid; LIST_ENTRY* le; - - to_read = devobj->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), devobj->SectorSize); - + + to_read = devobj->SectorSize == 0 ? sizeof(superblock) : (ULONG)sector_align(sizeof(superblock), devobj->SectorSize); + sb = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG); if (!sb) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = sync_read_phys(devobj, superblock_addrs[0], to_read, (UINT8*)sb, TRUE); if (!NT_SUCCESS(Status)) { ERR("sync_read_phys returned %08x\n", Status); ExFreePool(sb); return Status; } - + if (sb->magic != BTRFS_MAGIC) { TRACE("device is not Btrfs\n"); ExFreePool(sb); return STATUS_SUCCESS; } - + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); if (crc32 != *((UINT32*)sb->checksum)) { @@ -2434,80 +2601,107 @@ static NTSTATUS is_device_part_of_mounted_btrfs_raid(PDEVICE_OBJECT devobj) { ExFreePool(sb); return STATUS_SUCCESS; } - + fsuuid = sb->uuid; devuuid = sb->dev_item.device_uuid; - + ExFreePool(sb); - + ExAcquireResourceSharedLite(&global_loading_lock, TRUE); - + le = VcbList.Flink; - + while (le != &VcbList) { device_extension* Vcb = CONTAINING_RECORD(le, device_extension, list_entry); - + if (RtlCompareMemory(&Vcb->superblock.uuid, &fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { LIST_ENTRY* le2; - + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - + if (Vcb->superblock.num_devices > 1) { le2 = Vcb->devices.Flink; while (le2 != &Vcb->devices) { device* dev = CONTAINING_RECORD(le2, device, list_entry); - + if (RtlCompareMemory(&dev->devitem.device_uuid, &devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { ExReleaseResourceLite(&Vcb->tree_lock); ExReleaseResourceLite(&global_loading_lock); return STATUS_DEVICE_NOT_READY; } - + le2 = le2->Flink; } } - + ExReleaseResourceLite(&Vcb->tree_lock); ExReleaseResourceLite(&global_loading_lock); return STATUS_SUCCESS; } - + le = le->Flink; } - + ExReleaseResourceLite(&global_loading_lock); - + return STATUS_SUCCESS; } -static NTSTATUS add_device(device_extension* Vcb, PIRP Irp, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { +void trim_whole_device(device* dev) { + DEVICE_MANAGE_DATA_SET_ATTRIBUTES dmdsa; + NTSTATUS Status; + + // FIXME - avoid "bootloader area"?? + + dmdsa.Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES); + dmdsa.Action = DeviceDsmAction_Trim; + dmdsa.Flags = DEVICE_DSM_FLAG_ENTIRE_DATA_SET_RANGE | DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED; + dmdsa.ParameterBlockOffset = 0; + dmdsa.ParameterBlockLength = 0; + dmdsa.DataSetRangesOffset = 0; + dmdsa.DataSetRangesLength = 0; + + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES, &dmdsa, sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), NULL, 0, TRUE, NULL); + if (!NT_SUCCESS(Status)) + WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status); +} + +static NTSTATUS add_device(device_extension* Vcb, PIRP Irp, KPROCESSOR_MODE processor_mode) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); NTSTATUS Status; PFILE_OBJECT fileobj, mountmgrfo; + PDEVICE_OBJECT DeviceObject; HANDLE h; - LIST_ENTRY rollback, *le; - GET_LENGTH_INFORMATION gli; + LIST_ENTRY* le; device* dev; DEV_ITEM* di; - UINT64 dev_id; + UINT64 dev_id, size; UINT8* mb; UINT64* stats; - MOUNTDEV_NAME mdn1, *mdn2; - UNICODE_STRING volname, mmdevpath; - volume* v; + UNICODE_STRING mmdevpath, pnp_name, pnp_name2; + volume_child* vc; PDEVICE_OBJECT mountmgr; KEY searchkey; traverse_ptr tp; STORAGE_DEVICE_NUMBER sdn; - - volname.Buffer = NULL; - + volume_device_extension* vde; + pdo_device_extension* pdode; + const GUID* pnp_guid; + GET_LENGTH_INFORMATION gli; + + pnp_name.Buffer = NULL; + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) return STATUS_PRIVILEGE_NOT_HELD; - + + if (!Vcb->vde) { + WARN("not allowing second device to be added to non-PNP device\n"); + return STATUS_NOT_SUPPORTED; + } + if (Vcb->readonly) // FIXME - handle adding R/W device to seeding device return STATUS_MEDIA_WRITE_PROTECTED; - + #if defined(_WIN64) if (IoIs32bitProcess(Irp)) { if (IrpSp->Parameters.FileSystemControl.InputBufferLength != sizeof(UINT32)) @@ -2530,329 +2724,2057 @@ static NTSTATUS add_device(device_extension* Vcb, PIRP Irp, void* data, ULONG le ERR("ObReferenceObjectByHandle returned %08x\n", Status); return Status; } - - Status = is_device_part_of_mounted_btrfs_raid(fileobj->DeviceObject); + + DeviceObject = fileobj->DeviceObject; + + Status = get_device_pnp_name(DeviceObject, &pnp_name, &pnp_guid); if (!NT_SUCCESS(Status)) { - ERR("is_device_part_of_mounted_btrfs_raid returned %08x\n", Status); + ERR("get_device_pnp_name returned %08x\n", Status); ObDereferenceObject(fileobj); return Status; } - - Status = dev_ioctl(fileobj->DeviceObject, IOCTL_DISK_IS_WRITABLE, NULL, 0, NULL, 0, TRUE, NULL); + + // If this is a disk, we have been handed the PDO, so need to go up to find something we can use + if (RtlCompareMemory(pnp_guid, &GUID_DEVINTERFACE_DISK, sizeof(GUID)) == sizeof(GUID) && DeviceObject->AttachedDevice) + DeviceObject = DeviceObject->AttachedDevice; + + Status = dev_ioctl(DeviceObject, IOCTL_DISK_IS_WRITABLE, NULL, 0, NULL, 0, TRUE, NULL); if (!NT_SUCCESS(Status)) { ERR("IOCTL_DISK_IS_WRITABLE returned %08x\n", Status); ObDereferenceObject(fileobj); return Status; } - - Status = dev_ioctl(fileobj->DeviceObject, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, - &gli, sizeof(gli), TRUE, NULL); + + Status = is_device_part_of_mounted_btrfs_raid(DeviceObject); if (!NT_SUCCESS(Status)) { - ERR("error reading length information: %08x\n", Status); + ERR("is_device_part_of_mounted_btrfs_raid returned %08x\n", Status); ObDereferenceObject(fileobj); return Status; } - - if (gli.Length.QuadPart < 0x100000) { - ERR("device was not large enough to hold FS (%llx bytes, need at least 1 MB)\n", gli.Length.QuadPart); - ObDereferenceObject(fileobj); - return STATUS_INTERNAL_ERROR; - } - - Status = dev_ioctl(fileobj->DeviceObject, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, - &mdn1, sizeof(MOUNTDEV_NAME), TRUE, NULL); - if (Status == STATUS_BUFFER_OVERFLOW) { - mdn2 = ExAllocatePoolWithTag(PagedPool, offsetof(MOUNTDEV_NAME, Name[0]) + mdn1.NameLength, ALLOC_TAG); - if (!mdn2) { - ERR("out of memory\n"); - ObDereferenceObject(fileobj); - return STATUS_INTERNAL_ERROR; - } - - Status = dev_ioctl(fileobj->DeviceObject, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, - mdn2, offsetof(MOUNTDEV_NAME, Name[0]) + mdn1.NameLength, TRUE, NULL); - - if (!NT_SUCCESS(Status)) { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); - ObDereferenceObject(fileobj); - return Status; + + // if disk, check it has no partitions + if (RtlCompareMemory(pnp_guid, &GUID_DEVINTERFACE_DISK, sizeof(GUID)) == sizeof(GUID)) { + ULONG dlisize; + DRIVE_LAYOUT_INFORMATION_EX* dli = NULL; + + dlisize = 0; + + do { + dlisize += 1024; + + if (dli) + ExFreePool(dli); + + dli = ExAllocatePoolWithTag(PagedPool, dlisize, ALLOC_TAG); + if (!dli) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; + } + + Status = dev_ioctl(DeviceObject, IOCTL_DISK_GET_DRIVE_LAYOUT_EX, NULL, 0, dli, dlisize, TRUE, NULL); + } while (Status == STATUS_BUFFER_TOO_SMALL); + + if (NT_SUCCESS(Status) && dli->PartitionCount > 0) { + ExFreePool(dli); + ERR("not adding disk which has partitions\n"); + Status = STATUS_DEVICE_NOT_READY; + goto end2; } - } else if (NT_SUCCESS(Status)) { - mdn2 = ExAllocatePoolWithTag(PagedPool, sizeof(MOUNTDEV_NAME), ALLOC_TAG); - if (!mdn2) { - ERR("out of memory\n"); + + ExFreePool(dli); + } + + Status = dev_ioctl(DeviceObject, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, + &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); + if (NT_SUCCESS(Status)) { + if (sdn.DeviceType != FILE_DEVICE_DISK) { // FIXME - accept floppies and CDs? + WARN("device was not disk\n"); ObDereferenceObject(fileobj); - return STATUS_INTERNAL_ERROR; + return STATUS_INVALID_PARAMETER; } - - RtlCopyMemory(mdn2, &mdn1, sizeof(MOUNTDEV_NAME)); } else { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + sdn.DeviceNumber = 0xffffffff; + sdn.PartitionNumber = 0xffffffff; + } + + Status = dev_ioctl(DeviceObject, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, + &gli, sizeof(gli), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("error reading length information: %08x\n", Status); ObDereferenceObject(fileobj); return Status; } - - if (mdn2->NameLength == 0) { - ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned zero-length name\n"); + + size = gli.Length.QuadPart; + + if (size < 0x100000) { + ERR("device was not large enough to hold FS (%llx bytes, need at least 1 MB)\n", size); ObDereferenceObject(fileobj); - ExFreePool(mdn2); return STATUS_INTERNAL_ERROR; } - - volname.Length = volname.MaximumLength = mdn2->NameLength; - volname.Buffer = ExAllocatePoolWithTag(PagedPool, volname.MaximumLength, ALLOC_TAG); - if (!volname.Buffer) { - ERR("out of memory\n"); - ObDereferenceObject(fileobj); - ExFreePool(mdn2); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(volname.Buffer, mdn2->Name, volname.Length); - ExFreePool(mdn2); - - InitializeListHead(&rollback); - + + volume_removal(drvobj, &pnp_name); + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - - if (Vcb->need_write) { - Status = do_write(Vcb, Irp, &rollback); - if (!NT_SUCCESS(Status)) { - ERR("do_write returned %08x\n", Status); - do_rollback(Vcb, &rollback); - goto end; - } - } - + + if (Vcb->need_write) + Status = do_write(Vcb, Irp); + else + Status = STATUS_SUCCESS; + free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + goto end; + } + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); if (!dev) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + RtlZeroMemory(dev, sizeof(device)); - dev->devobj = fileobj->DeviceObject; + dev->devobj = DeviceObject; dev->seeding = FALSE; - dev->length = gli.Length.QuadPart; - init_device(Vcb, dev, FALSE, TRUE); - + init_device(Vcb, dev, TRUE); + InitializeListHead(&dev->space); - - if (gli.Length.QuadPart > 0x100000) { // add disk hole - the first MB is marked as used - Status = add_space_entry(&dev->space, NULL, 0x100000, gli.Length.QuadPart - 0x100000); + + if (size > 0x100000) { // add disk hole - the first MB is marked as used + Status = add_space_entry(&dev->space, NULL, 0x100000, size - 0x100000); if (!NT_SUCCESS(Status)) { ERR("add_space_entry returned %08x\n", Status); - Status = STATUS_INTERNAL_ERROR; goto end; } } - + dev_id = 0; - + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devitem.dev_id > dev_id) + dev_id = dev2->devitem.dev_id; + + le = le->Flink; + } + + dev_id++; + + dev->devitem.dev_id = dev_id; + dev->devitem.num_bytes = size; + dev->devitem.bytes_used = 0; + dev->devitem.optimal_io_align = Vcb->superblock.sector_size; + dev->devitem.optimal_io_width = Vcb->superblock.sector_size; + dev->devitem.minimal_io_size = Vcb->superblock.sector_size; + dev->devitem.type = 0; + dev->devitem.generation = 0; + dev->devitem.start_offset = 0; + dev->devitem.dev_group = 0; + dev->devitem.seek_speed = 0; + dev->devitem.bandwidth = 0; + get_uuid(&dev->devitem.device_uuid); + dev->devitem.fs_uuid = Vcb->superblock.uuid; + + di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); + if (!di) { + ERR("out of memory\n"); + goto end; + } + + RtlCopyMemory(di, &dev->devitem, sizeof(DEV_ITEM)); + + Status = insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, di->dev_id, di, sizeof(DEV_ITEM), NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(di); + goto end; + } + + // add stats entry to dev tree + stats = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * 5, ALLOC_TAG); + if (!stats) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(stats, sizeof(UINT64) * 5); + + searchkey.obj_id = 0; + searchkey.obj_type = TYPE_DEV_STATS; + searchkey.offset = di->dev_id; + + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + ExFreePool(stats); + goto end; + } + + if (!keycmp(tp.item->key, searchkey)) { + Status = delete_tree_item(Vcb, &tp); + if (!NT_SUCCESS(Status)) { + ERR("delete_tree_item returned %08x\n", Status); + ExFreePool(stats); + goto end; + } + } + + Status = insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, di->dev_id, stats, sizeof(UINT64) * 5, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("insert_tree_item returned %08x\n", Status); + ExFreePool(stats); + goto end; + } + + if (dev->trim && !dev->readonly && !Vcb->options.no_trim) + trim_whole_device(dev); + + // We clear the first megabyte of the device, so Windows doesn't identify it as another FS + mb = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG); + if (!mb) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(mb, 0x100000); + + Status = write_data_phys(DeviceObject, 0, mb, 0x100000); + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + ExFreePool(mb); + goto end; + } + + ExFreePool(mb); + + vde = Vcb->vde; + pdode = vde->pdode; + + vc = ExAllocatePoolWithTag(NonPagedPool, sizeof(volume_child), ALLOC_TAG); + if (!vc) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + vc->uuid = dev->devitem.device_uuid; + vc->devid = dev_id; + vc->generation = Vcb->superblock.generation; + vc->devobj = DeviceObject; + vc->fileobj = fileobj; + vc->notification_entry = NULL; + + Status = IoRegisterPlugPlayNotification(EventCategoryTargetDeviceChange, 0, fileobj, + drvobj, pnp_removal, vde->pdode, &vc->notification_entry); + if (!NT_SUCCESS(Status)) + WARN("IoRegisterPlugPlayNotification returned %08x\n", Status); + + pnp_name2 = pnp_name; + + if (pnp_name.Length > 4 * sizeof(WCHAR) && pnp_name.Buffer[0] == '\\' && (pnp_name.Buffer[1] == '\\' || pnp_name.Buffer[1] == '?') && + pnp_name.Buffer[2] == '?' && pnp_name.Buffer[3] == '\\') { + pnp_name2.Buffer = &pnp_name2.Buffer[3]; + pnp_name2.Length -= 3 * sizeof(WCHAR); + pnp_name2.MaximumLength -= 3 * sizeof(WCHAR); + } + + vc->pnp_name.Length = vc->pnp_name.MaximumLength = pnp_name2.Length; + + if (pnp_name2.Length == 0) + vc->pnp_name.Buffer = NULL; + else { + vc->pnp_name.Buffer = ExAllocatePoolWithTag(PagedPool, pnp_name2.Length, ALLOC_TAG); + if (!vc->pnp_name.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(vc->pnp_name.Buffer, pnp_name2.Buffer, pnp_name2.Length); + } + + vc->size = size; + vc->seeding = FALSE; + vc->disk_num = sdn.DeviceNumber; + vc->part_num = sdn.PartitionNumber; + vc->had_drive_letter = FALSE; + + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + InsertTailList(&pdode->children, &vc->list_entry); + pdode->num_children++; + pdode->children_loaded++; + ExReleaseResourceLite(&pdode->child_lock); + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &mountmgrfo, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + Status = remove_drive_letter(mountmgr, &pnp_name); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) + WARN("remove_drive_letter returned %08x\n", Status); + + vc->had_drive_letter = NT_SUCCESS(Status); + + ObDereferenceObject(mountmgrfo); + } + + Vcb->superblock.num_devices++; + Vcb->superblock.total_bytes += size; + Vcb->devices_loaded++; + InsertTailList(&Vcb->devices, &dev->list_entry); + + // FIXME - send notification that volume size has increased + + ObReferenceObject(DeviceObject); // for Vcb + + Status = do_write(Vcb, Irp); + if (!NT_SUCCESS(Status)) + ERR("do_write returned %08x\n", Status); + + ObReferenceObject(fileobj); + +end: + free_trees(Vcb); + + ExReleaseResourceLite(&Vcb->tree_lock); + +end2: + ObDereferenceObject(fileobj); + + if (pnp_name.Buffer) + ExFreePool(pnp_name.Buffer); + + if (NT_SUCCESS(Status)) + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); + + return Status; +} + +static NTSTATUS allow_extended_dasd_io(device_extension* Vcb, PFILE_OBJECT FileObject) { + fcb* fcb; + ccb* ccb; + + TRACE("FSCTL_ALLOW_EXTENDED_DASD_IO\n"); + + if (!FileObject) + return STATUS_INVALID_PARAMETER; + + fcb = FileObject->FsContext; + ccb = FileObject->FsContext2; + + if (!fcb) + return STATUS_INVALID_PARAMETER; + + if (fcb != Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + if (!ccb) + return STATUS_INVALID_PARAMETER; + + ccb->allow_extended_dasd_io = TRUE; + + return STATUS_SUCCESS; +} + +static NTSTATUS query_uuid(device_extension* Vcb, void* data, ULONG length) { + if (length < sizeof(BTRFS_UUID)) + return STATUS_BUFFER_OVERFLOW; + + RtlCopyMemory(data, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)); + + return STATUS_SUCCESS; +} + +static NTSTATUS reset_stats(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { + UINT64 devid; + NTSTATUS Status; + LIST_ENTRY* le; + + if (length < sizeof(UINT64)) + return STATUS_INVALID_PARAMETER; + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + devid = *((UINT64*)data); + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devitem.dev_id == devid) { + RtlZeroMemory(dev->stats, sizeof(UINT64) * 5); + dev->stats_changed = TRUE; + Vcb->stats_changed = TRUE; + Vcb->need_write = TRUE; + Status = STATUS_SUCCESS; + goto end; + } + + le = le->Flink; + } + + WARN("device %llx not found\n", devid); + Status = STATUS_INVALID_PARAMETER; + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static NTSTATUS get_integrity_information(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen) { + FSCTL_GET_INTEGRITY_INFORMATION_BUFFER* fgiib = (FSCTL_GET_INTEGRITY_INFORMATION_BUFFER*)data; + + TRACE("FSCTL_GET_INTEGRITY_INFORMATION\n"); + + // STUB + + if (!FileObject) + return STATUS_INVALID_PARAMETER; + + if (!data || datalen < sizeof(FSCTL_GET_INTEGRITY_INFORMATION_BUFFER)) + return STATUS_INVALID_PARAMETER; + + fgiib->ChecksumAlgorithm = 0; + fgiib->Reserved = 0; + fgiib->Flags = 0; + fgiib->ChecksumChunkSizeInBytes = Vcb->superblock.sector_size; + fgiib->ClusterSizeInBytes = Vcb->superblock.sector_size; + + return STATUS_SUCCESS; +} + +static NTSTATUS set_integrity_information(PFILE_OBJECT FileObject, void* data, ULONG datalen) { + TRACE("FSCTL_SET_INTEGRITY_INFORMATION\n"); + + // STUB + + if (!FileObject) + return STATUS_INVALID_PARAMETER; + + if (!data || datalen < sizeof(FSCTL_SET_INTEGRITY_INFORMATION_BUFFER)) + return STATUS_INVALID_PARAMETER; + + return STATUS_SUCCESS; +} + +BOOL fcb_is_inline(fcb* fcb) { + LIST_ENTRY* le; + + le = fcb->extents.Flink; + while (le != &fcb->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (!ext->ignore) + return ext->extent_data.type == EXTENT_TYPE_INLINE; + + le = le->Flink; + } + + return FALSE; +} + +static NTSTATUS duplicate_extents(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, PIRP Irp) { + DUPLICATE_EXTENTS_DATA* ded = (DUPLICATE_EXTENTS_DATA*)data; + fcb *fcb = FileObject ? FileObject->FsContext : NULL, *sourcefcb; + ccb *ccb = FileObject ? FileObject->FsContext2 : NULL, *sourceccb; + NTSTATUS Status; + PFILE_OBJECT sourcefo; + UINT64 sourcelen, nbytes = 0; + LIST_ENTRY rollback, *le, newexts; + LARGE_INTEGER time; + BTRFS_TIME now; + BOOL make_inline; + + if (!ded || datalen < sizeof(DUPLICATE_EXTENTS_DATA)) + return STATUS_BUFFER_TOO_SMALL; + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + if (ded->ByteCount.QuadPart == 0) + return STATUS_SUCCESS; + + if (!fcb || !ccb || fcb == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + if (is_subvol_readonly(fcb->subvol, Irp)) + return STATUS_ACCESS_DENIED; + + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_DATA)) { + WARN("insufficient privileges\n"); + return STATUS_ACCESS_DENIED; + } + + if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) + return STATUS_INVALID_PARAMETER; + + Status = ObReferenceObjectByHandle(ded->FileHandle, 0, *IoFileObjectType, Irp->RequestorMode, (void**)&sourcefo, NULL); + if (!NT_SUCCESS(Status)) { + ERR("ObReferenceObjectByHandle returned %08x\n", Status); + return Status; + } + + if (sourcefo->DeviceObject != FileObject->DeviceObject) { + WARN("source and destination are on different volumes\n"); + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + + sourcefcb = sourcefo->FsContext; + sourceccb = sourcefo->FsContext2; + + if (!sourcefcb || !sourceccb || sourcefcb == Vcb->volume_fcb) { + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + + if (!sourcefcb->ads && !fcb->ads) { + if ((ded->SourceFileOffset.QuadPart & (Vcb->superblock.sector_size - 1)) || (ded->TargetFileOffset.QuadPart & (Vcb->superblock.sector_size - 1))) { + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + + if (ded->ByteCount.QuadPart & (Vcb->superblock.sector_size - 1)) { + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + } + + if (Irp->RequestorMode == UserMode && (!(sourceccb->access & FILE_READ_DATA) || !(sourceccb->access & FILE_READ_ATTRIBUTES))) { + WARN("insufficient privileges\n"); + ObDereferenceObject(sourcefo); + return STATUS_ACCESS_DENIED; + } + + if (!sourcefcb->ads && sourcefcb->type != BTRFS_TYPE_FILE && sourcefcb->type != BTRFS_TYPE_SYMLINK) { + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + + sourcelen = sourcefcb->ads ? sourcefcb->adsdata.Length : sourcefcb->inode_item.st_size; + + if (sector_align(sourcelen, Vcb->superblock.sector_size) < (UINT64)ded->SourceFileOffset.QuadPart + (UINT64)ded->ByteCount.QuadPart) { + ObDereferenceObject(sourcefo); + return STATUS_NOT_SUPPORTED; + } + + if (fcb == sourcefcb && + ((ded->SourceFileOffset.QuadPart >= ded->TargetFileOffset.QuadPart && ded->SourceFileOffset.QuadPart < ded->TargetFileOffset.QuadPart + ded->ByteCount.QuadPart) || + (ded->TargetFileOffset.QuadPart >= ded->SourceFileOffset.QuadPart && ded->TargetFileOffset.QuadPart < ded->SourceFileOffset.QuadPart + ded->ByteCount.QuadPart))) { + WARN("source and destination are the same, and the ranges overlap\n"); + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + + // fail if nocsum flag set on one file but not the other + if (!fcb->ads && !sourcefcb->ads && (fcb->inode_item.flags & BTRFS_INODE_NODATASUM) != (sourcefcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + ObDereferenceObject(sourcefo); + return STATUS_INVALID_PARAMETER; + } + + InitializeListHead(&rollback); + InitializeListHead(&newexts); + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + + if (fcb != sourcefcb) + ExAcquireResourceSharedLite(sourcefcb->Header.Resource, TRUE); + + if (!FsRtlFastCheckLockForWrite(&fcb->lock, &ded->TargetFileOffset, &ded->ByteCount, 0, FileObject, PsGetCurrentProcess())) { + Status = STATUS_FILE_LOCK_CONFLICT; + goto end; + } + + if (!FsRtlFastCheckLockForRead(&sourcefcb->lock, &ded->SourceFileOffset, &ded->ByteCount, 0, FileObject, PsGetCurrentProcess())) { + Status = STATUS_FILE_LOCK_CONFLICT; + goto end; + } + + make_inline = fcb->ads ? FALSE : (fcb->inode_item.st_size <= Vcb->options.max_inline || fcb_is_inline(fcb)); + + if (fcb->ads || sourcefcb->ads || make_inline || fcb_is_inline(sourcefcb)) { + UINT8* data2; + ULONG bytes_read, dataoff, datalen2; + + if (make_inline) { + dataoff = (ULONG)ded->TargetFileOffset.QuadPart; + datalen2 = (ULONG)fcb->inode_item.st_size; + } else if (fcb->ads) { + dataoff = 0; + datalen2 = (ULONG)ded->ByteCount.QuadPart; + } else { + dataoff = ded->TargetFileOffset.QuadPart % Vcb->superblock.sector_size; + datalen2 = (ULONG)sector_align(ded->ByteCount.QuadPart + dataoff, Vcb->superblock.sector_size); + } + + data2 = ExAllocatePoolWithTag(PagedPool, datalen2, ALLOC_TAG); + if (!data2) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + if (dataoff > 0) { + if (make_inline) + Status = read_file(fcb, data2, 0, datalen2, NULL, Irp); + else + Status = read_file(fcb, data2, ded->TargetFileOffset.QuadPart - dataoff, dataoff, NULL, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("read_file returned %08x\n", Status); + ExFreePool(data2); + goto end; + } + } + + if (sourcefcb->ads) { + Status = read_stream(sourcefcb, data2 + dataoff, ded->SourceFileOffset.QuadPart, (ULONG)ded->ByteCount.QuadPart, &bytes_read); + if (!NT_SUCCESS(Status)) { + ERR("read_stream returned %08x\n", Status); + ExFreePool(data2); + goto end; + } + } else { + Status = read_file(sourcefcb, data2 + dataoff, ded->SourceFileOffset.QuadPart, ded->ByteCount.QuadPart, &bytes_read, Irp); + if (!NT_SUCCESS(Status)) { + ERR("read_file returned %08x\n", Status); + ExFreePool(data2); + goto end; + } + } + + if (dataoff + bytes_read < datalen2) + RtlZeroMemory(data2 + dataoff + bytes_read, datalen2 - bytes_read); + + if (fcb->ads) + RtlCopyMemory(&fcb->adsdata.Buffer[ded->TargetFileOffset.QuadPart], data2, (USHORT)min(ded->ByteCount.QuadPart, fcb->adsdata.Length - ded->TargetFileOffset.QuadPart)); + else if (make_inline) { + UINT16 edsize; + EXTENT_DATA* ed; + + Status = excise_extents(Vcb, fcb, 0, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size), Irp, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + ExFreePool(data2); + goto end; + } + + edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + datalen2); + + ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG); + if (!ed) { + ERR("out of memory\n"); + ExFreePool(data2); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + ed->generation = Vcb->superblock.generation; + ed->decoded_size = fcb->inode_item.st_size; + ed->compression = BTRFS_COMPRESSION_NONE; + ed->encryption = BTRFS_ENCRYPTION_NONE; + ed->encoding = BTRFS_ENCODING_NONE; + ed->type = EXTENT_TYPE_INLINE; + + RtlCopyMemory(ed->data, data2, datalen2); + + Status = add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, NULL, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); + ExFreePool(data2); + goto end; + } + + fcb->inode_item.st_blocks += datalen2; + } else { + UINT64 start = ded->TargetFileOffset.QuadPart - (ded->TargetFileOffset.QuadPart % Vcb->superblock.sector_size); + + Status = do_write_file(fcb, start, start + datalen2, data2, Irp, FALSE, 0, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("do_write_file returned %08x\n", Status); + ExFreePool(data2); + goto end; + } + } + + ExFreePool(data2); + } else { + LIST_ENTRY* lastextle; + + le = sourcefcb->extents.Flink; + while (le != &sourcefcb->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (!ext->ignore) { + if (ext->offset >= (UINT64)ded->SourceFileOffset.QuadPart + (UINT64)ded->ByteCount.QuadPart) + break; + + if (ext->extent_data.type != EXTENT_TYPE_INLINE) { + ULONG extlen = offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); + extent* ext2; + EXTENT_DATA2 *ed2s, *ed2d; + chunk* c; + + ed2s = (EXTENT_DATA2*)ext->extent_data.data; + + if (ext->offset + ed2s->num_bytes <= (UINT64)ded->SourceFileOffset.QuadPart) { + le = le->Flink; + continue; + } + + ext2 = ExAllocatePoolWithTag(PagedPool, extlen, ALLOC_TAG); + if (!ext2) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + if (ext->offset < (UINT64)ded->SourceFileOffset.QuadPart) + ext2->offset = ded->TargetFileOffset.QuadPart; + else + ext2->offset = ext->offset - ded->SourceFileOffset.QuadPart + ded->TargetFileOffset.QuadPart; + + ext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); + ext2->unique = FALSE; + ext2->ignore = FALSE; + ext2->inserted = TRUE; + + ext2->extent_data.generation = Vcb->superblock.generation; + ext2->extent_data.decoded_size = ext->extent_data.decoded_size; + ext2->extent_data.compression = ext->extent_data.compression; + ext2->extent_data.encryption = ext->extent_data.encryption; + ext2->extent_data.encoding = ext->extent_data.encoding; + ext2->extent_data.type = ext->extent_data.type; + + ed2d = (EXTENT_DATA2*)ext2->extent_data.data; + + ed2d->address = ed2s->address; + ed2d->size = ed2s->size; + + if (ext->offset < (UINT64)ded->SourceFileOffset.QuadPart) { + ed2d->offset = ed2s->offset + ded->SourceFileOffset.QuadPart - ext->offset; + ed2d->num_bytes = min((UINT64)ded->ByteCount.QuadPart, ed2s->num_bytes + ext->offset - ded->SourceFileOffset.QuadPart); + } else { + ed2d->offset = ed2s->offset; + ed2d->num_bytes = min(ded->SourceFileOffset.QuadPart + ded->ByteCount.QuadPart - ext->offset, ed2s->num_bytes); + } + + if (ext->csum) { + if (ext->extent_data.compression == BTRFS_COMPRESSION_NONE) { + ext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2d->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); + if (!ext2->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(ext2); + goto end; + } + + RtlCopyMemory(ext2->csum, &ext->csum[(ed2d->offset - ed2s->offset) / Vcb->superblock.sector_size], + (ULONG)(ed2d->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size)); + } else { + ext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2d->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); + if (!ext2->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(ext2); + goto end; + } + + RtlCopyMemory(ext2->csum, ext->csum, (ULONG)(ed2s->size * sizeof(UINT32) / Vcb->superblock.sector_size)); + } + } else + ext2->csum = NULL; + + InsertTailList(&newexts, &ext2->list_entry); + + c = get_chunk_from_address(Vcb, ed2s->address); + if (!c) { + ERR("get_chunk_from_address(%llx) failed\n", ed2s->address); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + Status = update_changed_extent_ref(Vcb, c, ed2s->address, ed2s->size, fcb->subvol->id, fcb->inode, ext2->offset - ed2d->offset, + 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("update_changed_extent_ref returned %08x\n", Status); + goto end; + } + + nbytes += ed2d->num_bytes; + } + } + + le = le->Flink; + } + + Status = excise_extents(Vcb, fcb, ded->TargetFileOffset.QuadPart, ded->TargetFileOffset.QuadPart + ded->ByteCount.QuadPart, Irp, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + + while (!IsListEmpty(&newexts)) { + extent* ext = CONTAINING_RECORD(RemoveHeadList(&newexts), extent, list_entry); + ExFreePool(ext); + } + + goto end; + } + + // clear unique flags in source fcb + le = sourcefcb->extents.Flink; + while (le != &sourcefcb->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (!ext->ignore && ext->unique && (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC)) { + EXTENT_DATA2* ed2s = (EXTENT_DATA2*)ext->extent_data.data; + LIST_ENTRY* le2; + + le2 = newexts.Flink; + while (le2 != &newexts) { + extent* ext2 = CONTAINING_RECORD(le2, extent, list_entry); + + if (ext2->extent_data.type == EXTENT_TYPE_REGULAR || ext2->extent_data.type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2d = (EXTENT_DATA2*)ext2->extent_data.data; + + if (ed2d->address == ed2s->address && ed2d->size == ed2s->size) { + ext->unique = FALSE; + break; + } + } + + le2 = le2->Flink; + } + } + + le = le->Flink; + } + + lastextle = &fcb->extents; + while (!IsListEmpty(&newexts)) { + extent* ext = CONTAINING_RECORD(RemoveHeadList(&newexts), extent, list_entry); + + add_extent(fcb, lastextle, ext); + lastextle = &ext->list_entry; + } + } + + KeQuerySystemTime(&time); + win_time_to_unix(time, &now); + + if (fcb->ads) { + ccb->fileref->parent->fcb->inode_item.sequence++; + + if (!ccb->user_set_change_time) + ccb->fileref->parent->fcb->inode_item.st_ctime = now; + + ccb->fileref->parent->fcb->inode_item_changed = TRUE; + mark_fcb_dirty(ccb->fileref->parent->fcb); + } else { + fcb->inode_item.st_blocks += nbytes; + fcb->inode_item.sequence++; + + if (!ccb->user_set_change_time) + fcb->inode_item.st_ctime = now; + + if (!ccb->user_set_write_time) { + fcb->inode_item.st_mtime = now; + send_notification_fcb(ccb->fileref, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + } + + fcb->inode_item_changed = TRUE; + fcb->extents_changed = TRUE; + } + + mark_fcb_dirty(fcb); + + if (fcb->nonpaged->segment_object.DataSectionObject) + CcPurgeCacheSection(&fcb->nonpaged->segment_object, &ded->TargetFileOffset, (ULONG)ded->ByteCount.QuadPart, FALSE); + + Status = STATUS_SUCCESS; + +end: + ObDereferenceObject(sourcefo); + + if (NT_SUCCESS(Status)) + clear_rollback(&rollback); + else + do_rollback(Vcb, &rollback); + + if (fcb != sourcefcb) + ExReleaseResourceLite(sourcefcb->Header.Resource); + + ExReleaseResourceLite(fcb->Header.Resource); + + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +// based on functions in sys/sysmacros.h +#define major(rdev) ((((rdev) >> 8) & 0xFFF) | ((UINT32)((rdev) >> 32) & ~0xFFF)) +#define minor(rdev) (((rdev) & 0xFF) | ((UINT32)((rdev) >> 12) & ~0xFF)) + +static NTSTATUS mknod(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, PIRP Irp) { + NTSTATUS Status; + btrfs_mknod* bmn; + fcb *parfcb, *fcb; + ccb* parccb; + file_ref *parfileref, *fileref; + UNICODE_STRING name; + root* subvol; + UINT64 inode; + dir_child* dc; + LARGE_INTEGER time; + BTRFS_TIME now; + LIST_ENTRY* lastle; + ANSI_STRING utf8; + ULONG len, i; + SECURITY_SUBJECT_CONTEXT subjcont; + PSID owner; + BOOLEAN defaulted; + + TRACE("(%p, %p, %p, %u)\n", Vcb, FileObject, data, datalen); + + if (!FileObject || !FileObject->FsContext || !FileObject->FsContext2 || FileObject->FsContext == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + parfcb = FileObject->FsContext; + + if (parfcb->type != BTRFS_TYPE_DIRECTORY) { + WARN("trying to create file in something other than a directory\n"); + return STATUS_INVALID_PARAMETER; + } + + if (is_subvol_readonly(parfcb->subvol, Irp)) + return STATUS_ACCESS_DENIED; + + parccb = FileObject->FsContext2; + parfileref = parccb->fileref; + + if (!parfileref) + return STATUS_INVALID_PARAMETER; + + if (datalen < sizeof(btrfs_mknod)) + return STATUS_INVALID_PARAMETER; + + bmn = (btrfs_mknod*)data; + + if (datalen < offsetof(btrfs_mknod, name[0]) + bmn->namelen || bmn->namelen < sizeof(WCHAR)) + return STATUS_INVALID_PARAMETER; + + if (bmn->type == BTRFS_TYPE_UNKNOWN || bmn->type > BTRFS_TYPE_SYMLINK) + return STATUS_INVALID_PARAMETER; + + if ((bmn->type == BTRFS_TYPE_DIRECTORY && !(parccb->access & FILE_ADD_SUBDIRECTORY)) || + (bmn->type != BTRFS_TYPE_DIRECTORY && !(parccb->access & FILE_ADD_FILE))) { + WARN("insufficient privileges\n"); + return STATUS_ACCESS_DENIED; + } + + if (bmn->inode != 0) { + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), Irp->RequestorMode)) + return STATUS_PRIVILEGE_NOT_HELD; + } + + for (i = 0; i < bmn->namelen / sizeof(WCHAR); i++) { + if (bmn->name[i] == 0 || bmn->name[i] == '/') + return STATUS_OBJECT_NAME_INVALID; + } + + // don't allow files called . or .. + if (bmn->name[0] == '.' && (bmn->namelen == sizeof(WCHAR) || (bmn->namelen == 2 * sizeof(WCHAR) && bmn->name[1] == '.'))) + return STATUS_OBJECT_NAME_INVALID; + + Status = RtlUnicodeToUTF8N(NULL, 0, &len, bmn->name, bmn->namelen); + if (!NT_SUCCESS(Status)) { + ERR("RtlUnicodeToUTF8N return %08x\n", Status); + return Status; + } + + if (len == 0) { + ERR("RtlUnicodeToUTF8N returned a length of 0\n"); + return STATUS_INTERNAL_ERROR; + } + + if (len > 0xffff) { + ERR("len was too long (%x)\n", len); + return STATUS_INVALID_PARAMETER; + } + + utf8.MaximumLength = utf8.Length = (USHORT)len; + utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.MaximumLength, ALLOC_TAG); + + if (!utf8.Buffer) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = RtlUnicodeToUTF8N(utf8.Buffer, len, &len, bmn->name, bmn->namelen); + if (!NT_SUCCESS(Status)) { + ERR("RtlUnicodeToUTF8N failed with error %08x\n", Status); + ExFreePool(utf8.Buffer); + return Status; + } + + name.Length = name.MaximumLength = bmn->namelen; + name.Buffer = bmn->name; + + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + + Status = find_file_in_dir(&name, parfcb, &subvol, &inode, &dc, TRUE); + if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) { + ERR("find_file_in_dir returned %08x\n", Status); + goto end; + } + + if (NT_SUCCESS(Status)) { + WARN("filename already exists\n"); + Status = STATUS_OBJECT_NAME_COLLISION; + goto end; + } + + if (bmn->inode == 0) { + inode = InterlockedIncrement64(&parfcb->subvol->lastinode); + lastle = parfcb->subvol->fcbs.Blink; + } else { + if (bmn->inode > (UINT64)parfcb->subvol->lastinode) { + inode = parfcb->subvol->lastinode = bmn->inode; + lastle = parfcb->subvol->fcbs.Blink; + } else { + LIST_ENTRY* le = parfcb->subvol->fcbs.Flink; + + lastle = parfcb->subvol->fcbs.Blink;; + while (le != &parfcb->subvol->fcbs) { + struct _fcb* fcb2 = CONTAINING_RECORD(le, struct _fcb, list_entry); + + if (fcb2->inode == bmn->inode && !fcb2->deleted) { + WARN("inode collision\n"); + Status = STATUS_INVALID_PARAMETER; + goto end; + } else if (fcb2->inode > bmn->inode) { + lastle = fcb2->list_entry.Blink; + break; + } + + le = le->Flink; + } + + inode = bmn->inode; + } + } + + KeQuerySystemTime(&time); + win_time_to_unix(time, &now); + + fcb = create_fcb(Vcb, PagedPool); + if (!fcb) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + fcb->Vcb = Vcb; + + fcb->inode_item.generation = Vcb->superblock.generation; + fcb->inode_item.transid = Vcb->superblock.generation; + fcb->inode_item.st_size = 0; + fcb->inode_item.st_blocks = 0; + fcb->inode_item.block_group = 0; + fcb->inode_item.st_nlink = 1; + fcb->inode_item.st_uid = UID_NOBODY; + fcb->inode_item.st_gid = GID_NOBODY; + fcb->inode_item.st_mode = inherit_mode(parfcb, bmn->type == BTRFS_TYPE_DIRECTORY); + + if (bmn->type == BTRFS_TYPE_BLOCKDEV || bmn->type == BTRFS_TYPE_CHARDEV) + fcb->inode_item.st_rdev = (minor(bmn->st_rdev) & 0xFFFFF) | ((major(bmn->st_rdev) & 0xFFFFFFFFFFF) << 20); + else + fcb->inode_item.st_rdev = 0; + + fcb->inode_item.flags = 0; + fcb->inode_item.sequence = 1; + fcb->inode_item.st_atime = now; + fcb->inode_item.st_ctime = now; + fcb->inode_item.st_mtime = now; + fcb->inode_item.otime = now; + + if (bmn->type == BTRFS_TYPE_DIRECTORY) + fcb->inode_item.st_mode |= __S_IFDIR; + else if (bmn->type == BTRFS_TYPE_CHARDEV) + fcb->inode_item.st_mode |= __S_IFCHR; + else if (bmn->type == BTRFS_TYPE_BLOCKDEV) + fcb->inode_item.st_mode |= __S_IFBLK; + else if (bmn->type == BTRFS_TYPE_FIFO) + fcb->inode_item.st_mode |= __S_IFIFO; + else if (bmn->type == BTRFS_TYPE_SOCKET) + fcb->inode_item.st_mode |= __S_IFSOCK; + else if (bmn->type == BTRFS_TYPE_SYMLINK) + fcb->inode_item.st_mode |= __S_IFLNK; + else + fcb->inode_item.st_mode |= __S_IFREG; + + if (bmn->type != BTRFS_TYPE_DIRECTORY) + fcb->inode_item.st_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); // remove executable bit if not directory + + // inherit nodatacow flag from parent directory + if (parfcb->inode_item.flags & BTRFS_INODE_NODATACOW) { + fcb->inode_item.flags |= BTRFS_INODE_NODATACOW; + + if (bmn->type != BTRFS_TYPE_DIRECTORY) + fcb->inode_item.flags |= BTRFS_INODE_NODATASUM; + } + + if (parfcb->inode_item.flags & BTRFS_INODE_COMPRESS) + fcb->inode_item.flags |= BTRFS_INODE_COMPRESS; + + fcb->prop_compression = parfcb->prop_compression; + fcb->prop_compression_changed = fcb->prop_compression != PropCompression_None; + + fcb->inode_item_changed = TRUE; + + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); + fcb->Header.AllocationSize.QuadPart = 0; + fcb->Header.FileSize.QuadPart = 0; + fcb->Header.ValidDataLength.QuadPart = 0; + + fcb->atts = 0; + + if (bmn->name[0] == '.') + fcb->atts |= FILE_ATTRIBUTE_HIDDEN; + + if (bmn->type == BTRFS_TYPE_DIRECTORY) + fcb->atts |= FILE_ATTRIBUTE_DIRECTORY; + + fcb->atts_changed = FALSE; + + InterlockedIncrement(&parfcb->refcount); + fcb->subvol = parfcb->subvol; + fcb->inode = inode; + fcb->type = bmn->type; + + SeCaptureSubjectContext(&subjcont); + + Status = SeAssignSecurityEx(parfileref ? parfileref->fcb->sd : NULL, NULL, (void**)&fcb->sd, NULL, fcb->type == BTRFS_TYPE_DIRECTORY, + SEF_SACL_AUTO_INHERIT, &subjcont, IoGetFileObjectGenericMapping(), PagedPool); + + if (!NT_SUCCESS(Status)) { + ERR("SeAssignSecurityEx returned %08x\n", Status); + free_fcb(Vcb, fcb); + goto end; + } + + Status = RtlGetOwnerSecurityDescriptor(fcb->sd, &owner, &defaulted); + if (!NT_SUCCESS(Status)) { + WARN("RtlGetOwnerSecurityDescriptor returned %08x\n", Status); + fcb->sd_dirty = TRUE; + } else { + fcb->inode_item.st_uid = sid_to_uid(owner); + fcb->sd_dirty = fcb->inode_item.st_uid == UID_NOBODY; + } + + find_gid(fcb, parfcb, &subjcont); + + fileref = create_fileref(Vcb); + if (!fileref) { + ERR("out of memory\n"); + free_fcb(Vcb, fcb); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + fileref->fcb = fcb; + + fcb->created = TRUE; + mark_fcb_dirty(fcb); + + fileref->created = TRUE; + mark_fileref_dirty(fileref); + + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; + fcb->subvol->root_item.ctime = now; + + fileref->parent = parfileref; + + Status = add_dir_child(fileref->parent->fcb, fcb->inode, FALSE, &utf8, &name, fcb->type, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fileref->dc = dc; + dc->fileref = fileref; + + ExAcquireResourceExclusiveLite(&parfileref->nonpaged->children_lock, TRUE); + InsertTailList(&parfileref->children, &fileref->list_entry); + ExReleaseResourceLite(&parfileref->nonpaged->children_lock); + + increase_fileref_refcount(parfileref); + + if (fcb->type == BTRFS_TYPE_DIRECTORY) { + fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs) { + ERR("out of memory\n"); + free_fileref(Vcb, fileref); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); + + fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs_uc) { + ERR("out of memory\n"); + free_fileref(Vcb, fileref); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); + } + + InsertHeadList(lastle, &fcb->list_entry); + InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); + + if (bmn->type == BTRFS_TYPE_DIRECTORY) + fileref->fcb->fileref = fileref; + + ExAcquireResourceExclusiveLite(parfcb->Header.Resource, TRUE); + parfcb->inode_item.st_size += utf8.Length * 2; + parfcb->inode_item.transid = Vcb->superblock.generation; + parfcb->inode_item.sequence++; + + if (!parccb->user_set_change_time) + parfcb->inode_item.st_ctime = now; + + if (!parccb->user_set_write_time) + parfcb->inode_item.st_mtime = now; + + ExReleaseResourceLite(parfcb->Header.Resource); + + parfcb->inode_item_changed = TRUE; + mark_fcb_dirty(parfcb); + + send_notification_fileref(fileref, bmn->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED, NULL); + + if (!parccb->user_set_write_time) + send_notification_fcb(parfileref, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL); + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->fcb_lock); + + ExFreePool(utf8.Buffer); + + return Status; +} + +static void mark_subvol_dirty(device_extension* Vcb, root* r) { + if (!r->dirty) { + r->dirty = TRUE; + + ExAcquireResourceExclusiveLite(&Vcb->dirty_subvols_lock, TRUE); + InsertTailList(&Vcb->dirty_subvols, &r->list_entry_dirty); + ExReleaseResourceLite(&Vcb->dirty_subvols_lock); + } + + Vcb->need_write = TRUE; +} + +static NTSTATUS recvd_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, KPROCESSOR_MODE processor_mode) { + btrfs_received_subvol* brs = (btrfs_received_subvol*)data; + fcb* fcb; + NTSTATUS Status; + LARGE_INTEGER time; + BTRFS_TIME now; + + TRACE("(%p, %p, %p, %u)\n", Vcb, FileObject, data, datalen); + + if (!data || datalen < sizeof(btrfs_received_subvol)) + return STATUS_INVALID_PARAMETER; + + if (!FileObject || !FileObject->FsContext || FileObject->FsContext == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + fcb = FileObject->FsContext; + + if (!fcb->subvol) + return STATUS_INVALID_PARAMETER; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + if (fcb->subvol->root_item.rtransid != 0) { + WARN("subvol already has received information set\n"); + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + KeQuerySystemTime(&time); + win_time_to_unix(time, &now); + + RtlCopyMemory(&fcb->subvol->root_item.received_uuid, &brs->uuid, sizeof(BTRFS_UUID)); + fcb->subvol->root_item.stransid = brs->generation; + fcb->subvol->root_item.rtransid = Vcb->superblock.generation; + fcb->subvol->root_item.rtime = now; + + fcb->subvol->received = TRUE; + mark_subvol_dirty(Vcb, fcb->subvol); + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static NTSTATUS fsctl_get_xattrs(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, KPROCESSOR_MODE processor_mode) { + LIST_ENTRY* le; + btrfs_set_xattr* bsxa; + ULONG reqlen = (ULONG)offsetof(btrfs_set_xattr, data[0]); + fcb* fcb; + ccb* ccb; + + if (!data || datalen < reqlen) + return STATUS_INVALID_PARAMETER; + + if (!FileObject || !FileObject->FsContext || !FileObject->FsContext2 || FileObject->FsContext == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + fcb = FileObject->FsContext; + ccb = FileObject->FsContext2; + + if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES)) && processor_mode == UserMode) { + WARN("insufficient privileges\n"); + return STATUS_ACCESS_DENIED; + } + + ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); + + le = fcb->xattrs.Flink; + while (le != &fcb->xattrs) { + xattr* xa = CONTAINING_RECORD(le, xattr, list_entry); + + if (xa->valuelen > 0) + reqlen += (ULONG)offsetof(btrfs_set_xattr, data[0]) + xa->namelen + xa->valuelen; + + le = le->Flink; + } + + if (datalen < reqlen) { + ExReleaseResourceLite(fcb->Header.Resource); + return STATUS_BUFFER_OVERFLOW; + } + + bsxa = (btrfs_set_xattr*)data; + + if (reqlen > 0) { + le = fcb->xattrs.Flink; + while (le != &fcb->xattrs) { + xattr* xa = CONTAINING_RECORD(le, xattr, list_entry); + + if (xa->valuelen > 0) { + bsxa->namelen = xa->namelen; + bsxa->valuelen = xa->valuelen; + memcpy(bsxa->data, xa->data, xa->namelen + xa->valuelen); + + bsxa = (btrfs_set_xattr*)&bsxa->data[xa->namelen + xa->valuelen]; + } + + le = le->Flink; + } + } + + bsxa->namelen = 0; + bsxa->valuelen = 0; + + ExReleaseResourceLite(fcb->Header.Resource); + + return STATUS_SUCCESS; +} + +static NTSTATUS fsctl_set_xattr(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, PIRP Irp) { + NTSTATUS Status; + btrfs_set_xattr* bsxa; + xattr* xa; + fcb* fcb; + ccb* ccb; + LIST_ENTRY* le; + + static const char stream_pref[] = "user."; + + TRACE("(%p, %p, %p, %u)\n", Vcb, FileObject, data, datalen); + + if (!data || datalen < sizeof(btrfs_set_xattr)) + return STATUS_INVALID_PARAMETER; + + bsxa = (btrfs_set_xattr*)data; + + if (datalen < offsetof(btrfs_set_xattr, data[0]) + bsxa->namelen + bsxa->valuelen) + return STATUS_INVALID_PARAMETER; + + if (bsxa->namelen + bsxa->valuelen + sizeof(tree_header) + sizeof(leaf_node) + offsetof(DIR_ITEM, name[0]) > Vcb->superblock.node_size) + return STATUS_INVALID_PARAMETER; + + if (!FileObject || !FileObject->FsContext || !FileObject->FsContext2 || FileObject->FsContext == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + fcb = FileObject->FsContext; + ccb = FileObject->FsContext2; + + if (is_subvol_readonly(fcb->subvol, Irp)) + return STATUS_ACCESS_DENIED; + + if (!(ccb->access & FILE_WRITE_ATTRIBUTES) && Irp->RequestorMode == UserMode) { + WARN("insufficient privileges\n"); + return STATUS_ACCESS_DENIED; + } + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + + if (bsxa->namelen == strlen(EA_NTACL) && RtlCompareMemory(bsxa->data, EA_NTACL, strlen(EA_NTACL)) == strlen(EA_NTACL)) { + if ((!(ccb->access & WRITE_DAC) || !(ccb->access & WRITE_OWNER)) && Irp->RequestorMode == UserMode) { + WARN("insufficient privileges\n"); + Status = STATUS_ACCESS_DENIED; + goto end; + } + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), Irp->RequestorMode)) { + Status = STATUS_PRIVILEGE_NOT_HELD; + goto end; + } + + if (fcb->sd) + ExFreePool(fcb->sd); + + if (bsxa->valuelen > 0 && RtlValidRelativeSecurityDescriptor(bsxa->data + bsxa->namelen, bsxa->valuelen, 0)) { + fcb->sd = ExAllocatePoolWithTag(PagedPool, bsxa->valuelen, ALLOC_TAG); + if (!fcb->sd) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(fcb->sd, bsxa->data + bsxa->namelen, bsxa->valuelen); + } else if (fcb->sd) + fcb->sd = NULL; + + fcb->sd_dirty = TRUE; + + if (!fcb->sd) { + fcb_get_sd(fcb, ccb->fileref->parent->fcb, FALSE, Irp); + fcb->sd_deleted = TRUE; + } + + mark_fcb_dirty(fcb); + + Status = STATUS_SUCCESS; + goto end; + } else if (bsxa->namelen == strlen(EA_DOSATTRIB) && RtlCompareMemory(bsxa->data, EA_DOSATTRIB, strlen(EA_DOSATTRIB)) == strlen(EA_DOSATTRIB)) { + ULONG atts; + + if (bsxa->valuelen > 0 && get_file_attributes_from_xattr(bsxa->data + bsxa->namelen, bsxa->valuelen, &atts)) { + fcb->atts = atts; + + if (fcb->type == BTRFS_TYPE_DIRECTORY) + fcb->atts |= FILE_ATTRIBUTE_DIRECTORY; + else if (fcb->type == BTRFS_TYPE_SYMLINK) + fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT; + + if (fcb->inode == SUBVOL_ROOT_INODE) { + if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) + fcb->atts |= FILE_ATTRIBUTE_READONLY; + else + fcb->atts &= ~FILE_ATTRIBUTE_READONLY; + } + + fcb->atts_deleted = FALSE; + } else { + BOOL hidden = ccb->fileref && ccb->fileref->dc && ccb->fileref->dc->utf8.Buffer && ccb->fileref->dc->utf8.Buffer[0] == '.'; + + fcb->atts = get_file_attributes(Vcb, fcb->subvol, fcb->inode, fcb->type, hidden, TRUE, Irp); + fcb->atts_deleted = TRUE; + } + + fcb->atts_changed = TRUE; + mark_fcb_dirty(fcb); + + Status = STATUS_SUCCESS; + goto end; + } else if (bsxa->namelen == strlen(EA_REPARSE) && RtlCompareMemory(bsxa->data, EA_REPARSE, strlen(EA_REPARSE)) == strlen(EA_REPARSE)) { + if (fcb->reparse_xattr.Buffer) { + ExFreePool(fcb->reparse_xattr.Buffer); + fcb->reparse_xattr.Buffer = NULL; + fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = 0; + } + + if (bsxa->valuelen > 0) { + fcb->reparse_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, bsxa->valuelen, ALLOC_TAG); + if (!fcb->reparse_xattr.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(fcb->reparse_xattr.Buffer, bsxa->data + bsxa->namelen, bsxa->valuelen); + fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = bsxa->valuelen; + } + + fcb->reparse_xattr_changed = TRUE; + mark_fcb_dirty(fcb); + + Status = STATUS_SUCCESS; + goto end; + } else if (bsxa->namelen == strlen(EA_EA) && RtlCompareMemory(bsxa->data, EA_EA, strlen(EA_EA)) == strlen(EA_EA)) { + if (!(ccb->access & FILE_WRITE_EA) && Irp->RequestorMode == UserMode) { + WARN("insufficient privileges\n"); + Status = STATUS_ACCESS_DENIED; + goto end; + } + + if (fcb->ea_xattr.Buffer) { + ExFreePool(fcb->ea_xattr.Buffer); + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = 0; + fcb->ea_xattr.Buffer = NULL; + } + + fcb->ealen = 0; + + if (bsxa->valuelen > 0) { + ULONG offset; + + Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)(bsxa->data + bsxa->namelen), bsxa->valuelen, &offset); + + if (!NT_SUCCESS(Status)) + WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); + else { + FILE_FULL_EA_INFORMATION* eainfo; + + fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, bsxa->valuelen, ALLOC_TAG); + if (!fcb->ea_xattr.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(fcb->ea_xattr.Buffer, bsxa->data + bsxa->namelen, bsxa->valuelen); + + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = bsxa->valuelen; + + fcb->ealen = 4; + + // calculate ealen + eainfo = (FILE_FULL_EA_INFORMATION*)(bsxa->data + bsxa->namelen); + do { + fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; + + if (eainfo->NextEntryOffset == 0) + break; + + eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); + } while (TRUE); + } + } + + fcb->ea_changed = TRUE; + mark_fcb_dirty(fcb); + + Status = STATUS_SUCCESS; + goto end; + } else if (bsxa->namelen == strlen(EA_PROP_COMPRESSION) && RtlCompareMemory(bsxa->data, EA_PROP_COMPRESSION, strlen(EA_PROP_COMPRESSION)) == strlen(EA_PROP_COMPRESSION)) { + const char lzo[] = "lzo"; + const char zlib[] = "zlib"; + + if (bsxa->valuelen == strlen(lzo) && RtlCompareMemory(bsxa->data + bsxa->namelen, lzo, bsxa->valuelen) == bsxa->valuelen) + fcb->prop_compression = PropCompression_LZO; + else if (bsxa->valuelen == strlen(zlib) && RtlCompareMemory(bsxa->data + bsxa->namelen, zlib, bsxa->valuelen) == bsxa->valuelen) + fcb->prop_compression = PropCompression_Zlib; + else + fcb->prop_compression = PropCompression_None; + + if (fcb->prop_compression != PropCompression_None) { + fcb->inode_item.flags |= BTRFS_INODE_COMPRESS; + fcb->inode_item_changed = TRUE; + } + + fcb->prop_compression_changed = TRUE; + mark_fcb_dirty(fcb); + + Status = STATUS_SUCCESS; + goto end; + } else if (bsxa->namelen >= strlen(stream_pref) && RtlCompareMemory(bsxa->data, stream_pref, strlen(stream_pref)) == strlen(stream_pref)) { + // don't allow xattrs beginning with user., as these appear as streams instead + Status = STATUS_OBJECT_NAME_INVALID; + goto end; + } + + xa = ExAllocatePoolWithTag(PagedPool, offsetof(xattr, data[0]) + bsxa->namelen + bsxa->valuelen, ALLOC_TAG); + if (!xa) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + le = fcb->xattrs.Flink; + while (le != &fcb->xattrs) { + xattr* xa2 = CONTAINING_RECORD(le, xattr, list_entry); + + if (xa2->namelen == bsxa->namelen && RtlCompareMemory(xa2->data, bsxa->data, xa2->namelen) == xa2->namelen) { + RemoveEntryList(&xa2->list_entry); + ExFreePool(xa2); + break; + } + + le = le->Flink; + } + + xa->namelen = bsxa->namelen; + xa->valuelen = bsxa->valuelen; + xa->dirty = TRUE; + RtlCopyMemory(xa->data, bsxa->data, bsxa->namelen + bsxa->valuelen); + + InsertTailList(&fcb->xattrs, &xa->list_entry); + + fcb->xattrs_changed = TRUE; + mark_fcb_dirty(fcb); + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(fcb->Header.Resource); + + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static NTSTATUS reserve_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, PIRP Irp) { + fcb* fcb; + ccb* ccb; + + TRACE("(%p, %p)\n", Vcb, FileObject); + + // "Reserving" a readonly subvol allows the calling process to write into it until the handle is closed. + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), Irp->RequestorMode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!FileObject || !FileObject->FsContext || !FileObject->FsContext2 || FileObject->FsContext == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + fcb = FileObject->FsContext; + ccb = FileObject->FsContext2; + + if (!(fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) + return STATUS_INVALID_PARAMETER; + + if (fcb->subvol->reserved) + return STATUS_INVALID_PARAMETER; + + fcb->subvol->reserved = PsGetCurrentProcess(); + ccb->reserving = TRUE; + + return STATUS_SUCCESS; +} + +static NTSTATUS get_subvol_path(device_extension* Vcb, UINT64 id, WCHAR* out, ULONG outlen, PIRP Irp) { + LIST_ENTRY* le; + root* r = NULL; + NTSTATUS Status; + file_ref* fr; + UNICODE_STRING us; + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == id) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("couldn't find subvol %llx\n", id); + return STATUS_INTERNAL_ERROR; + } + + ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE); + + Status = open_fileref_by_inode(Vcb, r, r->root_item.objid, &fr, Irp); + if (!NT_SUCCESS(Status)) { + ExReleaseResourceLite(&Vcb->fcb_lock); + ERR("open_fileref_by_inode returned %08x\n", Status); + return Status; + } + + us.Buffer = out; + us.Length = 0; + us.MaximumLength = (USHORT)min(0xffff, outlen) - sizeof(WCHAR); + + Status = fileref_get_filename(fr, &us, NULL, NULL); + + if (NT_SUCCESS(Status) || Status == STATUS_BUFFER_OVERFLOW) + out[us.Length / sizeof(WCHAR)] = 0; + else + ERR("fileref_get_filename returned %08x\n", Status); + + free_fileref(Vcb, fr); + + ExReleaseResourceLite(&Vcb->fcb_lock); + + return Status; +} + +static NTSTATUS find_subvol(device_extension* Vcb, void* in, ULONG inlen, void* out, ULONG outlen, PIRP Irp) { + btrfs_find_subvol* bfs; + NTSTATUS Status; + traverse_ptr tp; + KEY searchkey; + + if (!in || inlen < sizeof(btrfs_find_subvol)) + return STATUS_INVALID_PARAMETER; + + if (!out || outlen < sizeof(WCHAR)) + return STATUS_INVALID_PARAMETER; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), Irp->RequestorMode)) + return STATUS_PRIVILEGE_NOT_HELD; + + bfs = (btrfs_find_subvol*)in; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + if (!Vcb->uuid_root) { + ERR("couldn't find uuid root\n"); + Status = STATUS_NOT_FOUND; + goto end; + } + + RtlCopyMemory(&searchkey.obj_id, &bfs->uuid, sizeof(UINT64)); + searchkey.obj_type = TYPE_SUBVOL_UUID; + RtlCopyMemory(&searchkey.offset, &bfs->uuid.uuid[sizeof(UINT64)], sizeof(UINT64)); + + Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (!keycmp(searchkey, tp.item->key) && tp.item->size >= sizeof(UINT64)) { + UINT64* id = (UINT64*)tp.item->data; + + if (bfs->ctransid != 0) { + KEY searchkey2; + traverse_ptr tp2; + + searchkey2.obj_id = *id; + searchkey2.obj_type = TYPE_ROOT_ITEM; + searchkey2.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->root_root, &tp2, &searchkey2, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (tp2.item->key.obj_id == searchkey2.obj_id && tp2.item->key.obj_type == searchkey2.obj_type && + tp2.item->size >= offsetof(ROOT_ITEM, otransid)) { + ROOT_ITEM* ri = (ROOT_ITEM*)tp2.item->data; + + if (ri->ctransid == bfs->ctransid) { + TRACE("found subvol %llx\n", *id); + Status = get_subvol_path(Vcb, *id, out, outlen, Irp); + goto end; + } + } + } else { + TRACE("found subvol %llx\n", *id); + Status = get_subvol_path(Vcb, *id, out, outlen, Irp); + goto end; + } + } + + searchkey.obj_type = TYPE_SUBVOL_REC_UUID; + + Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (!keycmp(searchkey, tp.item->key) && tp.item->size >= sizeof(UINT64)) { + UINT64* ids = (UINT64*)tp.item->data; + ULONG i; + + for (i = 0; i < tp.item->size / sizeof(UINT64); i++) { + if (bfs->ctransid != 0) { + KEY searchkey2; + traverse_ptr tp2; + + searchkey2.obj_id = ids[i]; + searchkey2.obj_type = TYPE_ROOT_ITEM; + searchkey2.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->root_root, &tp2, &searchkey2, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (tp2.item->key.obj_id == searchkey2.obj_id && tp2.item->key.obj_type == searchkey2.obj_type && + tp2.item->size >= offsetof(ROOT_ITEM, otransid)) { + ROOT_ITEM* ri = (ROOT_ITEM*)tp2.item->data; + + if (ri->ctransid == bfs->ctransid) { + TRACE("found subvol %llx\n", ids[i]); + Status = get_subvol_path(Vcb, ids[i], out, outlen, Irp); + goto end; + } + } + } else { + TRACE("found subvol %llx\n", ids[i]); + Status = get_subvol_path(Vcb, ids[i], out, outlen, Irp); + goto end; + } + } + } + + Status = STATUS_NOT_FOUND; + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static NTSTATUS resize_device(device_extension* Vcb, void* data, ULONG len, PIRP Irp) { + btrfs_resize* br = (btrfs_resize*)data; + NTSTATUS Status; + LIST_ENTRY* le; + device* dev = NULL; + + TRACE("(%p, %p, %u)\n", Vcb, data, len); + + if (!data || len < sizeof(btrfs_resize) || (br->size % Vcb->superblock.sector_size) != 0) + return STATUS_INVALID_PARAMETER; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), Irp->RequestorMode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + le = Vcb->devices.Flink; while (le != &Vcb->devices) { - device* dev = CONTAINING_RECORD(le, device, list_entry); - - if (dev->devitem.dev_id > dev_id) - dev_id = dev->devitem.dev_id; - + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devitem.dev_id == br->device) { + dev = dev2; + break; + } + le = le->Flink; } - - dev_id++; - - dev->devitem.dev_id = dev_id; - dev->devitem.num_bytes = gli.Length.QuadPart; - dev->devitem.bytes_used = 0; - dev->devitem.optimal_io_align = Vcb->superblock.sector_size; - dev->devitem.optimal_io_width = Vcb->superblock.sector_size; - dev->devitem.minimal_io_size = Vcb->superblock.sector_size; - dev->devitem.type = 0; - dev->devitem.generation = 0; - dev->devitem.start_offset = 0; - dev->devitem.dev_group = 0; - dev->devitem.seek_speed = 0; - dev->devitem.bandwidth = 0; - get_uuid(&dev->devitem.device_uuid); - dev->devitem.fs_uuid = Vcb->superblock.uuid; - - di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); - if (!di) { - ERR("out of memory\n"); - goto end; - } - - RtlCopyMemory(di, &dev->devitem, sizeof(DEV_ITEM)); - - if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, di->dev_id, di, sizeof(DEV_ITEM), NULL, Irp, &rollback)) { - ERR("insert_tree_item failed\n"); - ExFreePool(di); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - // add stats entry to dev tree - stats = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * 5, ALLOC_TAG); - if (!stats) { - ERR("out of memory\n"); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - RtlZeroMemory(stats, sizeof(UINT64) * 5); - - searchkey.obj_id = 0; - searchkey.obj_type = TYPE_DEV_STATS; - searchkey.offset = di->dev_id; - - Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - if (!keycmp(tp.item->key, searchkey)) - delete_tree_item(Vcb, &tp, &rollback); - - if (!insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, di->dev_id, stats, sizeof(UINT64) * 5, NULL, Irp, &rollback)) { - ERR("insert_tree_item failed\n"); - ExFreePool(stats); - Status = STATUS_INTERNAL_ERROR; + + if (!dev) { + ERR("could not find device %llx\n", br->device); + Status = STATUS_INVALID_PARAMETER; goto end; } - - // We clear the first megabyte of the device, so Windows doesn't identify it as another FS - mb = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG); - if (!mb) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; + + if (!dev->devobj) { + ERR("trying to resize missing device\n"); + Status = STATUS_INVALID_PARAMETER; goto end; } - - RtlZeroMemory(mb, 0x100000); - - Status = write_data_phys(fileobj->DeviceObject, 0, mb, 0x100000); - if (!NT_SUCCESS(Status)) { - ERR("write_data_phys returned %08x\n", Status); + + if (dev->readonly) { + ERR("trying to resize readonly device\n"); + Status = STATUS_INVALID_PARAMETER; goto end; } - - ExFreePool(mb); - - v = ExAllocatePoolWithTag(PagedPool, sizeof(volume), ALLOC_TAG); - if (!v) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; + + if (br->size > 0 && dev->devitem.num_bytes == br->size) { + TRACE("size unchanged, returning STATUS_SUCCESS\n"); + Status = STATUS_SUCCESS; goto end; } - - v->fsuuid = Vcb->superblock.uuid; - v->devuuid = dev->devitem.device_uuid; - v->devnum = dev_id; - v->devpath = volname; - v->length = gli.Length.QuadPart; - v->gen1 = v->gen2 = Vcb->superblock.generation; - v->seeding = FALSE; - v->processed = TRUE; - - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - InsertTailList(&volumes, &v->list_entry); - ExReleaseResourceLite(&volumes_lock); - - volname.Buffer = NULL; - - Status = dev_ioctl(fileobj->DeviceObject, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, - &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); - if (!NT_SUCCESS(Status)) { - WARN("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); - v->disk_num = 0; - v->part_num = 0; - } else { - v->disk_num = sdn.DeviceNumber; - v->part_num = sdn.PartitionNumber; - } - - RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); - Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &mountmgrfo, &mountmgr); - if (!NT_SUCCESS(Status)) - ERR("IoGetDeviceObjectPointer returned %08x\n", Status); - else { - remove_drive_letter(mountmgr, v); - - ObDereferenceObject(mountmgrfo); + + if (br->size > 0 && dev->devitem.num_bytes > br->size) { // shrink device + BOOL need_balance = TRUE; + UINT64 old_size, delta; + + le = dev->space.Flink; + while (le != &dev->space) { + space* s = CONTAINING_RECORD(le, space, list_entry); + + if (s->address <= br->size && s->address + s->size >= dev->devitem.num_bytes) { + need_balance = FALSE; + break; + } + + le = le->Flink; + } + + delta = dev->devitem.num_bytes - br->size; + + if (need_balance) { + int i; + + if (Vcb->balance.thread) { + WARN("balance already running\n"); + Status = STATUS_DEVICE_NOT_READY; + goto end; + } + + RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3); + + for (i = 0; i < 3; i++) { + Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID | BTRFS_BALANCE_OPTS_DRANGE; + Vcb->balance.opts[i].devid = dev->devitem.dev_id; + Vcb->balance.opts[i].drange_start = br->size; + Vcb->balance.opts[i].drange_end = dev->devitem.num_bytes; + } + + Vcb->balance.paused = FALSE; + Vcb->balance.shrinking = TRUE; + Vcb->balance.status = STATUS_SUCCESS; + KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); + + space_list_subtract2(&dev->space, NULL, br->size, delta, NULL, NULL); + + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + goto end; + } + + Status = STATUS_MORE_PROCESSING_REQUIRED; + + goto end; + } + + old_size = dev->devitem.num_bytes; + dev->devitem.num_bytes = br->size; + + Status = update_dev_item(Vcb, dev, Irp); + if (!NT_SUCCESS(Status)) { + ERR("update_dev_item returned %08x\n", Status); + dev->devitem.num_bytes = old_size; + goto end; + } + + space_list_subtract2(&dev->space, NULL, br->size, delta, NULL, NULL); + + Vcb->superblock.total_bytes -= delta; + } else { // extend device + GET_LENGTH_INFORMATION gli; + UINT64 old_size, delta; + + Status = dev_ioctl(dev->devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, + &gli, sizeof(gli), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_DISK_GET_LENGTH_INFO returned %08x\n", Status); + goto end; + } + + if (br->size == 0) { + br->size = gli.Length.QuadPart; + + if (dev->devitem.num_bytes == br->size) { + TRACE("size unchanged, returning STATUS_SUCCESS\n"); + Status = STATUS_SUCCESS; + goto end; + } + + if (br->size == 0) { + ERR("IOCTL_DISK_GET_LENGTH_INFO returned 0 length\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } else if ((UINT64)gli.Length.QuadPart < br->size) { + ERR("device was %llx bytes, trying to extend to %llx\n", gli.Length.QuadPart, br->size); + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + delta = br->size - dev->devitem.num_bytes; + + old_size = dev->devitem.num_bytes; + dev->devitem.num_bytes = br->size; + + Status = update_dev_item(Vcb, dev, Irp); + if (!NT_SUCCESS(Status)) { + ERR("update_dev_item returned %08x\n", Status); + dev->devitem.num_bytes = old_size; + goto end; + } + + space_list_add2(&dev->space, NULL, dev->devitem.num_bytes, delta, NULL, NULL); + + Vcb->superblock.total_bytes += delta; } - - Vcb->superblock.num_devices++; - Vcb->superblock.total_bytes += gli.Length.QuadPart; - Vcb->devices_loaded++; - InsertTailList(&Vcb->devices, &dev->list_entry); - - ObReferenceObject(fileobj->DeviceObject); - - do_write(Vcb, Irp, &rollback); - free_trees(Vcb); - - clear_rollback(Vcb, &rollback); - Status = STATUS_SUCCESS; - + Vcb->need_write = TRUE; + end: ExReleaseResourceLite(&Vcb->tree_lock); - ObDereferenceObject(fileobj); - - if (volname.Buffer) - ExFreePool(volname.Buffer); - - return Status; -} -static NTSTATUS allow_extended_dasd_io(device_extension* Vcb, PFILE_OBJECT FileObject) { - fcb* fcb; - ccb* ccb; - - TRACE("FSCTL_ALLOW_EXTENDED_DASD_IO\n"); - - if (!FileObject) - return STATUS_INVALID_PARAMETER; - - fcb = FileObject->FsContext; - ccb = FileObject->FsContext2; - - if (!fcb) - return STATUS_INVALID_PARAMETER; - - if (fcb != Vcb->volume_fcb) - return STATUS_INVALID_PARAMETER; - - if (!ccb) - return STATUS_INVALID_PARAMETER; - - ccb->allow_extended_dasd_io = TRUE; - - return STATUS_SUCCESS; -} + if (NT_SUCCESS(Status)) + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE); -static NTSTATUS query_uuid(device_extension* Vcb, void* data, ULONG length) { - if (length < sizeof(BTRFS_UUID)) - return STATUS_BUFFER_OVERFLOW; - - RtlCopyMemory(data, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)); - - return STATUS_SUCCESS; + return Status; } -NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user) { +NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP* Pirp, UINT32 type) { + PIRP Irp = *Pirp; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); NTSTATUS Status; - + switch (type) { +#if (NTDDI_VERSION >= NTDDI_WIN7) + case FSCTL_REQUEST_OPLOCK: + WARN("STUB: FSCTL_REQUEST_OPLOCK\n"); + Status = STATUS_INVALID_DEVICE_REQUEST; + break; +#endif + case FSCTL_REQUEST_OPLOCK_LEVEL_1: WARN("STUB: FSCTL_REQUEST_OPLOCK_LEVEL_1\n"); Status = STATUS_INVALID_DEVICE_REQUEST; @@ -2873,6 +4795,11 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL Status = STATUS_INVALID_DEVICE_REQUEST; break; + case FSCTL_OPLOCK_BREAK_ACK_NO_2: + WARN("STUB: FSCTL_OPLOCK_BREAK_ACK_NO_2\n"); + Status = STATUS_INVALID_DEVICE_REQUEST; + break; + case FSCTL_OPBATCH_ACK_CLOSE_PENDING: WARN("STUB: FSCTL_OPBATCH_ACK_CLOSE_PENDING\n"); Status = STATUS_INVALID_DEVICE_REQUEST; @@ -2883,6 +4810,11 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL Status = STATUS_INVALID_DEVICE_REQUEST; break; + case FSCTL_REQUEST_FILTER_OPLOCK: + WARN("STUB: FSCTL_REQUEST_FILTER_OPLOCK\n"); + Status = STATUS_INVALID_DEVICE_REQUEST; + break; + case FSCTL_LOCK_VOLUME: Status = lock_volume(DeviceObject->DeviceExtension, Irp); break; @@ -2915,12 +4847,11 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL break; case FSCTL_GET_COMPRESSION: - Status = get_compression(DeviceObject->DeviceExtension, Irp); + Status = get_compression(Irp); break; case FSCTL_SET_COMPRESSION: - WARN("STUB: FSCTL_SET_COMPRESSION\n"); - Status = STATUS_INVALID_DEVICE_REQUEST; + Status = set_compression(Irp); break; case FSCTL_SET_BOOTLOADER_ACCESSED: @@ -2928,11 +4859,6 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL Status = STATUS_INVALID_DEVICE_REQUEST; break; - case FSCTL_OPLOCK_BREAK_ACK_NO_2: - WARN("STUB: FSCTL_OPLOCK_BREAK_ACK_NO_2\n"); - Status = STATUS_INVALID_DEVICE_REQUEST; - break; - case FSCTL_INVALIDATE_VOLUMES: Status = invalidate_volumes(Irp); break; @@ -2942,14 +4868,8 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL Status = STATUS_INVALID_DEVICE_REQUEST; break; - case FSCTL_REQUEST_FILTER_OPLOCK: - WARN("STUB: FSCTL_REQUEST_FILTER_OPLOCK\n"); - Status = STATUS_INVALID_DEVICE_REQUEST; - break; - case FSCTL_FILESYSTEM_GET_STATISTICS: - Status = fs_get_statistics(DeviceObject, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, - IrpSp->Parameters.FileSystemControl.OutputBufferLength, &Irp->IoStatus.Information); + Status = fs_get_statistics(Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.OutputBufferLength, &Irp->IoStatus.Information); break; case FSCTL_GET_NTFS_VOLUME_DATA: @@ -3054,7 +4974,7 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL break; case FSCTL_QUERY_ALLOCATED_RANGES: - Status = query_ranges(DeviceObject->DeviceExtension, IrpSp->FileObject, IrpSp->Parameters.FileSystemControl.Type3InputBuffer, + Status = query_ranges(IrpSp->FileObject, IrpSp->Parameters.FileSystemControl.Type3InputBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->UserBuffer, IrpSp->Parameters.FileSystemControl.OutputBufferLength, &Irp->IoStatus.Information); break; @@ -3309,46 +5229,66 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL WARN("STUB: FSCTL_TXFS_READ_BACKUP_INFORMATION2\n"); Status = STATUS_INVALID_DEVICE_REQUEST; break; - + case FSCTL_CSV_CONTROL: WARN("STUB: FSCTL_CSV_CONTROL\n"); Status = STATUS_INVALID_DEVICE_REQUEST; break; #endif + // TRACE rather than WARN because Windows 10 spams this undocumented fsctl + case FSCTL_QUERY_VOLUME_CONTAINER_STATE: + TRACE("STUB: FSCTL_QUERY_VOLUME_CONTAINER_STATE\n"); + Status = STATUS_INVALID_DEVICE_REQUEST; + break; + + case FSCTL_GET_INTEGRITY_INFORMATION: + Status = get_integrity_information(DeviceObject->DeviceExtension, IrpSp->FileObject, map_user_buffer(Irp, NormalPagePriority), + IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + case FSCTL_SET_INTEGRITY_INFORMATION: + Status = set_integrity_information(IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength); + break; + + case FSCTL_DUPLICATE_EXTENTS_TO_FILE: + Status = duplicate_extents(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, + IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); + break; + case FSCTL_BTRFS_GET_FILE_IDS: - Status = get_file_ids(IrpSp->FileObject, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = get_file_ids(IrpSp->FileObject, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); break; - + case FSCTL_BTRFS_CREATE_SUBVOL: - Status = create_subvol(DeviceObject->DeviceExtension, IrpSp->FileObject, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength, Irp); + Status = create_subvol(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); break; - + case FSCTL_BTRFS_CREATE_SNAPSHOT: - Status = create_snapshot(DeviceObject->DeviceExtension, IrpSp->FileObject, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength, Irp); + Status = create_snapshot(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); break; - + case FSCTL_BTRFS_GET_INODE_INFO: - Status = get_inode_info(IrpSp->FileObject, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = get_inode_info(IrpSp->FileObject, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); break; - + case FSCTL_BTRFS_SET_INODE_INFO: - Status = set_inode_info(IrpSp->FileObject, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = set_inode_info(IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); break; - + case FSCTL_BTRFS_GET_DEVICES: - Status = get_devices(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = get_devices(DeviceObject->DeviceExtension, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); break; - + case FSCTL_BTRFS_GET_USAGE: - Status = get_usage(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = get_usage(DeviceObject->DeviceExtension, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength, Irp); break; - + case FSCTL_BTRFS_START_BALANCE: Status = start_balance(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); break; - + case FSCTL_BTRFS_QUERY_BALANCE: - Status = query_balance(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + Status = query_balance(DeviceObject->DeviceExtension, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); break; case FSCTL_BTRFS_PAUSE_BALANCE: @@ -3358,31 +5298,97 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_BTRFS_RESUME_BALANCE: Status = resume_balance(DeviceObject->DeviceExtension, Irp->RequestorMode); break; - + case FSCTL_BTRFS_STOP_BALANCE: Status = stop_balance(DeviceObject->DeviceExtension, Irp->RequestorMode); break; - + case FSCTL_BTRFS_ADD_DEVICE: - Status = add_device(DeviceObject->DeviceExtension, Irp, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + Status = add_device(DeviceObject->DeviceExtension, Irp, Irp->RequestorMode); break; - + case FSCTL_BTRFS_REMOVE_DEVICE: Status = remove_device(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); - break; - + break; + case FSCTL_BTRFS_GET_UUID: - Status = query_uuid(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); - break; + Status = query_uuid(DeviceObject->DeviceExtension, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + case FSCTL_BTRFS_START_SCRUB: + Status = start_scrub(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_QUERY_SCRUB: + Status = query_scrub(DeviceObject->DeviceExtension, Irp->RequestorMode, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + case FSCTL_BTRFS_PAUSE_SCRUB: + Status = pause_scrub(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_RESUME_SCRUB: + Status = resume_scrub(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_STOP_SCRUB: + Status = stop_scrub(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_RESET_STATS: + Status = reset_stats(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_MKNOD: + Status = mknod(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); + break; + + case FSCTL_BTRFS_RECEIVED_SUBVOL: + Status = recvd_subvol(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, + IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_GET_XATTRS: + Status = fsctl_get_xattrs(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->UserBuffer, IrpSp->Parameters.FileSystemControl.OutputBufferLength, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_SET_XATTR: + Status = fsctl_set_xattr(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp->AssociatedIrp.SystemBuffer, + IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); + break; + + case FSCTL_BTRFS_RESERVE_SUBVOL: + Status = reserve_subvol(DeviceObject->DeviceExtension, IrpSp->FileObject, Irp); + break; + + case FSCTL_BTRFS_FIND_SUBVOL: + Status = find_subvol(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, + Irp->UserBuffer, IrpSp->Parameters.FileSystemControl.OutputBufferLength, Irp); + break; + + case FSCTL_BTRFS_SEND_SUBVOL: + Status = send_subvol(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, + IrpSp->FileObject, Irp); + break; + + case FSCTL_BTRFS_READ_SEND_BUFFER: + Status = read_send_buffer(DeviceObject->DeviceExtension, IrpSp->FileObject, map_user_buffer(Irp, NormalPagePriority), IrpSp->Parameters.FileSystemControl.OutputBufferLength, + &Irp->IoStatus.Information, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_RESIZE: + Status = resize_device(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, + IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp); + break; default: - TRACE("unknown control code %x (DeviceType = %x, Access = %x, Function = %x, Method = %x)\n", + WARN("unknown control code %x (DeviceType = %x, Access = %x, Function = %x, Method = %x)\n", IrpSp->Parameters.FileSystemControl.FsControlCode, (IrpSp->Parameters.FileSystemControl.FsControlCode & 0xff0000) >> 16, (IrpSp->Parameters.FileSystemControl.FsControlCode & 0xc000) >> 14, (IrpSp->Parameters.FileSystemControl.FsControlCode & 0x3ffc) >> 2, IrpSp->Parameters.FileSystemControl.FsControlCode & 0x3); Status = STATUS_INVALID_DEVICE_REQUEST; break; } - + return Status; } diff --git a/reactos/drivers/filesystems/btrfs/galois.c b/reactos/drivers/filesystems/btrfs/galois.c index b8e933daca5..baa26fc78fe 100644 --- a/reactos/drivers/filesystems/btrfs/galois.c +++ b/reactos/drivers/filesystems/btrfs/galois.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -95,20 +95,12 @@ UINT8 gdiv(UINT8 a, UINT8 b) { // https://www.kernel.org/pub/linux/kernel/people/hpa/raid6.pdf #ifdef _AMD64_ -#ifdef __REACTOS__ -static __inline UINT64 galois_double_mask64(UINT64 v) { -#else -static UINT64 __inline galois_double_mask64(UINT64 v) { -#endif +__inline static UINT64 galois_double_mask64(UINT64 v) { v &= 0x8080808080808080; return (v << 1) - (v >> 7); } #else -#ifdef __REACTOS__ -static __inline UINT32 galois_double_mask32(UINT32 v) { -#else -static UINT32 __inline galois_double_mask32(UINT32 v) { -#endif +__inline static UINT32 galois_double_mask32(UINT32 v) { v &= 0x80808080; return (v << 1) - (v >> 7); } @@ -116,31 +108,31 @@ static UINT32 __inline galois_double_mask32(UINT32 v) { void galois_double(UINT8* data, UINT32 len) { // FIXME - SIMD? - + #ifdef _AMD64_ while (len > sizeof(UINT64)) { UINT64 v = *((UINT64*)data), vv; - + vv = (v << 1) & 0xfefefefefefefefe; vv ^= galois_double_mask64(v) & 0x1d1d1d1d1d1d1d1d; *((UINT64*)data) = vv; - + data += sizeof(UINT64); len -= sizeof(UINT64); } #else while (len > sizeof(UINT32)) { UINT32 v = *((UINT32*)data), vv; - + vv = (v << 1) & 0xfefefefe; vv ^= galois_double_mask32(v) & 0x1d1d1d1d; *((UINT32*)data) = vv; - + data += sizeof(UINT32); len -= sizeof(UINT32); } #endif - + while (len > 0) { data[0] = (data[0] << 1) ^ ((data[0] & 0x80) ? 0x1d : 0); data++; diff --git a/reactos/drivers/filesystems/btrfs/guid.c b/reactos/drivers/filesystems/btrfs/guid.c new file mode 100644 index 00000000000..da214464c41 --- /dev/null +++ b/reactos/drivers/filesystems/btrfs/guid.c @@ -0,0 +1,11 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: BtrFS FSD for ReactOS + * FILE: drivers/filesystems/btrfs/guid.c + * PURPOSE: GUID glue + * PROGRAMMERS: Pierre Schweitzer + */ + +#include +#include +#include diff --git a/reactos/drivers/filesystems/btrfs/pnp.c b/reactos/drivers/filesystems/btrfs/pnp.c index 6c67e089b45..aac3ea3a54e 100644 --- a/reactos/drivers/filesystems/btrfs/pnp.c +++ b/reactos/drivers/filesystems/btrfs/pnp.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -34,14 +34,24 @@ typedef struct { pnp_stripe* stripes; } pnp_context; -static NTSTATUS STDCALL pnp_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +extern ERESOURCE pdo_list_lock; +extern LIST_ENTRY pdo_list; + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI pnp_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS pnp_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif pnp_stripe* stripe = conptr; pnp_context* context = (pnp_context*)stripe->context; - + + UNUSED(DeviceObject); + stripe->Status = Irp->IoStatus.Status; - + InterlockedDecrement(&context->left); - + if (context->left == 0) KeSetEvent(&context->Event, 0, FALSE); @@ -49,154 +59,141 @@ static NTSTATUS STDCALL pnp_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PV } static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { - pnp_context* context; - UINT64 num_devices, i; + pnp_context context; + ULONG num_devices, i; NTSTATUS Status; LIST_ENTRY* le; - - context = ExAllocatePoolWithTag(NonPagedPool, sizeof(pnp_context), ALLOC_TAG); - if (!context) { + + RtlZeroMemory(&context, sizeof(pnp_context)); + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + num_devices = (ULONG)min(0xffffffff, Vcb->superblock.num_devices); + + context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(pnp_stripe) * num_devices, ALLOC_TAG); + if (!context.stripes) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); - - RtlZeroMemory(context, sizeof(pnp_context)); - KeInitializeEvent(&context->Event, NotificationEvent, FALSE); - - num_devices = Vcb->superblock.num_devices; - - context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(pnp_stripe) * num_devices, ALLOC_TAG); - if (!context->stripes) { - ERR("out of memory\n"); - ExFreePool(context); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end2; - } - - RtlZeroMemory(context->stripes, sizeof(pnp_stripe) * num_devices); - + + RtlZeroMemory(context.stripes, sizeof(pnp_stripe) * num_devices); + i = 0; le = Vcb->devices.Flink; - + while (le != &Vcb->devices) { PIO_STACK_LOCATION IrpSp; device* dev = CONTAINING_RECORD(le, device, list_entry); - + if (dev->devobj) { - context->stripes[i].context = (struct pnp_context*)context; + context.stripes[i].context = (struct pnp_context*)&context; + + context.stripes[i].Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE); - context->stripes[i].Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE); - - if (!context->stripes[i].Irp) { + if (!context.stripes[i].Irp) { UINT64 j; - + ERR("IoAllocateIrp failed\n"); - + for (j = 0; j < i; j++) { - if (context->stripes[j].dev->devobj) { - IoFreeIrp(context->stripes[j].Irp); + if (context.stripes[j].dev->devobj) { + IoFreeIrp(context.stripes[j].Irp); } } - ExFreePool(context->stripes); - ExFreePool(context); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end2; + ExFreePool(context.stripes); + + return STATUS_INSUFFICIENT_RESOURCES; } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); + + IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); IrpSp->MajorFunction = IRP_MJ_PNP; IrpSp->MinorFunction = minor; - context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; - - IoSetCompletionRoutine(context->stripes[i].Irp, pnp_completion, &context->stripes[i], TRUE, TRUE, TRUE); - - context->stripes[i].Irp->IoStatus.Status = STATUS_NOT_SUPPORTED; - context->stripes[i].dev = dev; - - context->left++; + context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; + + IoSetCompletionRoutine(context.stripes[i].Irp, pnp_completion, &context.stripes[i], TRUE, TRUE, TRUE); + + context.stripes[i].Irp->IoStatus.Status = STATUS_NOT_SUPPORTED; + context.stripes[i].dev = dev; + + context.left++; } - + le = le->Flink; } - - if (context->left == 0) { + + if (context.left == 0) { Status = STATUS_SUCCESS; goto end; } - + for (i = 0; i < num_devices; i++) { - if (context->stripes[i].Irp) { - IoCallDriver(context->stripes[i].dev->devobj, context->stripes[i].Irp); + if (context.stripes[i].Irp) { + IoCallDriver(context.stripes[i].dev->devobj, context.stripes[i].Irp); } } - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - + + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + Status = STATUS_SUCCESS; - + for (i = 0; i < num_devices; i++) { - if (context->stripes[i].Irp) { - if (context->stripes[i].Status != STATUS_SUCCESS) - Status = context->stripes[i].Status; + if (context.stripes[i].Irp) { + if (context.stripes[i].Status != STATUS_SUCCESS) + Status = context.stripes[i].Status; } } - + end: for (i = 0; i < num_devices; i++) { - if (context->stripes[i].Irp) { - IoFreeIrp(context->stripes[i].Irp); + if (context.stripes[i].Irp) { + IoFreeIrp(context.stripes[i].Irp); } } - ExFreePool(context->stripes); - ExFreePool(context); - -end2: - ExReleaseResourceLite(&Vcb->tree_lock); + ExFreePool(context.stripes); return Status; } -static NTSTATUS pnp_cancel_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) { -device_extension* Vcb = DeviceObject->DeviceExtension; +static NTSTATUS pnp_cancel_remove_device(PDEVICE_OBJECT DeviceObject) { + device_extension* Vcb = DeviceObject->DeviceExtension; NTSTATUS Status; - + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) { Status = STATUS_ACCESS_DENIED; goto end; } - + Status = send_disks_pnp_message(Vcb, IRP_MN_CANCEL_REMOVE_DEVICE); if (!NT_SUCCESS(Status)) { WARN("send_disks_pnp_message returned %08x\n", Status); goto end; } - Vcb->removing = FALSE; end: ExReleaseResourceLite(&Vcb->fcb_lock); - + ExReleaseResourceLite(&Vcb->tree_lock); + return STATUS_SUCCESS; } -static NTSTATUS pnp_query_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +NTSTATUS pnp_query_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) { device_extension* Vcb = DeviceObject->DeviceExtension; NTSTATUS Status; - LIST_ENTRY rollback; - + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) { Status = STATUS_ACCESS_DENIED; goto end; } - + Status = send_disks_pnp_message(Vcb, IRP_MN_QUERY_REMOVE_DEVICE); if (!NT_SUCCESS(Status)) { WARN("send_disks_pnp_message returned %08x\n", Status); @@ -204,73 +201,297 @@ static NTSTATUS pnp_query_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } Vcb->removing = TRUE; - - InitializeListHead(&rollback); - - ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); - if (Vcb->need_write && !Vcb->readonly) - do_write(Vcb, Irp, &rollback); - - clear_rollback(Vcb, &rollback); + if (Vcb->need_write && !Vcb->readonly) { + Status = do_write(Vcb, Irp); + + free_trees(Vcb); + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + goto end; + } + } - ExReleaseResourceLite(&Vcb->tree_lock); Status = STATUS_SUCCESS; end: ExReleaseResourceLite(&Vcb->fcb_lock); - + + ExReleaseResourceLite(&Vcb->tree_lock); + return Status; } -static NTSTATUS pnp_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +static NTSTATUS pnp_remove_device(PDEVICE_OBJECT DeviceObject) { device_extension* Vcb = DeviceObject->DeviceExtension; NTSTATUS Status; - + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + Status = send_disks_pnp_message(Vcb, IRP_MN_REMOVE_DEVICE); - if (!NT_SUCCESS(Status)) { + + if (!NT_SUCCESS(Status)) WARN("send_disks_pnp_message returned %08x\n", Status); - } - + + ExReleaseResourceLite(&Vcb->tree_lock); + if (DeviceObject->Vpb->Flags & VPB_MOUNTED) { Status = FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_DISMOUNT); if (!NT_SUCCESS(Status)) { WARN("FsRtlNotifyVolumeEvent returned %08x\n", Status); } - - if (Vcb->open_files > 0) { - Vcb->removing = TRUE; - Vcb->Vpb->Flags &= ~VPB_MOUNTED; - } else + + if (Vcb->vde) + Vcb->vde->mounted_device = NULL; + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + Vcb->removing = TRUE; + Vcb->Vpb->Flags &= ~VPB_MOUNTED; + Vcb->Vpb->Flags |= VPB_DIRECT_WRITES_ALLOWED; + ExReleaseResourceLite(&Vcb->tree_lock); + + if (Vcb->open_files == 0) uninit(Vcb, FALSE); } return STATUS_SUCCESS; } -static NTSTATUS pnp_start_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) { - FIXME("STUB\n"); - - return STATUS_NOT_IMPLEMENTED; -} - -static NTSTATUS pnp_surprise_removal(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +NTSTATUS pnp_surprise_removal(PDEVICE_OBJECT DeviceObject, PIRP Irp) { device_extension* Vcb = DeviceObject->DeviceExtension; - + TRACE("(%p, %p)\n", DeviceObject, Irp); - + if (DeviceObject->Vpb->Flags & VPB_MOUNTED) { - if (Vcb->open_files > 0) { - Vcb->removing = TRUE; - Vcb->Vpb->Flags &= ~VPB_MOUNTED; - } else + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + if (Vcb->vde) + Vcb->vde->mounted_device = NULL; + + Vcb->removing = TRUE; + Vcb->Vpb->Flags &= ~VPB_MOUNTED; + Vcb->Vpb->Flags |= VPB_DIRECT_WRITES_ALLOWED; + + ExReleaseResourceLite(&Vcb->tree_lock); + + if (Vcb->open_files == 0) uninit(Vcb, FALSE); } return STATUS_SUCCESS; } -NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +static void bus_query_capabilities(PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + PDEVICE_CAPABILITIES dc = IrpSp->Parameters.DeviceCapabilities.Capabilities; + + dc->UniqueID = TRUE; + dc->SilentInstall = TRUE; + + Irp->IoStatus.Status = STATUS_SUCCESS; +} + +static NTSTATUS bus_query_device_relations(PIRP Irp) { + NTSTATUS Status; + ULONG num_children; + LIST_ENTRY* le; + ULONG drsize, i; + DEVICE_RELATIONS* dr; + + ExAcquireResourceSharedLite(&pdo_list_lock, TRUE); + + num_children = 0; + + le = pdo_list.Flink; + while (le != &pdo_list) { + num_children++; + + le = le->Flink; + } + + drsize = offsetof(DEVICE_RELATIONS, Objects[0]) + (num_children * sizeof(PDEVICE_OBJECT)); + dr = ExAllocatePoolWithTag(PagedPool, drsize, ALLOC_TAG); + + if (!dr) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + dr->Count = num_children; + + i = 0; + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + + ObReferenceObject(pdode->pdo); + dr->Objects[i] = pdode->pdo; + i++; + + le = le->Flink; + } + + Irp->IoStatus.Information = (ULONG_PTR)dr; + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&pdo_list_lock); + + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + + return Status; +} + +static NTSTATUS bus_query_hardware_ids(PIRP Irp) { + WCHAR* out; + + static WCHAR ids[] = L"ROOT\\btrfs\0"; + + out = ExAllocatePoolWithTag(PagedPool, sizeof(ids), ALLOC_TAG); + if (!out) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(out, ids, sizeof(ids)); + + Irp->IoStatus.Information = (ULONG_PTR)out; + + return STATUS_SUCCESS; +} + +static NTSTATUS bus_pnp(control_device_extension* cde, PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + + switch (IrpSp->MinorFunction) { + case IRP_MN_QUERY_CAPABILITIES: + bus_query_capabilities(Irp); + break; + + case IRP_MN_QUERY_DEVICE_RELATIONS: + if (IrpSp->Parameters.QueryDeviceRelations.Type != BusRelations || no_pnp) + break; + + return bus_query_device_relations(Irp); + + case IRP_MN_QUERY_ID: + { + NTSTATUS Status; + + if (IrpSp->Parameters.QueryId.IdType != BusQueryHardwareIDs) + break; + + Status = bus_query_hardware_ids(Irp); + + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + + return Status; + } + } + + IoSkipCurrentIrpStackLocation(Irp); + return IoCallDriver(cde->attached_device, Irp); +} + +static NTSTATUS pdo_query_device_id(pdo_device_extension* pdode, PIRP Irp) { + WCHAR name[100], *noff, *out; + int i; + + static WCHAR pref[] = L"Btrfs\\"; + + RtlCopyMemory(name, pref, wcslen(pref) * sizeof(WCHAR)); + + noff = &name[wcslen(pref)]; + for (i = 0; i < 16; i++) { + *noff = hex_digit(pdode->uuid.uuid[i] >> 4); noff++; + *noff = hex_digit(pdode->uuid.uuid[i] & 0xf); noff++; + + if (i == 3 || i == 5 || i == 7 || i == 9) { + *noff = '-'; + noff++; + } + } + *noff = 0; + + out = ExAllocatePoolWithTag(PagedPool, (wcslen(name) + 1) * sizeof(WCHAR), ALLOC_TAG); + if (!out) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(out, name, (wcslen(name) + 1) * sizeof(WCHAR)); + + Irp->IoStatus.Information = (ULONG_PTR)out; + + return STATUS_SUCCESS; +} + +static NTSTATUS pdo_query_hardware_ids(PIRP Irp) { + WCHAR* out; + + static WCHAR ids[] = L"BtrfsVolume\0"; + + out = ExAllocatePoolWithTag(PagedPool, sizeof(ids), ALLOC_TAG); + if (!out) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(out, ids, sizeof(ids)); + + Irp->IoStatus.Information = (ULONG_PTR)out; + + return STATUS_SUCCESS; +} + +static NTSTATUS pdo_query_id(pdo_device_extension* pdode, PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + + switch (IrpSp->Parameters.QueryId.IdType) { + case BusQueryDeviceID: + TRACE("BusQueryDeviceID\n"); + return pdo_query_device_id(pdode, Irp); + + case BusQueryHardwareIDs: + TRACE("BusQueryHardwareIDs\n"); + return pdo_query_hardware_ids(Irp); + + default: + break; + } + + return Irp->IoStatus.Status; +} + +static NTSTATUS pdo_pnp(PDEVICE_OBJECT pdo, PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + pdo_device_extension* pdode = pdo->DeviceExtension; + + switch (IrpSp->MinorFunction) { + case IRP_MN_QUERY_ID: + return pdo_query_id(pdode, Irp); + + case IRP_MN_START_DEVICE: + case IRP_MN_CANCEL_REMOVE_DEVICE: + case IRP_MN_SURPRISE_REMOVAL: + case IRP_MN_REMOVE_DEVICE: + return STATUS_SUCCESS; + + case IRP_MN_QUERY_REMOVE_DEVICE: + return STATUS_UNSUCCESSFUL; + } + + return Irp->IoStatus.Status; +} + +_Dispatch_type_(IRP_MJ_PNP) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); device_extension* Vcb = DeviceObject->DeviceExtension; NTSTATUS Status; @@ -279,17 +500,28 @@ NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { FsRtlEnterFileSystem(); top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); + + if (Vcb && Vcb->type == VCB_TYPE_CONTROL) { + Status = bus_pnp(DeviceObject->DeviceExtension, Irp); + goto exit; + } else if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + IoSkipCurrentIrpStackLocation(Irp); + Status = IoCallDriver(vde->pdo, Irp); + goto exit; + } else if (Vcb && Vcb->type == VCB_TYPE_PDO) { + Status = pdo_pnp(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; goto end; } - + Status = STATUS_NOT_IMPLEMENTED; - + switch (IrpSp->MinorFunction) { case IRP_MN_CANCEL_REMOVE_DEVICE: - Status = pnp_cancel_remove_device(DeviceObject, Irp); + Status = pnp_cancel_remove_device(DeviceObject); break; case IRP_MN_QUERY_REMOVE_DEVICE: @@ -297,11 +529,7 @@ NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { break; case IRP_MN_REMOVE_DEVICE: - Status = pnp_remove_device(DeviceObject, Irp); - break; - - case IRP_MN_START_DEVICE: - Status = pnp_start_device(DeviceObject, Irp); + Status = pnp_remove_device(DeviceObject); break; case IRP_MN_SURPRISE_REMOVAL: @@ -310,29 +538,23 @@ NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { default: TRACE("passing minor function 0x%x on\n", IrpSp->MinorFunction); - + IoSkipCurrentIrpStackLocation(Irp); Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); - goto end; + goto exit; } -// // Irp->IoStatus.Status = Status; -// // Irp->IoStatus.Information = 0; -// -// IoSkipCurrentIrpStackLocation(Irp); -// -// Status = IoCallDriver(first_device(Vcb)->devobj, Irp); -// -// // IoCompleteRequest(Irp, IO_NO_INCREMENT); - +end: Irp->IoStatus.Status = Status; IoCompleteRequest(Irp, IO_NO_INCREMENT); - -end: - if (top_level) + +exit: + TRACE("returning %08x\n", Status); + + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); return Status; diff --git a/reactos/drivers/filesystems/btrfs/read.c b/reactos/drivers/filesystems/btrfs/read.c index f2739cdf939..4fda6504029 100644 --- a/reactos/drivers/filesystems/btrfs/read.c +++ b/reactos/drivers/filesystems/btrfs/read.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -20,23 +20,23 @@ enum read_data_status { ReadDataStatus_Pending, ReadDataStatus_Success, - ReadDataStatus_Cancelling, - ReadDataStatus_Cancelled, ReadDataStatus_Error, - ReadDataStatus_CRCError, - ReadDataStatus_MissingDevice + ReadDataStatus_MissingDevice, + ReadDataStatus_Skip }; struct read_data_context; typedef struct { struct read_data_context* context; - UINT8* buf; UINT16 stripenum; BOOL rewrite; PIRP Irp; IO_STATUS_BLOCK iosb; enum read_data_status status; + PMDL mdl; + UINT64 stripestart; + UINT64 stripeend; } read_data_stripe; typedef struct { @@ -45,1462 +45,334 @@ typedef struct { chunk* c; UINT64 address; UINT32 buflen; - UINT64 num_stripes; - LONG stripes_left; + LONG num_stripes, stripes_left; UINT64 type; UINT32 sector_size; - UINT16 firstoff, startoffstripe, sectors_per_stripe, stripes_cancel; + UINT16 firstoff, startoffstripe, sectors_per_stripe; UINT32* csum; BOOL tree; - BOOL check_nocsum_parity; read_data_stripe* stripes; - KSPIN_LOCK spin_lock; + UINT8* va; } read_data_context; extern BOOL diskacc; -extern tPsUpdateDiskCounters PsUpdateDiskCounters; -extern tCcCopyReadEx CcCopyReadEx; +extern tPsUpdateDiskCounters fPsUpdateDiskCounters; +extern tCcCopyReadEx fCcCopyReadEx; +extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; -static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#define LINUX_PAGE_SIZE 4096 + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif read_data_stripe* stripe = conptr; read_data_context* context = (read_data_context*)stripe->context; - UINT64 i; - LONG stripes_left; - KIRQL irql; - - KeAcquireSpinLock(&context->spin_lock, &irql); - - stripes_left = InterlockedDecrement(&context->stripes_left); - - if (stripe->status == ReadDataStatus_Cancelling) { - stripe->status = ReadDataStatus_Cancelled; - goto end; - } - - stripe->iosb = Irp->IoStatus; - - if (NT_SUCCESS(Irp->IoStatus.Status)) { - if (context->type == BLOCK_FLAG_DUPLICATE) { - stripe->status = ReadDataStatus_Success; - - if (stripes_left > 0 && stripes_left == context->stripes_cancel) { - for (i = 0; i < context->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - context->stripes[i].status = ReadDataStatus_Cancelling; - IoCancelIrp(context->stripes[i].Irp); - } - } - } - } else if (context->type == BLOCK_FLAG_RAID0) { - stripe->status = ReadDataStatus_Success; - } else if (context->type == BLOCK_FLAG_RAID10) { - stripe->status = ReadDataStatus_Success; - - if (stripes_left > 0 && context->stripes_cancel != 0) { - for (i = 0; i < context->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending && context->stripes[i].stripenum == stripe->stripenum) { - context->stripes[i].status = ReadDataStatus_Cancelling; - IoCancelIrp(context->stripes[i].Irp); - break; - } - } - } - } else if (context->type == BLOCK_FLAG_RAID5) { - stripe->status = ReadDataStatus_Success; - - if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree || !context->check_nocsum_parity)) { - for (i = 0; i < context->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - context->stripes[i].status = ReadDataStatus_Cancelling; - IoCancelIrp(context->stripes[i].Irp); - break; - } - } - } - } else if (context->type == BLOCK_FLAG_RAID6) { - stripe->status = ReadDataStatus_Success; - - if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree || !context->check_nocsum_parity)) { - for (i = 0; i < context->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - context->stripes[i].status = ReadDataStatus_Cancelling; - IoCancelIrp(context->stripes[i].Irp); - } - } - } - } - - goto end; - } else { - stripe->status = ReadDataStatus_Error; - } - -end: - KeReleaseSpinLock(&context->spin_lock, irql); - - if (stripes_left == 0) - KeSetEvent(&context->Event, 0, FALSE); - - return STATUS_MORE_PROCESSING_REQUIRED; -} - -static void raid5_reconstruct(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize, - BOOL first, UINT32 firststripesize, UINT16 missing) { - UINT16 parity, stripe; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - UINT32 readlen; - - TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, missing); - - parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; - - readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff); - - if (missing != parity) { - UINT16 firststripe = missing == 0 ? 1 : 0; - - RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[firststripe].buf[*stripeoff], readlen); - - for (stripe = firststripe + 1; stripe < context->num_stripes; stripe++) { - if (stripe != missing) - do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - } - } else - TRACE("parity == missing == %x, skipping\n", parity); - - *stripeoff += stripelen; -} - -static void raid5_decode(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf, - UINT32* pos, UINT32 length, UINT32 firststripesize) { - UINT16 parity, stripe; - BOOL first = *pos == 0; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - - parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; - - stripe = (parity + 1) % ci->num_stripes; - - while (TRUE) { - if (stripe == parity) { - *stripeoff += stripelen; - return; - } - - if (skip >= ci->stripe_length) { - skip -= ci->stripe_length; - } else { - UINT32 copylen = min(ci->stripe_length - skip, length - *pos); - - RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - *pos += copylen; - - if (*pos == length) - return; - - skip = 0; - } - - stripe = (stripe + 1) % ci->num_stripes; - } -} - -static BOOL raid5_decode_with_checksum(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf, - UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32* csum, UINT32 sector_size) { - UINT16 parity, stripe; - BOOL first = *pos == 0; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - - parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; - - stripe = (parity + 1) % ci->num_stripes; - - while (TRUE) { - if (stripe == parity) { - *stripeoff += stripelen; - return TRUE; - } - - if (skip >= ci->stripe_length) { - skip -= ci->stripe_length; - } else { - UINT32 i; - UINT32 copylen = min(ci->stripe_length - skip, length - *pos); - - RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - for (i = 0; i < copylen / sector_size; i ++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size); - - if (crc32 != csum[i]) { - UINT16 j, firststripe = stripe == 0 ? 1 : 0; - - RtlCopyMemory(buf + *pos + (i * sector_size), - &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - - for (j = firststripe + 1; j < ci->num_stripes; j++) { - if (j != stripe) { - do_xor(buf + *pos + (i * sector_size), &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - } - } - - crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size); - - if (crc32 != csum[i]) { - ERR("unrecoverable checksum error\n"); - return FALSE; - } - - RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], buf + *pos + (i * sector_size), sector_size); - context->stripes[stripe].rewrite = TRUE; - } - } - - *pos += copylen; - - if (*pos == length) - return TRUE; - - skip = 0; - } - - stripe = (stripe + 1) % ci->num_stripes; - } - - return FALSE; -} - -static BOOL raid5_decode_with_checksum_metadata(UINT64 addr, UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf, - UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32 node_size) { - UINT16 parity, stripe; - BOOL first = *pos == 0; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - - parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; - - stripe = (parity + 1) % ci->num_stripes; - - while (TRUE) { - if (stripe == parity) { - *stripeoff += stripelen; - return TRUE; - } - - if (skip >= ci->stripe_length) { - skip -= ci->stripe_length; - } else { - UINT32 copylen = min(ci->stripe_length - skip, length - *pos); - tree_header* th = (tree_header*)buf; - UINT32 crc32; - - RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - UINT16 j, firststripe = stripe == 0 ? 1 : 0; - - RtlCopyMemory(buf + *pos, &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - for (j = firststripe + 1; j < ci->num_stripes; j++) { - if (j != stripe) { - do_xor(buf + *pos, &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - } - } - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - ERR("unrecoverable checksum error\n"); - return FALSE; - } - } - - RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], buf + *pos, copylen); - context->stripes[stripe].rewrite = TRUE; - - *pos += copylen; - - if (*pos == length) - return TRUE; - - skip = 0; - } - - stripe = (stripe + 1) % ci->num_stripes; - } - - return FALSE; -} - -static void raid6_reconstruct1(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize, - BOOL first, UINT32 firststripesize, UINT16 missing) { - UINT16 parity1, parity2, stripe; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - UINT32 readlen; - - TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, missing); - - parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; - parity2 = (parity1 + 1) % ci->num_stripes; - - readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff); - - if (missing != parity1 && missing != parity2) { - RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen); - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - do { - if (stripe != missing) - do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - } while (stripe != parity2); - } else - TRACE("skipping parity stripe\n"); - - *stripeoff += stripelen; -} - -static void raid6_reconstruct2(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize, - BOOL first, UINT32 firststripesize, UINT16 missing1, UINT16 missing2) { - UINT16 parity1, parity2, stripe; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - UINT32 readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff); - - TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, - first, firststripesize, missing1, missing2); - - parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; - parity2 = (parity1 + 1) % ci->num_stripes; - - // skip if missing stripes are p and q - if ((parity1 == missing1 && parity2 == missing2) || (parity1 == missing2 && parity2 == missing1)) { - *stripeoff += stripelen; - return; - } - - if (missing1 == parity2 || missing2 == parity2) { // reconstruct from p and data - UINT16 missing = missing1 == parity2 ? missing2 : missing1; - - RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen); - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - do { - if (stripe != missing) - do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - } while (stripe != parity2); - } else if (missing1 == parity1 || missing2 == parity1) { // reconstruct from q and data - UINT16 missing = missing1 == parity1 ? missing2 : missing1; - UINT16 i, div; - - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - i = ci->num_stripes - 3; - - if (stripe == missing) { - RtlZeroMemory(&context->stripes[missing].buf[*stripeoff], readlen); - div = i; - } else - RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - - i--; - do { - galois_double(&context->stripes[missing].buf[*stripeoff], readlen); - - if (stripe != missing) - do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - else - div = i; - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - i--; - } while (stripe != parity2); - - do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity2].buf[*stripeoff], readlen); - - if (div != 0) - galois_divpower(&context->stripes[missing].buf[*stripeoff], div, readlen); - } else { // reconstruct from p and q - UINT16 x, y, i; - UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; - UINT32 j; - - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - // put qxy in missing1 - // put pxy in missing2 - - i = ci->num_stripes - 3; - if (stripe == missing1 || stripe == missing2) { - RtlZeroMemory(&context->stripes[missing1].buf[*stripeoff], readlen); - RtlZeroMemory(&context->stripes[missing2].buf[*stripeoff], readlen); - - if (stripe == missing1) - x = i; - else - y = i; - } else { - RtlCopyMemory(&context->stripes[missing1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - RtlCopyMemory(&context->stripes[missing2].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - } - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - - i--; - do { - galois_double(&context->stripes[missing1].buf[*stripeoff], readlen); - - if (stripe != missing1 && stripe != missing2) { - do_xor(&context->stripes[missing1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - } else if (stripe == missing1) - x = i; - else if (stripe == missing2) - y = i; - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - i--; - } while (stripe != parity2); - - gyx = gpow2(y > x ? (y-x) : (255-x+y)); - gx = gpow2(255-x); - denom = gdiv(1, gyx ^ 1); - a = gmul(gyx, denom); - b = gmul(gx, denom); - - p = &context->stripes[parity1].buf[*stripeoff]; - q = &context->stripes[parity2].buf[*stripeoff]; - pxy = &context->stripes[missing2].buf[*stripeoff]; - qxy = &context->stripes[missing1].buf[*stripeoff]; - - for (j = 0; j < readlen; j++) { - *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); - - p++; - q++; - pxy++; - qxy++; - } - - do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[missing1].buf[*stripeoff], readlen); - do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen); - } - - *stripeoff += stripelen; -} + UNUSED(DeviceObject); -static void raid6_decode(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf, - UINT32* pos, UINT32 length, UINT32 firststripesize) { - UINT16 parity1, stripe; - BOOL first = *pos == 0; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - - parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; - - stripe = (parity1 + 2) % ci->num_stripes; - - while (TRUE) { - if (stripe == parity1) { - *stripeoff += stripelen; - return; - } - - if (skip >= ci->stripe_length) { - skip -= ci->stripe_length; - } else { - UINT32 copylen = min(ci->stripe_length - skip, length - *pos); - - RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - *pos += copylen; - - if (*pos == length) - return; - - skip = 0; - } - - stripe = (stripe + 1) % ci->num_stripes; - } -} + stripe->iosb = Irp->IoStatus; -static BOOL raid6_decode_with_checksum(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf, - UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32* csum, UINT32 sector_size) { - UINT16 parity1, parity2, stripe; - BOOL first = *pos == 0; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - - parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; - parity2 = (parity1 + 1) % ci->num_stripes; - stripe = (parity1 + 2) % ci->num_stripes; - - while (TRUE) { - if (stripe == parity1) { - *stripeoff += stripelen; - return TRUE; - } - - if (skip >= ci->stripe_length) { - skip -= ci->stripe_length; - } else { - UINT32 i; - UINT32 copylen = min(ci->stripe_length - skip, length - *pos); - - RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - for (i = 0; i < copylen / sector_size; i ++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size); - - if (crc32 != csum[i]) { - UINT16 j, firststripe; - - if (parity2 == 0 && stripe == 1) - firststripe = 2; - else if (parity2 == 0 || stripe == 0) - firststripe = 1; - else - firststripe = 0; - - RtlCopyMemory(buf + *pos + (i * sector_size), - &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - - for (j = firststripe + 1; j < ci->num_stripes; j++) { - if (j != stripe && j != parity2) { - do_xor(buf + *pos + (i * sector_size), &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - } - } - - crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size); - - if (crc32 != csum[i]) { - UINT8 *parity, *buf2; - UINT16 rs, div; - - // assume p is wrong - - parity = ExAllocatePoolWithTag(NonPagedPool, sector_size, ALLOC_TAG); - if (!parity) { - ERR("out of memory\n"); - return FALSE; - } - - rs = (parity1 + ci->num_stripes - 1) % ci->num_stripes; - j = ci->num_stripes - 3; - - if (rs == stripe) { - RtlZeroMemory(parity, sector_size); - div = j; - } else - RtlCopyMemory(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - - rs = (rs + ci->num_stripes - 1) % ci->num_stripes; - j--; - while (rs != parity2) { - galois_double(parity, sector_size); - - if (rs != stripe) - do_xor(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - else - div = j; - - rs = (rs + ci->num_stripes - 1) % ci->num_stripes; - j--; - } - - do_xor(parity, &context->stripes[parity2].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - - if (div != 0) - galois_divpower(parity, div, sector_size); - - crc32 = ~calc_crc32c(0xffffffff, parity, sector_size); - if (crc32 == csum[i]) { - RtlCopyMemory(buf + *pos + (i * sector_size), parity, sector_size); - - // recalculate p - RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], parity, sector_size); - - for (j = 0; j < ci->num_stripes; j++) { - if (j != stripe && j != parity1 && j != parity2) { - do_xor(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], - &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size); - } - } - - context->stripes[parity1].rewrite = TRUE; - - ExFreePool(parity); - goto success; - } - - // assume another of the data stripes is wrong - - buf2 = ExAllocatePoolWithTag(NonPagedPool, sector_size, ALLOC_TAG); - if (!buf2) { - ERR("out of memory\n"); - ExFreePool(parity); - return FALSE; - } - - j = (parity2 + 1) % ci->num_stripes; - - while (j != parity1) { - if (j != stripe) { - UINT16 curstripe, k; - UINT32 bufoff = *stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size); - UINT16 x, y; - UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; - - curstripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - // put qxy in parity - // put pxy in buf2 - - k = ci->num_stripes - 3; - if (curstripe == stripe || curstripe == j) { - RtlZeroMemory(parity, sector_size); - RtlZeroMemory(buf2, sector_size); - - if (curstripe == stripe) - x = k; - else - y = k; - } else { - RtlCopyMemory(parity, &context->stripes[curstripe].buf[bufoff], sector_size); - RtlCopyMemory(buf2, &context->stripes[curstripe].buf[bufoff], sector_size); - } - - curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1); - - k--; - do { - galois_double(parity, sector_size); - - if (curstripe != stripe && curstripe != j) { - do_xor(parity, &context->stripes[curstripe].buf[bufoff], sector_size); - do_xor(buf2, &context->stripes[curstripe].buf[bufoff], sector_size); - } else if (curstripe == stripe) - x = k; - else if (curstripe == j) - y = k; - - curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1); - k--; - } while (curstripe != parity2); - - gyx = gpow2(y > x ? (y-x) : (255-x+y)); - gx = gpow2(255-x); - - denom = gdiv(1, gyx ^ 1); - a = gmul(gyx, denom); - b = gmul(gx, denom); - - p = &context->stripes[parity1].buf[bufoff]; - q = &context->stripes[parity2].buf[bufoff]; - pxy = buf2; - qxy = parity; - - for (k = 0; k < sector_size; k++) { - *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); - - p++; - q++; - pxy++; - qxy++; - } - - crc32 = ~calc_crc32c(0xffffffff, parity, sector_size); - - if (crc32 == csum[i]) { - do_xor(buf2, parity, sector_size); - do_xor(buf2, &context->stripes[parity1].buf[bufoff], sector_size); - - RtlCopyMemory(&context->stripes[j].buf[bufoff], buf2, sector_size); - context->stripes[j].rewrite = TRUE; - - RtlCopyMemory(buf + *pos + (i * sector_size), parity, sector_size); - ExFreePool(parity); - ExFreePool(buf2); - goto success; - } - } - - j = (j + 1) % ci->num_stripes; - } - - ExFreePool(parity); - ExFreePool(buf2); - - ERR("unrecoverable checksum error\n"); - return FALSE; - } - -success: - RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], buf + *pos + (i * sector_size), sector_size); - context->stripes[stripe].rewrite = TRUE; - } - } - - *pos += copylen; - - if (*pos == length) - return TRUE; - - skip = 0; - } - - stripe = (stripe + 1) % ci->num_stripes; - } -} + if (NT_SUCCESS(Irp->IoStatus.Status)) + stripe->status = ReadDataStatus_Success; + else + stripe->status = ReadDataStatus_Error; -static BOOL raid6_decode_with_checksum_metadata(UINT64 addr, UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf, - UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32 node_size) { - UINT16 parity1, parity2, stripe; - BOOL first = *pos == 0; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - - parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; - parity2 = (parity1 + 1) % ci->num_stripes; - stripe = (parity1 + 2) % ci->num_stripes; - - while (TRUE) { - if (stripe == parity1) { - *stripeoff += stripelen; - return TRUE; - } - - if (skip >= ci->stripe_length) { - skip -= ci->stripe_length; - } else { - UINT32 copylen = min(ci->stripe_length - skip, length - *pos); - tree_header* th = (tree_header*)buf; - UINT32 crc32; - - RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen); - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - UINT16 j, firststripe; - - if (parity2 == 0 && stripe == 1) - firststripe = 2; - else if (parity2 == 0 || stripe == 0) - firststripe = 1; - else - firststripe = 0; - - RtlCopyMemory(buf + *pos, &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size); - - for (j = firststripe + 1; j < ci->num_stripes; j++) { - if (j != stripe && j != parity2) { - do_xor(buf + *pos, &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size); - } - } - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - UINT8 *parity, *buf2; - UINT16 rs, div; - tree_header* th2; - - // assume p is wrong - - parity = ExAllocatePoolWithTag(NonPagedPool, node_size, ALLOC_TAG); - if (!parity) { - ERR("out of memory\n"); - return FALSE; - } - - rs = (parity1 + ci->num_stripes - 1) % ci->num_stripes; - j = ci->num_stripes - 3; - - if (rs == stripe) { - RtlZeroMemory(parity, node_size); - div = j; - } else - RtlCopyMemory(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size); - - rs = (rs + ci->num_stripes - 1) % ci->num_stripes; - j--; - while (rs != parity2) { - galois_double(parity, node_size); - - if (rs != stripe) - do_xor(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size); - else - div = j; - - rs = (rs + ci->num_stripes - 1) % ci->num_stripes; - j--; - } - - do_xor(parity, &context->stripes[parity2].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size); - - if (div != 0) - galois_divpower(parity, div, node_size); - - th2 = (tree_header*)parity; - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, node_size - sizeof(th2->csum)); - - if (addr != th2->address || crc32 == *((UINT32*)th2->csum)) { - RtlCopyMemory(buf + *pos, parity, node_size); - - // recalculate p - RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen], parity, node_size); - - for (j = 0; j < ci->num_stripes; j++) { - if (j != stripe && j != parity1 && j != parity2) { - do_xor(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen], - &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size); - } - } - - context->stripes[parity1].rewrite = TRUE; - - ExFreePool(parity); - goto success; - } - - // assume another of the data stripes is wrong - - buf2 = ExAllocatePoolWithTag(NonPagedPool, node_size, ALLOC_TAG); - if (!buf2) { - ERR("out of memory\n"); - ExFreePool(parity); - return FALSE; - } - - j = (parity2 + 1) % ci->num_stripes; - - while (j != parity1) { - if (j != stripe) { - UINT16 curstripe, k; - UINT32 bufoff = *stripeoff + skip - ci->stripe_length + stripelen; - UINT16 x, y; - UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; - - curstripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - // put qxy in parity - // put pxy in buf2 - - k = ci->num_stripes - 3; - if (curstripe == stripe || curstripe == j) { - RtlZeroMemory(parity, node_size); - RtlZeroMemory(buf2, node_size); - - if (curstripe == stripe) - x = k; - else - y = k; - } else { - RtlCopyMemory(parity, &context->stripes[curstripe].buf[bufoff], node_size); - RtlCopyMemory(buf2, &context->stripes[curstripe].buf[bufoff], node_size); - } - - curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1); - - k--; - do { - galois_double(parity, node_size); - - if (curstripe != stripe && curstripe != j) { - do_xor(parity, &context->stripes[curstripe].buf[bufoff], node_size); - do_xor(buf2, &context->stripes[curstripe].buf[bufoff], node_size); - } else if (curstripe == stripe) - x = k; - else if (curstripe == j) - y = k; - - curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1); - k--; - } while (curstripe != parity2); - - gyx = gpow2(y > x ? (y-x) : (255-x+y)); - gx = gpow2(255-x); - - denom = gdiv(1, gyx ^ 1); - a = gmul(gyx, denom); - b = gmul(gx, denom); - - p = &context->stripes[parity1].buf[bufoff]; - q = &context->stripes[parity2].buf[bufoff]; - pxy = buf2; - qxy = parity; - - for (k = 0; k < node_size; k++) { - *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); - - p++; - q++; - pxy++; - qxy++; - } - - th2 = (tree_header*)parity; - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, node_size - sizeof(th2->csum)); - - if (addr != th2->address || crc32 == *((UINT32*)th2->csum)) { - do_xor(buf2, parity, node_size); - do_xor(buf2, &context->stripes[parity1].buf[bufoff], node_size); - - RtlCopyMemory(&context->stripes[j].buf[bufoff], buf2, node_size); - context->stripes[j].rewrite = TRUE; - - RtlCopyMemory(buf + *pos, parity, node_size); - ExFreePool(parity); - ExFreePool(buf2); - goto success; - } - } - - j = (j + 1) % ci->num_stripes; - } - - ExFreePool(parity); - ExFreePool(buf2); - - ERR("unrecoverable checksum error\n"); - return FALSE; - } - -success: - RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], buf + *pos, node_size); - context->stripes[stripe].rewrite = TRUE; - } - - *pos += copylen; - - if (*pos == length) - return TRUE; - - skip = 0; - } - - stripe = (stripe + 1) % ci->num_stripes; - } -} + if (InterlockedDecrement(&context->stripes_left) == 0) + KeSetEvent(&context->Event, 0, FALSE); -static NTSTATUS check_raid6_nocsum_parity(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize, - BOOL first, UINT32 firststripesize, UINT8* scratch) { - UINT16 parity1, parity2, stripe; - UINT32 stripelen = first ? firststripesize : ci->stripe_length; - UINT32 readlen, i; - BOOL bad = FALSE; - - TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %p)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, scratch); - - parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; - parity2 = (parity1 + 1) % ci->num_stripes; - - readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff); - - RtlCopyMemory(scratch, &context->stripes[parity1].buf[*stripeoff], readlen); - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - - do { - do_xor(scratch, &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - } while (stripe != parity2); - - for (i = 0; i < readlen; i++) { - if (scratch[i] != 0) { - bad = TRUE; - break; - } - } - - if (bad) { - UINT16 missing; - UINT8* buf2; - - // assume parity is bad - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - RtlCopyMemory(scratch, &context->stripes[stripe].buf[*stripeoff], readlen); - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - - do { - galois_double(scratch, readlen); - - do_xor(scratch, &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - } while (stripe != parity2); - - if (RtlCompareMemory(scratch, &context->stripes[parity2].buf[*stripeoff], readlen) == readlen) { - WARN("recovering from invalid parity stripe\n"); - - // recalc p - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - - do { - do_xor(&context->stripes[parity1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - } while (stripe != parity2); - - context->stripes[parity1].rewrite = TRUE; - goto end; - } - - // assume one of the data stripes is bad - - buf2 = ExAllocatePoolWithTag(NonPagedPool, readlen, ALLOC_TAG); - if (!buf2) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - missing = (parity2 + 1) % ci->num_stripes; - while (missing != parity1) { - RtlCopyMemory(scratch, &context->stripes[parity1].buf[*stripeoff], readlen); - for (i = 0; i < ci->num_stripes; i++) { - if (i != parity1 && i != parity2 && i != missing) { - do_xor(scratch, &context->stripes[i].buf[*stripeoff], readlen); - } - } - - stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1); - RtlCopyMemory(buf2, stripe == missing ? scratch : &context->stripes[stripe].buf[*stripeoff], readlen); - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - - do { - galois_double(buf2, readlen); - - do_xor(buf2, stripe == missing ? scratch : &context->stripes[stripe].buf[*stripeoff], readlen); - - stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1); - } while (stripe != parity2); - - if (RtlCompareMemory(buf2, &context->stripes[parity2].buf[*stripeoff], readlen) == readlen) { - WARN("recovering from invalid data stripe\n"); - - RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], scratch, readlen); - ExFreePool(buf2); - - context->stripes[missing].rewrite = TRUE; - goto end; - } - - missing = (missing + 1) % ci->num_stripes; - } - - ExFreePool(buf2); - - ERR("unrecoverable checksum error\n"); - return STATUS_CRC_ERROR; - } - -end: - *stripeoff += stripelen; - - return STATUS_SUCCESS; + return STATUS_MORE_PROCESSING_REQUIRED; } -static NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { +NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { NTSTATUS Status; calc_job* cj; UINT32* csum2; - + // From experimenting, it seems that 40 sectors is roughly the crossover // point where offloading the crc32 calculation becomes worth it. - - if (sectors < 40) { + + if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) { ULONG j; - + for (j = 0; j < sectors; j++) { UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - + if (crc32 != csum[j]) { return STATUS_CRC_ERROR; } } - + return STATUS_SUCCESS; } - + csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG); if (!csum2) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = add_calc_job(Vcb, data, sectors, csum2, &cj); if (!NT_SUCCESS(Status)) { ERR("add_calc_job returned %08x\n", Status); + ExFreePool(csum2); return Status; } - + KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); - + if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) { free_calc_job(cj); ExFreePool(csum2); return STATUS_CRC_ERROR; } - + free_calc_job(cj); ExFreePool(csum2); - + return STATUS_SUCCESS; } -static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, - CHUNK_ITEM* ci, device** devices, UINT64 *stripestart, UINT64 *stripeend) { - UINT64 i; +static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci, + device** devices, UINT64 generation) { + ULONG i; BOOL checksum_error = FALSE; - UINT16 cancelled = 0; + UINT16 j, stripe = 0; NTSTATUS Status; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - if (context->tree) { - tree_header* th = (tree_header*)context->stripes[i].buf; - UINT32 crc32; - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); - - if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { - context->stripes[i].status = ReadDataStatus_CRCError; - checksum_error = TRUE; - } - } else if (context->csum) { -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; - - time1 = KeQueryPerformanceCounter(NULL); -#endif - Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].Irp->IoStatus.Information / context->sector_size, context->csum); - - if (Status == STATUS_CRC_ERROR) { - context->stripes[i].status = ReadDataStatus_CRCError; - checksum_error = TRUE; - break; - } else if (!NT_SUCCESS(Status)) { - ERR("check_csum returned %08x\n", Status); - return Status; - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif - } - } else if (context->stripes[i].status == ReadDataStatus_Cancelled) { - cancelled++; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; + + for (j = 0; j < ci->num_stripes; j++) { + if (context->stripes[j].status == ReadDataStatus_Error) { + WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + return context->stripes[j].iosb.Status; + } else if (context->stripes[j].status == ReadDataStatus_Success) { + stripe = j; + break; } } - - if (checksum_error) { - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - - // FIXME - update dev stats - - if (cancelled > 0) { -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; -#endif - context->stripes_left = 0; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Cancelled) { - PIO_STACK_LOCATION IrpSp; - - // re-run Irp that we cancelled - - if (context->stripes[i].Irp) { - if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[i].Irp->MdlAddress); - IoFreeMdl(context->stripes[i].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[i].Irp); - } - - if (!Irp) { - context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); - - if (!context->stripes[i].Irp) { - ERR("IoAllocateIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } else { - context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); - - if (!context->stripes[i].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL); - if (!context->stripes[i].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[i].Irp->UserBuffer = context->stripes[i].buf; - } - IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset; - - context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; - - IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE); - - context->stripes_left++; - context->stripes[i].status = ReadDataStatus_Pending; - } - } - - context->stripes_cancel = 0; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!context->tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif + if (context->stripes[stripe].status != ReadDataStatus_Success) + return STATUS_INTERNAL_ERROR; - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); - } - } - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - + if (context->tree) { + tree_header* th = (tree_header*)buf; + UINT32 crc32; + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); + + if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { + checksum_error = TRUE; + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (generation != 0 && th->generation != generation) { + checksum_error = TRUE; + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); + } + } else if (context->csum) { #ifdef DEBUG_STATS - if (!context->tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } + LARGE_INTEGER time1, time2; + + time1 = KeQueryPerformanceCounter(NULL); #endif - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - if (context->tree) { - tree_header* th = (tree_header*)context->stripes[i].buf; - UINT32 crc32; - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); - - if (th->address != context->address || crc32 != *((UINT32*)th->csum)) - context->stripes[i].status = ReadDataStatus_CRCError; - } else if (context->csum) { - NTSTATUS Status; + Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); + + if (Status == STATUS_CRC_ERROR) { + checksum_error = TRUE; + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } #ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; #endif - Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].Irp->IoStatus.Information / Vcb->superblock.sector_size, context->csum); - - if (Status == STATUS_CRC_ERROR) - context->stripes[i].status = ReadDataStatus_CRCError; - else if (!NT_SUCCESS(Status)) { - ERR("check_csum returned %08x\n", Status); - return Status; + } + + if (!checksum_error) + return STATUS_SUCCESS; + + if (ci->num_stripes == 1) + return STATUS_CRC_ERROR; + + if (context->tree) { + tree_header* t2; + BOOL recovered = FALSE; + + t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); + if (!t2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (j = 0; j < ci->num_stripes; j++) { + if (j != stripe && devices[j] && devices[j]->devobj) { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE); + if (!NT_SUCCESS(Status)) { + WARN("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + } else { + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); + + if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { + RtlCopyMemory(buf, t2, Vcb->superblock.node_size); + ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); + recovered = TRUE; + + if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad + Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart, + t2, Vcb->superblock.node_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif - } + + break; + } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + else + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); } } } - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - RtlCopyMemory(buf, context->stripes[i].buf, length); - goto raid1write; - } - } - - if (context->tree || ci->num_stripes == 1) { // unable to recover from checksum error + + if (!recovered) { ERR("unrecoverable checksum error at %llx\n", addr); - -#ifdef _DEBUG - if (context->tree) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError) { - tree_header* th = (tree_header*)context->stripes[i].buf; - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); - - if (crc32 != *((UINT32*)th->csum)) { - WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); - return STATUS_CRC_ERROR; - } else if (addr != th->address) { - WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); - return STATUS_CRC_ERROR; - } - } - } - } -#endif + ExFreePool(t2); return STATUS_CRC_ERROR; } - - // checksum errors on both stripes - we need to check sector by sector - - for (i = 0; i < (stripeend[0] - stripestart[0]) / context->sector_size; i++) { - UINT16 j; - BOOL success = FALSE; -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; - - time1 = KeQueryPerformanceCounter(NULL); -#endif - - for (j = 0; j < ci->num_stripes; j++) { - if (context->stripes[j].status == ReadDataStatus_CRCError) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[j].buf + (i * context->sector_size), context->sector_size); - - if (crc32 == context->csum[i]) { - RtlCopyMemory(buf + (i * context->sector_size), context->stripes[j].buf + (i * context->sector_size), context->sector_size); - success = TRUE; - break; + + ExFreePool(t2); + } else { + ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size; + UINT8* sector; + + sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); + if (!sector) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (i = 0; i < sectors; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (context->csum[i] != crc32) { + BOOL recovered = FALSE; + + for (j = 0; j < ci->num_stripes; j++) { + if (j != stripe && devices[j] && devices[j]->devobj) { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), + Vcb->superblock.sector_size, sector, FALSE); + if (!NT_SUCCESS(Status)) { + WARN("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + } else { + UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); + + if (crc32b == context->csum[i]) { + RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); + ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); + recovered = TRUE; + + if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad + Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), + sector, Vcb->superblock.sector_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + + break; + } else + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } } } - } - -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif - if (!success) { - ERR("unrecoverable checksum error at %llx\n", addr + (i * context->sector_size)); - return STATUS_CRC_ERROR; - } - } - -raid1write: - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], buf, length); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); + ExFreePool(sector); + return STATUS_CRC_ERROR; } } } - - return STATUS_SUCCESS; - } - - // check if any of the stripes succeeded - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - RtlCopyMemory(buf, context->stripes[i].buf, length); - return STATUS_SUCCESS; - } - } - - // failing that, return the first error we encountered - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) - return context->stripes[i].iosb.Status; + + ExFreePool(sector); } - - // if we somehow get here, return STATUS_INTERNAL_ERROR - - return STATUS_INTERNAL_ERROR; + + return STATUS_SUCCESS; } static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, - CHUNK_ITEM* ci, UINT64* stripestart, UINT64* stripeend, UINT16 startoffstripe) { + CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { UINT64 i; - UINT32 pos, *stripeoff; - UINT8 stripe; - + for (i = 0; i < ci->num_stripes; i++) { if (context->stripes[i].status == ReadDataStatus_Error) { - WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); + WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); + log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); return context->stripes[i].iosb.Status; } } - - pos = 0; - stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); - if (!stripeoff) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); - - stripe = startoffstripe; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length)); - - RtlCopyMemory(buf, context->stripes[stripe].buf, readlen); - stripeoff[stripe] += readlen; - pos += readlen; - } else if (length - pos < ci->stripe_length) { - RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos); - pos = length; - } else { - RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length); - stripeoff[stripe] += ci->stripe_length; - pos += ci->stripe_length; - } - - stripe = (stripe + 1) % ci->num_stripes; - } - - ExFreePool(stripeoff); - - // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short - + if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries tree_header* th = (tree_header*)buf; UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (crc32 != *((UINT32*)th->csum)) { - WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); - return STATUS_CRC_ERROR; - } else if (addr != th->address) { - WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); - return STATUS_CRC_ERROR; + + if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) { + UINT64 off; + UINT16 stripe; + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); + + ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); + + if (crc32 != *((UINT32*)th->csum)) { + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + return STATUS_CRC_ERROR; + } else if (addr != th->address) { + WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + return STATUS_CRC_ERROR; + } else if (generation != 0 && generation != th->generation) { + WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); + return STATUS_CRC_ERROR; + } } } else if (context->csum) { NTSTATUS Status; #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; - + time1 = KeQueryPerformanceCounter(NULL); #endif Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); - + if (Status == STATUS_CRC_ERROR) { - WARN("checksum error\n"); + for (i = 0; i < length / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (context->csum[i] != crc32) { + UINT64 off; + UINT16 stripe; + + get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe); + + ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); + + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + return Status; + } + } + return Status; } else if (!NT_SUCCESS(Status)) { ERR("check_csum returned %08x\n", Status); @@ -1508,107 +380,56 @@ static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, } #ifdef DEBUG_STATS time2 = KeQueryPerformanceCounter(NULL); - + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; #endif } - - return STATUS_SUCCESS; + + return STATUS_SUCCESS; } -static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, - CHUNK_ITEM* ci, device** devices, UINT64* stripestart, UINT64* stripeend, UINT16 startoffstripe) { +static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, + CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { UINT64 i; + UINT16 j, stripe; NTSTATUS Status; BOOL checksum_error = FALSE; - UINT32 pos, *stripeoff; - UINT8 stripe; - read_data_stripe** stripes; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); - if (!stripes) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); - - for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { - UINT16 j; - - for (j = 0; j < ci->sub_stripes; j++) { - if (context->stripes[i+j].status == ReadDataStatus_Success) { - stripes[i / ci->sub_stripes] = &context->stripes[i+j]; - break; - } - } - - if (!stripes[i / ci->sub_stripes]) { - for (j = 0; j < ci->sub_stripes; j++) { - if (context->stripes[i+j].status == ReadDataStatus_Error) { - // both stripes must have errored if we get here - WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status); - ExFreePool(stripes); - return context->stripes[i].iosb.Status; - } - } - } - } - - pos = 0; - stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); - if (!stripeoff) { - ERR("out of memory\n"); - ExFreePool(stripes); - return STATUS_INSUFFICIENT_RESOURCES; + for (j = 0; j < ci->num_stripes; j++) { + if (context->stripes[j].status == ReadDataStatus_Error) { + WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + return context->stripes[j].iosb.Status; + } else if (context->stripes[j].status == ReadDataStatus_Success) + stripe = j; } - - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); - - stripe = startoffstripe / ci->sub_stripes; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - - RtlCopyMemory(buf, stripes[stripe]->buf, readlen); - stripeoff[stripe] += readlen; - pos += readlen; - } else if (length - pos < ci->stripe_length) { - RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos); - - pos = length; - } else { - RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length); - stripeoff[stripe] += ci->stripe_length; - pos += ci->stripe_length; - } - - stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); - } - if (context->tree) { tree_header* th = (tree_header*)buf; UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - + if (crc32 != *((UINT32*)th->csum)) { WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); checksum_error = TRUE; - stripes[startoffstripe]->status = ReadDataStatus_CRCError; + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); } else if (addr != th->address) { WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); checksum_error = TRUE; - stripes[startoffstripe]->status = ReadDataStatus_CRCError; + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (generation != 0 && generation != th->generation) { + WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); + checksum_error = TRUE; + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); } } else if (context->csum) { - NTSTATUS Status; #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; - + time1 = KeQueryPerformanceCounter(NULL); #endif Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); - + if (Status == STATUS_CRC_ERROR) checksum_error = TRUE; else if (!NT_SUCCESS(Status)) { @@ -1617,395 +438,233 @@ static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, } #ifdef DEBUG_STATS time2 = KeQueryPerformanceCounter(NULL); - + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; #endif } - - if (checksum_error) { - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; -#endif - - // FIXME - update dev stats - - WARN("checksum error\n"); - - if (!context->tree) { - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); - - // find out which stripe the error was on - pos = 0; - stripe = startoffstripe / ci->sub_stripes; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - - stripeoff[stripe] += readlen; - pos += readlen; - for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != context->csum[i]) - stripes[stripe]->status = ReadDataStatus_CRCError; - } - } else if (length - pos < ci->stripe_length) { - for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) - stripes[stripe]->status = ReadDataStatus_CRCError; - } - - pos = length; - } else { - stripeoff[stripe] += ci->stripe_length; - - for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) - stripes[stripe]->status = ReadDataStatus_CRCError; - } - - pos += ci->stripe_length; - } - - stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); + if (!checksum_error) + return STATUS_SUCCESS; + + if (context->tree) { + tree_header* t2; + UINT64 off; + UINT16 badsubstripe = 0; + BOOL recovered = FALSE; + + t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); + if (!t2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); + + stripe *= ci->sub_stripes; + + for (j = 0; j < ci->sub_stripes; j++) { + if (context->stripes[stripe + j].status == ReadDataStatus_Success) { + badsubstripe = j; + break; } } - - context->stripes_left = 0; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError) { - UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1); - - if (context->stripes[other_stripe].status == ReadDataStatus_Cancelled) { - PIO_STACK_LOCATION IrpSp; - - // re-run Irp that we cancelled - - if (context->stripes[other_stripe].Irp) { - if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[other_stripe].Irp->MdlAddress); - IoFreeMdl(context->stripes[other_stripe].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[other_stripe].Irp); - } - - if (!Irp) { - context->stripes[other_stripe].Irp = IoAllocateIrp(devices[other_stripe]->devobj->StackSize, FALSE); - - if (!context->stripes[other_stripe].Irp) { - ERR("IoAllocateIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } else { - context->stripes[other_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[other_stripe]->devobj->StackSize); - - if (!context->stripes[other_stripe].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[other_stripe].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[other_stripe]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[other_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[other_stripe].buf, stripeend[other_stripe] - stripestart[other_stripe], FALSE, FALSE, NULL); - if (!context->stripes[other_stripe].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; + + for (j = 0; j < ci->sub_stripes; j++) { + if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { + Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off, + Vcb->superblock.node_size, (UINT8*)t2, FALSE); + if (!NT_SUCCESS(Status)) { + WARN("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); + } else { + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); + + if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { + RtlCopyMemory(buf, t2, Vcb->superblock.node_size); + ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id); + recovered = TRUE; + + if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad + Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off, + t2, Vcb->superblock.node_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } } - - MmProbeAndLockPages(context->stripes[other_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[other_stripe].Irp->UserBuffer = context->stripes[other_stripe].buf; - } - IrpSp->Parameters.Read.Length = stripeend[other_stripe] - stripestart[other_stripe]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[other_stripe] + cis[other_stripe].offset; - - context->stripes[other_stripe].Irp->UserIosb = &context->stripes[other_stripe].iosb; - - IoSetCompletionRoutine(context->stripes[other_stripe].Irp, read_data_completion, &context->stripes[other_stripe], TRUE, TRUE, TRUE); - - context->stripes_left++; - context->stripes[other_stripe].status = ReadDataStatus_Pending; + break; + } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) + log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + else + log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); } } } - - if (context->stripes_left == 0) { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); + + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr); + ExFreePool(t2); return STATUS_CRC_ERROR; } - - context->stripes_cancel = 0; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!context->tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); - } - } - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - -#ifdef DEBUG_STATS - if (!context->tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif + ExFreePool(t2); + } else { + ULONG sectors = length / Vcb->superblock.sector_size; + UINT8* sector; - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError) { - UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1); - - if (context->stripes[other_stripe].status != ReadDataStatus_Success) { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - return STATUS_CRC_ERROR; - } - } + sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); + if (!sector) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); - - pos = 0; - stripe = startoffstripe / ci->sub_stripes; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - - stripeoff[stripe] += readlen; - pos += readlen; - - if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { - for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != context->csum[i]) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - UINT32 crc32b = ~calc_crc32c(0xffffffff, context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32b == context->csum[i]) { - RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - RtlCopyMemory(stripes[stripe]->buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), - Vcb->superblock.sector_size); - stripes[stripe]->rewrite = TRUE; - } else { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - return STATUS_CRC_ERROR; - } - } - } - } else if (context->tree) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - tree_header* th = (tree_header*)buf; - UINT32 crc32; - - RtlCopyMemory(buf, context->stripes[other_stripe].buf, readlen); - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - return STATUS_CRC_ERROR; - } - - RtlCopyMemory(stripes[stripe]->buf, buf, readlen); - stripes[stripe]->rewrite = TRUE; - } - } else if (length - pos < ci->stripe_length) { - if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { - for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - - if (crc32b == context->csum[i]) { - RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size), - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); - RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - stripes[stripe]->rewrite = TRUE; - } else { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - return STATUS_CRC_ERROR; - } - } + for (i = 0; i < sectors; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (context->csum[i] != crc32) { + UINT64 off; + UINT16 stripe2, badsubstripe = 0; + BOOL recovered = FALSE; + + get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, + ci->num_stripes / ci->sub_stripes, &off, &stripe2); + + stripe2 *= ci->sub_stripes; + + for (j = 0; j < ci->sub_stripes; j++) { + if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { + badsubstripe = j; + break; } } - - pos = length; - } else { - if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { - for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - + + log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + for (j = 0; j < ci->sub_stripes; j++) { + if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { + Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off, + Vcb->superblock.sector_size, sector, FALSE); + if (!NT_SUCCESS(Status)) { + WARN("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); + } else { + UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); + if (crc32b == context->csum[i]) { - RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size), - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); - RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - stripes[stripe]->rewrite = TRUE; - } else { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - return STATUS_CRC_ERROR; - } + RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); + ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id); + recovered = TRUE; + + if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad + Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off, + sector, Vcb->superblock.sector_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); + } + } + + break; + } else + log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); } } } - - stripeoff[stripe] += ci->stripe_length; - pos += ci->stripe_length; - } - - stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); - } - - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); + + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); + ExFreePool(sector); + return STATUS_CRC_ERROR; } } } + + ExFreePool(sector); } - - ExFreePool(stripes); - ExFreePool(stripeoff); - - // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short - + return STATUS_SUCCESS; } -static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, CHUNK_ITEM* ci, - device** devices, UINT64* stripestart, UINT64* stripeend, UINT64 offset, UINT32 firststripesize, BOOL check_nocsum_parity) { - UINT32 pos, skip; +static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, + device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { + ULONG i; NTSTATUS Status; - int num_errors = 0; - UINT64 i, off, stripeoff, origoff; - BOOL needs_reconstruct = FALSE; - UINT64 reconstruct_stripe; BOOL checksum_error = FALSE; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - num_errors++; - if (num_errors > 1) - break; - } - } - - if (num_errors > 1) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); - return context->stripes[i].iosb.Status; - } + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; + UINT16 j, stripe; + BOOL no_success = TRUE; + + for (j = 0; j < ci->num_stripes; j++) { + if (context->stripes[j].status == ReadDataStatus_Error) { + WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + return context->stripes[j].iosb.Status; + } else if (context->stripes[j].status == ReadDataStatus_Success) { + stripe = j; + no_success = FALSE; } } - - off = addr - offset; - off -= off % ((ci->num_stripes - 1) * ci->stripe_length); - skip = addr - offset - off; - origoff = off; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Cancelled) { - if (needs_reconstruct) { - ERR("more than one stripe needs reconstruction\n"); - return STATUS_INTERNAL_ERROR; - } else { - needs_reconstruct = TRUE; - reconstruct_stripe = i; - } + + if (c) { // check partial stripes + LIST_ENTRY* le; + UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length; + + ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); + + le = c->partial_stripes.Flink; + while (le != &c->partial_stripes) { + partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); + + if (ps->address + ps_length > addr && ps->address < addr + length) { + ULONG runlength, index; + + runlength = RtlFindFirstRunClear(&ps->bmp, &index); + + while (runlength != 0) { + UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); + UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); + UINT64 start = max(runstart, addr); + UINT64 end = min(runend, addr + length); + + if (end > start) + RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); + + runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); + } + } else if (ps->address >= addr + length) + break; + + le = le->Flink; } + + ExReleaseResourceLite(&c->partial_stripes_lock); } - - if (needs_reconstruct) { - TRACE("reconstructing stripe %u\n", reconstruct_stripe); - - stripeoff = 0; - - raid5_reconstruct(off, skip, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], TRUE, firststripesize, reconstruct_stripe); - - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 1) * ci->stripe_length; - raid5_reconstruct(off, 0, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, 0, reconstruct_stripe); - } - - off = addr - offset; - off -= off % ((ci->num_stripes - 1) * ci->stripe_length); - } - - pos = 0; - stripeoff = 0; - raid5_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize); - - while (pos < length) { - off += (ci->num_stripes - 1) * ci->stripe_length; - raid5_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0); - } - + if (context->tree) { tree_header* th = (tree_header*)buf; UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) + + if (addr != th->address || crc32 != *((UINT32*)th->csum)) { + checksum_error = TRUE; + if (!no_success && !degraded) + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (generation != 0 && generation != th->generation) { checksum_error = TRUE; + if (!no_success && !degraded) + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); + } } else if (context->csum) { #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; - + time1 = KeQueryPerformanceCounter(NULL); #endif Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); - + if (Status == STATUS_CRC_ERROR) { - WARN("checksum error\n"); + if (!degraded) + WARN("checksum error\n"); checksum_error = TRUE; } else if (!NT_SUCCESS(Status)) { ERR("check_csum returned %08x\n", Status); @@ -2014,301 +673,359 @@ static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, #ifdef DEBUG_STATS time2 = KeQueryPerformanceCounter(NULL); - + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; #endif - } - - if (checksum_error) { - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - - if (needs_reconstruct) { - PIO_STACK_LOCATION IrpSp; -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; -#endif - - // re-run Irp that we cancelled - - if (context->stripes[reconstruct_stripe].Irp) { - if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress); - IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[reconstruct_stripe].Irp); - } - - if (!Irp) { - context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE); - - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoAllocateIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } else { - context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize); - - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf, - stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL); - if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf; - } + } else if (degraded) + checksum_error = TRUE; - IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset; - - context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb; - - IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE); - - context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending; - - context->stripes_left = 1; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!context->tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif + if (!checksum_error) + return STATUS_SUCCESS; - IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp); - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - -#ifdef DEBUG_STATS - if (!context->tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif + if (context->tree) { + UINT16 parity; + UINT64 off; + BOOL recovered = FALSE, first = TRUE, failed = FALSE; + UINT8* t2; - if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) { - ERR("unrecoverable checksum error\n"); - return STATUS_CRC_ERROR; - } + t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); + if (!t2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - - if (context->tree) { - off = origoff; - pos = 0; - stripeoff = 0; - if (!raid5_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) { - ERR("unrecoverable metadata checksum error\n"); - return STATUS_CRC_ERROR; - } - } else { - off = origoff; - pos = 0; - stripeoff = 0; - if (!raid5_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, context->csum, Vcb->superblock.sector_size)) - return STATUS_CRC_ERROR; - - while (pos < length) { - off += (ci->num_stripes - 1) * ci->stripe_length; - if (!raid5_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, context->csum, Vcb->superblock.sector_size)) - return STATUS_CRC_ERROR; + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); + + parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; + + stripe = (parity + stripe + 1) % ci->num_stripes; + + for (j = 0; j < ci->num_stripes; j++) { + if (j != stripe) { + if (devices[j] && devices[j]->devobj) { + if (first) { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + failed = TRUE; + break; + } + + first = FALSE; + } else { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + failed = TRUE; + break; + } + + do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); + } + } else { + failed = TRUE; + break; + } } } - - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); - - if (!NT_SUCCESS(Status)) + + if (!failed) { + tree_header* t3 = (tree_header*)t2; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum)); + + if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) { + RtlCopyMemory(buf, t2, Vcb->superblock.node_size); + + if (!degraded) + ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); + + recovered = TRUE; + + if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad + Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); + if (!NT_SUCCESS(Status)) { WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } } } } - } - - if (check_nocsum_parity && !context->tree && !context->csum) { - UINT32* parity_buf; - - // We are reading a nodatacsum extent. Even though there's no checksum, we - // can still identify errors by checking if the parity is consistent. - - parity_buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[0] - stripestart[0], ALLOC_TAG); - - if (!parity_buf) { + + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr); + ExFreePool(t2); + return STATUS_CRC_ERROR; + } + + ExFreePool(t2); + } else { + ULONG sectors = length / Vcb->superblock.sector_size; + UINT8* sector; + + sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); + if (!sector) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(parity_buf, context->stripes[0].buf, stripeend[0] - stripestart[0]); - - for (i = 0; i < ci->num_stripes; i++) { - do_xor((UINT8*)parity_buf, context->stripes[i].buf, stripeend[0] - stripestart[0]); - } - - for (i = 0; i < (stripeend[0] - stripestart[0]) / sizeof(UINT32); i++) { - if (parity_buf[i] != 0) { - ERR("parity error on nodatacsum inode\n"); - ExFreePool(parity_buf); - return STATUS_CRC_ERROR; + + for (i = 0; i < sectors; i++) { + UINT16 parity; + UINT64 off; + UINT32 crc32; + + if (context->csum) + crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, + ci->num_stripes - 1, &off, &stripe); + + parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; + + stripe = (parity + stripe + 1) % ci->num_stripes; + + if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) { + BOOL recovered = FALSE, first = TRUE, failed = FALSE; + + if (devices[stripe] && devices[stripe]->devobj) + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); + + for (j = 0; j < ci->num_stripes; j++) { + if (j != stripe) { + if (devices[j] && devices[j]->devobj) { + if (first) { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + failed = TRUE; + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + break; + } + + first = FALSE; + } else { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + failed = TRUE; + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + break; + } + + do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); + } + } else { + failed = TRUE; + break; + } + } + } + + if (!failed) { + if (context->csum) + crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); + + if (!context->csum || crc32 == context->csum[i]) { + RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); + + if (!degraded) + ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); + + recovered = TRUE; + + if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad + Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, + sector, Vcb->superblock.sector_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + } + } + + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); + ExFreePool(sector); + return STATUS_CRC_ERROR; + } } } - - ExFreePool(parity_buf); + + ExFreePool(sector); } - + return STATUS_SUCCESS; } -static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, CHUNK_ITEM* ci, - device** devices, UINT64* stripestart, UINT64* stripeend, UINT64 offset, UINT32 firststripesize, BOOL check_nocsum_parity) { - NTSTATUS Status; - UINT32 pos, skip; - int num_errors = 0; - UINT64 i, off, stripeoff, origoff; - UINT8 needs_reconstruct = 0; - UINT16 missing1, missing2; - BOOL checksum_error = FALSE; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - num_errors++; - if (num_errors > 2) - break; - } - } - - if (num_errors > 2) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); - return context->stripes[i].iosb.Status; - } +void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) { + if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data + UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1; + UINT16 stripe; + + stripe = num_stripes - 3; + + if (stripe == missing) + RtlZeroMemory(out, sector_size); + else + RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); + + do { + stripe--; + + galois_double(out, sector_size); + + if (stripe != missing) + do_xor(out, sectors + (stripe * sector_size), sector_size); + } while (stripe > 0); + + do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); + + if (missing != 0) + galois_divpower(out, (UINT8)missing, sector_size); + } else { // reconstruct from p and q + UINT16 x, y, stripe; + UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; + UINT32 j; + + stripe = num_stripes - 3; + + pxy = out + sector_size; + qxy = out; + + if (stripe == missing1 || stripe == missing2) { + RtlZeroMemory(qxy, sector_size); + RtlZeroMemory(pxy, sector_size); + + if (stripe == missing1) + x = stripe; + else + y = stripe; + } else { + RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); + RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); } - } - - off = addr - offset; - off -= off % ((ci->num_stripes - 2) * ci->stripe_length); - skip = addr - offset - off; - origoff = off; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Cancelled) { - if (needs_reconstruct == 2) { - ERR("more than two stripes need reconstruction\n"); - return STATUS_INTERNAL_ERROR; - } else if (needs_reconstruct == 1) { - needs_reconstruct++; - missing2 = i; - } else { - needs_reconstruct++; - missing1 = i; - } + + do { + stripe--; + + galois_double(qxy, sector_size); + + if (stripe != missing1 && stripe != missing2) { + do_xor(qxy, sectors + (stripe * sector_size), sector_size); + do_xor(pxy, sectors + (stripe * sector_size), sector_size); + } else if (stripe == missing1) + x = stripe; + else if (stripe == missing2) + y = stripe; + } while (stripe > 0); + + gyx = gpow2(y > x ? (y-x) : (255-x+y)); + gx = gpow2(255-x); + + denom = gdiv(1, gyx ^ 1); + a = gmul(gyx, denom); + b = gmul(gx, denom); + + p = sectors + ((num_stripes - 2) * sector_size); + q = sectors + ((num_stripes - 1) * sector_size); + + for (j = 0; j < sector_size; j++) { + *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); + + p++; + q++; + pxy++; + qxy++; } + + do_xor(out + sector_size, out, sector_size); + do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); } - - if (needs_reconstruct > 0) { - stripeoff = 0; - - if (needs_reconstruct == 2) { - TRACE("reconstructing stripes %u and %u\n", missing1, missing2); - - raid6_reconstruct2(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], - TRUE, firststripesize, missing1, missing2); - - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 2) * ci->stripe_length; - raid6_reconstruct2(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], - FALSE, 0, missing1, missing2); - } - } else { - TRACE("reconstructing stripe %u\n", missing1); - - raid6_reconstruct1(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], TRUE, firststripesize, missing1); - - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 2) * ci->stripe_length; - raid6_reconstruct1(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], FALSE, 0, missing1); - } +} + +static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, + device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { + NTSTATUS Status; + ULONG i; + BOOL checksum_error = FALSE; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; + UINT16 stripe, j; + BOOL no_success = TRUE; + + for (j = 0; j < ci->num_stripes; j++) { + if (context->stripes[j].status == ReadDataStatus_Error) { + WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); + + if (devices[j]) + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + return context->stripes[j].iosb.Status; + } else if (context->stripes[j].status == ReadDataStatus_Success) { + stripe = j; + no_success = FALSE; } - - off = origoff; } - - if (check_nocsum_parity && !context->tree && !context->csum) { - UINT8* scratch; - - scratch = ExAllocatePoolWithTag(NonPagedPool, ci->stripe_length, ALLOC_TAG); - if (!scratch) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - stripeoff = 0; - Status = check_raid6_nocsum_parity(off, skip, context, ci, &stripeoff, stripeend[0] - stripestart[0], TRUE, firststripesize, scratch); - if (!NT_SUCCESS(Status)) { - ERR("check_raid6_nocsum_parity returned %08x\n", Status); - ExFreePool(scratch); - return Status; - } - - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 2) * ci->stripe_length; - Status = check_raid6_nocsum_parity(off, 0, context, ci, &stripeoff, stripeend[0] - stripestart[0], FALSE, 0, scratch); - - if (!NT_SUCCESS(Status)) { - ERR("check_raid6_nocsum_parity returned %08x\n", Status); - ExFreePool(scratch); - return Status; - } + + if (c) { // check partial stripes + LIST_ENTRY* le; + UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length; + + ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); + + le = c->partial_stripes.Flink; + while (le != &c->partial_stripes) { + partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); + + if (ps->address + ps_length > addr && ps->address < addr + length) { + ULONG runlength, index; + + runlength = RtlFindFirstRunClear(&ps->bmp, &index); + + while (runlength != 0) { + UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); + UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); + UINT64 start = max(runstart, addr); + UINT64 end = min(runend, addr + length); + + if (end > start) + RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); + + runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); + } + } else if (ps->address >= addr + length) + break; + + le = le->Flink; } - - ExFreePool(scratch); - - off = origoff; - } - - pos = 0; - stripeoff = 0; - raid6_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize); - - while (pos < length) { - off += (ci->num_stripes - 2) * ci->stripe_length; - raid6_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0); + + ExReleaseResourceLite(&c->partial_stripes_lock); } - + if (context->tree) { tree_header* th = (tree_header*)buf; UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) + + if (addr != th->address || crc32 != *((UINT32*)th->csum)) { checksum_error = TRUE; + if (!no_success && !degraded && devices[stripe]) + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (generation != 0 && generation != th->generation) { + checksum_error = TRUE; + if (!no_success && !degraded && devices[stripe]) + log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); + } } else if (context->csum) { #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; - + time1 = KeQueryPerformanceCounter(NULL); #endif Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); - + if (Status == STATUS_CRC_ERROR) { - WARN("checksum error\n"); + if (!degraded) + WARN("checksum error\n"); checksum_error = TRUE; } else if (!NT_SUCCESS(Status)) { ERR("check_csum returned %08x\n", Status); @@ -2316,227 +1033,497 @@ static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, } #ifdef DEBUG_STATS time2 = KeQueryPerformanceCounter(NULL); - + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; #endif - } - - if (checksum_error) { - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - - for (i = 0; i < needs_reconstruct; i++) { - PIO_STACK_LOCATION IrpSp; - UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; - - // re-run Irps that we cancelled - - if (context->stripes[reconstruct_stripe].Irp) { - if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress); - IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress); + } else if (degraded) + checksum_error = TRUE; + + if (!checksum_error) + return STATUS_SUCCESS; + + if (context->tree) { + UINT8* sector; + UINT16 k, physstripe, parity1, parity2, error_stripe; + UINT64 off; + BOOL recovered = FALSE, failed = FALSE; + ULONG num_errors = 0; + + sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); + if (!sector) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); + + parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; + parity2 = (parity1 + 1) % ci->num_stripes; + + physstripe = (parity2 + stripe + 1) % ci->num_stripes; + + j = (parity2 + 1) % ci->num_stripes; + + for (k = 0; k < ci->num_stripes - 1; k++) { + if (j != physstripe) { + if (devices[j] && devices[j]->devobj) { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + num_errors++; + error_stripe = k; + + if (num_errors > 1) { + failed = TRUE; + break; + } + } + } else { + num_errors++; + error_stripe = k; + + if (num_errors > 1) { + failed = TRUE; + break; + } } - IoFreeIrp(context->stripes[reconstruct_stripe].Irp); } - - if (!Irp) { - context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE); - - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoAllocateIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; + + j = (j + 1) % ci->num_stripes; + } + + if (!failed) { + if (num_errors == 0) { + tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); + UINT32 crc32; + + RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), + Vcb->superblock.node_size); + + for (j = 0; j < ci->num_stripes - 2; j++) { + if (j != stripe) + do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); } - } else { - context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize); - - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { + RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); + + if (devices[physstripe] && devices[physstripe]->devobj) + ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); + + recovered = TRUE; + + if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad + Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, + sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } } } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf, - stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL); - if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; + + if (!recovered) { + UINT32 crc32; + tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); + BOOL read_q = FALSE; + + if (devices[parity2] && devices[parity2]->devobj) { + Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, + Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + } else + read_q = TRUE; } - - MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf; - } - IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset; - - context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb; - - IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE); + if (read_q) { + if (num_errors == 1) { + raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); - context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending; - } - - if (needs_reconstruct > 0) { -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; -#endif - context->stripes_left = needs_reconstruct; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!context->tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif - - for (i = 0; i < needs_reconstruct; i++) { - UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; - - IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp); - } - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - -#ifdef DEBUG_STATS - if (!context->tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - for (i = 0; i < needs_reconstruct; i++) { - UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; - - if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) { - ERR("unrecoverable checksum error\n"); - return STATUS_CRC_ERROR; + if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) + recovered = TRUE; + } else { + for (j = 0; j < ci->num_stripes - 1; j++) { + if (j != stripe) { + raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { + recovered = TRUE; + error_stripe = j; + break; + } + } + } + } } - } - } - - off = origoff; - - if (context->tree) { - pos = 0; - stripeoff = 0; - if (!raid6_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) { - ERR("unrecoverable metadata checksum error\n"); - return STATUS_CRC_ERROR; - } - } else { - pos = 0; - stripeoff = 0; - if (!raid6_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, context->csum, Vcb->superblock.sector_size)) - return STATUS_CRC_ERROR; - - while (pos < length) { - off += (ci->num_stripes - 1) * ci->stripe_length; - if (!raid6_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, context->csum, Vcb->superblock.sector_size)) - return STATUS_CRC_ERROR; - } - } - } - - // write good data over bad - - if (!Vcb->readonly) { - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); - } - } - } - - return STATUS_SUCCESS; -} -NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, - PIRP Irp, BOOL check_nocsum_parity) { - CHUNK_ITEM* ci; - CHUNK_ITEM_STRIPE* cis; - read_data_context* context; - UINT64 i, type, offset; + if (recovered) { + UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; + + if (devices[physstripe] && devices[physstripe]->devobj) + ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); + + RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); + + if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad + Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, + sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + + if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { + if (error_stripe == ci->num_stripes - 2) { + ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id); + + log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); + + for (j = 0; j < ci->num_stripes - 2; j++) { + if (j == stripe) { + do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), + Vcb->superblock.node_size); + } else { + do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), + Vcb->superblock.node_size); + } + } + } else { + ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length), + devices[error_stripe_phys]->devitem.dev_id); + + log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), + sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); + } + } + + if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad + Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, + sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + } + } + } + + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr); + ExFreePool(sector); + return STATUS_CRC_ERROR; + } + + ExFreePool(sector); + } else { + ULONG sectors = length / Vcb->superblock.sector_size; + UINT8* sector; + + sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG); + if (!sector) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (i = 0; i < sectors; i++) { + UINT64 off; + UINT16 physstripe, parity1, parity2; + UINT32 crc32; + + if (context->csum) + crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, + ci->num_stripes - 2, &off, &stripe); + + parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; + parity2 = (parity1 + 1) % ci->num_stripes; + + physstripe = (parity2 + stripe + 1) % ci->num_stripes; + + if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) { + UINT16 k, error_stripe; + BOOL recovered = FALSE, failed = FALSE; + ULONG num_errors = 0; + + if (devices[physstripe] && devices[physstripe]->devobj) + log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); + + j = (parity2 + 1) % ci->num_stripes; + + for (k = 0; k < ci->num_stripes - 1; k++) { + if (j != physstripe) { + if (devices[j] && devices[j]->devobj) { + Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); + num_errors++; + error_stripe = k; + + if (num_errors > 1) { + failed = TRUE; + break; + } + } + } else { + num_errors++; + error_stripe = k; + + if (num_errors > 1) { + failed = TRUE; + break; + } + } + } + + j = (j + 1) % ci->num_stripes; + } + + if (!failed) { + if (num_errors == 0) { + RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + for (j = 0; j < ci->num_stripes - 2; j++) { + if (j != stripe) + do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + } + + if (context->csum) + crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (!context->csum || crc32 == context->csum[i]) { + RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (devices[physstripe] && devices[physstripe]->devobj) + ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), + devices[physstripe]->devitem.dev_id); + + recovered = TRUE; + + if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad + Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, + sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + } + } + + if (!recovered) { + BOOL read_q = FALSE; + + if (devices[parity2] && devices[parity2]->devobj) { + Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, + Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); + } else + read_q = TRUE; + } + + if (read_q) { + if (num_errors == 1) { + raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size)); + + if (!devices[physstripe] || !devices[physstripe]->devobj) + recovered = TRUE; + else { + crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 == context->csum[i]) + recovered = TRUE; + } + } else { + for (j = 0; j < ci->num_stripes - 1; j++) { + if (j != stripe) { + raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size)); + + crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 == context->csum[i]) { + recovered = TRUE; + error_stripe = j; + break; + } + } + } + } + } + + if (recovered) { + UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; + + if (devices[physstripe] && devices[physstripe]->devobj) + ERR("recovering from checksum error at %llx, device %llx\n", + addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id); + + RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad + Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, + sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + + if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { + if (error_stripe == ci->num_stripes - 2) { + ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), + devices[error_stripe_phys]->devitem.dev_id); + + log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + for (j = 0; j < ci->num_stripes - 2; j++) { + if (j == stripe) { + do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), + Vcb->superblock.sector_size); + } else { + do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), + Vcb->superblock.sector_size); + } + } + } else { + ERR("recovering from checksum error at %llx, device %llx\n", + addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length), + devices[error_stripe_phys]->devitem.dev_id); + + log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size), + sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + } + } + + if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad + Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, + sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + if (!NT_SUCCESS(Status)) { + WARN("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); + } + } + } + } + } + + if (!recovered) { + ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); + ExFreePool(sector); + return STATUS_CRC_ERROR; + } + } + } + + ExFreePool(sector); + } + + return STATUS_SUCCESS; +} + +NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum, + _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read, + _In_ ULONG priority) { + CHUNK_ITEM* ci; + CHUNK_ITEM_STRIPE* cis; + read_data_context context; + UINT64 type, offset, total_reading = 0; NTSTATUS Status; - device** devices; - UINT64 *stripestart = NULL, *stripeend = NULL; - UINT32 firststripesize; - UINT16 startoffstripe, allowed_missing, missing_devices = 0; + device** devices = NULL; + UINT16 i, startoffstripe, allowed_missing, missing_devices = 0; + UINT8* dummypage = NULL; + PMDL dummy_mdl = NULL; + BOOL need_to_wait; + UINT64 lockaddr, locklen; #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; #endif - + if (Vcb->log_to_phys_loaded) { if (!c) { c = get_chunk_from_address(Vcb, addr); - + if (!c) { ERR("get_chunk_from_address failed\n"); return STATUS_INTERNAL_ERROR; } } - + ci = c->chunk_item; offset = c->offset; devices = c->devices; - + if (pc) *pc = c; } else { LIST_ENTRY* le = Vcb->sys_chunks.Flink; - + ci = NULL; - + + c = NULL; while (le != &Vcb->sys_chunks) { sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); - + if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { CHUNK_ITEM* chunk_item = sc->data; - + if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { ci = chunk_item; offset = sc->key.offset; cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; - + devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); if (!devices) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + for (i = 0; i < ci->num_stripes; i++) { devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); } - + break; } } - + le = le->Flink; } - + if (!ci) { ERR("could not find chunk for %llx in bootstrap\n", addr); return STATUS_INTERNAL_ERROR; } - + if (pc) *pc = NULL; } - + if (ci->type & BLOCK_FLAG_DUPLICATE) { type = BLOCK_FLAG_DUPLICATE; - allowed_missing = 0; + allowed_missing = ci->num_stripes - 1; } else if (ci->type & BLOCK_FLAG_RAID0) { type = BLOCK_FLAG_RAID0; allowed_missing = 0; @@ -2559,458 +1546,1189 @@ NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UI cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG); - if (!context) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(context, sizeof(read_data_context)); - KeInitializeEvent(&context->Event, NotificationEvent, FALSE); - - context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); - if (!context->stripes) { - ERR("out of memory\n"); - ExFreePool(context); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes); - - context->buflen = length; - context->num_stripes = ci->num_stripes; - context->stripes_left = context->num_stripes; - context->sector_size = Vcb->superblock.sector_size; - context->csum = csum; - context->tree = is_tree; - context->type = type; - context->check_nocsum_parity = check_nocsum_parity; - - stripestart = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG); - if (!stripestart) { + RtlZeroMemory(&context, sizeof(read_data_context)); + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); + if (!context.stripes) { ERR("out of memory\n"); - ExFreePool(context); return STATUS_INSUFFICIENT_RESOURCES; } - - stripeend = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG); - if (!stripeend) { - ERR("out of memory\n"); - ExFreePool(stripestart); - ExFreePool(context); - return STATUS_INSUFFICIENT_RESOURCES; + + if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { + get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); + chunk_lock_range(Vcb, c, lockaddr, locklen); } - + + RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); + + context.buflen = length; + context.num_stripes = ci->num_stripes; + context.stripes_left = context.num_stripes; + context.sector_size = Vcb->superblock.sector_size; + context.csum = csum; + context.tree = is_tree; + context.type = type; + if (type == BLOCK_FLAG_RAID0) { UINT64 startoff, endoff; - UINT16 endoffstripe; - + UINT16 endoffstripe, stripe; + UINT32 *stripeoff, pos; + PMDL master_mdl; + PFN_NUMBER* pfns; + + // FIXME - test this still works if page size isn't the same as sector size + + // This relies on the fact that MDLs are followed in memory by the page file numbers, + // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 + // data for you without doing a memcpy yourself. + // MDLs are officially opaque, so this might very well break in future versions of Windows. + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); - + + if (file_read) { + // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL + // with duplicated dummy PFNs, which confuse check_csum. Ah well. + // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. + + context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + + if (!context.va) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } else + context.va = buf; + + master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + goto exit; + } + + pfns = (PFN_NUMBER*)(master_mdl + 1); + for (i = 0; i < ci->num_stripes; i++) { - if (startoffstripe > i) { - stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - stripestart[i] = startoff; - } else { - stripestart[i] = startoff - (startoff % ci->stripe_length); + if (startoffstripe > i) + context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + else if (startoffstripe == i) + context.stripes[i].stripestart = startoff; + else + context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); + + if (endoffstripe > i) + context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + else if (endoffstripe == i) + context.stripes[i].stripeend = endoff + 1; + else + context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); + + if (context.stripes[i].stripestart != context.stripes[i].stripeend) { + context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); + + if (!context.stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } } - - if (endoffstripe > i) { - stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stripeend[i] = endoff + 1; + } + + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); + + pos = 0; + stripe = startoffstripe; + while (pos < length) { + PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + + if (pos == 0) { + UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); + + RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] += readlen; + pos += readlen; + } else if (length - pos < ci->stripe_length) { + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + pos = length; } else { - stripeend[i] = endoff - (endoff % ci->stripe_length); + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + + stripeoff[stripe] += (UINT32)ci->stripe_length; + pos += (UINT32)ci->stripe_length; } + + stripe = (stripe + 1) % ci->num_stripes; } + + MmUnlockPages(master_mdl); + IoFreeMdl(master_mdl); + + ExFreePool(stripeoff); } else if (type == BLOCK_FLAG_RAID10) { UINT64 startoff, endoff; - UINT16 endoffstripe, j; - + UINT16 endoffstripe, j, stripe; + ULONG orig_ls; + PMDL master_mdl; + PFN_NUMBER* pfns; + UINT32* stripeoff, pos; + read_data_stripe** stripes; + + if (c) + orig_ls = c->last_stripe; + else + orig_ls = 0; + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); - + if ((ci->num_stripes % ci->sub_stripes) != 0) { ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); Status = STATUS_INTERNAL_ERROR; goto exit; } - - context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size; - context->startoffstripe = startoffstripe; - context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size; - + + if (file_read) { + context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + + if (!context.va) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } else + context.va = buf; + + context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size); + context.startoffstripe = startoffstripe; + context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size); + startoffstripe *= ci->sub_stripes; endoffstripe *= ci->sub_stripes; - - for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { - if (startoffstripe > i) { - stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - stripestart[i] = startoff; - } else { - stripestart[i] = startoff - (startoff % ci->stripe_length); - } - - if (endoffstripe > i) { - stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stripeend[i] = endoff + 1; - } else { - stripeend[i] = endoff - (endoff % ci->stripe_length); - } - - for (j = 1; j < ci->sub_stripes; j++) { - stripestart[i+j] = stripestart[i]; - stripeend[i+j] = stripeend[i]; - } - } - - context->stripes_cancel = 1; - } else if (type == BLOCK_FLAG_DUPLICATE) { - for (i = 0; i < ci->num_stripes; i++) { - stripestart[i] = addr - offset; - stripeend[i] = stripestart[i] + length; + + if (c) + c->last_stripe = (orig_ls + 1) % ci->sub_stripes; + + master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - context->stripes_cancel = ci->num_stripes - 1; - } else if (type == BLOCK_FLAG_RAID5) { - UINT64 startoff, endoff; - UINT16 endoffstripe; - UINT64 start = 0xffffffffffffffff, end = 0; - - get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); - get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); - - for (i = 0; i < ci->num_stripes - 1; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % ci->stripe_length); - } - - if (endoffstripe > i) { - stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % ci->stripe_length); - } - - if (ststart != stend) { - if (ststart < start) { - start = ststart; - firststripesize = ci->stripe_length - (ststart % ci->stripe_length); - } - - if (stend > end) - end = stend; - } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + goto exit; } - - for (i = 0; i < ci->num_stripes; i++) { - stripestart[i] = start; - stripeend[i] = end; + + pfns = (PFN_NUMBER*)(master_mdl + 1); + + stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); + if (!stripes) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - context->stripes_cancel = Vcb->options.raid5_recalculation; - } else if (type == BLOCK_FLAG_RAID6) { - UINT64 startoff, endoff; - UINT16 endoffstripe; - UINT64 start = 0xffffffffffffffff, end = 0; - - get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); - get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); - - for (i = 0; i < ci->num_stripes - 2; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % ci->stripe_length); - } - - if (endoffstripe > i) { - stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % ci->stripe_length); - } - - if (ststart != stend) { - if (ststart < start) { - start = ststart; - firststripesize = ci->stripe_length - (ststart % ci->stripe_length); + + RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); + + for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { + UINT64 sstart, send; + BOOL stripeset = FALSE; + + if (startoffstripe > i) + sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + else if (startoffstripe == i) + sstart = startoff; + else + sstart = startoff - (startoff % ci->stripe_length); + + if (endoffstripe > i) + send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + else if (endoffstripe == i) + send = endoff + 1; + else + send = endoff - (endoff % ci->stripe_length); + + for (j = 0; j < ci->sub_stripes; j++) { + if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { + context.stripes[i+j].stripestart = sstart; + context.stripes[i+j].stripeend = send; + stripes[i / ci->sub_stripes] = &context.stripes[i+j]; + + if (sstart != send) { + context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); + + if (!context.stripes[i+j].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } + + stripeset = TRUE; + } else + context.stripes[i+j].status = ReadDataStatus_Skip; + } + + if (!stripeset) { + for (j = 0; j < ci->sub_stripes; j++) { + if (devices[i+j] && devices[i+j]->devobj) { + context.stripes[i+j].stripestart = sstart; + context.stripes[i+j].stripeend = send; + context.stripes[i+j].status = ReadDataStatus_Pending; + stripes[i / ci->sub_stripes] = &context.stripes[i+j]; + + if (sstart != send) { + context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); + + if (!context.stripes[i+j].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } + + stripeset = TRUE; + break; + } + } + + if (!stripeset) { + ERR("could not find stripe to read\n"); + Status = STATUS_DEVICE_NOT_READY; + goto exit; + } + } + } + + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); + + pos = 0; + stripe = startoffstripe / ci->sub_stripes; + while (pos < length) { + PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); + + if (pos == 0) { + UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, + ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); + + RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] += readlen; + pos += readlen; + } else if (length - pos < ci->stripe_length) { + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + pos = length; + } else { + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + + stripeoff[stripe] += (ULONG)ci->stripe_length; + pos += (ULONG)ci->stripe_length; + } + + stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); + } + + MmUnlockPages(master_mdl); + IoFreeMdl(master_mdl); + + ExFreePool(stripeoff); + ExFreePool(stripes); + } else if (type == BLOCK_FLAG_DUPLICATE) { + UINT64 orig_ls; + + if (c) + orig_ls = i = c->last_stripe; + else + orig_ls = i = 0; + + while (!devices[i] || !devices[i]->devobj) { + i = (i + 1) % ci->num_stripes; + + if (i == orig_ls) { + ERR("no devices available to service request\n"); + Status = STATUS_DEVICE_NOT_READY; + goto exit; + } + } + + if (c) + c->last_stripe = (i + 1) % ci->num_stripes; + + context.stripes[i].stripestart = addr - offset; + context.stripes[i].stripeend = context.stripes[i].stripestart + length; + + if (file_read) { + context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + + if (!context.va) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); + if (!context.stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + MmBuildMdlForNonPagedPool(context.stripes[i].mdl); + } else { + context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL); + + if (!context.stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + goto exit; + } + } + } else if (type == BLOCK_FLAG_RAID5) { + UINT64 startoff, endoff; + UINT16 endoffstripe, parity; + UINT32 *stripeoff, pos; + PMDL master_mdl; + PFN_NUMBER *pfns, dummy; + BOOL need_dummy = FALSE; + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); + get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); + + if (file_read) { + context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + + if (!context.va) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } else + context.va = buf; + + master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + goto exit; + } + + pfns = (PFN_NUMBER*)(master_mdl + 1); + + pos = 0; + while (pos < length) { + parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; + ULONG skip, readlen; + + i = startoffstripe; + while (stripe != parity) { + if (i == startoffstripe) { + readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); + + context.stripes[stripe].stripestart = startoff; + context.stripes[stripe].stripeend = startoff + readlen; + + pos += readlen; + + if (pos == length) + break; + } else { + readlen = min(length - pos, (ULONG)ci->stripe_length); + + context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); + context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; + + pos += readlen; + + if (pos == length) + break; + } + + i++; + stripe = (stripe + 1) % ci->num_stripes; + } + + if (pos == length) + break; + + for (i = 0; i < startoffstripe; i++) { + UINT16 stripe2 = (parity + i + 1) % ci->num_stripes; + + context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + } + + context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + + if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { + skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); + + for (i = 0; i < ci->num_stripes; i++) { + context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; + } + + pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); + need_dummy = TRUE; + } + } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { + for (i = 0; i < ci->num_stripes; i++) { + context.stripes[i].stripeend += ci->stripe_length; + } + + pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1)); + need_dummy = TRUE; + } else { + UINT16 stripe = (parity + 1) % ci->num_stripes; + + i = 0; + while (stripe != parity) { + if (endoffstripe == i) { + context.stripes[stripe].stripeend = endoff + 1; + break; + } else if (endoffstripe > i) + context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + + i++; + stripe = (stripe + 1) % ci->num_stripes; + } + + break; + } + } + + for (i = 0; i < ci->num_stripes; i++) { + if (context.stripes[i].stripestart != context.stripes[i].stripeend) { + context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), + FALSE, FALSE, NULL); + + if (!context.stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } + } + + if (need_dummy) { + dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); + if (!dummypage) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); + if (!dummy_mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(dummypage); + goto exit; + } + + MmBuildMdlForNonPagedPool(dummy_mdl); + + dummy = *(PFN_NUMBER*)(dummy_mdl + 1); + } + + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); + + pos = 0; + + while (pos < length) { + PFN_NUMBER* stripe_pfns; + + parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; + UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, + ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); + + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + + RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] = readlen; + pos += readlen; + + stripe = (stripe + 1) % ci->num_stripes; + + while (stripe != parity) { + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); + + if (readlen == 0) + break; + + RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] = readlen; + pos += readlen; + + stripe = (stripe + 1) % ci->num_stripes; + } + } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { + UINT16 stripe = (parity + 1) % ci->num_stripes; + ULONG k; + + while (stripe != parity) { + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + + stripeoff[stripe] += (UINT32)ci->stripe_length; + pos += (UINT32)ci->stripe_length; + + stripe = (stripe + 1) % ci->num_stripes; + } + + stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); + + for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { + stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; + stripeoff[parity] += PAGE_SIZE; + } + } else { + UINT16 stripe = (parity + 1) % ci->num_stripes; + UINT32 readlen; + + while (pos < length) { + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); + + if (readlen == 0) + break; + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] += readlen; + pos += readlen; + + stripe = (stripe + 1) % ci->num_stripes; + } + } + } + + MmUnlockPages(master_mdl); + IoFreeMdl(master_mdl); + + ExFreePool(stripeoff); + } else if (type == BLOCK_FLAG_RAID6) { + UINT64 startoff, endoff; + UINT16 endoffstripe, parity1; + UINT32 *stripeoff, pos; + PMDL master_mdl; + PFN_NUMBER *pfns, dummy; + BOOL need_dummy = FALSE; + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); + get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); + + if (file_read) { + context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + + if (!context.va) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } else + context.va = buf; + + master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + goto exit; + } + + pfns = (PFN_NUMBER*)(master_mdl + 1); + + pos = 0; + while (pos < length) { + parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; + ULONG skip, readlen; + + i = startoffstripe; + while (stripe != parity1) { + if (i == startoffstripe) { + readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); + + context.stripes[stripe].stripestart = startoff; + context.stripes[stripe].stripeend = startoff + readlen; + + pos += readlen; + + if (pos == length) + break; + } else { + readlen = min(length - pos, (ULONG)ci->stripe_length); + + context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); + context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; + + pos += readlen; + + if (pos == length) + break; + } + + i++; + stripe = (stripe + 1) % ci->num_stripes; + } + + if (pos == length) + break; + + for (i = 0; i < startoffstripe; i++) { + UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes; + + context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + } + + context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + + parity2 = (parity1 + 1) % ci->num_stripes; + context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + + if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { + skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); + + for (i = 0; i < ci->num_stripes; i++) { + context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; + } + + pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); + need_dummy = TRUE; + } + } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { + for (i = 0; i < ci->num_stripes; i++) { + context.stripes[i].stripeend += ci->stripe_length; + } + + pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2)); + need_dummy = TRUE; + } else { + UINT16 stripe = (parity1 + 2) % ci->num_stripes; + + i = 0; + while (stripe != parity1) { + if (endoffstripe == i) { + context.stripes[stripe].stripeend = endoff + 1; + break; + } else if (endoffstripe > i) + context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + + i++; + stripe = (stripe + 1) % ci->num_stripes; + } + + break; + } + } + + for (i = 0; i < ci->num_stripes; i++) { + if (context.stripes[i].stripestart != context.stripes[i].stripeend) { + context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); + + if (!context.stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } + } + + if (need_dummy) { + dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); + if (!dummypage) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); + if (!dummy_mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(dummypage); + goto exit; + } + + MmBuildMdlForNonPagedPool(dummy_mdl); + + dummy = *(PFN_NUMBER*)(dummy_mdl + 1); + } + + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); + + pos = 0; + + while (pos < length) { + PFN_NUMBER* stripe_pfns; + + parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; + UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, + ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); + + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + + RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] = readlen; + pos += readlen; + + stripe = (stripe + 1) % ci->num_stripes; + + while (stripe != parity1) { + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); + + if (readlen == 0) + break; + + RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] = readlen; + pos += readlen; + + stripe = (stripe + 1) % ci->num_stripes; + } + } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { + UINT16 stripe = (parity1 + 2) % ci->num_stripes; + UINT16 parity2 = (parity1 + 1) % ci->num_stripes; + ULONG k; + + while (stripe != parity1) { + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + + stripeoff[stripe] += (UINT32)ci->stripe_length; + pos += (UINT32)ci->stripe_length; + + stripe = (stripe + 1) % ci->num_stripes; + } + + stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); + + for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { + stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; + stripeoff[parity1] += PAGE_SIZE; + } + + stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); + + for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { + stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; + stripeoff[parity2] += PAGE_SIZE; + } + } else { + UINT16 stripe = (parity1 + 2) % ci->num_stripes; + UINT32 readlen; + + while (pos < length) { + stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); + readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); + + if (readlen == 0) + break; + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripe] += readlen; + pos += readlen; + + stripe = (stripe + 1) % ci->num_stripes; } - - if (stend > end) - end = stend; } } - - for (i = 0; i < ci->num_stripes; i++) { - stripestart[i] = start; - stripeend[i] = end; - } - - context->stripes_cancel = Vcb->options.raid6_recalculation; + + MmUnlockPages(master_mdl); + IoFreeMdl(master_mdl); + + ExFreePool(stripeoff); } - - KeInitializeSpinLock(&context->spin_lock); - - context->address = addr; - + + context.address = addr; + for (i = 0; i < ci->num_stripes; i++) { - if (!devices[i] || stripestart[i] == stripeend[i]) { - context->stripes[i].status = ReadDataStatus_MissingDevice; - context->stripes[i].buf = NULL; - context->stripes_left--; - - if (!devices[i]) + if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { + context.stripes[i].status = ReadDataStatus_MissingDevice; + context.stripes_left--; + + if (!devices[i] || !devices[i]->devobj) missing_devices++; } } - + if (missing_devices > allowed_missing) { ERR("not enough devices to service request (%u missing)\n", missing_devices); Status = STATUS_UNEXPECTED_IO_ERROR; goto exit; } - + for (i = 0; i < ci->num_stripes; i++) { PIO_STACK_LOCATION IrpSp; - - if (devices[i] && stripestart[i] != stripeend[i]) { - context->stripes[i].context = (struct read_data_context*)context; - context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG); - - if (!context->stripes[i].buf) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - + + if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { + context.stripes[i].context = (struct read_data_context*)&context; + if (type == BLOCK_FLAG_RAID10) { - context->stripes[i].stripenum = i / ci->sub_stripes; + context.stripes[i].stripenum = i / ci->sub_stripes; } if (!Irp) { - context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); - - if (!context->stripes[i].Irp) { + context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); + + if (!context.stripes[i].Irp) { ERR("IoAllocateIrp failed\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } } else { - context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); - - if (!context->stripes[i].Irp) { + context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); + + if (!context.stripes[i].Irp) { ERR("IoMakeAssociatedIrp failed\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); + + IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); IrpSp->MajorFunction = IRP_MJ_READ; - + if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL); - if (!context->stripes[i].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); + context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); + if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { + ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - - MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[i].Irp->UserBuffer = context->stripes[i].buf; - } - IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset; - - context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; - - IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE); + context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; + + context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); + } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) + context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; + else + context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); + + IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); + IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; + + total_reading += IrpSp->Parameters.Read.Length; - context->stripes[i].status = ReadDataStatus_Pending; + context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; + + IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE); + + context.stripes[i].status = ReadDataStatus_Pending; } } - + #ifdef DEBUG_STATS if (!is_tree) time1 = KeQueryPerformanceCounter(NULL); #endif - + + need_to_wait = FALSE; for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status != ReadDataStatus_MissingDevice) { - IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); + if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { + IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); + need_to_wait = TRUE; } } - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - + if (need_to_wait) + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + #ifdef DEBUG_STATS if (!is_tree) { time2 = KeQueryPerformanceCounter(NULL); - + Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; } #endif - + + if (diskacc) + fFsRtlUpdateDiskCounters(total_reading, 0); + // check if any of the devices return a "user-induced" error - + for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) { - if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) { - PDEVICE_OBJECT dev; - - dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); - IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL); - - if (!dev) { - dev = IoGetDeviceToVerify(PsGetCurrentThread()); - IoSetDeviceToVerify(PsGetCurrentThread(), NULL); - } - - dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; - - if (dev) - IoVerifyVolume(dev, FALSE); - } -// IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj); - - Status = context->stripes[i].iosb.Status; + if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { + Status = context.stripes[i].iosb.Status; goto exit; } } - + if (type == BLOCK_FLAG_RAID0) { - Status = read_data_raid0(Vcb, buf, addr, length, context, ci, stripestart, stripeend, startoffstripe); + Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); if (!NT_SUCCESS(Status)) { ERR("read_data_raid0 returned %08x\n", Status); + + if (file_read) + ExFreePool(context.va); + goto exit; } + + if (file_read) { + RtlCopyMemory(buf, context.va, length); + ExFreePool(context.va); + } } else if (type == BLOCK_FLAG_RAID10) { - Status = read_data_raid10(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend, startoffstripe); + Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); + if (!NT_SUCCESS(Status)) { ERR("read_data_raid10 returned %08x\n", Status); + + if (file_read) + ExFreePool(context.va); + goto exit; } + + if (file_read) { + RtlCopyMemory(buf, context.va, length); + ExFreePool(context.va); + } } else if (type == BLOCK_FLAG_DUPLICATE) { - Status = read_data_dup(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend); + Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); if (!NT_SUCCESS(Status)) { ERR("read_data_dup returned %08x\n", Status); + + if (file_read) + ExFreePool(context.va); + goto exit; } + + if (file_read) { + RtlCopyMemory(buf, context.va, length); + ExFreePool(context.va); + } } else if (type == BLOCK_FLAG_RAID5) { - Status = read_data_raid5(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend, offset, firststripesize, check_nocsum_parity); + Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); if (!NT_SUCCESS(Status)) { ERR("read_data_raid5 returned %08x\n", Status); + + if (file_read) + ExFreePool(context.va); + goto exit; } + + if (file_read) { + RtlCopyMemory(buf, context.va, length); + ExFreePool(context.va); + } } else if (type == BLOCK_FLAG_RAID6) { - Status = read_data_raid6(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend, offset, firststripesize, check_nocsum_parity); + Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); if (!NT_SUCCESS(Status)) { ERR("read_data_raid6 returned %08x\n", Status); + + if (file_read) + ExFreePool(context.va); + goto exit; } + + if (file_read) { + RtlCopyMemory(buf, context.va, length); + ExFreePool(context.va); + } } exit: - if (stripestart) ExFreePool(stripestart); - if (stripeend) ExFreePool(stripeend); + if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) + chunk_unlock_range(Vcb, c, lockaddr, locklen); + + if (dummy_mdl) + IoFreeMdl(dummy_mdl); + + if (dummypage) + ExFreePool(dummypage); for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].Irp) { - if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[i].Irp->MdlAddress); - IoFreeMdl(context->stripes[i].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[i].Irp); + if (context.stripes[i].mdl) { + if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(context.stripes[i].mdl); + + IoFreeMdl(context.stripes[i].mdl); } - - if (context->stripes[i].buf) - ExFreePool(context->stripes[i].buf); + + if (context.stripes[i].Irp) + IoFreeIrp(context.stripes[i].Irp); } - ExFreePool(context->stripes); - ExFreePool(context); - + ExFreePool(context.stripes); + if (!Vcb->log_to_phys_loaded) ExFreePool(devices); - + return Status; } -static NTSTATUS STDCALL read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) { +NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) { ULONG readlen; - NTSTATUS Status; - + TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); - + if (pbr) *pbr = 0; - + if (start >= fcb->adsdata.Length) { TRACE("tried to read beyond end of stream\n"); return STATUS_END_OF_FILE; } - + if (length == 0) { WARN("tried to read zero bytes\n"); return STATUS_SUCCESS; } - + if (start + length < fcb->adsdata.Length) readlen = length; else readlen = fcb->adsdata.Length - (ULONG)start; - + if (readlen > 0) RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen); - + if (pbr) *pbr = readlen; - - Status = STATUS_SUCCESS; - - return Status; + + return STATUS_SUCCESS; } -NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp, BOOL check_nocsum_parity) { +NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) { NTSTATUS Status; EXTENT_DATA* ed; - UINT64 bytes_read = 0; + UINT32 bytes_read = 0; UINT64 last_end; LIST_ENTRY* le; #ifdef DEBUG_STATS LARGE_INTEGER time1, time2; #endif - + TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); - + if (pbr) *pbr = 0; - + if (start >= fcb->inode_item.st_size) { WARN("Tried to read beyond end of file\n"); Status = STATUS_END_OF_FILE; - goto exit; + goto exit; } - + #ifdef DEBUG_STATS time1 = KeQueryPerformanceCounter(NULL); #endif @@ -3023,86 +2741,147 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U UINT64 len; extent* ext = CONTAINING_RECORD(le, extent, list_entry); EXTENT_DATA2* ed2; - + if (!ext->ignore) { - ed = ext->data; - + ed = &ext->extent_data; + ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL; - + len = ed2 ? ed2->num_bytes : ed->decoded_size; - + if (ext->offset + len <= start) { last_end = ext->offset + len; goto nextitem; } - + if (ext->offset > last_end && ext->offset > start + bytes_read) { - UINT32 read = min(length, ext->offset - max(start, last_end)); - + UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end)); + RtlZeroMemory(data + bytes_read, read); bytes_read += read; length -= read; } - + if (length == 0 || ext->offset > start + bytes_read + length) break; - + if (ed->encryption != BTRFS_ENCRYPTION_NONE) { WARN("Encryption not supported\n"); Status = STATUS_NOT_IMPLEMENTED; goto exit; } - + if (ed->encoding != BTRFS_ENCODING_NONE) { WARN("Other encodings not supported\n"); Status = STATUS_NOT_IMPLEMENTED; goto exit; } - + switch (ed->type) { case EXTENT_TYPE_INLINE: { UINT64 off = start + bytes_read - ext->offset; - UINT64 read = len - off; - - if (read > length) read = length; - - RtlCopyMemory(data + bytes_read, &ed->data[off], read); - - // FIXME - can we have compressed inline extents? - + UINT32 read; + + if (ed->compression == BTRFS_COMPRESSION_NONE) { + read = (UINT32)min(min(len, ext->datalen) - off, length); + + RtlCopyMemory(data + bytes_read, &ed->data[off], read); + } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO) { + UINT8* decomp; + BOOL decomp_alloc; + UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]); + + if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { + ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size); + Status = STATUS_INTERNAL_ERROR; + goto exit; + } + + read = (UINT32)min(ed->decoded_size - off, length); + + if (off > 0) { + decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG); + if (!decomp) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + decomp_alloc = TRUE; + } else { + decomp = data + bytes_read; + decomp_alloc = FALSE; + } + + if (ed->compression == BTRFS_COMPRESSION_ZLIB) { + Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off)); + if (!NT_SUCCESS(Status)) { + ERR("zlib_decompress returned %08x\n", Status); + if (decomp_alloc) ExFreePool(decomp); + goto exit; + } + } else if (ed->compression == BTRFS_COMPRESSION_LZO) { + if (inlen < sizeof(UINT32)) { + ERR("extent data was truncated\n"); + Status = STATUS_INTERNAL_ERROR; + if (decomp_alloc) ExFreePool(decomp); + goto exit; + } else + inlen -= sizeof(UINT32); + + Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32)); + if (!NT_SUCCESS(Status)) { + ERR("lzo_decompress returned %08x\n", Status); + if (decomp_alloc) ExFreePool(decomp); + goto exit; + } + } + + if (decomp_alloc) { + RtlCopyMemory(data + bytes_read, decomp + off, read); + ExFreePool(decomp); + } + } else { + ERR("unhandled compression type %x\n", ed->compression); + Status = STATUS_NOT_IMPLEMENTED; + goto exit; + } + bytes_read += read; length -= read; + break; } - + case EXTENT_TYPE_REGULAR: { UINT64 off = start + bytes_read - ext->offset; UINT32 to_read, read; UINT8* buf; + BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE; BOOL buf_free; - UINT32 bumpoff = 0; - UINT64 addr, lockaddr, locklen; + UINT32 bumpoff = 0, *csum; + UINT64 addr; chunk* c; - - read = len - off; - if (read > length) read = length; - + + read = (UINT32)(len - off); + if (read > length) read = (UINT32)length; + if (ed->compression == BTRFS_COMPRESSION_NONE) { addr = ed2->address + ed2->offset + off; - to_read = sector_align(read, fcb->Vcb->superblock.sector_size); - + to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size); + if (addr % fcb->Vcb->superblock.sector_size > 0) { bumpoff = addr % fcb->Vcb->superblock.sector_size; addr -= bumpoff; - to_read = sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size); + to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size); } } else { addr = ed2->address; - to_read = sector_align(ed2->size, fcb->Vcb->superblock.sector_size); + to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); } - + if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 && length % fcb->Vcb->superblock.sector_size == 0) { buf = data + bytes_read; @@ -3110,101 +2889,166 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U } else { buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG); buf_free = TRUE; - + if (!buf) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } + + mdl = FALSE; } - + c = get_chunk_from_address(fcb->Vcb, addr); - + if (!c) { ERR("get_chunk_from_address(%llx) failed\n", addr); - + if (buf_free) ExFreePool(buf); - + goto exit; } - - if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) { - get_raid56_lock_range(c, addr, to_read, &lockaddr, &locklen); - chunk_lock_range(fcb->Vcb, c, lockaddr, locklen); - } - - Status = read_data(fcb->Vcb, addr, to_read, ext->csum ? &ext->csum[off / fcb->Vcb->superblock.sector_size] : NULL, FALSE, - buf, c, NULL, Irp, check_nocsum_parity); + + if (ext->csum) { + if (ed->compression == BTRFS_COMPRESSION_NONE) + csum = &ext->csum[off / fcb->Vcb->superblock.sector_size]; + else + csum = ext->csum; + } else + csum = NULL; + + Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl, + fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); if (!NT_SUCCESS(Status)) { ERR("read_data returned %08x\n", Status); - - if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) - chunk_unlock_range(fcb->Vcb, c, lockaddr, locklen); - + if (buf_free) ExFreePool(buf); - + goto exit; } - - if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) - chunk_unlock_range(fcb->Vcb, c, lockaddr, locklen); - + if (ed->compression == BTRFS_COMPRESSION_NONE) { if (buf_free) RtlCopyMemory(data + bytes_read, buf + bumpoff, read); } else { - UINT8* decomp = NULL; - - // FIXME - don't mess around with decomp if we're reading the whole extent - - decomp = ExAllocatePoolWithTag(PagedPool, ed->decoded_size, ALLOC_TAG); - if (!decomp) { - ERR("out of memory\n"); + UINT8 *decomp = NULL, *buf2; + ULONG outlen, inlen, off2; + UINT32 inpageoff = 0; + + off2 = (ULONG)(ed2->offset + off); + buf2 = buf; + inlen = (ULONG)ed2->size; + + if (ed->compression == BTRFS_COMPRESSION_LZO) { + ULONG inoff = sizeof(UINT32); + + inlen -= sizeof(UINT32); + + // If reading a few sectors in, skip to the interesting bit + while (off2 > LINUX_PAGE_SIZE) { + UINT32 partlen; + + if (inlen < sizeof(UINT32)) + break; + + partlen = *(UINT32*)(buf2 + inoff); + + if (partlen < inlen) { + off2 -= LINUX_PAGE_SIZE; + inoff += partlen + sizeof(UINT32); + inlen -= partlen + sizeof(UINT32); + + if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32)) + inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE; + } else + break; + } + + buf2 = &buf2[inoff]; + inpageoff = inoff % LINUX_PAGE_SIZE; + } + + if (off2 != 0) { + outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off)); + + decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG); + if (!decomp) { + ERR("out of memory\n"); + ExFreePool(buf); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + } else + outlen = min(read, (UINT32)(ed2->num_bytes - off)); + + if (ed->compression == BTRFS_COMPRESSION_ZLIB) { + Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); + + if (!NT_SUCCESS(Status)) { + ERR("zlib_decompress returned %08x\n", Status); + ExFreePool(buf); + + if (decomp) + ExFreePool(decomp); + + goto exit; + } + } else if (ed->compression == BTRFS_COMPRESSION_LZO) { + Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff); + + if (!NT_SUCCESS(Status)) { + ERR("lzo_decompress returned %08x\n", Status); + ExFreePool(buf); + + if (decomp) + ExFreePool(decomp); + + goto exit; + } + } else { + ERR("unsupported compression type %x\n", ed->compression); + Status = STATUS_NOT_SUPPORTED; + ExFreePool(buf); - Status = STATUS_INSUFFICIENT_RESOURCES; + + if (decomp) + ExFreePool(decomp); + goto exit; } - - Status = decompress(ed->compression, buf, ed2->size, decomp, ed->decoded_size); - - if (!NT_SUCCESS(Status)) { - ERR("decompress returned %08x\n", Status); - ExFreePool(buf); + + if (decomp) { + RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off)); ExFreePool(decomp); - goto exit; } - - RtlCopyMemory(data + bytes_read, decomp + ed2->offset + off, min(read, ed2->num_bytes - off)); - - ExFreePool(decomp); } - + if (buf_free) ExFreePool(buf); - + bytes_read += read; length -= read; - + break; } - + case EXTENT_TYPE_PREALLOC: { UINT64 off = start + bytes_read - ext->offset; - UINT32 read = len - off; - - if (read > length) read = length; + UINT32 read = (UINT32)(len - off); + + if (read > length) read = (UINT32)length; RtlZeroMemory(data + bytes_read, read); bytes_read += read; length -= read; - + break; } - + default: WARN("Unsupported extent data type %u\n", ed->type); Status = STATUS_NOT_IMPLEMENTED; @@ -3212,7 +3056,7 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U } last_end = ext->offset + len; - + if (length == 0) break; } @@ -3220,116 +3064,115 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U nextitem: le = le->Flink; } - + if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { - UINT32 read = min(fcb->inode_item.st_size - start - bytes_read, length); - + UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length); + RtlZeroMemory(data + bytes_read, read); - + bytes_read += read; length -= read; } - + Status = STATUS_SUCCESS; if (pbr) *pbr = bytes_read; - + #ifdef DEBUG_STATS time2 = KeQueryPerformanceCounter(NULL); - + fcb->Vcb->stats.num_reads++; fcb->Vcb->stats.data_read += bytes_read; fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart; #endif - + exit: return Status; } -NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read) { +NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb = FileObject->FsContext; - UINT8* data; - ULONG length, addon = 0; + UINT8* data = NULL; + ULONG length = IrpSp->Parameters.Read.Length, addon = 0; UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart; - length = IrpSp->Parameters.Read.Length; + *bytes_read = 0; - + if (!fcb || !fcb->Vcb || !fcb->subvol) return STATUS_INTERNAL_ERROR; - + TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb); TRACE("offset = %llx, length = %x\n", start, length); TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE"); if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) return STATUS_INVALID_DEVICE_REQUEST; - + if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { WARN("tried to read locked region\n"); return STATUS_FILE_LOCK_CONFLICT; } - + if (length == 0) { TRACE("tried to read zero bytes\n"); return STATUS_SUCCESS; } - - if (start >= fcb->Header.FileSize.QuadPart) { + + if (start >= (UINT64)fcb->Header.FileSize.QuadPart) { TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart); return STATUS_END_OF_FILE; } - + TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); -// int3; - + if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) { - data = map_user_buffer(Irp); - + data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); + if (Irp->MdlAddress && !data) { ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - if (start >= fcb->Header.ValidDataLength.QuadPart) { - length = min(length, min(start + length, fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); + + if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) { + length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); RtlZeroMemory(data, length); Irp->IoStatus.Information = *bytes_read = length; return STATUS_SUCCESS; } - - if (length + start > fcb->Header.ValidDataLength.QuadPart) { - addon = min(start + length, fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart; + + if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) { + addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); - length = fcb->Header.ValidDataLength.QuadPart - start; + length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); } } - + if (!(Irp->Flags & IRP_NOCACHE)) { NTSTATUS Status = STATUS_SUCCESS; - + _SEH2_TRY { if (!FileObject->PrivateCacheMap) { CC_FILE_SIZES ccfs; - + ccfs.AllocationSize = fcb->Header.AllocationSize; ccfs.FileSize = fcb->Header.FileSize; ccfs.ValidDataLength = fcb->Header.ValidDataLength; - + init_file_cache(FileObject, &ccfs); } - + if (IrpSp->MinorFunction & IRP_MN_MDL) { CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); } else { - if (CcCopyReadEx) { + if (fCcCopyReadEx) { TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); - if (!CcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { + if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { TRACE("CcCopyReadEx could not wait\n"); - + IoMarkIrpPending(Irp); return STATUS_PENDING; } @@ -3339,7 +3182,7 @@ NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read) { TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { TRACE("CcCopyRead could not wait\n"); - + IoMarkIrpPending(Irp); return STATUS_PENDING; } @@ -3349,176 +3192,177 @@ NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read) { } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { Status = _SEH2_GetExceptionCode(); } _SEH2_END; - + if (NT_SUCCESS(Status)) { Status = Irp->IoStatus.Status; Irp->IoStatus.Information += addon; - *bytes_read = Irp->IoStatus.Information; + *bytes_read = (ULONG)Irp->IoStatus.Information; } else ERR("EXCEPTION - %08x\n", Status); - + return Status; } else { NTSTATUS Status; - + if (!wait) { IoMarkIrpPending(Irp); return STATUS_PENDING; } - + if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) { IO_STATUS_BLOCK iosb; - + CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, length, &iosb); - + if (!NT_SUCCESS(iosb.Status)) { ERR("CcFlushCache returned %08x\n", iosb.Status); return iosb.Status; } } - + if (fcb->ads) Status = read_stream(fcb, data, start, length, bytes_read); else - Status = read_file(fcb, data, start, length, bytes_read, Irp, TRUE); - + Status = read_file(fcb, data, start, length, bytes_read, Irp); + *bytes_read += addon; TRACE("read %u bytes\n", *bytes_read); - + Irp->IoStatus.Information = *bytes_read; - + if (diskacc && Status != STATUS_PENDING) { PETHREAD thread = NULL; - + if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) thread = Irp->Tail.Overlay.Thread; else if (!IoIsSystemThread(PsGetCurrentThread())) thread = PsGetCurrentThread(); else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) thread = PsGetCurrentThread(); - + if (thread) - PsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); + fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); } - + return Status; } } -NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { +_Dispatch_type_(IRP_MJ_READ) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { device_extension* Vcb = DeviceObject->DeviceExtension; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); PFILE_OBJECT FileObject = IrpSp->FileObject; - ULONG bytes_read; + ULONG bytes_read = 0; NTSTATUS Status; BOOL top_level; fcb* fcb; ccb* ccb; - BOOL fcb_lock = FALSE, wait; - + BOOLEAN fcb_lock = FALSE, wait; + FsRtlEnterFileSystem(); - + top_level = is_top_level(Irp); - + TRACE("read\n"); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_read(DeviceObject, Irp); goto exit2; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + Irp->IoStatus.Information = 0; - + if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); - + Irp->MdlAddress = NULL; Status = STATUS_SUCCESS; - bytes_read = 0; - + goto exit; } - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("fcb was NULL\n"); Status = STATUS_INVALID_PARAMETER; goto exit; } - + ccb = FileObject->FsContext2; - + if (!ccb) { ERR("ccb was NULL\n"); Status = STATUS_INVALID_PARAMETER; goto exit; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { WARN("insufficient privileges\n"); Status = STATUS_ACCESS_DENIED; goto exit; } - + if (fcb == Vcb->volume_fcb) { TRACE("reading volume FCB\n"); - + IoSkipCurrentIrpStackLocation(Irp); - + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); - + goto exit2; } - + wait = IoIsOperationSynchronous(Irp); - + // Don't offload jobs when doing paging IO - otherwise this can lead to // deadlocks in CcCopyRead. if (Irp->Flags & IRP_PAGING_IO) wait = TRUE; - + if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { Status = STATUS_PENDING; IoMarkIrpPending(Irp); goto exit; } - + fcb_lock = TRUE; } - + Status = do_read(Irp, wait, &bytes_read); - -exit: + if (fcb_lock) ExReleaseResourceLite(fcb->Header.Resource); - Irp->IoStatus.Status = Status; - +exit: if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); - - // fastfat doesn't do this, but the Wine ntdll file test seems to think we ought to - if (Irp->UserIosb) - *Irp->UserIosb = Irp->IoStatus; - + +end: + Irp->IoStatus.Status = Status; + TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status); TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information); TRACE("returning %08x\n", Status); - + if (Status != STATUS_PENDING) IoCompleteRequest(Irp, IO_NO_INCREMENT); else { if (!add_thread_job(Vcb, Irp)) do_read_job(Irp); } - + exit2: - if (top_level) + if (top_level) IoSetTopLevelIrp(NULL); - + FsRtlExitFileSystem(); - + return Status; } diff --git a/reactos/drivers/filesystems/btrfs/registry.c b/reactos/drivers/filesystems/btrfs/registry.c index 402f30d9211..9e4d9ea08d2 100644 --- a/reactos/drivers/filesystems/btrfs/registry.c +++ b/reactos/drivers/filesystems/btrfs/registry.c @@ -1,77 +1,88 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" extern UNICODE_STRING log_device, log_file, registry_path; +extern LIST_ENTRY uid_map_list, gid_map_list; +extern ERESOURCE mapping_lock; -static WCHAR option_mounted[] = L"Mounted"; +#ifdef _DEBUG +extern HANDLE log_handle; +extern ERESOURCE log_lock; +extern PFILE_OBJECT comfo; +extern PDEVICE_OBJECT comdo; +#endif -#define hex_digit(c) ((c) >= 0 && (c) <= 9) ? ((c) + '0') : ((c) - 10 + 'a') +WORK_QUEUE_ITEM wqi; + +static WCHAR option_mounted[] = L"Mounted"; NTSTATUS registry_load_volume_options(device_extension* Vcb) { BTRFS_UUID* uuid = &Vcb->superblock.uuid; mount_options* options = &Vcb->options; UNICODE_STRING path, ignoreus, compressus, compressforceus, compresstypeus, readonlyus, zliblevelus, flushintervalus, - maxinlineus, subvolidus, raid5recalcus, raid6recalcus, skipbalanceus; + maxinlineus, subvolidus, skipbalanceus, nobarrierus, notrimus, clearcacheus, allowdegradedus; OBJECT_ATTRIBUTES oa; NTSTATUS Status; ULONG i, j, kvfilen, index, retlen; KEY_VALUE_FULL_INFORMATION* kvfi = NULL; HANDLE h; - + options->compress = mount_compress; options->compress_force = mount_compress_force; options->compress_type = mount_compress_type > BTRFS_COMPRESSION_LZO ? 0 : mount_compress_type; - options->readonly = FALSE; + options->readonly = mount_readonly; options->zlib_level = mount_zlib_level; options->flush_interval = mount_flush_interval; options->max_inline = min(mount_max_inline, Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - sizeof(EXTENT_DATA) + 1); - options->raid5_recalculation = mount_raid5_recalculation; - options->raid6_recalculation = mount_raid6_recalculation; options->skip_balance = mount_skip_balance; + options->no_barrier = mount_no_barrier; + options->no_trim = mount_no_trim; + options->clear_cache = mount_clear_cache; + options->allow_degraded = mount_allow_degraded; options->subvol_id = 0; - + path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR)); path.Buffer = ExAllocatePoolWithTag(PagedPool, path.Length, ALLOC_TAG); - + if (!path.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(path.Buffer, registry_path.Buffer, registry_path.Length); i = registry_path.Length / sizeof(WCHAR); - + path.Buffer[i] = '\\'; i++; - + for (j = 0; j < 16; j++) { path.Buffer[i] = hex_digit((uuid->uuid[j] & 0xF0) >> 4); path.Buffer[i+1] = hex_digit(uuid->uuid[j] & 0xF); - + i += 2; - + if (j == 3 || j == 5 || j == 7 || j == 9) { path.Buffer[i] = '-'; i++; } } - + kvfilen = sizeof(KEY_VALUE_FULL_INFORMATION) - sizeof(WCHAR) + (255 * sizeof(WCHAR)); kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); if (!kvfi) { @@ -79,9 +90,9 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + InitializeObjectAttributes(&oa, &path, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); - + Status = ZwOpenKey(&h, KEY_QUERY_VALUE, &oa); if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { Status = STATUS_SUCCESS; @@ -90,9 +101,9 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { ERR("ZwOpenKey returned %08x\n", Status); goto end; } - + index = 0; - + RtlInitUnicodeString(&ignoreus, L"Ignore"); RtlInitUnicodeString(&compressus, L"Compress"); RtlInitUnicodeString(&compressforceus, L"CompressForce"); @@ -102,99 +113,103 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { RtlInitUnicodeString(&flushintervalus, L"FlushInterval"); RtlInitUnicodeString(&maxinlineus, L"MaxInline"); RtlInitUnicodeString(&subvolidus, L"SubvolId"); - RtlInitUnicodeString(&raid5recalcus, L"Raid5Recalculation"); - RtlInitUnicodeString(&raid6recalcus, L"Raid6Recalculation"); RtlInitUnicodeString(&skipbalanceus, L"SkipBalance"); - + RtlInitUnicodeString(&nobarrierus, L"NoBarrier"); + RtlInitUnicodeString(¬rimus, L"NoTrim"); + RtlInitUnicodeString(&clearcacheus, L"ClearCache"); + RtlInitUnicodeString(&allowdegradedus, L"AllowDegraded"); + do { Status = ZwEnumerateValueKey(h, index, KeyValueFullInformation, kvfi, kvfilen, &retlen); - + index++; - + if (NT_SUCCESS(Status)) { UNICODE_STRING us; - - us.Length = us.MaximumLength = kvfi->NameLength; + + us.Length = us.MaximumLength = (USHORT)kvfi->NameLength; us.Buffer = kvfi->Name; - + if (FsRtlAreNamesEqual(&ignoreus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->ignore = *val != 0 ? TRUE : FALSE; } else if (FsRtlAreNamesEqual(&compressus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->compress = *val != 0 ? TRUE : FALSE; } else if (FsRtlAreNamesEqual(&compressforceus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->compress_force = *val != 0 ? TRUE : FALSE; } else if (FsRtlAreNamesEqual(&compresstypeus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - - options->compress_type = *val > BTRFS_COMPRESSION_LZO ? 0 : *val; + + options->compress_type = (UINT8)(*val > BTRFS_COMPRESSION_LZO ? 0 : *val); } else if (FsRtlAreNamesEqual(&readonlyus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->readonly = *val != 0 ? TRUE : FALSE; } else if (FsRtlAreNamesEqual(&zliblevelus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->zlib_level = *val; } else if (FsRtlAreNamesEqual(&flushintervalus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->flush_interval = *val; } else if (FsRtlAreNamesEqual(&maxinlineus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->max_inline = min(*val, Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - sizeof(EXTENT_DATA) + 1); } else if (FsRtlAreNamesEqual(&subvolidus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_QWORD) { UINT64* val = (UINT64*)((UINT8*)kvfi + kvfi->DataOffset); - + options->subvol_id = *val; - } else if (FsRtlAreNamesEqual(&raid5recalcus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { - DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - - options->raid5_recalculation = *val; - } else if (FsRtlAreNamesEqual(&raid6recalcus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { - DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - - options->raid6_recalculation = *val; } else if (FsRtlAreNamesEqual(&skipbalanceus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); - + options->skip_balance = *val; + } else if (FsRtlAreNamesEqual(&nobarrierus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { + DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); + + options->no_barrier = *val; + } else if (FsRtlAreNamesEqual(¬rimus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { + DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); + + options->no_trim = *val; + } else if (FsRtlAreNamesEqual(&clearcacheus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { + DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); + + options->clear_cache = *val; + } else if (FsRtlAreNamesEqual(&allowdegradedus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { + DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); + + options->allow_degraded = *val; } } else if (Status != STATUS_NO_MORE_ENTRIES) { ERR("ZwEnumerateValueKey returned %08x\n", Status); goto end2; } } while (NT_SUCCESS(Status)); - + if (!options->compress && options->compress_force) options->compress = TRUE; - + if (options->zlib_level > 9) options->zlib_level = 9; - + if (options->flush_interval == 0) options->flush_interval = mount_flush_interval; - - if (options->raid5_recalculation > 1) - options->raid5_recalculation = 1; - - if (options->raid6_recalculation > 2) - options->raid6_recalculation = 2; Status = STATUS_SUCCESS; - + end2: ZwClose(h); end: ExFreePool(path.Buffer); - + if (kvfi) ExFreePool(kvfi); @@ -208,27 +223,27 @@ NTSTATUS registry_mark_volume_mounted(BTRFS_UUID* uuid) { OBJECT_ATTRIBUTES oa; HANDLE h; DWORD data; - + path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR)); path.Buffer = ExAllocatePoolWithTag(PagedPool, path.Length, ALLOC_TAG); - + if (!path.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(path.Buffer, registry_path.Buffer, registry_path.Length); i = registry_path.Length / sizeof(WCHAR); - + path.Buffer[i] = '\\'; i++; - + for (j = 0; j < 16; j++) { path.Buffer[i] = hex_digit((uuid->uuid[j] & 0xF0) >> 4); path.Buffer[i+1] = hex_digit(uuid->uuid[j] & 0xF); - + i += 2; - + if (j == 3 || j == 5 || j == 7 || j == 9) { path.Buffer[i] = '-'; i++; @@ -236,32 +251,32 @@ NTSTATUS registry_mark_volume_mounted(BTRFS_UUID* uuid) { } InitializeObjectAttributes(&oa, &path, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); - + Status = ZwCreateKey(&h, KEY_SET_VALUE, &oa, 0, NULL, REG_OPTION_NON_VOLATILE, NULL); if (!NT_SUCCESS(Status)) { ERR("ZwCreateKey returned %08x\n", Status); goto end; } - + mountedus.Buffer = option_mounted; - mountedus.Length = mountedus.MaximumLength = wcslen(option_mounted) * sizeof(WCHAR); - + mountedus.Length = mountedus.MaximumLength = (USHORT)wcslen(option_mounted) * sizeof(WCHAR); + data = 1; - + Status = ZwSetValueKey(h, &mountedus, 0, REG_DWORD, &data, sizeof(DWORD)); if (!NT_SUCCESS(Status)) { ERR("ZwSetValueKey returned %08x\n", Status); goto end2; } - + Status = STATUS_SUCCESS; end2: ZwClose(h); - + end: ExFreePool(path.Buffer); - + return Status; } @@ -273,40 +288,40 @@ static NTSTATUS registry_mark_volume_unmounted_path(PUNICODE_STRING path) { KEY_VALUE_BASIC_INFORMATION* kvbi; BOOL has_options = FALSE; UNICODE_STRING mountedus; - + // If a volume key has any options in it, we set Mounted to 0 and return. Otherwise, // we delete the whole thing. - + kvbi = ExAllocatePoolWithTag(PagedPool, kvbilen, ALLOC_TAG); if (!kvbi) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + InitializeObjectAttributes(&oa, path, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); - + Status = ZwOpenKey(&h, KEY_QUERY_VALUE | KEY_SET_VALUE | DELETE, &oa); if (!NT_SUCCESS(Status)) { ERR("ZwOpenKey returned %08x\n", Status); goto end; } - + index = 0; - + mountedus.Buffer = option_mounted; - mountedus.Length = mountedus.MaximumLength = wcslen(option_mounted) * sizeof(WCHAR); - + mountedus.Length = mountedus.MaximumLength = (USHORT)wcslen(option_mounted) * sizeof(WCHAR); + do { Status = ZwEnumerateValueKey(h, index, KeyValueBasicInformation, kvbi, kvbilen, &retlen); - + index++; - + if (NT_SUCCESS(Status)) { UNICODE_STRING us; - - us.Length = us.MaximumLength = kvbi->NameLength; + + us.Length = us.MaximumLength = (USHORT)kvbi->NameLength; us.Buffer = kvbi->Name; - + if (!FsRtlAreNamesEqual(&mountedus, &us, TRUE, NULL)) { has_options = TRUE; break; @@ -316,10 +331,10 @@ static NTSTATUS registry_mark_volume_unmounted_path(PUNICODE_STRING path) { goto end2; } } while (NT_SUCCESS(Status)); - + if (has_options) { DWORD data = 0; - + Status = ZwSetValueKey(h, &mountedus, 0, REG_DWORD, &data, sizeof(DWORD)); if (!NT_SUCCESS(Status)) { ERR("ZwSetValueKey returned %08x\n", Status); @@ -332,15 +347,15 @@ static NTSTATUS registry_mark_volume_unmounted_path(PUNICODE_STRING path) { goto end2; } } - + Status = STATUS_SUCCESS; end2: ZwClose(h); - + end: ExFreePool(kvbi); - + return Status; } @@ -348,44 +363,44 @@ NTSTATUS registry_mark_volume_unmounted(BTRFS_UUID* uuid) { UNICODE_STRING path; NTSTATUS Status; ULONG i, j; - + path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR)); path.Buffer = ExAllocatePoolWithTag(PagedPool, path.Length, ALLOC_TAG); - + if (!path.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(path.Buffer, registry_path.Buffer, registry_path.Length); i = registry_path.Length / sizeof(WCHAR); - + path.Buffer[i] = '\\'; i++; - + for (j = 0; j < 16; j++) { path.Buffer[i] = hex_digit((uuid->uuid[j] & 0xF0) >> 4); path.Buffer[i+1] = hex_digit(uuid->uuid[j] & 0xF); - + i += 2; - + if (j == 3 || j == 5 || j == 7 || j == 9) { path.Buffer[i] = '-'; i++; } } - + Status = registry_mark_volume_unmounted_path(&path); if (!NT_SUCCESS(Status)) { ERR("registry_mark_volume_unmounted_path returned %08x\n", Status); goto end; } - + Status = STATUS_SUCCESS; - + end: ExFreePool(path.Buffer); - + return Status; } @@ -393,10 +408,10 @@ end: static BOOL is_uuid(ULONG namelen, WCHAR* name) { ULONG i; - + if (namelen != 36 * sizeof(WCHAR)) return FALSE; - + for (i = 0; i < 36; i++) { if (i == 8 || i == 13 || i == 18 || i == 23) { if (name[i] != '-') @@ -404,7 +419,7 @@ static BOOL is_uuid(ULONG namelen, WCHAR* name) { } else if (!is_hex(name[i])) return FALSE; } - + return TRUE; } @@ -418,85 +433,85 @@ static void reset_subkeys(HANDLE h, PUNICODE_STRING reg_path) { KEY_BASIC_INFORMATION* kbi; ULONG kbilen = sizeof(KEY_BASIC_INFORMATION) - sizeof(WCHAR) + (255 * sizeof(WCHAR)), retlen, index = 0; LIST_ENTRY key_names, *le; - + InitializeListHead(&key_names); - + kbi = ExAllocatePoolWithTag(PagedPool, kbilen, ALLOC_TAG); if (!kbi) { ERR("out of memory\n"); return; } - + do { Status = ZwEnumerateKey(h, index, KeyBasicInformation, kbi, kbilen, &retlen); - + index++; - + if (NT_SUCCESS(Status)) { key_name* kn; - - ERR("key: %.*S\n", kbi->NameLength / sizeof(WCHAR), kbi->Name); - + + TRACE("key: %.*S\n", kbi->NameLength / sizeof(WCHAR), kbi->Name); + if (is_uuid(kbi->NameLength, kbi->Name)) { kn = ExAllocatePoolWithTag(PagedPool, sizeof(key_name), ALLOC_TAG); if (!kn) { ERR("out of memory\n"); goto end; } - - kn->name.Length = kn->name.MaximumLength = kbi->NameLength; - kn->name.Buffer = ExAllocatePoolWithTag(PagedPool, kn->name.Length, ALLOC_TAG); - + + kn->name.Length = kn->name.MaximumLength = (USHORT)min(0xffff, kbi->NameLength); + kn->name.Buffer = ExAllocatePoolWithTag(PagedPool, kn->name.MaximumLength, ALLOC_TAG); + if (!kn->name.Buffer) { ERR("out of memory\n"); ExFreePool(kn); goto end; } - - RtlCopyMemory(kn->name.Buffer, kbi->Name, kbi->NameLength); - + + RtlCopyMemory(kn->name.Buffer, kbi->Name, kn->name.Length); + InsertTailList(&key_names, &kn->list_entry); } } else if (Status != STATUS_NO_MORE_ENTRIES) ERR("ZwEnumerateKey returned %08x\n", Status); } while (NT_SUCCESS(Status)); - + le = key_names.Flink; while (le != &key_names) { key_name* kn = CONTAINING_RECORD(le, key_name, list_entry); UNICODE_STRING path; - + path.Length = path.MaximumLength = reg_path->Length + sizeof(WCHAR) + kn->name.Length; path.Buffer = ExAllocatePoolWithTag(PagedPool, path.Length, ALLOC_TAG); - + if (!path.Buffer) { ERR("out of memory\n"); goto end; } - + RtlCopyMemory(path.Buffer, reg_path->Buffer, reg_path->Length); path.Buffer[reg_path->Length / sizeof(WCHAR)] = '\\'; RtlCopyMemory(&path.Buffer[(reg_path->Length / sizeof(WCHAR)) + 1], kn->name.Buffer, kn->name.Length); - + Status = registry_mark_volume_unmounted_path(&path); if (!NT_SUCCESS(Status)) WARN("registry_mark_volume_unmounted_path returned %08x\n", Status); - + ExFreePool(path.Buffer); - + le = le->Flink; } - + end: while (!IsListEmpty(&key_names)) { key_name* kn; - + le = RemoveHeadList(&key_names); kn = CONTAINING_RECORD(le, key_name, list_entry); - + if (kn->name.Buffer) ExFreePool(kn->name.Buffer); - + ExFreePool(kn); } @@ -510,28 +525,32 @@ static void read_mappings(PUNICODE_STRING regpath) { OBJECT_ATTRIBUTES oa; ULONG dispos; NTSTATUS Status; - ULONG kvfilen, retlen, i; - KEY_VALUE_FULL_INFORMATION* kvfi; - + const WCHAR mappings[] = L"\\Mappings"; - + + while (!IsListEmpty(&uid_map_list)) { + uid_map* um = CONTAINING_RECORD(RemoveHeadList(&uid_map_list), uid_map, listentry); + + if (um->sid) ExFreePool(um->sid); + ExFreePool(um); + } + path = ExAllocatePoolWithTag(PagedPool, regpath->Length + (wcslen(mappings) * sizeof(WCHAR)), ALLOC_TAG); if (!path) { ERR("out of memory\n"); return; } - + RtlCopyMemory(path, regpath->Buffer, regpath->Length); RtlCopyMemory((UINT8*)path + regpath->Length, mappings, wcslen(mappings) * sizeof(WCHAR)); - + us.Buffer = path; us.Length = us.MaximumLength = regpath->Length + ((USHORT)wcslen(mappings) * sizeof(WCHAR)); - + InitializeObjectAttributes(&oa, &us, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); - - // FIXME - keep open and do notify for changes + Status = ZwCreateKey(&h, KEY_QUERY_VALUE, &oa, 0, NULL, REG_OPTION_NON_VOLATILE, &dispos); - + if (!NT_SUCCESS(Status)) { ERR("ZwCreateKey returned %08x\n", Status); ExFreePool(path); @@ -539,62 +558,170 @@ static void read_mappings(PUNICODE_STRING regpath) { } if (dispos == REG_OPENED_EXISTING_KEY) { + KEY_VALUE_FULL_INFORMATION* kvfi; + ULONG kvfilen, retlen, i; + kvfilen = sizeof(KEY_VALUE_FULL_INFORMATION) + 256; kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); - + if (!kvfi) { ERR("out of memory\n"); ExFreePool(path); ZwClose(h); return; } - + i = 0; do { Status = ZwEnumerateValueKey(h, i, KeyValueFullInformation, kvfi, kvfilen, &retlen); - - if (NT_SUCCESS(Status) && kvfi->DataLength > 0) { + + if (NT_SUCCESS(Status) && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { UINT32 val = 0; - + RtlCopyMemory(&val, (UINT8*)kvfi + kvfi->DataOffset, min(kvfi->DataLength, sizeof(UINT32))); - + TRACE("entry %u = %.*S = %u\n", i, kvfi->NameLength / sizeof(WCHAR), kvfi->Name, val); - + add_user_mapping(kvfi->Name, kvfi->NameLength / sizeof(WCHAR), val); } - + i = i + 1; } while (Status != STATUS_NO_MORE_ENTRIES); + + ExFreePool(kvfi); } - + ZwClose(h); ExFreePool(path); } +static void read_group_mappings(PUNICODE_STRING regpath) { + WCHAR* path; + UNICODE_STRING us; + HANDLE h; + OBJECT_ATTRIBUTES oa; + ULONG dispos; + NTSTATUS Status; + + const WCHAR mappings[] = L"\\GroupMappings"; + + while (!IsListEmpty(&gid_map_list)) { + gid_map* gm = CONTAINING_RECORD(RemoveHeadList(&gid_map_list), gid_map, listentry); + + if (gm->sid) ExFreePool(gm->sid); + ExFreePool(gm); + } + + path = ExAllocatePoolWithTag(PagedPool, regpath->Length + (wcslen(mappings) * sizeof(WCHAR)), ALLOC_TAG); + if (!path) { + ERR("out of memory\n"); + return; + } + + RtlCopyMemory(path, regpath->Buffer, regpath->Length); + RtlCopyMemory((UINT8*)path + regpath->Length, mappings, wcslen(mappings) * sizeof(WCHAR)); + + us.Buffer = path; + us.Length = us.MaximumLength = regpath->Length + ((USHORT)wcslen(mappings) * sizeof(WCHAR)); + + InitializeObjectAttributes(&oa, &us, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); + + Status = ZwCreateKey(&h, KEY_QUERY_VALUE, &oa, 0, NULL, REG_OPTION_NON_VOLATILE, &dispos); + + if (!NT_SUCCESS(Status)) { + ERR("ZwCreateKey returned %08x\n", Status); + ExFreePool(path); + return; + } + + ExFreePool(path); + + if (dispos == REG_OPENED_EXISTING_KEY) { + KEY_VALUE_FULL_INFORMATION* kvfi; + ULONG kvfilen, retlen, i; + + kvfilen = sizeof(KEY_VALUE_FULL_INFORMATION) + 256; + kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); + + if (!kvfi) { + ERR("out of memory\n"); + ZwClose(h); + return; + } + + i = 0; + do { + Status = ZwEnumerateValueKey(h, i, KeyValueFullInformation, kvfi, kvfilen, &retlen); + + if (NT_SUCCESS(Status) && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { + UINT32 val = 0; + + RtlCopyMemory(&val, (UINT8*)kvfi + kvfi->DataOffset, min(kvfi->DataLength, sizeof(UINT32))); + + TRACE("entry %u = %.*S = %u\n", i, kvfi->NameLength / sizeof(WCHAR), kvfi->Name, val); + + add_group_mapping(kvfi->Name, kvfi->NameLength / sizeof(WCHAR), val); + } + + i = i + 1; + } while (Status != STATUS_NO_MORE_ENTRIES); + + ExFreePool(kvfi); + } else if (dispos == REG_CREATED_NEW_KEY) { + WCHAR* builtin_users = L"S-1-5-32-545"; + UNICODE_STRING us2; + DWORD val; + + // If we're creating the key for the first time, we add a default mapping of + // BUILTIN\Users to gid 100, which ought to correspond to the "users" group on Linux. + + us2.Length = us2.MaximumLength = (USHORT)wcslen(builtin_users) * sizeof(WCHAR); + us2.Buffer = ExAllocatePoolWithTag(PagedPool, us2.MaximumLength, ALLOC_TAG); + + if (us2.Buffer) { + RtlCopyMemory(us2.Buffer, builtin_users, us2.Length); + + val = 100; + Status = ZwSetValueKey(h, &us2, 0, REG_DWORD, &val, sizeof(DWORD)); + if (!NT_SUCCESS(Status)) { + ERR("ZwSetValueKey returned %08x\n", Status); + ZwClose(h); + return; + } + + add_group_mapping(us2.Buffer, us2.Length / sizeof(WCHAR), val); + + ExFreePool(us2.Buffer); + } + } + + ZwClose(h); +} + static void get_registry_value(HANDLE h, WCHAR* string, ULONG type, void* val, ULONG size) { ULONG kvfilen; KEY_VALUE_FULL_INFORMATION* kvfi; UNICODE_STRING us; NTSTATUS Status; - + RtlInitUnicodeString(&us, string); - + kvfi = NULL; kvfilen = 0; Status = ZwQueryValueKey(h, &us, KeyValueFullInformation, kvfi, kvfilen, &kvfilen); - + if ((Status == STATUS_BUFFER_TOO_SMALL || Status == STATUS_BUFFER_OVERFLOW) && kvfilen > 0) { kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); - + if (!kvfi) { ERR("out of memory\n"); ZwClose(h); return; } - + Status = ZwQueryValueKey(h, &us, KeyValueFullInformation, kvfi, kvfilen, &kvfilen); - + if (NT_SUCCESS(Status)) { if (kvfi->Type == type && kvfi->DataLength >= size) { RtlCopyMemory(val, ((UINT8*)kvfi) + kvfi->DataOffset, size); @@ -610,11 +737,11 @@ static void get_registry_value(HANDLE h, WCHAR* string, ULONG type, void* val, U } } } - + ExFreePool(kvfi); } else if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { Status = ZwSetValueKey(h, &us, 0, type, val, size); - + if (!NT_SUCCESS(Status)) { ERR("ZwSetValueKey returned %08x\n", Status); } @@ -623,80 +750,87 @@ static void get_registry_value(HANDLE h, WCHAR* string, ULONG type, void* val, U } } -void STDCALL read_registry(PUNICODE_STRING regpath) { -#ifndef __REACTOS__ - UNICODE_STRING us; -#endif +void read_registry(PUNICODE_STRING regpath, BOOL refresh) { OBJECT_ATTRIBUTES oa; NTSTATUS Status; HANDLE h; ULONG dispos; -#ifndef __REACTOS__ - ULONG kvfilen; +#ifdef _DEBUG KEY_VALUE_FULL_INFORMATION* kvfi; -#endif - -#ifndef __REACTOS__ + ULONG kvfilen, old_debug_log_level = debug_log_level; + UNICODE_STRING us, old_log_file, old_log_device; + static WCHAR def_log_file[] = L"\\??\\C:\\btrfs.log"; #endif - + + ExAcquireResourceExclusiveLite(&mapping_lock, TRUE); + read_mappings(regpath); - + read_group_mappings(regpath); + + ExReleaseResourceLite(&mapping_lock); + InitializeObjectAttributes(&oa, regpath, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); - + Status = ZwCreateKey(&h, KEY_QUERY_VALUE | KEY_ENUMERATE_SUB_KEYS, &oa, 0, NULL, REG_OPTION_NON_VOLATILE, &dispos); - + if (!NT_SUCCESS(Status)) { ERR("ZwCreateKey returned %08x\n", Status); return; } - - reset_subkeys(h, regpath); - + + if (!refresh) + reset_subkeys(h, regpath); + get_registry_value(h, L"Compress", REG_DWORD, &mount_compress, sizeof(mount_compress)); get_registry_value(h, L"CompressForce", REG_DWORD, &mount_compress_force, sizeof(mount_compress_force)); get_registry_value(h, L"CompressType", REG_DWORD, &mount_compress_type, sizeof(mount_compress_type)); get_registry_value(h, L"ZlibLevel", REG_DWORD, &mount_zlib_level, sizeof(mount_zlib_level)); get_registry_value(h, L"FlushInterval", REG_DWORD, &mount_flush_interval, sizeof(mount_flush_interval)); get_registry_value(h, L"MaxInline", REG_DWORD, &mount_max_inline, sizeof(mount_max_inline)); - get_registry_value(h, L"Raid5Recalculation", REG_DWORD, &mount_raid5_recalculation, sizeof(mount_raid5_recalculation)); - get_registry_value(h, L"Raid6Recalculation", REG_DWORD, &mount_raid6_recalculation, sizeof(mount_raid6_recalculation)); get_registry_value(h, L"SkipBalance", REG_DWORD, &mount_skip_balance, sizeof(mount_skip_balance)); - + get_registry_value(h, L"NoBarrier", REG_DWORD, &mount_no_barrier, sizeof(mount_no_barrier)); + get_registry_value(h, L"NoTrim", REG_DWORD, &mount_no_trim, sizeof(mount_no_trim)); + get_registry_value(h, L"ClearCache", REG_DWORD, &mount_clear_cache, sizeof(mount_clear_cache)); + get_registry_value(h, L"AllowDegraded", REG_DWORD, &mount_allow_degraded, sizeof(mount_allow_degraded)); + get_registry_value(h, L"Readonly", REG_DWORD, &mount_readonly, sizeof(mount_readonly)); + + if (!refresh) + get_registry_value(h, L"NoPNP", REG_DWORD, &no_pnp, sizeof(no_pnp)); + if (mount_flush_interval == 0) mount_flush_interval = 1; - - if (mount_raid5_recalculation > 1) - mount_raid5_recalculation = 1; - - if (mount_raid6_recalculation > 2) - mount_raid6_recalculation = 2; - + #ifdef _DEBUG get_registry_value(h, L"DebugLogLevel", REG_DWORD, &debug_log_level, sizeof(debug_log_level)); - + RtlInitUnicodeString(&us, L"LogDevice"); - + kvfi = NULL; kvfilen = 0; Status = ZwQueryValueKey(h, &us, KeyValueFullInformation, kvfi, kvfilen, &kvfilen); - + + old_log_device = log_device; + + log_device.Length = log_device.MaximumLength = 0; + log_device.Buffer = NULL; + if ((Status == STATUS_BUFFER_TOO_SMALL || Status == STATUS_BUFFER_OVERFLOW) && kvfilen > 0) { kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); - + if (!kvfi) { ERR("out of memory\n"); ZwClose(h); return; } - + Status = ZwQueryValueKey(h, &us, KeyValueFullInformation, kvfi, kvfilen, &kvfilen); - + if (NT_SUCCESS(Status)) { if ((kvfi->Type == REG_SZ || kvfi->Type == REG_EXPAND_SZ) && kvfi->DataLength >= sizeof(WCHAR)) { - log_device.Length = log_device.MaximumLength = kvfi->DataLength; - log_device.Buffer = ExAllocatePoolWithTag(PagedPool, kvfi->DataLength, ALLOC_TAG); - + log_device.Length = log_device.MaximumLength = (USHORT)min(0xffff, kvfi->DataLength); + log_device.Buffer = ExAllocatePoolWithTag(PagedPool, log_device.MaximumLength, ALLOC_TAG); + if (!log_device.Buffer) { ERR("out of memory\n"); ExFreePool(kvfi); @@ -704,47 +838,76 @@ void STDCALL read_registry(PUNICODE_STRING regpath) { return; } - RtlCopyMemory(log_device.Buffer, ((UINT8*)kvfi) + kvfi->DataOffset, kvfi->DataLength); - + RtlCopyMemory(log_device.Buffer, ((UINT8*)kvfi) + kvfi->DataOffset, log_device.Length); + if (log_device.Buffer[(log_device.Length / sizeof(WCHAR)) - 1] == 0) log_device.Length -= sizeof(WCHAR); } else { ERR("LogDevice was type %u, length %u\n", kvfi->Type, kvfi->DataLength); - + Status = ZwDeleteValueKey(h, &us); if (!NT_SUCCESS(Status)) { ERR("ZwDeleteValueKey returned %08x\n", Status); } } } - + ExFreePool(kvfi); } else if (Status != STATUS_OBJECT_NAME_NOT_FOUND) { ERR("ZwQueryValueKey returned %08x\n", Status); } - + + ExAcquireResourceExclusiveLite(&log_lock, TRUE); + + if (refresh && (log_device.Length != old_log_device.Length || RtlCompareMemory(log_device.Buffer, old_log_device.Buffer, log_device.Length) != log_device.Length || + (!comfo && log_device.Length > 0) || (old_debug_log_level == 0 && debug_log_level > 0) || (old_debug_log_level > 0 && debug_log_level == 0))) { + if (comfo) + ObDereferenceObject(comfo); + + if (log_handle) { + ZwClose(log_handle); + log_handle = NULL; + } + + comfo = NULL; + comdo = NULL; + + if (log_device.Length > 0 && debug_log_level > 0) { + Status = IoGetDeviceObjectPointer(&log_device, FILE_WRITE_DATA, &comfo, &comdo); + if (!NT_SUCCESS(Status)) + DbgPrint("IoGetDeviceObjectPointer returned %08x\n", Status); + } + } + + ExReleaseResourceLite(&log_lock); + + if (old_log_device.Buffer) + ExFreePool(old_log_device.Buffer); + RtlInitUnicodeString(&us, L"LogFile"); - + kvfi = NULL; kvfilen = 0; Status = ZwQueryValueKey(h, &us, KeyValueFullInformation, kvfi, kvfilen, &kvfilen); - + + old_log_file = log_file; + if ((Status == STATUS_BUFFER_TOO_SMALL || Status == STATUS_BUFFER_OVERFLOW) && kvfilen > 0) { kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); - + if (!kvfi) { ERR("out of memory\n"); ZwClose(h); return; } - + Status = ZwQueryValueKey(h, &us, KeyValueFullInformation, kvfi, kvfilen, &kvfilen); - + if (NT_SUCCESS(Status)) { if ((kvfi->Type == REG_SZ || kvfi->Type == REG_EXPAND_SZ) && kvfi->DataLength >= sizeof(WCHAR)) { - log_file.Length = log_file.MaximumLength = kvfi->DataLength; - log_file.Buffer = ExAllocatePoolWithTag(PagedPool, kvfi->DataLength, ALLOC_TAG); - + log_file.Length = log_file.MaximumLength = (USHORT)min(0xffff, kvfi->DataLength); + log_file.Buffer = ExAllocatePoolWithTag(PagedPool, log_file.MaximumLength, ALLOC_TAG); + if (!log_file.Buffer) { ERR("out of memory\n"); ExFreePool(kvfi); @@ -752,44 +915,102 @@ void STDCALL read_registry(PUNICODE_STRING regpath) { return; } - RtlCopyMemory(log_file.Buffer, ((UINT8*)kvfi) + kvfi->DataOffset, kvfi->DataLength); - + RtlCopyMemory(log_file.Buffer, ((UINT8*)kvfi) + kvfi->DataOffset, log_file.Length); + if (log_file.Buffer[(log_file.Length / sizeof(WCHAR)) - 1] == 0) log_file.Length -= sizeof(WCHAR); } else { ERR("LogFile was type %u, length %u\n", kvfi->Type, kvfi->DataLength); - + Status = ZwDeleteValueKey(h, &us); if (!NT_SUCCESS(Status)) { ERR("ZwDeleteValueKey returned %08x\n", Status); } } } - + ExFreePool(kvfi); } else if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { - Status = ZwSetValueKey(h, &us, 0, REG_SZ, def_log_file, (wcslen(def_log_file) + 1) * sizeof(WCHAR)); - + Status = ZwSetValueKey(h, &us, 0, REG_SZ, def_log_file, (ULONG)(wcslen(def_log_file) + 1) * sizeof(WCHAR)); + if (!NT_SUCCESS(Status)) { ERR("ZwSetValueKey returned %08x\n", Status); } } else { ERR("ZwQueryValueKey returned %08x\n", Status); } - + if (log_file.Length == 0) { - log_file.Length = log_file.MaximumLength = wcslen(def_log_file) * sizeof(WCHAR); + log_file.Length = log_file.MaximumLength = (UINT16)wcslen(def_log_file) * sizeof(WCHAR); log_file.Buffer = ExAllocatePoolWithTag(PagedPool, log_file.MaximumLength, ALLOC_TAG); - + if (!log_file.Buffer) { ERR("out of memory\n"); ZwClose(h); return; } - + RtlCopyMemory(log_file.Buffer, def_log_file, log_file.Length); } + + ExAcquireResourceExclusiveLite(&log_lock, TRUE); + + if (refresh && (log_file.Length != old_log_file.Length || RtlCompareMemory(log_file.Buffer, old_log_file.Buffer, log_file.Length) != log_file.Length || + (!log_handle && log_file.Length > 0) || (old_debug_log_level == 0 && debug_log_level > 0) || (old_debug_log_level > 0 && debug_log_level == 0))) { + if (log_handle) { + ZwClose(log_handle); + log_handle = NULL; + } + + if (!comfo && log_file.Length > 0 && refresh && debug_log_level > 0) { + IO_STATUS_BLOCK iosb; + + InitializeObjectAttributes(&oa, &log_file, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); + + Status = ZwCreateFile(&log_handle, FILE_WRITE_DATA, &oa, &iosb, NULL, FILE_ATTRIBUTE_NORMAL, FILE_SHARE_READ, + FILE_OPEN_IF, FILE_NON_DIRECTORY_FILE | FILE_WRITE_THROUGH | FILE_SYNCHRONOUS_IO_ALERT, NULL, 0); + if (!NT_SUCCESS(Status)) { + DbgPrint("ZwCreateFile returned %08x\n", Status); + log_handle = NULL; + } + } + } + + ExReleaseResourceLite(&log_lock); + + if (old_log_file.Buffer) + ExFreePool(old_log_file.Buffer); #endif - + ZwClose(h); } + +_Function_class_(WORKER_THREAD_ROUTINE) +#ifdef __REACTOS__ +static void NTAPI registry_work_item(PVOID Parameter) { +#else +static void registry_work_item(PVOID Parameter) { +#endif + NTSTATUS Status; + HANDLE regh = (HANDLE)Parameter; + IO_STATUS_BLOCK iosb; + + TRACE("registry changed\n"); + + read_registry(®istry_path, TRUE); + + Status = ZwNotifyChangeKey(regh, NULL, (PVOID)&wqi, (PVOID)DelayedWorkQueue, &iosb, REG_NOTIFY_CHANGE_LAST_SET, TRUE, NULL, 0, TRUE); + if (!NT_SUCCESS(Status)) + ERR("ZwNotifyChangeKey returned %08x\n", Status); +} + +void watch_registry(HANDLE regh) { + NTSTATUS Status; + IO_STATUS_BLOCK iosb; + + ExInitializeWorkItem(&wqi, registry_work_item, regh); + + Status = ZwNotifyChangeKey(regh, NULL, (PVOID)&wqi, (PVOID)DelayedWorkQueue, &iosb, REG_NOTIFY_CHANGE_LAST_SET, TRUE, NULL, 0, TRUE); + if (!NT_SUCCESS(Status)) + ERR("ZwNotifyChangeKey returned %08x\n", Status); +} diff --git a/reactos/drivers/filesystems/btrfs/reparse.c b/reactos/drivers/filesystems/btrfs/reparse.c index b495d78a5fb..440894323c7 100644 --- a/reactos/drivers/filesystems/btrfs/reparse.c +++ b/reactos/drivers/filesystems/btrfs/reparse.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -23,126 +23,145 @@ NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, DWORD reqlen; REPARSE_DATA_BUFFER* rdb = buffer; fcb* fcb = FileObject->FsContext; - char* data; + ccb* ccb = FileObject->FsContext2; NTSTATUS Status; - + TRACE("(%p, %p, %p, %x, %p)\n", DeviceObject, FileObject, buffer, buflen, retlen); - + + if (!ccb) + return STATUS_INVALID_PARAMETER; + ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); - + if (fcb->type == BTRFS_TYPE_SYMLINK) { - if (called_from_lxss()) { + if (ccb->lxss) { reqlen = offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer) + sizeof(UINT32); - + if (buflen < reqlen) { Status = STATUS_BUFFER_OVERFLOW; goto end; } - + rdb->ReparseTag = IO_REPARSE_TAG_LXSS_SYMLINK; rdb->ReparseDataLength = offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer) + sizeof(UINT32); rdb->Reserved = 0; - + *((UINT32*)rdb->GenericReparseBuffer.DataBuffer) = 1; - + *retlen = reqlen; } else { - data = ExAllocatePoolWithTag(PagedPool, fcb->inode_item.st_size, ALLOC_TAG); + char* data; + + if (fcb->inode_item.st_size == 0 || fcb->inode_item.st_size > 0xffff) { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + data = ExAllocatePoolWithTag(PagedPool, (ULONG)fcb->inode_item.st_size, ALLOC_TAG); if (!data) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + TRACE("data = %p, size = %x\n", data, fcb->inode_item.st_size); - Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL, TRUE); - + Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL); + if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); ExFreePool(data); goto end; } - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, data, fcb->inode_item.st_size); + + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, data, (ULONG)fcb->inode_item.st_size); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); ExFreePool(data); goto end; } - - subnamelen = stringlen; - printnamelen = stringlen; - + + subnamelen = (UINT16)stringlen; + printnamelen = (UINT16)stringlen; + reqlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + subnamelen + printnamelen; - + + if (buflen >= offsetof(REPARSE_DATA_BUFFER, ReparseDataLength)) + rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK; + + if (buflen >= offsetof(REPARSE_DATA_BUFFER, Reserved)) + rdb->ReparseDataLength = (USHORT)(reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer)); + + if (buflen >= offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.SubstituteNameOffset)) + rdb->Reserved = 0; + if (buflen < reqlen) { + ExFreePool(data); Status = STATUS_BUFFER_OVERFLOW; + *retlen = min(buflen, offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.SubstituteNameOffset)); goto end; } - - rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK; - rdb->ReparseDataLength = reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer); - rdb->Reserved = 0; - + rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset = 0; rdb->SymbolicLinkReparseBuffer.SubstituteNameLength = subnamelen; rdb->SymbolicLinkReparseBuffer.PrintNameOffset = subnamelen; rdb->SymbolicLinkReparseBuffer.PrintNameLength = printnamelen; rdb->SymbolicLinkReparseBuffer.Flags = SYMLINK_FLAG_RELATIVE; - + Status = RtlUTF8ToUnicodeN(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)], - stringlen, &stringlen, data, fcb->inode_item.st_size); + stringlen, &stringlen, data, (ULONG)fcb->inode_item.st_size); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); ExFreePool(data); goto end; } - + for (i = 0; i < stringlen / sizeof(WCHAR); i++) { if (rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] == '/') rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] = '\\'; } - + RtlCopyMemory(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(WCHAR)], &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)], rdb->SymbolicLinkReparseBuffer.SubstituteNameLength); - + *retlen = reqlen; - + ExFreePool(data); } - + Status = STATUS_SUCCESS; } else if (fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) { if (fcb->type == BTRFS_TYPE_FILE) { ULONG len; - - Status = read_file(fcb, buffer, 0, buflen, &len, NULL, TRUE); - + + Status = read_file(fcb, buffer, 0, buflen, &len, NULL); + if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); } - + *retlen = len; } else if (fcb->type == BTRFS_TYPE_DIRECTORY) { if (!fcb->reparse_xattr.Buffer || fcb->reparse_xattr.Length < sizeof(ULONG)) { Status = STATUS_NOT_A_REPARSE_POINT; goto end; } - + if (buflen > 0) { *retlen = min(buflen, fcb->reparse_xattr.Length); RtlCopyMemory(buffer, fcb->reparse_xattr.Buffer, *retlen); } else *retlen = 0; + + Status = *retlen == fcb->reparse_xattr.Length ? STATUS_SUCCESS : STATUS_BUFFER_OVERFLOW; } else Status = STATUS_NOT_A_REPARSE_POINT; } else { Status = STATUS_NOT_A_REPARSE_POINT; } - + end: ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&fcb->Vcb->tree_lock); @@ -159,87 +178,92 @@ static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, ccb* ccb, REPARSE_DATA_ LARGE_INTEGER offset, time; BTRFS_TIME now; USHORT i; - + if (write) { minlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + sizeof(WCHAR); if (buflen < minlen) { WARN("buffer was less than minimum length (%u < %u)\n", buflen, minlen); return STATUS_INVALID_PARAMETER; } - + + if (rdb->SymbolicLinkReparseBuffer.SubstituteNameLength < sizeof(WCHAR)) { + WARN("rdb->SymbolicLinkReparseBuffer.SubstituteNameLength was too short\n"); + return STATUS_INVALID_PARAMETER; + } + subname.Buffer = &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)]; subname.MaximumLength = subname.Length = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength; - + TRACE("substitute name = %.*S\n", subname.Length / sizeof(WCHAR), subname.Buffer); } - + fileref->fcb->type = BTRFS_TYPE_SYMLINK; - fileref->fcb->inode_item.st_mode |= __S_IFLNK; - + fileref->fcb->inode_item.generation = fileref->fcb->Vcb->superblock.generation; // so we don't confuse btrfs send on Linux + if (fileref->dc) fileref->dc->type = fileref->fcb->type; - + if (write) { Status = truncate_file(fileref->fcb, 0, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("truncate_file returned %08x\n", Status); return Status; } - + Status = RtlUnicodeToUTF8N(NULL, 0, (PULONG)&target.Length, subname.Buffer, subname.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N 1 failed with error %08x\n", Status); return Status; } - + target.MaximumLength = target.Length; target.Buffer = ExAllocatePoolWithTag(PagedPool, target.MaximumLength, ALLOC_TAG); if (!target.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + Status = RtlUnicodeToUTF8N(target.Buffer, target.Length, (PULONG)&target.Length, subname.Buffer, subname.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUnicodeToUTF8N 2 failed with error %08x\n", Status); ExFreePool(target.Buffer); return Status; } - - for (i = 0; i < target.Length; i++) { + + for (i = 0; i < target.MaximumLength; i++) { if (target.Buffer[i] == '\\') target.Buffer[i] = '/'; } - + offset.QuadPart = 0; tlength = target.Length; Status = write_file2(fileref->fcb->Vcb, Irp, offset, target.Buffer, &tlength, FALSE, TRUE, - TRUE, FALSE, rollback); + TRUE, FALSE, FALSE, rollback); ExFreePool(target.Buffer); } else Status = STATUS_SUCCESS; - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fileref->fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fileref->fcb->inode_item.st_mtime = now; - + fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->subvol->root_item.ctime = now; - + fileref->fcb->inode_item_changed = TRUE; mark_fcb_dirty(fileref->fcb); - + mark_fileref_dirty(fileref); - + return Status; } @@ -255,62 +279,66 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { file_ref* fileref; ULONG tag; LIST_ENTRY rollback; - + TRACE("(%p, %p)\n", DeviceObject, Irp); - + InitializeListHead(&rollback); - + if (!FileObject) { ERR("FileObject was NULL\n"); return STATUS_INVALID_PARAMETER; } - + fcb = FileObject->FsContext; ccb = FileObject->FsContext2; - + if (!ccb) { ERR("ccb was NULL\n"); return STATUS_INVALID_PARAMETER; } - + // It isn't documented what permissions FSCTL_SET_REPARSE_POINT needs, but CreateSymbolicLinkW // creates a file with FILE_WRITE_ATTRIBUTES | DELETE | SYNCHRONIZE. if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + fileref = ccb->fileref; - + if (!fileref) { ERR("fileref was NULL\n"); return STATUS_INVALID_PARAMETER; } - + + if (fcb->ads) { + fileref = fileref->parent; + fcb = fileref->fcb; + } + TRACE("%S\n", file_desc(FileObject)); - + ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + if (fcb->type == BTRFS_TYPE_SYMLINK) { WARN("tried to set a reparse point on an existing symlink\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + // FIXME - fail if we already have the attribute FILE_ATTRIBUTE_REPARSE_POINT - + // FIXME - die if not file or directory - // FIXME - die if ADS - + if (buflen < sizeof(ULONG)) { WARN("buffer was not long enough to hold tag\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + RtlCopyMemory(&tag, buffer, sizeof(ULONG)); - + if (fcb->type == BTRFS_TYPE_FILE && ((tag == IO_REPARSE_TAG_SYMLINK && rdb->SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) || tag == IO_REPARSE_TAG_LXSS_SYMLINK)) { Status = set_symlink(Irp, fileref, ccb, rdb, buflen, tag == IO_REPARSE_TAG_SYMLINK, &rollback); @@ -318,26 +346,26 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } else { LARGE_INTEGER offset, time; BTRFS_TIME now; - + if (fcb->type == BTRFS_TYPE_DIRECTORY) { // for directories, store as xattr ANSI_STRING buf; - + buf.Buffer = ExAllocatePoolWithTag(PagedPool, buflen, ALLOC_TAG); if (!buf.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - buf.Length = buf.MaximumLength = buflen; - + buf.Length = buf.MaximumLength = (UINT16)buflen; + if (fcb->reparse_xattr.Buffer) ExFreePool(fcb->reparse_xattr.Buffer); - + fcb->reparse_xattr = buf; - RtlCopyMemory(fcb->reparse_xattr.Buffer, buffer, buflen); - + RtlCopyMemory(buf.Buffer, buffer, buflen); + fcb->reparse_xattr_changed = TRUE; - + Status = STATUS_SUCCESS; } else { // otherwise, store as file data Status = truncate_file(fcb, 0, Irp, &rollback); @@ -345,49 +373,49 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ERR("truncate_file returned %08x\n", Status); goto end; } - + offset.QuadPart = 0; - - Status = write_file2(fcb->Vcb, Irp, offset, buffer, &buflen, FALSE, TRUE, TRUE, FALSE, &rollback); + + Status = write_file2(fcb->Vcb, Irp, offset, buffer, &buflen, FALSE, TRUE, TRUE, FALSE, FALSE, &rollback); if (!NT_SUCCESS(Status)) { ERR("write_file2 returned %08x\n", Status); goto end; } } - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); fcb->inode_item.transid = fcb->Vcb->superblock.generation; fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fcb->inode_item.st_mtime = now; - + fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT; fcb->atts_changed = TRUE; - + fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; - + fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); } - - send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_ACTION_MODIFIED); - + + send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_ACTION_MODIFIED, NULL); + end: if (NT_SUCCESS(Status)) - clear_rollback(fcb->Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(fcb->Vcb, &rollback); ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&fcb->Vcb->tree_lock); - + return Status; } @@ -401,98 +429,98 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ccb* ccb; file_ref* fileref; LIST_ENTRY rollback; - + TRACE("(%p, %p)\n", DeviceObject, Irp); - + InitializeListHead(&rollback); - + if (!FileObject) { ERR("FileObject was NULL\n"); return STATUS_INVALID_PARAMETER; } - + fcb = FileObject->FsContext; - + if (!fcb) { ERR("fcb was NULL\n"); return STATUS_INVALID_PARAMETER; } - + ccb = FileObject->FsContext2; - + if (!ccb) { ERR("ccb was NULL\n"); return STATUS_INVALID_PARAMETER; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) { WARN("insufficient privileges\n"); return STATUS_ACCESS_DENIED; } - + fileref = ccb->fileref; - + if (!fileref) { ERR("fileref was NULL\n"); - Status = STATUS_INVALID_PARAMETER; - goto end; + return STATUS_INVALID_PARAMETER; } - + ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - + TRACE("%S\n", file_desc(FileObject)); - + if (buflen < offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer)) { ERR("buffer was too short\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (rdb->ReparseDataLength > 0) { WARN("rdb->ReparseDataLength was not zero\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (fcb->ads) { WARN("tried to delete reparse point on ADS\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (fcb->type == BTRFS_TYPE_SYMLINK) { LARGE_INTEGER time; BTRFS_TIME now; - + if (rdb->ReparseTag != IO_REPARSE_TAG_SYMLINK) { WARN("reparse tag was not IO_REPARSE_TAG_SYMLINK\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + fileref->fcb->type = BTRFS_TYPE_FILE; fileref->fcb->inode_item.st_mode &= ~__S_IFLNK; fileref->fcb->inode_item.st_mode |= __S_IFREG; + fileref->fcb->inode_item.generation = fileref->fcb->Vcb->superblock.generation; // so we don't confuse btrfs send on Linux fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fileref->fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fileref->fcb->inode_item.st_mtime = now; - + fileref->fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; - + if (fileref->dc) fileref->dc->type = fileref->fcb->type; - + mark_fileref_dirty(fileref); - + fileref->fcb->inode_item_changed = TRUE; mark_fcb_dirty(fileref->fcb); @@ -501,27 +529,27 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } else if (fcb->type == BTRFS_TYPE_FILE) { LARGE_INTEGER time; BTRFS_TIME now; - + // FIXME - do we need to check that the reparse tags match? - + Status = truncate_file(fcb, 0, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("truncate_file returned %08x\n", Status); goto end; } - + fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; fcb->atts_changed = TRUE; - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + fcb->inode_item.transid = fcb->Vcb->superblock.generation; fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fcb->inode_item.st_mtime = now; @@ -533,28 +561,28 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } else if (fcb->type == BTRFS_TYPE_DIRECTORY) { LARGE_INTEGER time; BTRFS_TIME now; - + // FIXME - do we need to check that the reparse tags match? - + fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; fcb->atts_changed = TRUE; - + if (fcb->reparse_xattr.Buffer) { ExFreePool(fcb->reparse_xattr.Buffer); fcb->reparse_xattr.Buffer = NULL; } - + fcb->reparse_xattr_changed = TRUE; - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); fcb->inode_item.transid = fcb->Vcb->superblock.generation; fcb->inode_item.sequence++; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + if (!ccb->user_set_write_time) fcb->inode_item.st_mtime = now; @@ -568,19 +596,19 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Status = STATUS_INVALID_PARAMETER; goto end; } - + Status = STATUS_SUCCESS; - - send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_ACTION_MODIFIED); - + + send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_ACTION_MODIFIED, NULL); + end: if (NT_SUCCESS(Status)) - clear_rollback(fcb->Vcb, &rollback); + clear_rollback(&rollback); else do_rollback(fcb->Vcb, &rollback); - + ExReleaseResourceLite(fcb->Header.Resource); ExReleaseResourceLite(&fcb->Vcb->tree_lock); - + return Status; } diff --git a/reactos/drivers/filesystems/btrfs/scrub.c b/reactos/drivers/filesystems/btrfs/scrub.c new file mode 100644 index 00000000000..bab1241579b --- /dev/null +++ b/reactos/drivers/filesystems/btrfs/scrub.c @@ -0,0 +1,3453 @@ +/* Copyright (c) Mark Harmstone 2017 + * + * This file is part of WinBtrfs. + * + * WinBtrfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or + * (at your option) any later version. + * + * WinBtrfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public Licence for more details. + * + * You should have received a copy of the GNU Lesser General Public Licence + * along with WinBtrfs. If not, see . */ + +#include "btrfs_drv.h" + +#define SCRUB_UNIT 0x100000 // 1 MB + +struct _scrub_context; + +typedef struct { + struct _scrub_context* context; + PIRP Irp; + UINT64 start; + UINT32 length; + IO_STATUS_BLOCK iosb; + UINT8* buf; + BOOL csum_error; + UINT32* bad_csums; +} scrub_context_stripe; + +typedef struct _scrub_context { + KEVENT Event; + scrub_context_stripe* stripes; + LONG stripes_left; +} scrub_context; + +typedef struct { + ANSI_STRING name; + BOOL orig_subvol; + LIST_ENTRY list_entry; +} path_part; + +static void log_file_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 subvol, UINT64 inode, UINT64 offset) { + LIST_ENTRY *le, parts; + root* r = NULL; + KEY searchkey; + traverse_ptr tp; + UINT64 dir; + BOOL orig_subvol = TRUE, not_in_tree = FALSE; + ANSI_STRING fn; + scrub_error* err; + NTSTATUS Status; + ULONG utf16len; + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == subvol) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("could not find subvol %llx\n", subvol); + return; + } + + InitializeListHead(&parts); + + dir = inode; + + while (TRUE) { + if (dir == r->root_item.objid) { + if (r == Vcb->root_fileref->fcb->subvol) + break; + + searchkey.obj_id = r->id; + searchkey.obj_type = TYPE_ROOT_BACKREF; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + ROOT_REF* rr = (ROOT_REF*)tp.item->data; + path_part* pp; + + if (tp.item->size < sizeof(ROOT_REF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); + goto end; + } + + if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n); + goto end; + } + + pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); + if (!pp) { + ERR("out of memory\n"); + goto end; + } + + pp->name.Buffer = rr->name; + pp->name.Length = pp->name.MaximumLength = rr->n; + pp->orig_subvol = FALSE; + + InsertTailList(&parts, &pp->list_entry); + + r = NULL; + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == tp.item->key.offset) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("could not find subvol %llx\n", tp.item->key.offset); + goto end; + } + + dir = rr->dir; + orig_subvol = FALSE; + } else { + not_in_tree = TRUE; + break; + } + } else { + searchkey.obj_id = dir; + searchkey.obj_type = TYPE_INODE_EXTREF; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) { + INODE_REF* ir = (INODE_REF*)tp.item->data; + path_part* pp; + + if (tp.item->size < sizeof(INODE_REF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); + goto end; + } + + if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, offsetof(INODE_REF, name[0]) + ir->n); + goto end; + } + + pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); + if (!pp) { + ERR("out of memory\n"); + goto end; + } + + pp->name.Buffer = ir->name; + pp->name.Length = pp->name.MaximumLength = ir->n; + pp->orig_subvol = orig_subvol; + + InsertTailList(&parts, &pp->list_entry); + + if (dir == tp.item->key.offset) + break; + + dir = tp.item->key.offset; + } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) { + INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; + path_part* pp; + + if (tp.item->size < sizeof(INODE_EXTREF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(INODE_EXTREF)); + goto end; + } + + if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n); + goto end; + } + + pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); + if (!pp) { + ERR("out of memory\n"); + goto end; + } + + pp->name.Buffer = ier->name; + pp->name.Length = pp->name.MaximumLength = ier->n; + pp->orig_subvol = orig_subvol; + + InsertTailList(&parts, &pp->list_entry); + + if (dir == ier->dir) + break; + + dir = ier->dir; + } else { + ERR("could not find INODE_REF for inode %llx in subvol %llx\n", dir, r->id); + goto end; + } + } + } + + fn.MaximumLength = 0; + + if (not_in_tree) { + le = parts.Blink; + while (le != &parts) { + path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); + LIST_ENTRY* le2 = le->Blink; + + if (pp->orig_subvol) + break; + + RemoveTailList(&parts); + ExFreePool(pp); + + le = le2; + } + } + + le = parts.Flink; + while (le != &parts) { + path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); + + fn.MaximumLength += pp->name.Length + 1; + + le = le->Flink; + } + + fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG); + if (!fn.Buffer) { + ERR("out of memory\n"); + goto end; + } + + fn.Length = 0; + + le = parts.Blink; + while (le != &parts) { + path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); + + fn.Buffer[fn.Length] = '\\'; + fn.Length++; + + RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length); + fn.Length += pp->name.Length; + + le = le->Blink; + } + + if (not_in_tree) + ERR("subvol %llx, %.*s, offset %llx\n", subvol, fn.Length, fn.Buffer, offset); + else + ERR("%.*s, offset %llx\n", fn.Length, fn.Buffer, offset); + + Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, fn.Buffer, fn.Length); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ExFreePool(fn.Buffer); + goto end; + } + + err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG); + if (!err) { + ERR("out of memory\n"); + ExFreePool(fn.Buffer); + goto end; + } + + err->address = addr; + err->device = devid; + err->recovered = FALSE; + err->is_metadata = FALSE; + err->parity = FALSE; + + err->data.subvol = not_in_tree ? subvol : 0; + err->data.offset = offset; + err->data.filename_length = (UINT16)utf16len; + + Status = RtlUTF8ToUnicodeN(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(fn.Buffer); + ExFreePool(err); + goto end; + } + + ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); + + Vcb->scrub.num_errors++; + InsertTailList(&Vcb->scrub.errors, &err->list_entry); + + ExReleaseResourceLite(&Vcb->scrub.stats_lock); + + ExFreePool(fn.Buffer); + +end: + while (!IsListEmpty(&parts)) { + path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry); + + ExFreePool(pp); + } +} + +static void log_file_checksum_error_shared(device_extension* Vcb, UINT64 treeaddr, UINT64 addr, UINT64 devid, UINT64 extent) { + tree_header* tree; + NTSTATUS Status; + leaf_node* ln; + ULONG i; + + tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); + if (!tree) { + ERR("out of memory\n"); + return; + } + + Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + goto end; + } + + if (tree->level != 0) { + ERR("tree level was %x, expected 0\n", tree->level); + goto end; + } + + ln = (leaf_node*)&tree[1]; + + for (i = 0; i < tree->num_items; i++) { + if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { + EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)tree + sizeof(tree_header) + ln[i].offset); + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr) + log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent); + } + } + +end: + ExFreePool(tree); +} + +static void log_tree_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 root, UINT8 level, KEY* firstitem) { + scrub_error* err; + + err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); + if (!err) { + ERR("out of memory\n"); + return; + } + + err->address = addr; + err->device = devid; + err->recovered = FALSE; + err->is_metadata = TRUE; + err->parity = FALSE; + + err->metadata.root = root; + err->metadata.level = level; + + if (firstitem) { + ERR("root %llx, level %u, first item (%llx,%x,%llx)\n", root, level, firstitem->obj_id, + firstitem->obj_type, firstitem->offset); + + err->metadata.firstitem = *firstitem; + } else { + ERR("root %llx, level %u\n", root, level); + + RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY)); + } + + ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); + + Vcb->scrub.num_errors++; + InsertTailList(&Vcb->scrub.errors, &err->list_entry); + + ExReleaseResourceLite(&Vcb->scrub.stats_lock); +} + +static void log_tree_checksum_error_shared(device_extension* Vcb, UINT64 offset, UINT64 address, UINT64 devid) { + tree_header* tree; + NTSTATUS Status; + internal_node* in; + ULONG i; + + tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); + if (!tree) { + ERR("out of memory\n"); + return; + } + + Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + goto end; + } + + if (tree->level == 0) { + ERR("tree level was 0\n"); + goto end; + } + + in = (internal_node*)&tree[1]; + + for (i = 0; i < tree->num_items; i++) { + if (in[i].address == address) { + log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key); + break; + } + } + +end: + ExFreePool(tree); +} + +static void log_unrecoverable_error(device_extension* Vcb, UINT64 address, UINT64 devid) { + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + EXTENT_ITEM* ei; + EXTENT_ITEM2* ei2 = NULL; + UINT8* ptr; + ULONG len; + UINT64 rc; + + // FIXME - still log even if rest of this function fails + + searchkey.obj_id = address; + searchkey.obj_type = TYPE_METADATA_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return; + } + + if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) || + tp.item->key.obj_id >= address + Vcb->superblock.sector_size || + (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) || + (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address) + ) + return; + + if (tp.item->size < sizeof(EXTENT_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); + return; + } + + ei = (EXTENT_ITEM*)tp.item->data; + ptr = (UINT8*)&ei[1]; + len = tp.item->size - sizeof(EXTENT_ITEM); + + if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { + if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); + return; + } + + ei2 = (EXTENT_ITEM2*)ptr; + + ptr += sizeof(EXTENT_ITEM2); + len -= sizeof(EXTENT_ITEM2); + } + + rc = 0; + + while (len > 0) { + UINT8 type = *ptr; + + ptr++; + len--; + + if (type == TYPE_TREE_BLOCK_REF) { + TREE_BLOCK_REF* tbr; + + if (len < sizeof(TREE_BLOCK_REF)) { + ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len); + break; + } + + tbr = (TREE_BLOCK_REF*)ptr; + + log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); + + rc++; + + ptr += sizeof(TREE_BLOCK_REF); + len -= sizeof(TREE_BLOCK_REF); + } else if (type == TYPE_EXTENT_DATA_REF) { + EXTENT_DATA_REF* edr; + + if (len < sizeof(EXTENT_DATA_REF)) { + ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len); + break; + } + + edr = (EXTENT_DATA_REF*)ptr; + + log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); + + rc += edr->count; + + ptr += sizeof(EXTENT_DATA_REF); + len -= sizeof(EXTENT_DATA_REF); + } else if (type == TYPE_SHARED_BLOCK_REF) { + SHARED_BLOCK_REF* sbr; + + if (len < sizeof(SHARED_BLOCK_REF)) { + ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len); + break; + } + + sbr = (SHARED_BLOCK_REF*)ptr; + + log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid); + + rc++; + + ptr += sizeof(SHARED_BLOCK_REF); + len -= sizeof(SHARED_BLOCK_REF); + } else if (type == TYPE_SHARED_DATA_REF) { + SHARED_DATA_REF* sdr; + + if (len < sizeof(SHARED_DATA_REF)) { + ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len); + break; + } + + sdr = (SHARED_DATA_REF*)ptr; + + log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id); + + rc += sdr->count; + + ptr += sizeof(SHARED_DATA_REF); + len -= sizeof(SHARED_DATA_REF); + } else { + ERR("unknown extent type %x\n", type); + break; + } + } + + if (rc < ei->refcount) { + do { + traverse_ptr next_tp; + + if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + break; + + if (tp.item->key.obj_id == address) { + if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF) + log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); + else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) { + EXTENT_DATA_REF* edr; + + if (tp.item->size < sizeof(EXTENT_DATA_REF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(EXTENT_DATA_REF)); + break; + } + + edr = (EXTENT_DATA_REF*)tp.item->data; + + log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); + } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF) + log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid); + else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF) + log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id); + } else + break; + } while (TRUE); + } +} + +static void log_error(device_extension* Vcb, UINT64 addr, UINT64 devid, BOOL metadata, BOOL recoverable, BOOL parity) { + if (recoverable) { + scrub_error* err; + + if (parity) { + ERR("recovering from parity error at %llx on device %llx\n", addr, devid); + } else { + if (metadata) + ERR("recovering from metadata checksum error at %llx on device %llx\n", addr, devid); + else + ERR("recovering from data checksum error at %llx on device %llx\n", addr, devid); + } + + err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); + if (!err) { + ERR("out of memory\n"); + return; + } + + err->address = addr; + err->device = devid; + err->recovered = TRUE; + err->is_metadata = metadata; + err->parity = parity; + + if (metadata) + RtlZeroMemory(&err->metadata, sizeof(err->metadata)); + else + RtlZeroMemory(&err->data, sizeof(err->data)); + + ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); + + Vcb->scrub.num_errors++; + InsertTailList(&Vcb->scrub.errors, &err->list_entry); + + ExReleaseResourceLite(&Vcb->scrub.stats_lock); + } else { + if (metadata) + ERR("unrecoverable metadata checksum error at %llx\n", addr); + else + ERR("unrecoverable data checksum error at %llx\n", addr); + + log_unrecoverable_error(Vcb, addr, devid); + } +} + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif + scrub_context_stripe* stripe = conptr; + scrub_context* context = (scrub_context*)stripe->context; + ULONG left = InterlockedDecrement(&context->stripes_left); + + UNUSED(DeviceObject); + + stripe->iosb = Irp->IoStatus; + + if (left == 0) + KeSetEvent(&context->Event, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + +static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, UINT64 offset, UINT32* csum, scrub_context* context) { + NTSTATUS Status; + BOOL csum_error = FALSE; + ULONG i; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + UINT16 present_devices = 0; + + if (csum) { + ULONG good_stripe = 0xffffffff; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj) { + present_devices++; + + // if first stripe is okay, we only need to check that the others are identical to it + if (good_stripe != 0xffffffff) { + if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, + context->stripes[good_stripe].length) != context->stripes[i].length) { + context->stripes[i].csum_error = TRUE; + csum_error = TRUE; + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else { + Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum); + if (Status == STATUS_CRC_ERROR) { + context->stripes[i].csum_error = TRUE; + csum_error = TRUE; + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } else + good_stripe = i; + } + } + } + } else { + ULONG good_stripe = 0xffffffff; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + ULONG j; + + if (c->devices[i]->devobj) { + // if first stripe is okay, we only need to check that the others are identical to it + if (good_stripe != 0xffffffff) { + if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, + context->stripes[good_stripe].length) != context->stripes[i].length) { + context->stripes[i].csum_error = TRUE; + csum_error = TRUE; + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { + tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 != *((UINT32*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) { + context->stripes[i].csum_error = TRUE; + csum_error = TRUE; + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + + if (!context->stripes[i].csum_error) + good_stripe = i; + } + } + } + } + + if (!csum_error) + return STATUS_SUCCESS; + + // handle checksum error + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (context->stripes[i].csum_error) { + if (csum) { + context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!context->stripes[i].bad_csums) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + return Status; + } + } else { + ULONG j; + + context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG); + if (!context->stripes[i].bad_csums) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { + tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + context->stripes[i].bad_csums[j] = crc32; + } + } + } + } + + if (present_devices > 1) { + ULONG good_stripe = 0xffffffff; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj && !context->stripes[i].csum_error) { + good_stripe = i; + break; + } + } + + if (good_stripe != 0xffffffff) { + // log + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (context->stripes[i].csum_error) { + ULONG j; + + if (csum) { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { + if (context->stripes[i].bad_csums[j] != csum[j]) { + UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); + + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + } else { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { + tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; + UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); + + if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) { + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + } + } + } + + // write good data over bad + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (context->stripes[i].csum_error && !c->devices[i]->readonly) { + Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset, + context->stripes[good_stripe].buf, context->stripes[i].length); + + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); + return Status; + } + } + } + + return STATUS_SUCCESS; + } + + // if csum errors on all stripes, check sector by sector + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + ULONG j; + + if (c->devices[i]->devobj) { + if (csum) { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { + if (context->stripes[i].bad_csums[j] != csum[j]) { + ULONG k; + UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); + BOOL recovered = FALSE; + + for (k = 0; k < c->chunk_item->num_stripes; k++) { + if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) { + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size), + context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + recovered = TRUE; + break; + } + } + + if (!recovered) { + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + } + } else { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { + tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; + UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); + + if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) { + ULONG k; + BOOL recovered = FALSE; + + for (k = 0; k < c->chunk_item->num_stripes; k++) { + if (i != k && c->devices[k]->devobj) { + tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size]; + + if (context->stripes[k].bad_csums[j] == *((UINT32*)th2->csum) && th2->address == addr) { + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + RtlCopyMemory(th, th2, Vcb->superblock.node_size); + + recovered = TRUE; + break; + } + } + } + + if (!recovered) { + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + } + } + } + } + + // write good data over bad + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj && !c->devices[i]->readonly) { + Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset, + context->stripes[i].buf, context->stripes[i].length); + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + return Status; + } + } + } + + return STATUS_SUCCESS; + } + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj) { + ULONG j; + + if (csum) { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { + if (context->stripes[i].bad_csums[j] != csum[j]) { + UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); + + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE); + } + } + } else { + for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { + tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; + UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); + + if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) + log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE); + } + } + } + } + + return STATUS_SUCCESS; +} + +static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) { + ULONG j; + UINT16 stripe; + UINT32 pos, *stripeoff; + + pos = 0; + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes); + + stripe = startoffstripe; + while (pos < length) { + UINT32 readlen; + + if (pos == 0) + readlen = (UINT32)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length)); + else + readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); + + if (csum) { + for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size); + + if (crc32 != csum[pos / Vcb->superblock.sector_size]) { + UINT64 addr = offset + pos; + + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE); + log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + + pos += Vcb->superblock.sector_size; + stripeoff[stripe] += Vcb->superblock.sector_size; + } + } else { + for (j = 0; j < readlen; j += Vcb->superblock.node_size) { + tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]); + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + UINT64 addr = offset + pos; + + if (crc32 != *((UINT32*)th->csum) || th->address != addr) { + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE); + log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + + pos += Vcb->superblock.node_size; + stripeoff[stripe] += Vcb->superblock.node_size; + } + } + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + ExFreePool(stripeoff); + + return STATUS_SUCCESS; +} + +static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) { + ULONG j; + UINT16 stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1); + UINT32 pos, *stripeoff; + BOOL csum_error = FALSE; + NTSTATUS Status; + + pos = 0; + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes); + + stripe = startoffstripe; + while (pos < length) { + UINT32 readlen; + + if (pos == 0) + readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length, + c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); + else + readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); + + if (csum) { + ULONG good_stripe = 0xffffffff; + UINT16 k; + + for (k = 0; k < sub_stripes; k++) { + if (c->devices[(stripe * sub_stripes) + k]->devobj) { + // if first stripe is okay, we only need to check that the others are identical to it + if (good_stripe != 0xffffffff) { + if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], + context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], + readlen) != readlen) { + context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; + csum_error = TRUE; + log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else { + for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size); + + if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) { + csum_error = TRUE; + context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; + log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + break; + } + } + + if (!context->stripes[(stripe * sub_stripes) + k].csum_error) + good_stripe = k; + } + } + } + + pos += readlen; + stripeoff[stripe] += readlen; + } else { + ULONG good_stripe = 0xffffffff; + UINT16 k; + + for (k = 0; k < sub_stripes; k++) { + if (c->devices[(stripe * sub_stripes) + k]->devobj) { + // if first stripe is okay, we only need to check that the others are identical to it + if (good_stripe != 0xffffffff) { + if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], + context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], + readlen) != readlen) { + context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; + csum_error = TRUE; + log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else { + for (j = 0; j < readlen; j += Vcb->superblock.node_size) { + tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j); + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + UINT64 addr = offset + pos + j; + + if (crc32 != *((UINT32*)th->csum) || th->address != addr) { + csum_error = TRUE; + context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; + log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + break; + } + } + + if (!context->stripes[(stripe * sub_stripes) + k].csum_error) + good_stripe = k; + } + } + } + + pos += readlen; + stripeoff[stripe] += readlen; + } + + stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); + } + + if (!csum_error) { + Status = STATUS_SUCCESS; + goto end; + } + + for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) { + ULONG goodstripe = 0xffffffff; + UINT16 k; + BOOL hasbadstripe = FALSE; + + if (context->stripes[j].length == 0) + continue; + + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj) { + if (!context->stripes[j + k].csum_error) + goodstripe = k; + else + hasbadstripe = TRUE; + } + } + + if (hasbadstripe) { + if (goodstripe != 0xffffffff) { + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) { + UINT32 so = 0; + BOOL recovered = FALSE; + + pos = 0; + + stripe = startoffstripe; + while (pos < length) { + UINT32 readlen; + + if (pos == 0) + readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length, + c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); + else + readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); + + if (stripe == j / sub_stripes) { + if (csum) { + ULONG l; + + for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { + if (RtlCompareMemory(context->stripes[j + k].buf + so, + context->stripes[j + goodstripe].buf + so, + Vcb->superblock.sector_size) != Vcb->superblock.sector_size) { + UINT64 addr = offset + pos; + + log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE); + + recovered = TRUE; + } + + pos += Vcb->superblock.sector_size; + so += Vcb->superblock.sector_size; + } + } else { + ULONG l; + + for (l = 0; l < readlen; l += Vcb->superblock.node_size) { + if (RtlCompareMemory(context->stripes[j + k].buf + so, + context->stripes[j + goodstripe].buf + so, + Vcb->superblock.node_size) != Vcb->superblock.node_size) { + UINT64 addr = offset + pos; + + log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE); + + recovered = TRUE; + } + + pos += Vcb->superblock.node_size; + so += Vcb->superblock.node_size; + } + } + } else + pos += readlen; + + stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); + } + + if (recovered) { + // write good data over bad + + if (!c->devices[j + k]->readonly) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset, + context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length); + + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); + goto end; + } + } + } + } + } + } else { + UINT32 so = 0; + BOOL recovered = FALSE; + + if (csum) { + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj) { + context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!context->stripes[j + k].bad_csums) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + goto end; + } + } + } + } else { + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj) { + ULONG l; + + context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG); + if (!context->stripes[j + k].bad_csums) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) { + tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size]; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + context->stripes[j + k].bad_csums[l] = crc32; + } + } + } + } + + pos = 0; + + stripe = startoffstripe; + while (pos < length) { + UINT32 readlen; + + if (pos == 0) + readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length, + c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); + else + readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); + + if (stripe == j / sub_stripes) { + ULONG l; + + if (csum) { + for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { + UINT32 crc32 = csum[pos / Vcb->superblock.sector_size]; + BOOL has_error = FALSE; + + goodstripe = 0xffffffff; + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj) { + if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) + has_error = TRUE; + else + goodstripe = k; + } + } + + if (has_error) { + if (goodstripe != 0xffffffff) { + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) { + UINT64 addr = offset + pos; + + log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE); + + recovered = TRUE; + + RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so, + Vcb->superblock.sector_size); + } + } + } else { + UINT64 addr = offset + pos; + + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + j]->devobj) { + log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, FALSE, FALSE); + log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + } + } + + pos += Vcb->superblock.sector_size; + so += Vcb->superblock.sector_size; + } + } else { + for (l = 0; l < readlen; l += Vcb->superblock.node_size) { + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj) { + tree_header* th = (tree_header*)&context->stripes[j + k].buf[so]; + UINT64 addr = offset + pos; + + if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((UINT32*)th->csum) || th->address != addr) { + ULONG m; + + recovered = FALSE; + + for (m = 0; m < sub_stripes; m++) { + if (m != k) { + tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so]; + + if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((UINT32*)th2->csum) && th2->address == addr) { + log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE); + + RtlCopyMemory(th, th2, Vcb->superblock.node_size); + + recovered = TRUE; + break; + } else + log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + + if (!recovered) + log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, FALSE, FALSE); + } + } + } + + pos += Vcb->superblock.node_size; + so += Vcb->superblock.node_size; + } + } + } else + pos += readlen; + + stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); + } + + if (recovered) { + // write good data over bad + + for (k = 0; k < sub_stripes; k++) { + if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset, + context->stripes[j + k].buf, context->stripes[j + k].length); + + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); + goto end; + } + } + } + } + } + } + } + + Status = STATUS_SUCCESS; + +end: + ExFreePool(stripeoff); + + return Status; +} + +static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, UINT64 offset, UINT32 size, UINT32* csum) { + ULONG i; + scrub_context context; + CHUNK_ITEM_STRIPE* cis; + NTSTATUS Status; + UINT16 startoffstripe, num_missing, allowed_missing; + + TRACE("(%p, %p, %llx, %llx, %p)\n", Vcb, c, offset, size, csum); + + context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); + if (!context.stripes) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes); + + context.stripes_left = 0; + + cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + if (type == BLOCK_FLAG_RAID0) { + UINT64 startoff, endoff; + UINT16 endoffstripe; + + get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); + get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (startoffstripe > i) + context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + else if (startoffstripe == i) + context.stripes[i].start = startoff; + else + context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); + + if (endoffstripe > i) + context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); + else if (endoffstripe == i) + context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start); + else + context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); + } + + allowed_missing = 0; + } else if (type == BLOCK_FLAG_RAID10) { + UINT64 startoff, endoff; + UINT16 endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1); + + get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); + get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); + + if ((c->chunk_item->num_stripes % sub_stripes) != 0) { + ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + startoffstripe *= sub_stripes; + endoffstripe *= sub_stripes; + + for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { + if (startoffstripe > i) + context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + else if (startoffstripe == i) + context.stripes[i].start = startoff; + else + context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); + + if (endoffstripe > i) + context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); + else if (endoffstripe == i) + context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start); + else + context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); + + for (j = 1; j < sub_stripes; j++) { + context.stripes[i+j].start = context.stripes[i].start; + context.stripes[i+j].length = context.stripes[i].length; + } + } + + startoffstripe /= sub_stripes; + allowed_missing = 1; + } else + allowed_missing = c->chunk_item->num_stripes - 1; + + num_missing = 0; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + PIO_STACK_LOCATION IrpSp; + + context.stripes[i].context = (struct _scrub_context*)&context; + + if (type == BLOCK_FLAG_DUPLICATE) { + context.stripes[i].start = offset - c->offset; + context.stripes[i].length = size; + } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) { + ERR("unexpected chunk type %x\n", type); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + if (!c->devices[i]->devobj) { + num_missing++; + + if (num_missing > allowed_missing) { + ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } else if (context.stripes[i].length > 0) { + context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); + + if (!context.stripes[i].buf) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE); + + if (!context.stripes[i].Irp) { + ERR("IoAllocateIrp failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); + IrpSp->MajorFunction = IRP_MJ_READ; + + if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { + context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); + if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; + + context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; + } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { + context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, FALSE, FALSE, NULL); + if (!context.stripes[i].Irp->MdlAddress) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(context.stripes[i].Irp->MdlAddress); + context.stripes[i].Irp->MdlAddress = NULL; + goto end; + } + } else + context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; + + IrpSp->Parameters.Read.Length = context.stripes[i].length; + IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset; + + context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; + + IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], TRUE, TRUE, TRUE); + + context.stripes_left++; + + Vcb->scrub.data_scrubbed += context.stripes[i].length; + } + } + + if (context.stripes_left == 0) { + ERR("error - not reading any stripes\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj && context.stripes[i].length > 0) + IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); + } + + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + + // return an error if any of the stripes returned an error + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (!NT_SUCCESS(context.stripes[i].iosb.Status)) { + Status = context.stripes[i].iosb.Status; + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); + goto end; + } + } + + if (type == BLOCK_FLAG_DUPLICATE) { + Status = scrub_extent_dup(Vcb, c, offset, csum, &context); + if (!NT_SUCCESS(Status)) { + ERR("scrub_extent_dup returned %08x\n", Status); + goto end; + } + } else if (type == BLOCK_FLAG_RAID0) { + Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context); + if (!NT_SUCCESS(Status)) { + ERR("scrub_extent_raid0 returned %08x\n", Status); + goto end; + } + } else if (type == BLOCK_FLAG_RAID10) { + Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context); + if (!NT_SUCCESS(Status)) { + ERR("scrub_extent_raid10 returned %08x\n", Status); + goto end; + } + } + +end: + if (context.stripes) { + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (context.stripes[i].Irp) { + if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { + MmUnlockPages(context.stripes[i].Irp->MdlAddress); + IoFreeMdl(context.stripes[i].Irp->MdlAddress); + } + IoFreeIrp(context.stripes[i].Irp); + } + + if (context.stripes[i].buf) + ExFreePool(context.stripes[i].buf); + + if (context.stripes[i].bad_csums) + ExFreePool(context.stripes[i].bad_csums); + } + + ExFreePool(context.stripes); + } + + return Status; +} + +static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, UINT64 offset, ULONG type, UINT32* csum, RTL_BITMAP* bmp) { + NTSTATUS Status; + ULONG runlength, index; + + runlength = RtlFindFirstRunClear(bmp, &index); + + while (runlength != 0) { + do { + ULONG rl; + + if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT) + rl = SCRUB_UNIT / Vcb->superblock.sector_size; + else + rl = runlength; + + Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]); + if (!NT_SUCCESS(Status)) { + ERR("scrub_data_extent_dup returned %08x\n", Status); + return Status; + } + + runlength -= rl; + index += rl; + } while (runlength > 0); + + runlength = RtlFindNextForwardRunClear(bmp, index, &index); + } + + return STATUS_SUCCESS; +} + +typedef struct { + UINT8* buf; + PIRP Irp; + void* context; + IO_STATUS_BLOCK iosb; + UINT64 offset; + BOOL rewrite, missing; + RTL_BITMAP error; + ULONG* errorarr; +} scrub_context_raid56_stripe; + +typedef struct { + scrub_context_raid56_stripe* stripes; + LONG stripes_left; + KEVENT Event; + RTL_BITMAP alloc; + RTL_BITMAP has_csum; + RTL_BITMAP is_tree; + UINT32* csum; + UINT8* parity_scratch; + UINT8* parity_scratch2; +} scrub_context_raid56; + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif + scrub_context_raid56_stripe* stripe = conptr; + scrub_context_raid56* context = (scrub_context_raid56*)stripe->context; + LONG left = InterlockedDecrement(&context->stripes_left); + + UNUSED(DeviceObject); + + stripe->iosb = Irp->IoStatus; + + if (left == 0) + KeSetEvent(&context->Event, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + +static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start, + UINT64 num, UINT16 missing_devices) { + ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; + UINT16 stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; + UINT64 stripeoff; + + stripe = (parity + 1) % c->chunk_item->num_stripes; + off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1); + stripeoff = num * sectors_per_stripe; + + if (missing_devices == 0) + RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); + + while (stripe != parity) { + RtlClearAllBits(&context->stripes[stripe].error); + + for (i = 0; i < sectors_per_stripe; i++) { + if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { + if (RtlCheckBit(&context->is_tree, off)) { + tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 != *((UINT32*)th->csum) || th->address != addr) { + RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); + log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + if (missing_devices > 0) + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE); + } + + off += Vcb->superblock.node_size / Vcb->superblock.sector_size; + stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; + i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; + + continue; + } else if (RtlCheckBit(&context->has_csum, off)) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[off]) { + RtlSetBit(&context->stripes[stripe].error, i); + log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + if (missing_devices > 0) { + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); + + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE); + } + } + } + } + + off++; + stripeoff++; + } + + if (missing_devices == 0) + do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + stripeoff = num * sectors_per_stripe; + } + + // check parity + + if (missing_devices == 0) { + RtlClearAllBits(&context->stripes[parity].error); + + for (i = 0; i < sectors_per_stripe; i++) { + ULONG o, j; + + o = i * Vcb->superblock.sector_size; + for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE + if (context->parity_scratch[o] != 0) { + RtlSetBit(&context->stripes[parity].error, i); + break; + } + o++; + } + } + } + + // log and fix errors + + if (missing_devices > 0) + return; + + for (i = 0; i < sectors_per_stripe; i++) { + ULONG num_errors = 0, bad_off; + UINT64 bad_stripe; + BOOL alloc = FALSE; + + stripe = (parity + 1) % c->chunk_item->num_stripes; + off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; + + while (stripe != parity) { + if (RtlCheckBit(&context->alloc, off)) { + alloc = TRUE; + + if (RtlCheckBit(&context->stripes[stripe].error, i)) { + bad_stripe = stripe; + bad_off = off; + num_errors++; + } + } + + off += sectors_per_stripe; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + if (!alloc) + continue; + + if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine + continue; + + if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error + UINT64 addr; + + do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], + Vcb->superblock.sector_size); + + bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; + addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); + + context->stripes[parity].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } else if (num_errors == 1) { + UINT32 crc32; + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); + + if (RtlCheckBit(&context->is_tree, bad_off)) { + tree_header* th; + + do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], + &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.node_size); + + th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 == *((UINT32*)th->csum) && th->address == addr) { + RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); + + context->stripes[bad_stripe].rewrite = TRUE; + + RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); + + log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, FALSE, FALSE); + } else { + do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], + &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + if (crc32 == context->csum[bad_off]) { + RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + context->stripes[bad_stripe].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, FALSE, FALSE); + } + } else { + stripe = (parity + 1) % c->chunk_item->num_stripes; + off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; + + while (stripe != parity) { + if (RtlCheckBit(&context->alloc, off)) { + if (RtlCheckBit(&context->stripes[stripe].error, i)) { + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); + + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE); + } + } + + off += sectors_per_stripe; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + } + } +} + +static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start, + UINT64 num, UINT16 missing_devices) { + ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; + UINT16 stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; + UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes; + UINT64 stripeoff; + + stripe = (parity1 + 2) % c->chunk_item->num_stripes; + off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2); + stripeoff = num * sectors_per_stripe; + + if (c->devices[parity1]->devobj) + RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); + + if (c->devices[parity2]->devobj) + RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length); + + while (stripe != parity1) { + RtlClearAllBits(&context->stripes[stripe].error); + + for (i = 0; i < sectors_per_stripe; i++) { + if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { + if (RtlCheckBit(&context->is_tree, off)) { + tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 != *((UINT32*)th->csum) || th->address != addr) { + RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); + log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + if (missing_devices == 2) + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE); + } + + off += Vcb->superblock.node_size / Vcb->superblock.sector_size; + stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; + i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; + + continue; + } else if (RtlCheckBit(&context->has_csum, off)) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[off]) { + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); + + RtlSetBit(&context->stripes[stripe].error, i); + log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + + if (missing_devices == 2) + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE); + } + } + } + + off++; + stripeoff++; + } + + if (c->devices[parity1]->devobj) + do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length); + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + stripeoff = num * sectors_per_stripe; + } + + RtlClearAllBits(&context->stripes[parity1].error); + + if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) { + // check parity 1 + + for (i = 0; i < sectors_per_stripe; i++) { + ULONG o, j; + + o = i * Vcb->superblock.sector_size; + for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE + if (context->parity_scratch[o] != 0) { + RtlSetBit(&context->stripes[parity1].error, i); + break; + } + o++; + } + } + } + + RtlClearAllBits(&context->stripes[parity2].error); + + if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) { + // check parity 2 + + stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); + + while (stripe != parity2) { + galois_double(context->parity_scratch2, (UINT32)c->chunk_item->stripe_length); + do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length); + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + } + + for (i = 0; i < sectors_per_stripe; i++) { + if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size) + RtlSetBit(&context->stripes[parity2].error, i); + } + } + + if (missing_devices == 2) + return; + + // log and fix errors + + for (i = 0; i < sectors_per_stripe; i++) { + ULONG num_errors = 0; + UINT64 bad_stripe1, bad_stripe2; + ULONG bad_off1, bad_off2; + BOOL alloc = FALSE; + + stripe = (parity1 + 2) % c->chunk_item->num_stripes; + off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; + + while (stripe != parity1) { + if (RtlCheckBit(&context->alloc, off)) { + alloc = TRUE; + + if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) { + if (num_errors == 0) { + bad_stripe1 = stripe; + bad_off1 = off; + } else if (num_errors == 1) { + bad_stripe2 = stripe; + bad_off2 = off; + } + num_errors++; + } + } + + off += sectors_per_stripe; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + if (!alloc) + continue; + + if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine + continue; + + if (num_errors == 0) { // parity error + UINT64 addr; + + if (RtlCheckBit(&context->stripes[parity1].error, i)) { + do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], + Vcb->superblock.sector_size); + + bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; + addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); + + context->stripes[parity1].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + + if (RtlCheckBit(&context->stripes[parity2].error, i)) { + RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch2[i * Vcb->superblock.sector_size], + Vcb->superblock.sector_size); + + bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; + addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); + + context->stripes[parity2].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else if (num_errors == 1) { + UINT32 crc32a, crc32b, len; + UINT16 stripe_num, bad_stripe_num; + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); + UINT8* scratch; + + len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size; + + scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); + if (!scratch) { + ERR("out of memory\n"); + return; + } + + RtlZeroMemory(scratch, len); + + do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], + &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + + stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); + + if (c->devices[parity2]->devobj) { + stripe_num = c->chunk_item->num_stripes - 3; + while (stripe != parity2) { + galois_double(scratch, len); + + if (stripe != bad_stripe1) + do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + else + bad_stripe_num = stripe_num; + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + stripe_num--; + } + + do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + } + + if (bad_stripe_num != 0) + galois_divpower(scratch, (UINT8)bad_stripe_num, len); + + if (RtlCheckBit(&context->is_tree, bad_off1)) { + tree_header *th1 = NULL, *th2 = NULL; + + if (c->devices[parity1]->devobj) { + th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; + crc32a = ~calc_crc32c(0xffffffff, (UINT8*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum)); + } + + if (c->devices[parity2]->devobj) { + th2 = (tree_header*)scratch; + crc32b = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum)); + } + + if ((c->devices[parity1]->devobj && crc32a == *((UINT32*)th1->csum) && th1->address == addr) || + (c->devices[parity2]->devobj && crc32b == *((UINT32*)th2->csum) && th2->address == addr)) { + if (!c->devices[parity1]->devobj || crc32a != *((UINT32*)th1->csum) || th1->address != addr) { + RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + scratch, Vcb->superblock.node_size); + + if (c->devices[parity1]->devobj) { + // fix parity 1 + + stripe = (parity1 + 2) % c->chunk_item->num_stripes; + + RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.node_size); + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + + while (stripe != parity1) { + do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.node_size); + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + context->stripes[parity1].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else { + RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); + + if (!c->devices[parity2]->devobj || crc32b != *((UINT32*)th2->csum) || th2->address != addr) { + // fix parity 2 + stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); + + if (c->devices[parity2]->devobj) { + RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.node_size); + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + + while (stripe != parity2) { + galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size); + + do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.node_size); + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + } + + context->stripes[parity2].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + } + + context->stripes[bad_stripe1].rewrite = TRUE; + + RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); + + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE); + } else { + if (c->devices[parity1]->devobj) + crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + if (c->devices[parity2]->devobj) + crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size); + + if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) { + if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) { + RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + scratch, Vcb->superblock.sector_size); + + if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) { + // fix parity 1 + + stripe = (parity1 + 2) % c->chunk_item->num_stripes; + + RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + + while (stripe != parity1) { + do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + context->stripes[parity1].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } else { + RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) { + // fix parity 2 + stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); + + RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + + while (stripe != parity2) { + galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); + + do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + } + + context->stripes[parity2].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE); + log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); + } + } + + context->stripes[bad_stripe1].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE); + } + + ExFreePool(scratch); + } else if (num_errors == 2 && missing_devices == 0) { + UINT16 x, y, k; + UINT64 addr; + UINT32 len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size; + UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; + UINT32 j; + + stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); + + // put qxy in parity_scratch + // put pxy in parity_scratch2 + + k = c->chunk_item->num_stripes - 3; + if (stripe == bad_stripe1 || stripe == bad_stripe2) { + RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len); + RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len); + + if (stripe == bad_stripe1) + x = k; + else + y = k; + } else { + RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + } + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + + k--; + do { + galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len); + + if (stripe != bad_stripe1 && stripe != bad_stripe2) { + do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], + &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + } else if (stripe == bad_stripe1) + x = k; + else if (stripe == bad_stripe2) + y = k; + + stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); + k--; + } while (stripe != parity2); + + gyx = gpow2(y > x ? (y-x) : (255-x+y)); + gx = gpow2(255-x); + + denom = gdiv(1, gyx ^ 1); + a = gmul(gyx, denom); + b = gmul(gx, denom); + + p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; + q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; + pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size]; + qxy = &context->parity_scratch[i * Vcb->superblock.sector_size]; + + for (j = 0; j < len; j++) { + *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); + + p++; + q++; + pxy++; + qxy++; + } + + do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len); + do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); + + addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); + + if (RtlCheckBit(&context->is_tree, bad_off1)) { + tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 == *((UINT32*)th->csum) && th->address == addr) { + RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); + + context->stripes[bad_stripe1].rewrite = TRUE; + + RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); + + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE); + } else { + UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + if (crc32 == context->csum[bad_off1]) { + RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + context->stripes[bad_stripe1].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE); + } + + addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size); + + if (RtlCheckBit(&context->is_tree, bad_off2)) { + tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size]; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 == *((UINT32*)th->csum) && th->address == addr) { + RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); + + context->stripes[bad_stripe2].rewrite = TRUE; + + RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); + + log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, FALSE, FALSE); + } else { + UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + if (crc32 == context->csum[bad_off2]) { + RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], + &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); + + context->stripes[bad_stripe2].rewrite = TRUE; + + log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, TRUE, FALSE); + } else + log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, FALSE, FALSE); + } + } else { + stripe = (parity2 + 1) % c->chunk_item->num_stripes; + off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; + + while (stripe != parity1) { + if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { + if (RtlCheckBit(&context->stripes[stripe].error, i)) { + UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); + + log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE); + } + } + + off += sectors_per_stripe; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + } + } +} + +static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, UINT64 stripe_start, UINT64 stripe_end) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + BOOL b; + UINT64 run_start, run_end, full_stripe_len, stripe; + UINT32 max_read, num_sectors; + ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; + scrub_context_raid56 context; + UINT16 i; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + + TRACE("(%p, %p, %llx, %llx)\n", Vcb, c, stripe_start, stripe_end); + + full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; + run_start = c->offset + (stripe_start * full_stripe_len); + run_end = c->offset + ((stripe_end + 1) * full_stripe_len); + + searchkey.obj_id = run_start; + searchkey.obj_type = TYPE_METADATA_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + num_sectors = (UINT32)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size); + arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG)); + + allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); + if (!allocarr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); + if (!treearr) { + ERR("out of memory\n"); + ExFreePool(allocarr); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlInitializeBitMap(&context.alloc, allocarr, num_sectors); + RtlClearAllBits(&context.alloc); + + RtlInitializeBitMap(&context.is_tree, treearr, num_sectors); + RtlClearAllBits(&context.is_tree); + + context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); + if (!context.parity_scratch) { + ERR("out of memory\n"); + ExFreePool(allocarr); + ExFreePool(treearr); + return STATUS_INSUFFICIENT_RESOURCES; + } + + if (c->chunk_item->type & BLOCK_FLAG_DATA) { + csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); + if (!csumarr) { + ERR("out of memory\n"); + ExFreePool(allocarr); + ExFreePool(treearr); + ExFreePool(context.parity_scratch); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors); + RtlClearAllBits(&context.has_csum); + + context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(UINT32), ALLOC_TAG); + if (!context.csum) { + ERR("out of memory\n"); + ExFreePool(allocarr); + ExFreePool(treearr); + ExFreePool(context.parity_scratch); + ExFreePool(csumarr); + return STATUS_INSUFFICIENT_RESOURCES; + } + } + + if (c->chunk_item->type & BLOCK_FLAG_RAID6) { + context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); + if (!context.parity_scratch2) { + ERR("out of memory\n"); + ExFreePool(allocarr); + ExFreePool(treearr); + ExFreePool(context.parity_scratch); + + if (c->chunk_item->type & BLOCK_FLAG_DATA) { + ExFreePool(csumarr); + ExFreePool(context.csum); + } + + return STATUS_INSUFFICIENT_RESOURCES; + } + } + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id >= run_end) + break; + + if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) { + UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; + + if (tp.item->key.obj_id + size > run_start) { + UINT64 extent_start = max(run_start, tp.item->key.obj_id); + UINT64 extent_end = min(tp.item->key.obj_id + size, run_end); + BOOL extent_is_tree = FALSE; + + RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); + + if (tp.item->key.obj_type == TYPE_METADATA_ITEM) + extent_is_tree = TRUE; + else { + EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; + + if (tp.item->size < sizeof(EXTENT_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) + extent_is_tree = TRUE; + } + + if (extent_is_tree) + RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); + else if (c->chunk_item->type & BLOCK_FLAG_DATA) { + traverse_ptr tp2; + BOOL b2; + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = extent_start; + + Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + do { + traverse_ptr next_tp2; + + if (tp2.item->key.offset >= extent_end) + break; + + if (tp2.item->key.offset >= extent_start) { + UINT64 csum_start = max(extent_start, tp2.item->key.offset); + UINT64 csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32))); + + RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size)); + + RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size], + tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size), + (ULONG)((csum_end - csum_start) * sizeof(UINT32) / Vcb->superblock.sector_size)); + } + + b2 = find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL); + + if (b2) + tp2 = next_tp2; + } while (b2); + } + } + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); + + if (b) + tp = next_tp; + } while (b); + + context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); + if (!context.stripes) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + max_read = (UINT32)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG); + if (!context.stripes[i].buf) { + UINT64 j; + + ERR("out of memory\n"); + + for (j = 0; j < i; j++) { + ExFreePool(context.stripes[j].buf); + } + ExFreePool(context.stripes); + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG); + if (!context.stripes[i].errorarr) { + UINT64 j; + + ERR("out of memory\n"); + + ExFreePool(context.stripes[i].buf); + + for (j = 0; j < i; j++) { + ExFreePool(context.stripes[j].buf); + } + ExFreePool(context.stripes); + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size)); + + context.stripes[i].context = &context; + context.stripes[i].rewrite = FALSE; + } + + stripe = stripe_start; + + Status = STATUS_SUCCESS; + + chunk_lock_range(Vcb, c, run_start, run_end - run_start); + + do { + ULONG read_stripes; + UINT16 missing_devices = 0; + BOOL need_wait = FALSE; + + if (max_read < stripe_end + 1 - stripe) + read_stripes = max_read; + else + read_stripes = (ULONG)(stripe_end + 1 - stripe); + + context.stripes_left = c->chunk_item->num_stripes; + + // read megabyte by megabyte + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj) { + PIO_STACK_LOCATION IrpSp; + + context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE); + + if (!context.stripes[i].Irp) { + ERR("IoAllocateIrp failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end3; + } + + context.stripes[i].Irp->MdlAddress = NULL; + + IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); + IrpSp->MajorFunction = IRP_MJ_READ; + + if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { + context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG); + if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end3; + } + + context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; + + context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; + } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { + context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), FALSE, FALSE, NULL); + if (!context.stripes[i].Irp->MdlAddress) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end3; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(context.stripes[i].Irp->MdlAddress); + goto end3; + } + } else + context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; + + context.stripes[i].offset = stripe * c->chunk_item->stripe_length; + + IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length); + IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset; + + context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; + context.stripes[i].missing = FALSE; + + IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], TRUE, TRUE, TRUE); + + Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length; + need_wait = TRUE; + } else { + context.stripes[i].Irp = NULL; + context.stripes[i].missing = TRUE; + missing_devices++; + InterlockedDecrement(&context.stripes_left); + } + } + + if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) { + ERR("too many missing devices (%u, maximum 1)\n", missing_devices); + Status = STATUS_UNEXPECTED_IO_ERROR; + goto end3; + } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) { + ERR("too many missing devices (%u, maximum 2)\n", missing_devices); + Status = STATUS_UNEXPECTED_IO_ERROR; + goto end3; + } + + if (need_wait) { + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (c->devices[i]->devobj) + IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); + } + + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + } + + // return an error if any of the stripes returned an error + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) { + Status = context.stripes[i].iosb.Status; + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); + goto end3; + } + } + + if (c->chunk_item->type & BLOCK_FLAG_RAID6) { + for (i = 0; i < read_stripes; i++) { + scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); + } + } else { + for (i = 0; i < read_stripes; i++) { + scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); + } + } + stripe += read_stripes; + +end3: + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (context.stripes[i].Irp) { + if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { + MmUnlockPages(context.stripes[i].Irp->MdlAddress); + IoFreeMdl(context.stripes[i].Irp->MdlAddress); + } + IoFreeIrp(context.stripes[i].Irp); + context.stripes[i].Irp = NULL; + + if (context.stripes[i].rewrite) { + Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + context.stripes[i].offset, + context.stripes[i].buf, (UINT32)(read_stripes * c->chunk_item->stripe_length)); + + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); + goto end2; + } + } + } + } + + if (!NT_SUCCESS(Status)) + break; + } while (stripe < stripe_end); + +end2: + chunk_unlock_range(Vcb, c, run_start, run_end - run_start); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + ExFreePool(context.stripes[i].buf); + ExFreePool(context.stripes[i].errorarr); + } + ExFreePool(context.stripes); + +end: + ExFreePool(treearr); + ExFreePool(allocarr); + ExFreePool(context.parity_scratch); + + if (c->chunk_item->type & BLOCK_FLAG_RAID6) + ExFreePool(context.parity_scratch2); + + if (c->chunk_item->type & BLOCK_FLAG_DATA) { + ExFreePool(csumarr); + ExFreePool(context.csum); + } + + return Status; +} + +static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + BOOL b; + UINT64 full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0; + ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; + + full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; + stripe = (*offset - c->offset) / full_stripe_len; + + *offset = c->offset + (stripe * full_stripe_len); + + searchkey.obj_id = *offset; + searchkey.obj_type = TYPE_METADATA_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + *changed = FALSE; + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) + break; + + if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { + UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; + + TRACE("%llx\n", tp.item->key.obj_id); + + if (size < Vcb->superblock.sector_size) { + ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size); + return STATUS_INTERNAL_ERROR; + } + + stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len; + + if (*changed) { + if (stripe > stripe_end + 1) { + Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); + if (!NT_SUCCESS(Status)) { + ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status); + return Status; + } + + stripe_start = stripe; + } + } else + stripe_start = stripe; + + stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len; + + *changed = TRUE; + + total_data += size; + num_extents++; + + // only do so much at a time + if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB + break; + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); + + if (b) + tp = next_tp; + } while (b); + + if (*changed) { + Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); + if (!NT_SUCCESS(Status)) { + ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status); + return Status; + } + + *offset = c->offset + ((stripe_end + 1) * full_stripe_len); + } + + return STATUS_SUCCESS; +} + +static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + BOOL b = FALSE, tree_run = FALSE; + ULONG type, num_extents = 0; + UINT64 total_data = 0, tree_run_start, tree_run_end; + + TRACE("chunk %llx\n", c->offset); + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) + type = BLOCK_FLAG_DUPLICATE; + else if (c->chunk_item->type & BLOCK_FLAG_RAID0) + type = BLOCK_FLAG_RAID0; + else if (c->chunk_item->type & BLOCK_FLAG_RAID1) + type = BLOCK_FLAG_DUPLICATE; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + type = BLOCK_FLAG_RAID10; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { + Status = scrub_chunk_raid56(Vcb, c, offset, changed); + goto end; + } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { + Status = scrub_chunk_raid56(Vcb, c, offset, changed); + goto end; + } else // SINGLE + type = BLOCK_FLAG_DUPLICATE; + + searchkey.obj_id = *offset; + searchkey.obj_type = TYPE_METADATA_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + goto end; + } + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) + break; + + if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { + UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; + BOOL is_tree; + UINT32* csum = NULL; + RTL_BITMAP bmp; + ULONG* bmparr = NULL; + + TRACE("%llx\n", tp.item->key.obj_id); + + is_tree = FALSE; + + if (tp.item->key.obj_type == TYPE_METADATA_ITEM) + is_tree = TRUE; + else { + EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; + + if (tp.item->size < sizeof(EXTENT_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) + is_tree = TRUE; + } + + if (size < Vcb->superblock.sector_size) { + ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + // load csum + if (!is_tree) { + traverse_ptr tp2; + + csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(UINT32) * size / Vcb->superblock.sector_size), ALLOC_TAG); + if (!csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align(((size / Vcb->superblock.sector_size) >> 3) + 1, sizeof(ULONG))), ALLOC_TAG); + if (!bmparr) { + ERR("out of memory\n"); + ExFreePool(csum); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlInitializeBitMap(&bmp, bmparr, (ULONG)(size / Vcb->superblock.sector_size)); + RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = tp.item->key.obj_id; + + Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("find_item returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + if (Status != STATUS_NOT_FOUND) { + do { + traverse_ptr next_tp2; + + if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) { + if (tp2.item->key.offset >= tp.item->key.obj_id + size) + break; + else if (tp2.item->size >= sizeof(UINT32) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= tp.item->key.obj_id) { + UINT64 cs = max(tp.item->key.obj_id, tp2.item->key.offset); + UINT64 ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32))); + + RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), + tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size), + (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size)); + + RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size)); + + if (ce == tp.item->key.obj_id + size) + break; + } + } + + if (find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL)) + tp2 = next_tp2; + else + break; + } while (TRUE); + } + } + + if (tree_run) { + if (!is_tree || tp.item->key.obj_id > tree_run_end) { + Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL); + if (!NT_SUCCESS(Status)) { + ERR("scrub_extent returned %08x\n", Status); + goto end; + } + + if (!is_tree) + tree_run = FALSE; + else { + tree_run_start = tp.item->key.obj_id; + tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; + } + } else + tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; + } else if (is_tree) { + tree_run = TRUE; + tree_run_start = tp.item->key.obj_id; + tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; + } + + if (!is_tree) { + Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp); + if (!NT_SUCCESS(Status)) { + ERR("scrub_data_extent returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(bmparr); + goto end; + } + + ExFreePool(csum); + ExFreePool(bmparr); + } + + *offset = tp.item->key.obj_id + size; + *changed = TRUE; + + total_data += size; + num_extents++; + + // only do so much at a time + if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB + break; + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); + + if (b) + tp = next_tp; + } while (b); + + if (tree_run) { + Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL); + if (!NT_SUCCESS(Status)) { + ERR("scrub_extent returned %08x\n", Status); + goto end; + } + } + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +static void NTAPI scrub_thread(void* context) { +#else +static void scrub_thread(void* context) { +#endif + device_extension* Vcb = context; + LIST_ENTRY chunks, *le; + NTSTATUS Status; + LARGE_INTEGER time; + + KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, FALSE); + + InitializeListHead(&chunks); + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + if (Vcb->need_write && !Vcb->readonly) + Status = do_write(Vcb, NULL); + else + Status = STATUS_SUCCESS; + + free_trees(Vcb); + + if (!NT_SUCCESS(Status)) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("do_write returned %08x\n", Status); + Vcb->scrub.error = Status; + goto end; + } + + ExConvertExclusiveToSharedLite(&Vcb->tree_lock); + + ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); + + KeQuerySystemTime(&Vcb->scrub.start_time); + Vcb->scrub.finish_time.QuadPart = 0; + Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart; + Vcb->scrub.duration.QuadPart = 0; + Vcb->scrub.total_chunks = 0; + Vcb->scrub.chunks_left = 0; + Vcb->scrub.data_scrubbed = 0; + Vcb->scrub.num_errors = 0; + + while (!IsListEmpty(&Vcb->scrub.errors)) { + scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry); + ExFreePool(err); + } + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (!c->readonly) { + InsertTailList(&chunks, &c->list_entry_balance); + Vcb->scrub.total_chunks++; + Vcb->scrub.chunks_left++; + } + + ExReleaseResourceLite(&c->lock); + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + ExReleaseResource(&Vcb->scrub.stats_lock); + + ExReleaseResourceLite(&Vcb->tree_lock); + + while (!IsListEmpty(&chunks)) { + chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance); + UINT64 offset = c->offset; + BOOL changed; + + c->reloc = TRUE; + + KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL); + + if (!Vcb->scrub.stopping) { + do { + changed = FALSE; + + Status = scrub_chunk(Vcb, c, &offset, &changed); + if (!NT_SUCCESS(Status)) { + ERR("scrub_chunk returned %08x\n", Status); + Vcb->scrub.stopping = TRUE; + Vcb->scrub.error = Status; + break; + } + + if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping) + break; + + KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL); + } while (changed); + } + + ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); + + if (!Vcb->scrub.stopping) + Vcb->scrub.chunks_left--; + + if (IsListEmpty(&chunks)) + KeQuerySystemTime(&Vcb->scrub.finish_time); + + ExReleaseResource(&Vcb->scrub.stats_lock); + + c->reloc = FALSE; + c->list_entry_balance.Flink = NULL; + } + + KeQuerySystemTime(&time); + Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; + +end: + ZwClose(Vcb->scrub.thread); + Vcb->scrub.thread = NULL; + + KeSetEvent(&Vcb->scrub.finished, 0, FALSE); +} + +NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + NTSTATUS Status; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (Vcb->locked) { + WARN("cannot start scrub while locked\n"); + return STATUS_DEVICE_NOT_READY; + } + + if (Vcb->balance.thread) { + WARN("cannot start scrub while balance running\n"); + return STATUS_DEVICE_NOT_READY; + } + + if (Vcb->scrub.thread) { + WARN("scrub already running\n"); + return STATUS_DEVICE_NOT_READY; + } + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + Vcb->scrub.stopping = FALSE; + Vcb->scrub.paused = FALSE; + Vcb->scrub.error = STATUS_SUCCESS; + KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused); + + Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, NULL, NULL, NULL, scrub_thread, Vcb); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; +} + +NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) { + btrfs_query_scrub* bqs = (btrfs_query_scrub*)data; + ULONG len; + NTSTATUS Status; + LIST_ENTRY* le; + btrfs_scrub_error* bse = NULL; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (length < offsetof(btrfs_query_scrub, errors)) + return STATUS_BUFFER_TOO_SMALL; + + ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, TRUE); + + if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0) + bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING; + else + bqs->status = BTRFS_SCRUB_STOPPED; + + bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart; + bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart; + bqs->chunks_left = Vcb->scrub.chunks_left; + bqs->total_chunks = Vcb->scrub.total_chunks; + bqs->data_scrubbed = Vcb->scrub.data_scrubbed; + + bqs->duration = Vcb->scrub.duration.QuadPart; + + if (bqs->status == BTRFS_SCRUB_RUNNING) { + LARGE_INTEGER time; + + KeQuerySystemTime(&time); + bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart; + } + + bqs->error = Vcb->scrub.error; + + bqs->num_errors = Vcb->scrub.num_errors; + + len = length - offsetof(btrfs_query_scrub, errors); + + le = Vcb->scrub.errors.Flink; + while (le != &Vcb->scrub.errors) { + scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry); + ULONG errlen; + + if (err->is_metadata) + errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); + else + errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length; + + if (len < errlen) { + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + if (!bse) + bse = &bqs->errors; + else { + ULONG lastlen; + + if (bse->is_metadata) + lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); + else + lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length; + + bse->next_entry = lastlen; + bse = (btrfs_scrub_error*)(((UINT8*)bse) + lastlen); + } + + bse->next_entry = 0; + bse->address = err->address; + bse->device = err->device; + bse->recovered = err->recovered; + bse->is_metadata = err->is_metadata; + bse->parity = err->parity; + + if (err->is_metadata) { + bse->metadata.root = err->metadata.root; + bse->metadata.level = err->metadata.level; + bse->metadata.firstitem = err->metadata.firstitem; + } else { + bse->data.subvol = err->data.subvol; + bse->data.offset = err->data.offset; + bse->data.filename_length = err->data.filename_length; + RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length); + } + + len -= errlen; + le = le->Flink; + } + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->scrub.stats_lock); + + return Status; +} + +NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + LARGE_INTEGER time; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!Vcb->scrub.thread) + return STATUS_DEVICE_NOT_READY; + + if (Vcb->scrub.paused) + return STATUS_DEVICE_NOT_READY; + + Vcb->scrub.paused = TRUE; + KeClearEvent(&Vcb->scrub.event); + + KeQuerySystemTime(&time); + Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; + + return STATUS_SUCCESS; +} + +NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!Vcb->scrub.thread) + return STATUS_DEVICE_NOT_READY; + + if (!Vcb->scrub.paused) + return STATUS_DEVICE_NOT_READY; + + Vcb->scrub.paused = FALSE; + KeSetEvent(&Vcb->scrub.event, 0, FALSE); + + KeQuerySystemTime(&Vcb->scrub.resume_time); + + return STATUS_SUCCESS; +} + +NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!Vcb->scrub.thread) + return STATUS_DEVICE_NOT_READY; + + Vcb->scrub.paused = FALSE; + Vcb->scrub.stopping = TRUE; + KeSetEvent(&Vcb->scrub.event, 0, FALSE); + + return STATUS_SUCCESS; +} diff --git a/reactos/drivers/filesystems/btrfs/search.c b/reactos/drivers/filesystems/btrfs/search.c index b0cd875062e..6f3dd62a2bb 100644 --- a/reactos/drivers/filesystems/btrfs/search.c +++ b/reactos/drivers/filesystems/btrfs/search.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -23,659 +23,719 @@ #include #include #endif +#include +#include #include -#ifndef __REACTOS__ -#include -#endif #include -extern LIST_ENTRY volumes; -extern ERESOURCE volumes_lock; -extern LIST_ENTRY pnp_disks; +extern ERESOURCE pdo_list_lock; +extern LIST_ENTRY pdo_list; +extern UNICODE_STRING registry_path; +extern KEVENT mountmgr_thread_event; +extern HANDLE mountmgr_thread_handle; +extern BOOL shutting_down; -static NTSTATUS create_part0(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT DeviceObject, PUNICODE_STRING devpath, - PUNICODE_STRING nameus, BTRFS_UUID* uuid) { - PDEVICE_OBJECT newdevobj; - UNICODE_STRING name; - NTSTATUS Status; - part0_device_extension* p0de; - - static const WCHAR part0_suffix[] = L"Btrfs"; - - name.Length = name.MaximumLength = devpath->Length + (wcslen(part0_suffix) * sizeof(WCHAR)); - name.Buffer = ExAllocatePoolWithTag(PagedPool, name.Length, ALLOC_TAG); - if (!name.Buffer) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(name.Buffer, devpath->Buffer, devpath->Length); - RtlCopyMemory(&name.Buffer[devpath->Length / sizeof(WCHAR)], part0_suffix, wcslen(part0_suffix) * sizeof(WCHAR)); - - Status = IoCreateDevice(DriverObject, sizeof(part0_device_extension), &name, FILE_DEVICE_DISK, FILE_DEVICE_SECURE_OPEN, FALSE, &newdevobj); - if (!NT_SUCCESS(Status)) { - ERR("IoCreateDevice returned %08x\n", Status); - ExFreePool(name.Buffer); - return Status; - } - - p0de = newdevobj->DeviceExtension; - p0de->type = VCB_TYPE_PARTITION0; - p0de->devobj = DeviceObject; - RtlCopyMemory(&p0de->uuid, uuid, sizeof(BTRFS_UUID)); - - p0de->name.Length = name.Length; - p0de->name.MaximumLength = name.MaximumLength; - p0de->name.Buffer = ExAllocatePoolWithTag(PagedPool, p0de->name.MaximumLength, ALLOC_TAG); - - if (!p0de->name.Buffer) { - ERR("out of memory\b"); - ExFreePool(name.Buffer); - ExFreePool(p0de->name.Buffer); - IoDeleteDevice(newdevobj); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(p0de->name.Buffer, name.Buffer, name.Length); - - ObReferenceObject(DeviceObject); - - newdevobj->StackSize = DeviceObject->StackSize + 1; - newdevobj->SectorSize = DeviceObject->SectorSize; - - newdevobj->Flags |= DO_DIRECT_IO; - newdevobj->Flags &= ~DO_DEVICE_INITIALIZING; - - *nameus = name; - - return STATUS_SUCCESS; -} +typedef void (*pnp_callback)(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath); + +extern PDEVICE_OBJECT master_devobj; -void add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us) { - ULONG tnsize; - MOUNTMGR_TARGET_NAME* tn; - KEVENT Event; - IO_STATUS_BLOCK IoStatusBlock; - PIRP Irp; +static BOOL fs_ignored(BTRFS_UUID* uuid) { + UNICODE_STRING path, ignoreus; NTSTATUS Status; - ULONG mmdltsize; - MOUNTMGR_DRIVE_LETTER_TARGET* mmdlt; - MOUNTMGR_DRIVE_LETTER_INFORMATION mmdli; - - TRACE("found BTRFS volume\n"); - - tnsize = sizeof(MOUNTMGR_TARGET_NAME) - sizeof(WCHAR) + us->Length; - tn = ExAllocatePoolWithTag(NonPagedPool, tnsize, ALLOC_TAG); - if (!tn) { + OBJECT_ATTRIBUTES oa; + KEY_VALUE_FULL_INFORMATION* kvfi; + ULONG dispos, retlen, kvfilen, i, j; + HANDLE h; + BOOL ret = FALSE; + + path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR)); + + path.Buffer = ExAllocatePoolWithTag(PagedPool, path.Length, ALLOC_TAG); + if (!path.Buffer) { ERR("out of memory\n"); - return; - } - - tn->DeviceNameLength = us->Length; - RtlCopyMemory(tn->DeviceName, us->Buffer, tn->DeviceNameLength); - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - Irp = IoBuildDeviceIoControlRequest(IOCTL_MOUNTMGR_VOLUME_ARRIVAL_NOTIFICATION, - mountmgr, tn, tnsize, - NULL, 0, FALSE, &Event, &IoStatusBlock); - if (!Irp) { - ERR("%.*S: IoBuildDeviceIoControlRequest 1 failed\n", us->Length / sizeof(WCHAR), us->Buffer); - ExFreePool(tn); - return; + return FALSE; } - Status = IoCallDriver(mountmgr, Irp); - if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); - Status = IoStatusBlock.Status; + RtlCopyMemory(path.Buffer, registry_path.Buffer, registry_path.Length); + + i = registry_path.Length / sizeof(WCHAR); + + path.Buffer[i] = '\\'; + i++; + + for (j = 0; j < 16; j++) { + path.Buffer[i] = hex_digit((uuid->uuid[j] & 0xF0) >> 4); + path.Buffer[i+1] = hex_digit(uuid->uuid[j] & 0xF); + + i += 2; + + if (j == 3 || j == 5 || j == 7 || j == 9) { + path.Buffer[i] = '-'; + i++; + } } + InitializeObjectAttributes(&oa, &path, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); + + Status = ZwCreateKey(&h, KEY_QUERY_VALUE, &oa, 0, NULL, REG_OPTION_NON_VOLATILE, &dispos); + if (!NT_SUCCESS(Status)) { - ERR("%.*S: IoCallDriver 1 returned %08x\n", us->Length / sizeof(WCHAR), us->Buffer, Status); - return; + TRACE("ZwCreateKey returned %08x\n", Status); + ExFreePool(path.Buffer); + return FALSE; } - - ExFreePool(tn); - - mmdltsize = offsetof(MOUNTMGR_DRIVE_LETTER_TARGET, DeviceName[0]) + us->Length; - - mmdlt = ExAllocatePoolWithTag(NonPagedPool, mmdltsize, ALLOC_TAG); - if (!mmdlt) { + + RtlInitUnicodeString(&ignoreus, L"Ignore"); + + kvfilen = (ULONG)offsetof(KEY_VALUE_FULL_INFORMATION, Name[0]) + (255 * sizeof(WCHAR)); + kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); + if (!kvfi) { ERR("out of memory\n"); - return; - } - - mmdlt->DeviceNameLength = us->Length; - RtlCopyMemory(&mmdlt->DeviceName, us->Buffer, us->Length); - TRACE("mmdlt = %.*S\n", mmdlt->DeviceNameLength / sizeof(WCHAR), mmdlt->DeviceName); - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - Irp = IoBuildDeviceIoControlRequest(IOCTL_MOUNTMGR_NEXT_DRIVE_LETTER, - mountmgr, mmdlt, mmdltsize, - &mmdli, sizeof(MOUNTMGR_DRIVE_LETTER_INFORMATION), FALSE, &Event, &IoStatusBlock); - if (!Irp) { - ERR("%.*S: IoBuildDeviceIoControlRequest 2 failed\n", us->Length / sizeof(WCHAR), us->Buffer); - return; + ZwClose(h); + ExFreePool(path.Buffer); + return FALSE; } - Status = IoCallDriver(mountmgr, Irp); - if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); - Status = IoStatusBlock.Status; + Status = ZwQueryValueKey(h, &ignoreus, KeyValueFullInformation, kvfi, kvfilen, &retlen); + if (NT_SUCCESS(Status)) { + if (kvfi->Type == REG_DWORD && kvfi->DataLength >= sizeof(UINT32)) { + UINT32* pr = (UINT32*)((UINT8*)kvfi + kvfi->DataOffset); + + ret = *pr; + } } - if (!NT_SUCCESS(Status)) { - ERR("%.*S: IoCallDriver 2 returned %08x\n", us->Length / sizeof(WCHAR), us->Buffer, Status); - } else - TRACE("DriveLetterWasAssigned = %u, CurrentDriveLetter = %c\n", mmdli.DriveLetterWasAssigned, mmdli.CurrentDriveLetter); - - ExFreePool(mmdlt); + ZwClose(h); + ExFreePool(kvfi); + ExFreePool(path.Buffer); + + return ret; } -static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath, DWORD disk_num, DWORD part_num, LIST_ENTRY* volumes) { - KEVENT Event; - PIRP Irp; - IO_STATUS_BLOCK IoStatusBlock; +static void test_vol(PDEVICE_OBJECT mountmgr, PDEVICE_OBJECT DeviceObject, PUNICODE_STRING devpath, + DWORD disk_num, DWORD part_num, UINT64 length) { NTSTATUS Status; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - LARGE_INTEGER Offset; ULONG toread; UINT8* data = NULL; UINT32 sector_size; - + TRACE("%.*S\n", devpath->Length / sizeof(WCHAR), devpath->Buffer); - - Status = IoGetDeviceObjectPointer(devpath, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); - if (!NT_SUCCESS(Status)) { - ERR("IoGetDeviceObjectPointer returned %08x\n", Status); - return; - } sector_size = DeviceObject->SectorSize; - + if (sector_size == 0) { DISK_GEOMETRY geometry; IO_STATUS_BLOCK iosb; - + Status = dev_ioctl(DeviceObject, IOCTL_DISK_GET_DRIVE_GEOMETRY, NULL, 0, - &geometry, sizeof(DISK_GEOMETRY), TRUE, &iosb); - + &geometry, sizeof(DISK_GEOMETRY), TRUE, &iosb); + if (!NT_SUCCESS(Status)) { ERR("%.*S had a sector size of 0, and IOCTL_DISK_GET_DRIVE_GEOMETRY returned %08x\n", devpath->Length / sizeof(WCHAR), devpath->Buffer, Status); goto deref; } - + if (iosb.Information < sizeof(DISK_GEOMETRY)) { ERR("%.*S: IOCTL_DISK_GET_DRIVE_GEOMETRY returned %u bytes, expected %u\n", devpath->Length / sizeof(WCHAR), devpath->Buffer, iosb.Information, sizeof(DISK_GEOMETRY)); } - + sector_size = geometry.BytesPerSector; - + if (sector_size == 0) { ERR("%.*S had a sector size of 0\n", devpath->Length / sizeof(WCHAR), devpath->Buffer); goto deref; } } - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - Offset.QuadPart = superblock_addrs[0]; - - toread = sector_align(sizeof(superblock), sector_size); + toread = (ULONG)sector_align(sizeof(superblock), sector_size); data = ExAllocatePoolWithTag(NonPagedPool, toread, ALLOC_TAG); if (!data) { ERR("out of memory\n"); goto deref; } - Irp = IoBuildSynchronousFsdRequest(IRP_MJ_READ, DeviceObject, data, toread, &Offset, &Event, &IoStatusBlock); - - if (!Irp) { - ERR("IoBuildSynchronousFsdRequest failed\n"); - goto deref; - } + Status = sync_read_phys(DeviceObject, superblock_addrs[0], toread, data, TRUE); - Status = IoCallDriver(DeviceObject, Irp); + if (NT_SUCCESS(Status) && ((superblock*)data)->magic == BTRFS_MAGIC) { + superblock* sb = (superblock*)data; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); - if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); - Status = IoStatusBlock.Status; - } + if (crc32 != *((UINT32*)sb->checksum)) + ERR("checksum error on superblock\n"); + else { + TRACE("volume found\n"); - if (NT_SUCCESS(Status) && IoStatusBlock.Information > 0 && ((superblock*)data)->magic == BTRFS_MAGIC) { - int i; - GET_LENGTH_INFORMATION gli; - superblock* sb = (superblock*)data; - volume* v = ExAllocatePoolWithTag(PagedPool, sizeof(volume), ALLOC_TAG); - - if (!v) { - ERR("out of memory\n"); - goto deref; - } - - Status = dev_ioctl(DeviceObject, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, - &gli, sizeof(gli), TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("error reading length information: %08x\n", Status); - ExFreePool(v); - goto deref; - } - - if (part_num == 0) { - UNICODE_STRING us3; - - Status = create_part0(DriverObject, DeviceObject, devpath, &us3, &sb->dev_item.device_uuid); - - if (!NT_SUCCESS(Status)) { - ERR("create_part0 returned %08x\n", Status); - ExFreePool(v); - goto deref; - } - - v->devpath = us3; - } else { - v->devpath.Length = v->devpath.MaximumLength = devpath->Length; - v->devpath.Buffer = ExAllocatePoolWithTag(PagedPool, v->devpath.Length, ALLOC_TAG); - - if (!v->devpath.Buffer) { - ERR("out of memory\n"); - ExFreePool(v); - goto deref; - } - - RtlCopyMemory(v->devpath.Buffer, devpath->Buffer, v->devpath.Length); - } - - RtlCopyMemory(&v->fsuuid, &sb->uuid, sizeof(BTRFS_UUID)); - RtlCopyMemory(&v->devuuid, &sb->dev_item.device_uuid, sizeof(BTRFS_UUID)); - v->devnum = sb->dev_item.dev_id; - v->processed = FALSE; - v->length = gli.Length.QuadPart; - v->gen1 = sb->generation; - v->gen2 = 0; - v->seeding = sb->flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE; - v->disk_num = disk_num; - v->part_num = part_num; - - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - InsertTailList(volumes, &v->list_entry); - ExReleaseResourceLite(&volumes_lock); - - i = 1; - while (superblock_addrs[i] != 0 && superblock_addrs[i] + toread <= v->length) { - KeInitializeEvent(&Event, NotificationEvent, FALSE); - - Offset.QuadPart = superblock_addrs[i]; - - Irp = IoBuildSynchronousFsdRequest(IRP_MJ_READ, DeviceObject, data, toread, &Offset, &Event, &IoStatusBlock); - - if (!Irp) { - ERR("IoBuildSynchronousFsdRequest failed\n"); - goto deref; - } + if (length >= superblock_addrs[1] + toread) { + ULONG i = 1; - Status = IoCallDriver(DeviceObject, Irp); + superblock* sb2 = ExAllocatePoolWithTag(NonPagedPool, toread, ALLOC_TAG); + if (!sb2) { + ERR("out of memory\n"); + goto deref; + } - if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); - Status = IoStatusBlock.Status; - } - - if (NT_SUCCESS(Status)) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); - - if (crc32 != *((UINT32*)sb->checksum)) - WARN("superblock %u CRC error\n", i); - else if (sb->generation > v->gen1) { - v->gen2 = v->gen1; - v->gen1 = sb->generation; + while (superblock_addrs[i] > 0 && length >= superblock_addrs[i] + toread) { + Status = sync_read_phys(DeviceObject, superblock_addrs[i], toread, (PUCHAR)sb2, TRUE); + + if (NT_SUCCESS(Status) && sb2->magic == BTRFS_MAGIC) { + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb2->uuid, (ULONG)sizeof(superblock) - sizeof(sb2->checksum)); + + if (crc32 == *((UINT32*)sb2->checksum) && sb2->generation > sb->generation) + RtlCopyMemory(sb, sb2, toread); + } + + i++; } - } else { - ERR("got error %08x while reading superblock %u\n", Status, i); + + ExFreePool(sb2); + } + + if (!fs_ignored(&sb->uuid)) { + DeviceObject->Flags &= ~DO_VERIFY_VOLUME; + add_volume_device(sb, mountmgr, devpath, length, disk_num, part_num); } - - i++; } - - TRACE("volume found\n"); - TRACE("gen1 = %llx, gen2 = %llx\n", v->gen1, v->gen2); - TRACE("FS uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", - v->fsuuid.uuid[0], v->fsuuid.uuid[1], v->fsuuid.uuid[2], v->fsuuid.uuid[3], v->fsuuid.uuid[4], v->fsuuid.uuid[5], v->fsuuid.uuid[6], v->fsuuid.uuid[7], - v->fsuuid.uuid[8], v->fsuuid.uuid[9], v->fsuuid.uuid[10], v->fsuuid.uuid[11], v->fsuuid.uuid[12], v->fsuuid.uuid[13], v->fsuuid.uuid[14], v->fsuuid.uuid[15]); - TRACE("device uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", - v->devuuid.uuid[0], v->devuuid.uuid[1], v->devuuid.uuid[2], v->devuuid.uuid[3], v->devuuid.uuid[4], v->devuuid.uuid[5], v->devuuid.uuid[6], v->devuuid.uuid[7], - v->devuuid.uuid[8], v->devuuid.uuid[9], v->devuuid.uuid[10], v->devuuid.uuid[11], v->devuuid.uuid[12], v->devuuid.uuid[13], v->devuuid.uuid[14], v->devuuid.uuid[15]); - TRACE("device number %llx\n", v->devnum); } - + deref: if (data) ExFreePool(data); - - ObDereferenceObject(FileObject); } -void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v) { +NTSTATUS remove_drive_letter(PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath) { NTSTATUS Status; - KEVENT Event; - PIRP Irp; MOUNTMGR_MOUNT_POINT* mmp; ULONG mmpsize; MOUNTMGR_MOUNT_POINTS mmps1, *mmps2; - IO_STATUS_BLOCK IoStatusBlock; - - mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + v->devpath.Length; - + + TRACE("removing drive letter\n"); + + mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + devpath->Length; + mmp = ExAllocatePoolWithTag(PagedPool, mmpsize, ALLOC_TAG); if (!mmp) { ERR("out of memory\n"); - return; + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(mmp, mmpsize); - + mmp->DeviceNameOffset = sizeof(MOUNTMGR_MOUNT_POINT); - mmp->DeviceNameLength = v->devpath.Length; - RtlCopyMemory(&mmp[1], v->devpath.Buffer, v->devpath.Length); - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - Irp = IoBuildDeviceIoControlRequest(IOCTL_MOUNTMGR_DELETE_POINTS, - mountmgr, mmp, mmpsize, - &mmps1, sizeof(MOUNTMGR_MOUNT_POINTS), FALSE, &Event, &IoStatusBlock); - if (!Irp) { - ERR("%.*S: IoBuildDeviceIoControlRequest 1 failed\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer); - ExFreePool(mmp); - return; - } + mmp->DeviceNameLength = devpath->Length; + RtlCopyMemory(&mmp[1], devpath->Buffer, devpath->Length); - Status = IoCallDriver(mountmgr, Irp); - if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); - Status = IoStatusBlock.Status; - } + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, &mmps1, sizeof(MOUNTMGR_MOUNT_POINTS), FALSE, NULL); if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { - ERR("%.*S: IoCallDriver 1 returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status); + ERR("IOCTL_MOUNTMGR_DELETE_POINTS 1 returned %08x\n", Status); ExFreePool(mmp); - return; + return Status; } - + if (Status != STATUS_BUFFER_OVERFLOW || mmps1.Size == 0) { ExFreePool(mmp); - return; + return STATUS_NOT_FOUND; } - + mmps2 = ExAllocatePoolWithTag(PagedPool, mmps1.Size, ALLOC_TAG); if (!mmps2) { ERR("out of memory\n"); ExFreePool(mmp); - return; - } - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - Irp = IoBuildDeviceIoControlRequest(IOCTL_MOUNTMGR_DELETE_POINTS, - mountmgr, mmp, mmpsize, - mmps2, mmps1.Size, FALSE, &Event, &IoStatusBlock); - if (!Irp) { - ERR("%.*S: IoBuildDeviceIoControlRequest 2 failed\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer); - ExFreePool(mmps2); - ExFreePool(mmp); - return; + return STATUS_INSUFFICIENT_RESOURCES; } - Status = IoCallDriver(mountmgr, Irp); - if (Status == STATUS_PENDING) { - KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); - Status = IoStatusBlock.Status; - } + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, mmps2, mmps1.Size, FALSE, NULL); + + if (!NT_SUCCESS(Status)) + ERR("IOCTL_MOUNTMGR_DELETE_POINTS 2 returned %08x\n", Status); - if (!NT_SUCCESS(Status)) { - ERR("%.*S: IoCallDriver 2 returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status); - ExFreePool(mmps2); - ExFreePool(mmp); - return; - } - ExFreePool(mmps2); ExFreePool(mmp); -} -static void refresh_mountmgr(PDEVICE_OBJECT mountmgr, LIST_ENTRY* volumes) { - LIST_ENTRY* le; - - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - - le = volumes->Flink; - while (le != volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - - if (!v->processed) { - LIST_ENTRY* le2 = le; - volume* mountvol = v; - - while (le2 != volumes) { - volume* v2 = CONTAINING_RECORD(le2, volume, list_entry); - - if (RtlCompareMemory(&v2->fsuuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - v2->processed = TRUE; - - if (v2->devnum < mountvol->devnum) { - remove_drive_letter(mountmgr, mountvol); - mountvol = v2; - } else if (v2->devnum > mountvol->devnum) - remove_drive_letter(mountmgr, v2); - } - - le2 = le2->Flink; - } - - add_volume(mountmgr, &mountvol->devpath); - } - - le = le->Flink; - } - - ExReleaseResourceLite(&volumes_lock); -} - -static void add_pnp_disk(ULONG disk_num, PUNICODE_STRING devpath) { - LIST_ENTRY* le; - pnp_disk* disk; - - le = pnp_disks.Flink; - while (le != &pnp_disks) { - disk = CONTAINING_RECORD(le, pnp_disk, list_entry); - - if (disk->devpath.Length == devpath->Length && - RtlCompareMemory(disk->devpath.Buffer, devpath->Buffer, devpath->Length) == devpath->Length) - return; - - le = le->Flink; - } - - disk = ExAllocatePoolWithTag(PagedPool, sizeof(pnp_disk), ALLOC_TAG); - if (!disk) { - ERR("out of memory\n"); - return; - } - - disk->devpath.Length = disk->devpath.MaximumLength = devpath->Length; - disk->devpath.Buffer = ExAllocatePoolWithTag(PagedPool, devpath->Length, ALLOC_TAG); - - if (!disk->devpath.Buffer) { - ERR("out of memory\n"); - ExFreePool(disk); - return; - } - - RtlCopyMemory(disk->devpath.Buffer, devpath->Buffer, devpath->Length); - - disk->disk_num = disk_num; - - InsertTailList(&pnp_disks, &disk->list_entry); + return Status; } -static void disk_arrival(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { - PFILE_OBJECT FileObject, FileObject2; +void disk_arrival(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { + PFILE_OBJECT FileObject, mountmgrfo; PDEVICE_OBJECT devobj, mountmgr; NTSTATUS Status; STORAGE_DEVICE_NUMBER sdn; ULONG dlisize; - DRIVE_LAYOUT_INFORMATION_EX* dli; + DRIVE_LAYOUT_INFORMATION_EX* dli = NULL; IO_STATUS_BLOCK iosb; - int i, num_parts = 0; - UNICODE_STRING devname, num, bspus, mmdevpath; - WCHAR devnamew[255], numw[20]; - USHORT preflen; - - static WCHAR device_harddisk[] = L"\\Device\\Harddisk"; - static WCHAR bs_partition[] = L"\\Partition"; - - // FIXME - work with CD-ROMs and floppies(?) - + GET_LENGTH_INFORMATION gli; + UNICODE_STRING mmdevpath; + + UNUSED(DriverObject); + Status = IoGetDeviceObjectPointer(devpath, FILE_READ_ATTRIBUTES, &FileObject, &devobj); if (!NT_SUCCESS(Status)) { ERR("IoGetDeviceObjectPointer returned %08x\n", Status); return; } - + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); - Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject2, &mountmgr); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &mountmgrfo, &mountmgr); if (!NT_SUCCESS(Status)) { ERR("IoGetDeviceObjectPointer returned %08x\n", Status); ObDereferenceObject(FileObject); return; } - - Status = dev_ioctl(devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, - &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, &iosb); - if (!NT_SUCCESS(Status)) { - ERR("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); - goto end; - } - - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - add_pnp_disk(sdn.DeviceNumber, devpath); - ExReleaseResourceLite(&volumes_lock); - + dlisize = 0; - + do { dlisize += 1024; + + if (dli) + ExFreePool(dli); + dli = ExAllocatePoolWithTag(PagedPool, dlisize, ALLOC_TAG); - + if (!dli) { + ERR("out of memory\n"); + goto end; + } + Status = dev_ioctl(devobj, IOCTL_DISK_GET_DRIVE_LAYOUT_EX, NULL, 0, dli, dlisize, TRUE, &iosb); } while (Status == STATUS_BUFFER_TOO_SMALL); - - if (!NT_SUCCESS(Status)) { + + // only consider disk as a potential filesystem if it has no partitions + if (NT_SUCCESS(Status) && dli->PartitionCount > 0) { ExFreePool(dli); - goto no_parts; - } - - wcscpy(devnamew, device_harddisk); - devname.Buffer = devnamew; - devname.MaximumLength = sizeof(devnamew); - devname.Length = wcslen(device_harddisk) * sizeof(WCHAR); - - num.Buffer = numw; - num.MaximumLength = sizeof(numw); - RtlIntegerToUnicodeString(sdn.DeviceNumber, 10, &num); - RtlAppendUnicodeStringToString(&devname, &num); - - bspus.Buffer = bs_partition; - bspus.Length = bspus.MaximumLength = wcslen(bs_partition) * sizeof(WCHAR); - RtlAppendUnicodeStringToString(&devname, &bspus); - - preflen = devname.Length; - - for (i = 0; i < dli->PartitionCount; i++) { - if (dli->PartitionEntry[i].PartitionLength.QuadPart != 0 && dli->PartitionEntry[i].PartitionNumber != 0) { - devname.Length = preflen; - RtlIntegerToUnicodeString(dli->PartitionEntry[i].PartitionNumber, 10, &num); - RtlAppendUnicodeStringToString(&devname, &num); - - test_vol(DriverObject, mountmgr, &devname, sdn.DeviceNumber, dli->PartitionEntry[i].PartitionNumber, &volumes); - - num_parts++; - } + goto end; } - + ExFreePool(dli); - -no_parts: - if (num_parts == 0) { - devname.Length = preflen; - devname.Buffer[devname.Length / sizeof(WCHAR)] = '0'; - devname.Length += sizeof(WCHAR); - - test_vol(DriverObject, mountmgr, &devname, sdn.DeviceNumber, 0, &volumes); + + Status = dev_ioctl(devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, + &gli, sizeof(gli), TRUE, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("error reading length information: %08x\n", Status); + goto end; } - + + Status = dev_ioctl(devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, + &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + TRACE("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); + sdn.DeviceNumber = 0xffffffff; + sdn.PartitionNumber = 0; + } else + TRACE("DeviceType = %u, DeviceNumber = %u, PartitionNumber = %u\n", sdn.DeviceType, sdn.DeviceNumber, sdn.PartitionNumber); + + test_vol(mountmgr, devobj, devpath, sdn.DeviceNumber, sdn.PartitionNumber, gli.Length.QuadPart); + end: - refresh_mountmgr(mountmgr, &volumes); + ObDereferenceObject(FileObject); + ObDereferenceObject(mountmgrfo); +} + +void remove_volume_child(_Inout_ _Requires_exclusive_lock_held_(_Curr_->child_lock) _Releases_exclusive_lock_(_Curr_->child_lock) _In_ volume_device_extension* vde, + _In_ volume_child* vc, _In_ BOOL skip_dev) { + NTSTATUS Status; + pdo_device_extension* pdode = vde->pdode; + device_extension* Vcb = vde->mounted_device ? vde->mounted_device->DeviceExtension : NULL; + + if (vc->notification_entry) +#ifdef __REACTOS__ + IoUnregisterPlugPlayNotification(vc->notification_entry); +#else + IoUnregisterPlugPlayNotificationEx(vc->notification_entry); +#endif + + if (vde->mounted_device && (!Vcb || !Vcb->options.allow_degraded)) { + Status = pnp_surprise_removal(vde->mounted_device, NULL); + if (!NT_SUCCESS(Status)) + ERR("pnp_surprise_removal returned %08x\n", Status); + } + + if (!Vcb || !Vcb->options.allow_degraded) { + Status = IoSetDeviceInterfaceState(&vde->bus_name, FALSE); + if (!NT_SUCCESS(Status)) + WARN("IoSetDeviceInterfaceState returned %08x\n", Status); + } + + if (pdode->children_loaded > 0) { + UNICODE_STRING mmdevpath; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT mountmgr; + LIST_ENTRY* le; + + if (!Vcb || !Vcb->options.allow_degraded) { + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + le = pdode->children.Flink; + + while (le != &pdode->children) { + volume_child* vc2 = CONTAINING_RECORD(le, volume_child, list_entry); + + if (vc2->had_drive_letter) { // re-add entry to mountmgr + MOUNTDEV_NAME mdn; + + Status = dev_ioctl(vc2->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + else { + MOUNTDEV_NAME* mdn2; + ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; + + mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); + if (!mdn2) + ERR("out of memory\n"); + else { + Status = dev_ioctl(vc2->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); + if (!NT_SUCCESS(Status)) + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + else { + UNICODE_STRING name; + + name.Buffer = mdn2->Name; + name.Length = name.MaximumLength = mdn2->NameLength; + + Status = mountmgr_add_drive_letter(mountmgr, &name); + if (!NT_SUCCESS(Status)) + WARN("mountmgr_add_drive_letter returned %08x\n", Status); + } + + ExFreePool(mdn2); + } + } + } + + le = le->Flink; + } + + ObDereferenceObject(FileObject); + } + } else if (!skip_dev) { + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj == vc->devobj) { + dev->devobj = NULL; // mark as missing + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->tree_lock); + } + + if (vde->device->Characteristics & FILE_REMOVABLE_MEDIA) { + vde->device->Characteristics &= ~FILE_REMOVABLE_MEDIA; + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc2 = CONTAINING_RECORD(le, volume_child, list_entry); + + if (vc2 != vc && vc2->devobj->Characteristics & FILE_REMOVABLE_MEDIA) { + vde->device->Characteristics |= FILE_REMOVABLE_MEDIA; + break; + } + + le = le->Flink; + } + } + } + + ObDereferenceObject(vc->fileobj); + ExFreePool(vc->pnp_name.Buffer); + RemoveEntryList(&vc->list_entry); + ExFreePool(vc); + + pdode->children_loaded--; + + if (pdode->children_loaded == 0) { // remove volume device + BOOL remove = FALSE; + + RemoveEntryList(&pdode->list_entry); + + vde->removing = TRUE; + + Status = IoSetDeviceInterfaceState(&vde->bus_name, FALSE); + if (!NT_SUCCESS(Status)) + WARN("IoSetDeviceInterfaceState returned %08x\n", Status); + + if (vde->pdo->AttachedDevice) + IoDetachDevice(vde->pdo); + + if (vde->open_count == 0) + remove = TRUE; + + ExReleaseResourceLite(&pdode->child_lock); + if (!no_pnp) { + control_device_extension* cde = master_devobj->DeviceExtension; + + IoInvalidateDeviceRelations(cde->buspdo, BusRelations); + } + + if (remove) { + PDEVICE_OBJECT pdo; + + if (vde->name.Buffer) + ExFreePool(vde->name.Buffer); + + if (Vcb) + Vcb->vde = NULL; + + ExDeleteResourceLite(&pdode->child_lock); + + pdo = vde->pdo; + IoDeleteDevice(vde->device); + + if (no_pnp) + IoDeleteDevice(pdo); + } + } else + ExReleaseResourceLite(&pdode->child_lock); +} + +void volume_arrival(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { + STORAGE_DEVICE_NUMBER sdn; + PFILE_OBJECT FileObject, mountmgrfo; + UNICODE_STRING mmdevpath; + PDEVICE_OBJECT devobj, mountmgr; + GET_LENGTH_INFORMATION gli; + NTSTATUS Status; + + TRACE("%.*S\n", devpath->Length / sizeof(WCHAR), devpath->Buffer); + + Status = IoGetDeviceObjectPointer(devpath, FILE_READ_ATTRIBUTES, &FileObject, &devobj); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + return; + } + + // make sure we're not processing devices we've created ourselves + + if (devobj->DriverObject == DriverObject) + goto end; + + Status = dev_ioctl(devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, &gli, sizeof(gli), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_DISK_GET_LENGTH_INFO returned %08x\n", Status); + goto end; + } + + Status = dev_ioctl(devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, + &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + TRACE("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); + sdn.DeviceNumber = 0xffffffff; + sdn.PartitionNumber = 0; + } else + TRACE("DeviceType = %u, DeviceNumber = %u, PartitionNumber = %u\n", sdn.DeviceType, sdn.DeviceNumber, sdn.PartitionNumber); + + // If we've just added a partition to a whole-disk filesystem, unmount it + if (sdn.DeviceNumber != 0xffffffff) { + LIST_ENTRY* le; + + ExAcquireResourceExclusiveLite(&pdo_list_lock, TRUE); + + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + LIST_ENTRY* le2; + BOOL changed = FALSE; + + if (pdode->vde) { + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + le2 = pdode->children.Flink; + while (le2 != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le2, volume_child, list_entry); + LIST_ENTRY* le3 = le2->Flink; + + if (vc->disk_num == sdn.DeviceNumber && vc->part_num == 0) { + TRACE("removing device\n"); + + remove_volume_child(pdode->vde, vc, FALSE); + changed = TRUE; + + break; + } + + le2 = le3; + } + + if (!changed) + ExReleaseResourceLite(&pdode->child_lock); + else + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&pdo_list_lock); + } + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &mountmgrfo, &mountmgr); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + goto end; + } + + test_vol(mountmgr, devobj, devpath, sdn.DeviceNumber, sdn.PartitionNumber, gli.Length.QuadPart); + + ObDereferenceObject(mountmgrfo); + +end: ObDereferenceObject(FileObject); - ObDereferenceObject(FileObject2); } -static void disk_removal(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { +void volume_removal(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { LIST_ENTRY* le; - pnp_disk* disk = NULL; - - // FIXME - remove Partition0Btrfs devices and unlink from mountmgr - // FIXME - emergency unmount of RAIDed volumes - - ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); - - le = pnp_disks.Flink; - while (le != &pnp_disks) { - pnp_disk* disk2 = CONTAINING_RECORD(le, pnp_disk, list_entry); - - if (disk2->devpath.Length == devpath->Length && - RtlCompareMemory(disk2->devpath.Buffer, devpath->Buffer, devpath->Length) == devpath->Length) { - disk = disk2; - break; + UNICODE_STRING devpath2; + + TRACE("%.*S\n", devpath->Length / sizeof(WCHAR), devpath->Buffer); + + UNUSED(DriverObject); + + devpath2 = *devpath; + + if (devpath->Length > 4 * sizeof(WCHAR) && devpath->Buffer[0] == '\\' && (devpath->Buffer[1] == '\\' || devpath->Buffer[1] == '?') && + devpath->Buffer[2] == '?' && devpath->Buffer[3] == '\\') { + devpath2.Buffer = &devpath2.Buffer[3]; + devpath2.Length -= 3 * sizeof(WCHAR); + devpath2.MaximumLength -= 3 * sizeof(WCHAR); + } + + ExAcquireResourceExclusiveLite(&pdo_list_lock, TRUE); + + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + LIST_ENTRY* le2; + BOOL changed = FALSE; + + if (pdode->vde) { + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + le2 = pdode->children.Flink; + while (le2 != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le2, volume_child, list_entry); + LIST_ENTRY* le3 = le2->Flink; + + if (vc->pnp_name.Length == devpath2.Length && RtlCompareMemory(vc->pnp_name.Buffer, devpath2.Buffer, devpath2.Length) == devpath2.Length) { + TRACE("removing device\n"); + + remove_volume_child(pdode->vde, vc, FALSE); + changed = TRUE; + + break; + } + + le2 = le3; + } + + if (!changed) + ExReleaseResourceLite(&pdode->child_lock); + else + break; } - + le = le->Flink; } - - if (!disk) { - ExReleaseResourceLite(&volumes_lock); + + ExReleaseResourceLite(&pdo_list_lock); +} + +typedef struct { + PDRIVER_OBJECT DriverObject; + UNICODE_STRING name; + pnp_callback func; + PIO_WORKITEM work_item; +} pnp_callback_context; + +_Function_class_(IO_WORKITEM_ROUTINE) +#ifdef __REACTOS__ +static void NTAPI do_pnp_callback(PDEVICE_OBJECT DeviceObject, PVOID con) { +#else +static void do_pnp_callback(PDEVICE_OBJECT DeviceObject, PVOID con) { +#endif + pnp_callback_context* context = con; + + UNUSED(DeviceObject); + + context->func(context->DriverObject, &context->name); + + if (context->name.Buffer) + ExFreePool(context->name.Buffer); + + IoFreeWorkItem(context->work_item); +} + +static void enqueue_pnp_callback(PDRIVER_OBJECT DriverObject, PUNICODE_STRING name, pnp_callback func) { + PIO_WORKITEM work_item; + pnp_callback_context* context; + + work_item = IoAllocateWorkItem(master_devobj); + + context = ExAllocatePoolWithTag(PagedPool, sizeof(pnp_callback_context), ALLOC_TAG); + + if (!context) { + ERR("out of memory\n"); + IoFreeWorkItem(work_item); return; } - le = volumes.Flink; - while (le != &volumes) { - volume* v = CONTAINING_RECORD(le, volume, list_entry); - LIST_ENTRY* le2 = le->Flink; - - if (v->disk_num == disk->disk_num) { - if (v->devpath.Buffer) - ExFreePool(v->devpath.Buffer); - - RemoveEntryList(&v->list_entry); - - ExFreePool(v); + context->DriverObject = DriverObject; + + if (name->Length > 0) { + context->name.Buffer = ExAllocatePoolWithTag(PagedPool, name->Length, ALLOC_TAG); + if (!context->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(context); + IoFreeWorkItem(work_item); + return; } - - le = le2; + + RtlCopyMemory(context->name.Buffer, name->Buffer, name->Length); + context->name.Length = context->name.MaximumLength = name->Length; + } else { + context->name.Length = context->name.MaximumLength = 0; + context->name.Buffer = NULL; } - - ExReleaseResourceLite(&volumes_lock); - - ExFreePool(disk->devpath.Buffer); - - RemoveEntryList(&disk->list_entry); - - ExFreePool(disk); + + context->func = func; + context->work_item = work_item; + + IoQueueWorkItem(work_item, do_pnp_callback, DelayedWorkQueue, context); +} + +_Function_class_(DRIVER_NOTIFICATION_CALLBACK_ROUTINE) +#ifdef __REACTOS__ +NTSTATUS NTAPI volume_notification(PVOID NotificationStructure, PVOID Context) { +#else +NTSTATUS volume_notification(PVOID NotificationStructure, PVOID Context) { +#endif + DEVICE_INTERFACE_CHANGE_NOTIFICATION* dicn = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)NotificationStructure; + PDRIVER_OBJECT DriverObject = (PDRIVER_OBJECT)Context; + + if (RtlCompareMemory(&dicn->Event, &GUID_DEVICE_INTERFACE_ARRIVAL, sizeof(GUID)) == sizeof(GUID)) + enqueue_pnp_callback(DriverObject, dicn->SymbolicLinkName, volume_arrival); + else if (RtlCompareMemory(&dicn->Event, &GUID_DEVICE_INTERFACE_REMOVAL, sizeof(GUID)) == sizeof(GUID)) + enqueue_pnp_callback(DriverObject, dicn->SymbolicLinkName, volume_removal); + + return STATUS_SUCCESS; } +_Function_class_(DRIVER_NOTIFICATION_CALLBACK_ROUTINE) #ifdef __REACTOS__ NTSTATUS NTAPI pnp_notification(PVOID NotificationStructure, PVOID Context) { #else @@ -683,11 +743,200 @@ NTSTATUS pnp_notification(PVOID NotificationStructure, PVOID Context) { #endif DEVICE_INTERFACE_CHANGE_NOTIFICATION* dicn = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)NotificationStructure; PDRIVER_OBJECT DriverObject = (PDRIVER_OBJECT)Context; - + if (RtlCompareMemory(&dicn->Event, &GUID_DEVICE_INTERFACE_ARRIVAL, sizeof(GUID)) == sizeof(GUID)) - disk_arrival(DriverObject, dicn->SymbolicLinkName); + enqueue_pnp_callback(DriverObject, dicn->SymbolicLinkName, disk_arrival); else if (RtlCompareMemory(&dicn->Event, &GUID_DEVICE_INTERFACE_REMOVAL, sizeof(GUID)) == sizeof(GUID)) - disk_removal(DriverObject, dicn->SymbolicLinkName); - + enqueue_pnp_callback(DriverObject, dicn->SymbolicLinkName, volume_removal); + return STATUS_SUCCESS; } + +static void mountmgr_process_drive(PDEVICE_OBJECT mountmgr, PUNICODE_STRING device_name) { + NTSTATUS Status; + LIST_ENTRY* le; + BOOL done = FALSE; + + ExAcquireResourceSharedLite(&pdo_list_lock, TRUE); + + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + LIST_ENTRY* le2; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le2 = pdode->children.Flink; + + while (le2 != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le2, volume_child, list_entry); + + if (vc->devobj) { + MOUNTDEV_NAME mdn; + + Status = dev_ioctl(vc->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + else { + MOUNTDEV_NAME* mdn2; + ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; + + mdn2 = ExAllocatePoolWithTag(NonPagedPool, mdnsize, ALLOC_TAG); + if (!mdn2) + ERR("out of memory\n"); + else { + Status = dev_ioctl(vc->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); + if (!NT_SUCCESS(Status)) + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + else { + if (mdn2->NameLength == device_name->Length && RtlCompareMemory(mdn2->Name, device_name->Buffer, device_name->Length) == device_name->Length) { + Status = remove_drive_letter(mountmgr, device_name); + if (!NT_SUCCESS(Status)) + ERR("remove_drive_letter returned %08x\n", Status); + else + vc->had_drive_letter = TRUE; + + done = TRUE; + break; + } + } + + ExFreePool(mdn2); + } + } + } + + le2 = le2->Flink; + } + + ExReleaseResourceLite(&pdode->child_lock); + + if (done) + break; + + le = le->Flink; + } + + ExReleaseResourceLite(&pdo_list_lock); +} + +static void mountmgr_updated(PDEVICE_OBJECT mountmgr, MOUNTMGR_MOUNT_POINTS* mmps) { + ULONG i; + + static WCHAR pref[] = L"\\DosDevices\\"; + + for (i = 0; i < mmps->NumberOfMountPoints; i++) { + UNICODE_STRING symlink, device_name; + + if (mmps->MountPoints[i].SymbolicLinkNameOffset != 0) { + symlink.Buffer = (WCHAR*)(((UINT8*)mmps) + mmps->MountPoints[i].SymbolicLinkNameOffset); + symlink.Length = symlink.MaximumLength = mmps->MountPoints[i].SymbolicLinkNameLength; + } else { + symlink.Buffer = NULL; + symlink.Length = symlink.MaximumLength = 0; + } + + if (mmps->MountPoints[i].DeviceNameOffset != 0) { + device_name.Buffer = (WCHAR*)(((UINT8*)mmps) + mmps->MountPoints[i].DeviceNameOffset); + device_name.Length = device_name.MaximumLength = mmps->MountPoints[i].DeviceNameLength; + } else { + device_name.Buffer = NULL; + device_name.Length = device_name.MaximumLength = 0; + } + + if (symlink.Length > wcslen(pref) * sizeof(WCHAR) && + RtlCompareMemory(symlink.Buffer, pref, wcslen(pref) * sizeof(WCHAR)) == wcslen(pref) * sizeof(WCHAR)) + mountmgr_process_drive(mountmgr, &device_name); + } +} + +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +void NTAPI mountmgr_thread(_In_ void* context) { +#else +void mountmgr_thread(_In_ void* context) { +#endif + UNICODE_STRING mmdevpath; + NTSTATUS Status; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT mountmgr; + MOUNTMGR_CHANGE_NOTIFY_INFO mcni; + + UNUSED(context); + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + return; + } + + mcni.EpicNumber = 0; + + while (TRUE) { + PIRP Irp; + MOUNTMGR_MOUNT_POINT mmp; + MOUNTMGR_MOUNT_POINTS mmps; + IO_STATUS_BLOCK iosb; + + KeClearEvent(&mountmgr_thread_event); + + Irp = IoBuildDeviceIoControlRequest(IOCTL_MOUNTMGR_CHANGE_NOTIFY, mountmgr, &mcni, sizeof(MOUNTMGR_CHANGE_NOTIFY_INFO), + &mcni, sizeof(MOUNTMGR_CHANGE_NOTIFY_INFO), FALSE, &mountmgr_thread_event, &iosb); + + if (!Irp) { + ERR("out of memory\n"); + break; + } + + Status = IoCallDriver(mountmgr, Irp); + + if (Status == STATUS_PENDING) { + KeWaitForSingleObject(&mountmgr_thread_event, Executive, KernelMode, FALSE, NULL); + Status = iosb.Status; + } + + if (shutting_down) + break; + + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_MOUNTMGR_CHANGE_NOTIFY returned %08x\n", Status); + break; + } + + TRACE("mountmgr changed\n"); + + RtlZeroMemory(&mmp, sizeof(MOUNTMGR_MOUNT_POINT)); + + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, &mmp, sizeof(MOUNTMGR_MOUNT_POINT), &mmps, sizeof(MOUNTMGR_MOUNT_POINTS), + FALSE, NULL); + + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) + ERR("IOCTL_MOUNTMGR_QUERY_POINTS 1 returned %08x\n", Status); + + if (mmps.Size > 0) { + MOUNTMGR_MOUNT_POINTS* mmps2; + + mmps2 = ExAllocatePoolWithTag(NonPagedPool, mmps.Size, ALLOC_TAG); + if (!mmps2) { + ERR("out of memory\n"); + break; + } + + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, &mmp, sizeof(MOUNTMGR_MOUNT_POINTS), mmps2, mmps.Size, + FALSE, NULL); + if (!NT_SUCCESS(Status)) + ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status); + else + mountmgr_updated(mountmgr, mmps2); + + ExFreePool(mmps2); + } + } + + ObDereferenceObject(FileObject); + + mountmgr_thread_handle = NULL; + + PsTerminateSystemThread(STATUS_SUCCESS); +} diff --git a/reactos/drivers/filesystems/btrfs/security.c b/reactos/drivers/filesystems/btrfs/security.c index 57dd7d135c3..8ba772f1e4c 100644 --- a/reactos/drivers/filesystems/btrfs/security.c +++ b/reactos/drivers/filesystems/btrfs/security.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -45,78 +45,13 @@ static dacl def_dacls[] = { { OBJECT_INHERIT_ACE | CONTAINER_INHERIT_ACE | INHERIT_ONLY_ACE, FILE_ALL_ACCESS, &sid_SY }, { OBJECT_INHERIT_ACE | CONTAINER_INHERIT_ACE, FILE_GENERIC_READ | FILE_GENERIC_EXECUTE, &sid_BU }, { OBJECT_INHERIT_ACE | CONTAINER_INHERIT_ACE | INHERIT_ONLY_ACE, FILE_GENERIC_READ | FILE_GENERIC_WRITE | FILE_GENERIC_EXECUTE | DELETE, &sid_AU }, - { 0, FILE_ADD_SUBDIRECTORY, &sid_AU }, + { 0, FILE_GENERIC_READ | FILE_GENERIC_WRITE | FILE_GENERIC_EXECUTE | DELETE, &sid_AU }, // FIXME - Mandatory Label\High Mandatory Level:(OI)(NP)(IO)(NW) { 0, 0, NULL } }; -extern LIST_ENTRY uid_map_list; - -// UINT32 STDCALL get_uid() { -// PACCESS_TOKEN at = PsReferencePrimaryToken(PsGetCurrentProcess()); -// HANDLE h; -// ULONG len, size; -// TOKEN_USER* tu; -// NTSTATUS Status; -// UINT32 uid = UID_NOBODY; -// LIST_ENTRY* le; -// uid_map* um; -// // char s[256]; -// -// Status = ObOpenObjectByPointer(at, OBJ_KERNEL_HANDLE, NULL, GENERIC_READ, NULL, KernelMode, &h); -// if (!NT_SUCCESS(Status)) { -// ERR("ObOpenObjectByPointer returned %08x\n", Status); -// goto exit; -// } -// -// Status = ZwQueryInformationToken(h, TokenUser, NULL, 0, &len); -// if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_TOO_SMALL) { -// ERR("ZwQueryInformationToken(1) returned %08x (len = %u)\n", Status, len); -// goto exit2; -// } -// -// // TRACE("len = %u\n", len); -// -// tu = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); -// -// Status = ZwQueryInformationToken(h, TokenUser, tu, len, &len); -// -// if (!NT_SUCCESS(Status)) { -// ERR("ZwQueryInformationToken(2) returned %08x\n", Status); -// goto exit3; -// } -// -// size = 8 + (4 * ((sid_header*)tu->User.Sid)->elements); -// -// le = uid_map_list.Flink; -// while (le != &uid_map_list) { -// um = CONTAINING_RECORD(le, uid_map, listentry); -// -// if (((sid_header*)um->sid)->elements == ((sid_header*)tu->User.Sid)->elements && -// RtlCompareMemory(um->sid, tu->User.Sid, size) == size) { -// uid = um->uid; -// break; -// } -// -// le = le->Flink; -// } -// -// // sid_to_string(tu->User.Sid, s); -// -// // TRACE("got SID: %s\n", s); -// TRACE("uid = %u\n", uid); -// -// exit3: -// ExFreePool(tu); -// -// exit2: -// ZwClose(h); -// -// exit: -// PsDereferencePrimaryToken(at); -// -// return uid; -// } +extern LIST_ENTRY uid_map_list, gid_map_list; +extern ERESOURCE mapping_lock; void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid) { unsigned int i, np; @@ -125,8 +60,7 @@ void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid) { ULONG sidsize; sid_header* sid; uid_map* um; -// char s[255]; - + if (sidstringlength < 4 || sidstring[0] != 'S' || sidstring[1] != '-' || @@ -135,10 +69,10 @@ void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid) { ERR("invalid SID\n"); return; } - + sidstring = &sidstring[4]; sidstringlength -= 4; - + numdashes = 0; for (i = 0; i < sidstringlength; i++) { if (sidstring[i] == '-') { @@ -146,17 +80,17 @@ void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid) { sidstring[i] = 0; } } - + sidsize = 8 + (numdashes * 4); sid = ExAllocatePoolWithTag(PagedPool, sidsize, ALLOC_TAG); if (!sid) { ERR("out of memory\n"); return; } - + sid->revision = 0x01; sid->elements = numdashes; - + np = 0; while (sidstringlength > 0) { val = 0; @@ -167,26 +101,26 @@ void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid) { val += sidstring[i] - '0'; } else break; - + i++; } - + i++; TRACE("val = %u, i = %u, ssl = %u\n", (UINT32)val, i, sidstringlength); - + if (np == 0) { - sid->auth[0] = (val & 0xff0000000000) >> 40; - sid->auth[1] = (val & 0xff00000000) >> 32; - sid->auth[2] = (val & 0xff000000) >> 24; - sid->auth[3] = (val & 0xff0000) >> 16; - sid->auth[4] = (val & 0xff00) >> 8; + sid->auth[0] = (UINT8)((val & 0xff0000000000) >> 40); + sid->auth[1] = (UINT8)((val & 0xff00000000) >> 32); + sid->auth[2] = (UINT8)((val & 0xff000000) >> 24); + sid->auth[3] = (UINT8)((val & 0xff0000) >> 16); + sid->auth[4] = (UINT8)((val & 0xff00) >> 8); sid->auth[5] = val & 0xff; } else { sid->nums[np-1] = (UINT32)val; } - + np++; - + if (sidstringlength > i) { sidstringlength -= i; @@ -194,115 +128,208 @@ void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid) { } else break; } - -// sid_to_string(sid, s); - -// TRACE("%s\n", s); + um = ExAllocatePoolWithTag(PagedPool, sizeof(uid_map), ALLOC_TAG); if (!um) { ERR("out of memory\n"); ExFreePool(sid); return; } - + um->sid = sid; um->uid = uid; - + InsertTailList(&uid_map_list, &um->listentry); } -void uid_to_sid(UINT32 uid, PSID* sid) { +void add_group_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 gid) { + unsigned int i, np; + UINT8 numdashes; + UINT64 val; + ULONG sidsize; + sid_header* sid; + gid_map* gm; + + if (sidstringlength < 4 || sidstring[0] != 'S' || sidstring[1] != '-' || sidstring[2] != '1' || sidstring[3] != '-') { + ERR("invalid SID\n"); + return; + } + + sidstring = &sidstring[4]; + sidstringlength -= 4; + + numdashes = 0; + for (i = 0; i < sidstringlength; i++) { + if (sidstring[i] == '-') { + numdashes++; + sidstring[i] = 0; + } + } + + sidsize = 8 + (numdashes * 4); + sid = ExAllocatePoolWithTag(PagedPool, sidsize, ALLOC_TAG); + if (!sid) { + ERR("out of memory\n"); + return; + } + + sid->revision = 0x01; + sid->elements = numdashes; + + np = 0; + while (sidstringlength > 0) { + val = 0; + i = 0; + while (sidstring[i] != '-' && i < sidstringlength) { + if (sidstring[i] >= '0' && sidstring[i] <= '9') { + val *= 10; + val += sidstring[i] - '0'; + } else + break; + + i++; + } + + i++; + TRACE("val = %u, i = %u, ssl = %u\n", (UINT32)val, i, sidstringlength); + + if (np == 0) { + sid->auth[0] = (UINT8)((val & 0xff0000000000) >> 40); + sid->auth[1] = (UINT8)((val & 0xff00000000) >> 32); + sid->auth[2] = (UINT8)((val & 0xff000000) >> 24); + sid->auth[3] = (UINT8)((val & 0xff0000) >> 16); + sid->auth[4] = (UINT8)((val & 0xff00) >> 8); + sid->auth[5] = val & 0xff; + } else + sid->nums[np-1] = (UINT32)val; + + np++; + + if (sidstringlength > i) { + sidstringlength -= i; + + sidstring = &sidstring[i]; + } else + break; + } + + gm = ExAllocatePoolWithTag(PagedPool, sizeof(gid_map), ALLOC_TAG); + if (!gm) { + ERR("out of memory\n"); + ExFreePool(sid); + return; + } + + gm->sid = sid; + gm->gid = gid; + + InsertTailList(&gid_map_list, &gm->listentry); +} + +NTSTATUS uid_to_sid(UINT32 uid, PSID* sid) { LIST_ENTRY* le; - uid_map* um; sid_header* sh; UCHAR els; - + + ExAcquireResourceSharedLite(&mapping_lock, TRUE); + le = uid_map_list.Flink; while (le != &uid_map_list) { - um = CONTAINING_RECORD(le, uid_map, listentry); - + uid_map* um = CONTAINING_RECORD(le, uid_map, listentry); + if (um->uid == uid) { *sid = ExAllocatePoolWithTag(PagedPool, RtlLengthSid(um->sid), ALLOC_TAG); if (!*sid) { ERR("out of memory\n"); - return; + ExReleaseResourceLite(&mapping_lock); + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(*sid, um->sid, RtlLengthSid(um->sid)); - return; + ExReleaseResourceLite(&mapping_lock); + return STATUS_SUCCESS; } - + le = le->Flink; } - + + ExReleaseResourceLite(&mapping_lock); + if (uid == 0) { // root // FIXME - find actual Administrator account, rather than SYSTEM (S-1-5-18) // (of form S-1-5-21-...-500) - + els = 1; - + sh = ExAllocatePoolWithTag(PagedPool, sizeof(sid_header) + ((els - 1) * sizeof(UINT32)), ALLOC_TAG); if (!sh) { ERR("out of memory\n"); *sid = NULL; - return; + return STATUS_INSUFFICIENT_RESOURCES; } - + sh->revision = 1; sh->elements = els; - + sh->auth[0] = 0; sh->auth[1] = 0; sh->auth[2] = 0; sh->auth[3] = 0; sh->auth[4] = 0; sh->auth[5] = 5; - + sh->nums[0] = 18; - } else { + } else { // fallback to S-1-22-1-X, Samba's SID scheme sh = ExAllocatePoolWithTag(PagedPool, sizeof(sid_header), ALLOC_TAG); if (!sh) { ERR("out of memory\n"); *sid = NULL; - return; + return STATUS_INSUFFICIENT_RESOURCES; } - + sh->revision = 1; sh->elements = 2; - + sh->auth[0] = 0; sh->auth[1] = 0; sh->auth[2] = 0; sh->auth[3] = 0; sh->auth[4] = 0; sh->auth[5] = 22; - + sh->nums[0] = 1; sh->nums[1] = uid; } *sid = sh; + + return STATUS_SUCCESS; } UINT32 sid_to_uid(PSID sid) { LIST_ENTRY* le; - uid_map* um; sid_header* sh = sid; + ExAcquireResourceSharedLite(&mapping_lock, TRUE); + le = uid_map_list.Flink; while (le != &uid_map_list) { - um = CONTAINING_RECORD(le, uid_map, listentry); - - if (RtlEqualSid(sid, um->sid)) + uid_map* um = CONTAINING_RECORD(le, uid_map, listentry); + + if (RtlEqualSid(sid, um->sid)) { + ExReleaseResourceLite(&mapping_lock); return um->uid; - + } + le = le->Flink; } - + + ExReleaseResourceLite(&mapping_lock); + if (RtlEqualSid(sid, &sid_SY)) return 0; // root - + // Samba's SID scheme: S-1-22-1-X if (sh->revision == 1 && sh->elements == 2 && sh->auth[0] == 0 && sh->auth[1] == 0 && sh->auth[2] == 0 && sh->auth[3] == 0 && sh->auth[4] == 0 && sh->auth[5] == 22 && sh->nums[0] == 1) @@ -314,9 +341,9 @@ UINT32 sid_to_uid(PSID sid) { static void gid_to_sid(UINT32 gid, PSID* sid) { sid_header* sh; UCHAR els; - + // FIXME - do this properly? - + // fallback to S-1-22-2-X, Samba's SID scheme els = 2; sh = ExAllocatePoolWithTag(PagedPool, sizeof(sid_header) + ((els - 1) * sizeof(UINT32)), ALLOC_TAG); @@ -325,17 +352,17 @@ static void gid_to_sid(UINT32 gid, PSID* sid) { *sid = NULL; return; } - + sh->revision = 1; sh->elements = els; - + sh->auth[0] = 0; sh->auth[1] = 0; sh->auth[2] = 0; sh->auth[3] = 0; sh->auth[4] = 0; sh->auth[5] = 22; - + sh->nums[0] = 2; sh->nums[1] = gid; @@ -343,11 +370,10 @@ static void gid_to_sid(UINT32 gid, PSID* sid) { } static ACL* load_default_acl() { - ULONG size; + UINT16 size, i; ACL* acl; ACCESS_ALLOWED_ACE* aaa; - UINT32 i; - + size = sizeof(ACL); i = 0; while (def_dacls[i].sid) { @@ -355,19 +381,19 @@ static ACL* load_default_acl() { size += 8 + (def_dacls[i].sid->elements * sizeof(UINT32)) - sizeof(ULONG); i++; } - + acl = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG); if (!acl) { ERR("out of memory\n"); return NULL; } - + acl->AclRevision = ACL_REVISION; acl->Sbz1 = 0; acl->AclSize = size; acl->AceCount = i; acl->Sbz2 = 0; - + aaa = (ACCESS_ALLOWED_ACE*)&acl[1]; i = 0; while (def_dacls[i].sid) { @@ -375,178 +401,15 @@ static ACL* load_default_acl() { aaa->Header.AceFlags = def_dacls[i].flags; aaa->Header.AceSize = sizeof(ACCESS_ALLOWED_ACE) - sizeof(ULONG) + 8 + (def_dacls[i].sid->elements * sizeof(UINT32)); aaa->Mask = def_dacls[i].mask; - + RtlCopyMemory(&aaa->SidStart, def_dacls[i].sid, 8 + (def_dacls[i].sid->elements * sizeof(UINT32))); - + aaa = (ACCESS_ALLOWED_ACE*)((UINT8*)aaa + aaa->Header.AceSize); - + i++; } - - return acl; -} -// static void STDCALL sid_to_string(PSID sid, char* s) { -// sid_header* sh = (sid_header*)sid; -// LARGE_INTEGER authnum; -// UINT8 i; -// -// authnum.LowPart = sh->auth[5] | (sh->auth[4] << 8) | (sh->auth[3] << 16) | (sh->auth[2] << 24); -// authnum.HighPart = sh->auth[1] | (sh->auth[0] << 8); -// -// sprintf(s, "S-%u-%u", sh->revision, (UINT32)authnum.QuadPart); -// -// for (i = 0; i < sh->elements; i++) { -// sprintf(s, "%s-%u", s, sh->nums[i]); -// } -// } - -BOOL get_sd_from_xattr(fcb* fcb, ULONG buflen) { - NTSTATUS Status; - PSID sid, usersid; - - TRACE("using xattr " EA_NTACL " for security descriptor\n"); - - if (fcb->inode_item.st_uid != UID_NOBODY) { - BOOLEAN defaulted; - - Status = RtlGetOwnerSecurityDescriptor(fcb->sd, &sid, &defaulted); - if (!NT_SUCCESS(Status)) { - ERR("RtlGetOwnerSecurityDescriptor returned %08x\n", Status); - } else { - uid_to_sid(fcb->inode_item.st_uid, &usersid); - - if (!usersid) { - ERR("out of memory\n"); - return FALSE; - } - - if (!RtlEqualSid(sid, usersid)) { - SECURITY_DESCRIPTOR *newsd, *newsd2; - ULONG sdsize, daclsize, saclsize, ownersize, groupsize; - ACL *dacl, *sacl; - PSID owner, group; - - sdsize = daclsize = saclsize = ownersize = groupsize = 0; - - Status = RtlSelfRelativeToAbsoluteSD(fcb->sd, NULL, &sdsize, NULL, &daclsize, NULL, &saclsize, NULL, &ownersize, NULL, &groupsize); - - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_TOO_SMALL) { - ERR("RtlSelfRelativeToAbsoluteSD 1 returned %08x\n", Status); - } - - TRACE("sdsize = %u, daclsize = %u, saclsize = %u, ownersize = %u, groupsize = %u\n", sdsize, daclsize, saclsize, ownersize, groupsize); - - newsd2 = sdsize == 0 ? NULL : ExAllocatePoolWithTag(PagedPool, sdsize, ALLOC_TAG); - dacl = daclsize == 0 ? NULL : ExAllocatePoolWithTag(PagedPool, daclsize, ALLOC_TAG); - sacl = saclsize == 0 ? NULL : ExAllocatePoolWithTag(PagedPool, saclsize, ALLOC_TAG); - owner = ownersize == 0 ? NULL : ExAllocatePoolWithTag(PagedPool, ownersize, ALLOC_TAG); - group = groupsize == 0 ? NULL : ExAllocatePoolWithTag(PagedPool, groupsize, ALLOC_TAG); - - if ((sdsize > 0 && !newsd2) || (daclsize > 0 && !dacl) || (saclsize > 0 && !sacl) || (ownersize > 0 && !owner) || (groupsize > 0 && !group)) { - ERR("out of memory\n"); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - return FALSE; - } - - Status = RtlSelfRelativeToAbsoluteSD(fcb->sd, newsd2, &sdsize, dacl, &daclsize, sacl, &saclsize, owner, &ownersize, group, &groupsize); - - if (!NT_SUCCESS(Status)) { - ERR("RtlSelfRelativeToAbsoluteSD returned %08x\n", Status); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - return FALSE; - } - - Status = RtlSetOwnerSecurityDescriptor(newsd2, usersid, FALSE); - if (!NT_SUCCESS(Status)) { - ERR("RtlSetOwnerSecurityDescriptor returned %08x\n", Status); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - return FALSE; - } - - buflen = 0; - Status = RtlAbsoluteToSelfRelativeSD(newsd2, NULL, &buflen); - if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_TOO_SMALL) { - ERR("RtlAbsoluteToSelfRelativeSD 1 returned %08x\n", Status); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - return FALSE; - } - - if (buflen == 0 || NT_SUCCESS(Status)) { - ERR("RtlAbsoluteToSelfRelativeSD said SD is zero-length\n"); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - return FALSE; - } - - newsd = ExAllocatePoolWithTag(PagedPool, buflen, ALLOC_TAG); - if (!newsd) { - ERR("out of memory\n"); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - return FALSE; - } - - Status = RtlAbsoluteToSelfRelativeSD(newsd2, newsd, &buflen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlAbsoluteToSelfRelativeSD 2 returned %08x\n", Status); - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - ExFreePool(usersid); - ExFreePool(newsd); - return FALSE; - } - - ExFreePool(fcb->sd); - - fcb->sd = newsd; - - if (newsd2) ExFreePool(newsd2); - if (dacl) ExFreePool(dacl); - if (sacl) ExFreePool(sacl); - if (owner) ExFreePool(owner); - if (group) ExFreePool(group); - } - - ExFreePool(usersid); - } - } - - // FIXME - check GID here if not GID_NOBODY - - return TRUE; + return acl; } static void get_top_level_sd(fcb* fcb) { @@ -555,98 +418,92 @@ static void get_top_level_sd(fcb* fcb) { ULONG buflen; ACL* acl = NULL; PSID usersid = NULL, groupsid = NULL; - + Status = RtlCreateSecurityDescriptor(&sd, SECURITY_DESCRIPTOR_REVISION); - + if (!NT_SUCCESS(Status)) { ERR("RtlCreateSecurityDescriptor returned %08x\n", Status); goto end; } - -// if (fcb->inode_item.st_uid != UID_NOBODY) { - uid_to_sid(fcb->inode_item.st_uid, &usersid); - if (!usersid) { - ERR("out of memory\n"); - goto end; - } - - RtlSetOwnerSecurityDescriptor(&sd, usersid, FALSE); - - if (!NT_SUCCESS(Status)) { - ERR("RtlSetOwnerSecurityDescriptor returned %08x\n", Status); - goto end; - } -// } - -// if (fcb->inode_item.st_gid != GID_NOBODY) { - gid_to_sid(fcb->inode_item.st_gid, &groupsid); - if (!groupsid) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlSetGroupSecurityDescriptor(&sd, groupsid, FALSE); - - if (!NT_SUCCESS(Status)) { - ERR("RtlSetGroupSecurityDescriptor returned %08x\n", Status); - goto end; - } -// } - + + Status = uid_to_sid(fcb->inode_item.st_uid, &usersid); + if (!NT_SUCCESS(Status)) { + ERR("uid_to_sid returned %08x\n", Status); + goto end; + } + + RtlSetOwnerSecurityDescriptor(&sd, usersid, FALSE); + + if (!NT_SUCCESS(Status)) { + ERR("RtlSetOwnerSecurityDescriptor returned %08x\n", Status); + goto end; + } + + gid_to_sid(fcb->inode_item.st_gid, &groupsid); + if (!groupsid) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlSetGroupSecurityDescriptor(&sd, groupsid, FALSE); + + if (!NT_SUCCESS(Status)) { + ERR("RtlSetGroupSecurityDescriptor returned %08x\n", Status); + goto end; + } + acl = load_default_acl(); - + if (!acl) { ERR("out of memory\n"); goto end; } Status = RtlSetDaclSecurityDescriptor(&sd, TRUE, acl, FALSE); - + if (!NT_SUCCESS(Status)) { ERR("RtlSetDaclSecurityDescriptor returned %08x\n", Status); goto end; } - + // FIXME - SACL_SECURITY_INFORMATION - + buflen = 0; - + // get sd size Status = RtlAbsoluteToSelfRelativeSD(&sd, NULL, &buflen); if (Status != STATUS_SUCCESS && Status != STATUS_BUFFER_TOO_SMALL) { ERR("RtlAbsoluteToSelfRelativeSD 1 returned %08x\n", Status); goto end; } - -// fcb->sdlen = buflen; - + if (buflen == 0 || Status == STATUS_SUCCESS) { TRACE("RtlAbsoluteToSelfRelativeSD said SD is zero-length\n"); goto end; } - + fcb->sd = ExAllocatePoolWithTag(PagedPool, buflen, ALLOC_TAG); if (!fcb->sd) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + Status = RtlAbsoluteToSelfRelativeSD(&sd, fcb->sd, &buflen); - + if (!NT_SUCCESS(Status)) { ERR("RtlAbsoluteToSelfRelativeSD 2 returned %08x\n", Status); goto end; } - + end: if (acl) ExFreePool(acl); - + if (usersid) ExFreePool(usersid); - + if (groupsid) ExFreePool(groupsid); } @@ -656,51 +513,49 @@ void fcb_get_sd(fcb* fcb, struct _fcb* parent, BOOL look_for_xattr, PIRP Irp) { PSID usersid = NULL, groupsid = NULL; SECURITY_SUBJECT_CONTEXT subjcont; ULONG buflen; - - if (look_for_xattr && get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8**)&fcb->sd, (UINT16*)&buflen, Irp)) { - if (get_sd_from_xattr(fcb, buflen)) - return; - } - + + if (look_for_xattr && get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8**)&fcb->sd, (UINT16*)&buflen, Irp)) + return; + if (!parent) { get_top_level_sd(fcb); return; } - + SeCaptureSubjectContext(&subjcont); - + Status = SeAssignSecurityEx(parent->sd, NULL, (void**)&fcb->sd, NULL, fcb->type == BTRFS_TYPE_DIRECTORY, SEF_DACL_AUTO_INHERIT, &subjcont, IoGetFileObjectGenericMapping(), PagedPool); if (!NT_SUCCESS(Status)) { ERR("SeAssignSecurityEx returned %08x\n", Status); } - - uid_to_sid(fcb->inode_item.st_uid, &usersid); - if (!usersid) { - ERR("out of memory\n"); + + Status = uid_to_sid(fcb->inode_item.st_uid, &usersid); + if (!NT_SUCCESS(Status)) { + ERR("uid_to_sid returned %08x\n", Status); return; } - + RtlSetOwnerSecurityDescriptor(&fcb->sd, usersid, FALSE); - + gid_to_sid(fcb->inode_item.st_gid, &groupsid); if (!groupsid) { ERR("out of memory\n"); return; } - + RtlSetGroupSecurityDescriptor(&fcb->sd, groupsid, FALSE); - + ExFreePool(usersid); ExFreePool(groupsid); } -static NTSTATUS STDCALL get_file_security(device_extension* Vcb, PFILE_OBJECT FileObject, SECURITY_DESCRIPTOR* relsd, ULONG* buflen, SECURITY_INFORMATION flags) { +static NTSTATUS get_file_security(PFILE_OBJECT FileObject, SECURITY_DESCRIPTOR* relsd, ULONG* buflen, SECURITY_INFORMATION flags) { NTSTATUS Status; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; file_ref* fileref = ccb ? ccb->fileref : NULL; - + if (fcb->ads) { if (fileref && fileref->parent) fcb = fileref->parent->fcb; @@ -709,21 +564,21 @@ static NTSTATUS STDCALL get_file_security(device_extension* Vcb, PFILE_OBJECT Fi return STATUS_INTERNAL_ERROR; } } - -// TRACE("buflen = %u, fcb->sdlen = %u\n", *buflen, fcb->sdlen); // Why (void**)? Is this a bug in mingw? Status = SeQuerySecurityDescriptorInfo(&flags, relsd, buflen, (void**)&fcb->sd); - + if (Status == STATUS_BUFFER_TOO_SMALL) TRACE("SeQuerySecurityDescriptorInfo returned %08x\n", Status); else if (!NT_SUCCESS(Status)) ERR("SeQuerySecurityDescriptorInfo returned %08x\n", Status); - + return Status; } -NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_QUERY_SECURITY) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; SECURITY_DESCRIPTOR* sd; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); @@ -733,33 +588,36 @@ NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) PFILE_OBJECT FileObject = IrpSp->FileObject; ccb* ccb = FileObject ? FileObject->FsContext2 : NULL; - TRACE("query security\n"); - FsRtlEnterFileSystem(); + TRACE("query security\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_query_security(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + if (!ccb) { ERR("no ccb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & READ_CONTROL)) { WARN("insufficient permissions\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + Status = STATUS_SUCCESS; - + Irp->IoStatus.Information = 0; - + if (IrpSp->Parameters.QuerySecurity.SecurityInformation & OWNER_SECURITY_INFORMATION) TRACE("OWNER_SECURITY_INFORMATION\n"); @@ -771,23 +629,22 @@ NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) if (IrpSp->Parameters.QuerySecurity.SecurityInformation & SACL_SECURITY_INFORMATION) TRACE("SACL_SECURITY_INFORMATION\n"); - + TRACE("length = %u\n", IrpSp->Parameters.QuerySecurity.Length); - - sd = map_user_buffer(Irp); -// sd = Irp->AssociatedIrp.SystemBuffer; + + sd = map_user_buffer(Irp, NormalPagePriority); TRACE("sd = %p\n", sd); - + if (Irp->MdlAddress && !sd) { ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + buflen = IrpSp->Parameters.QuerySecurity.Length; - - Status = get_file_security(Vcb, IrpSp->FileObject, sd, &buflen, IrpSp->Parameters.QuerySecurity.SecurityInformation); - + + Status = get_file_security(IrpSp->FileObject, sd, &buflen, IrpSp->Parameters.QuerySecurity.SecurityInformation); + if (NT_SUCCESS(Status)) Irp->IoStatus.Information = IrpSp->Parameters.QuerySecurity.Length; else if (Status == STATUS_BUFFER_TOO_SMALL) { @@ -795,26 +652,25 @@ NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) Status = STATUS_BUFFER_OVERFLOW; } else Irp->IoStatus.Information = 0; - + end: TRACE("Irp->IoStatus.Information = %u\n", Irp->IoStatus.Information); - + Irp->IoStatus.Status = Status; IoCompleteRequest(Irp, IO_NO_INCREMENT); - -exit: - if (top_level) - IoSetTopLevelIrp(NULL); - - FsRtlExitFileSystem(); + + if (top_level) + IoSetTopLevelIrp(NULL); TRACE("returning %08x\n", Status); - + + FsRtlExitFileSystem(); + return Status; } -static NTSTATUS STDCALL set_file_security(device_extension* Vcb, PFILE_OBJECT FileObject, SECURITY_DESCRIPTOR* sd, SECURITY_INFORMATION flags) { +static NTSTATUS set_file_security(device_extension* Vcb, PFILE_OBJECT FileObject, SECURITY_DESCRIPTOR* sd, PSECURITY_INFORMATION flags, PIRP Irp) { NTSTATUS Status; fcb* fcb = FileObject->FsContext; ccb* ccb = FileObject->FsContext2; @@ -822,12 +678,12 @@ static NTSTATUS STDCALL set_file_security(device_extension* Vcb, PFILE_OBJECT Fi SECURITY_DESCRIPTOR* oldsd; LARGE_INTEGER time; BTRFS_TIME now; - - TRACE("(%p, %p, %p, %x)\n", Vcb, FileObject, sd, flags); - + + TRACE("(%p, %p, %p, %x)\n", Vcb, FileObject, sd, *flags); + if (Vcb->readonly) return STATUS_MEDIA_WRITE_PROTECTED; - + if (fcb->ads) { if (fileref && fileref->parent) fcb = fileref->parent->fcb; @@ -836,66 +692,58 @@ static NTSTATUS STDCALL set_file_security(device_extension* Vcb, PFILE_OBJECT Fi return STATUS_INTERNAL_ERROR; } } - + + if (!fcb || !ccb) + return STATUS_INVALID_PARAMETER; + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { + + if (is_subvol_readonly(fcb->subvol, Irp)) { Status = STATUS_ACCESS_DENIED; goto end; } - + oldsd = fcb->sd; - - Status = SeSetSecurityDescriptorInfo(NULL, &flags, sd, (void**)&fcb->sd, PagedPool, IoGetFileObjectGenericMapping()); - + + Status = SeSetSecurityDescriptorInfo(NULL, flags, sd, (void**)&fcb->sd, PagedPool, IoGetFileObjectGenericMapping()); + if (!NT_SUCCESS(Status)) { ERR("SeSetSecurityDescriptorInfo returned %08x\n", Status); goto end; } - + ExFreePool(oldsd); - + KeQuerySystemTime(&time); win_time_to_unix(time, &now); - + fcb->inode_item.transid = Vcb->superblock.generation; - + if (!ccb->user_set_change_time) fcb->inode_item.st_ctime = now; - + fcb->inode_item.sequence++; - - if (flags & OWNER_SECURITY_INFORMATION) { - PSID owner; - BOOLEAN defaulted; - - Status = RtlGetOwnerSecurityDescriptor(sd, &owner, &defaulted); - - if (!NT_SUCCESS(Status)) { - ERR("RtlGetOwnerSecurityDescriptor returned %08x\n", Status); - goto end; - } - - fcb->inode_item.st_uid = sid_to_uid(owner); - } - + fcb->sd_dirty = TRUE; + fcb->sd_deleted = FALSE; fcb->inode_item_changed = TRUE; - + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; - + mark_fcb_dirty(fcb); - - send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_SECURITY, FILE_ACTION_MODIFIED); - + + send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_SECURITY, FILE_ACTION_MODIFIED, NULL); + end: ExReleaseResourceLite(fcb->Header.Resource); return Status; } -NTSTATUS STDCALL drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_SET_SECURITY) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); PFILE_OBJECT FileObject = IrpSp->FileObject; @@ -904,77 +752,161 @@ NTSTATUS STDCALL drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { ULONG access_req = 0; BOOL top_level; - TRACE("set security\n"); - FsRtlEnterFileSystem(); + TRACE("set security\n"); + top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); - goto exit; + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_set_security(DeviceObject, Irp); + goto end; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + if (!ccb) { ERR("no ccb\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + Status = STATUS_SUCCESS; - + Irp->IoStatus.Information = 0; - - if (IrpSp->Parameters.QuerySecurity.SecurityInformation & OWNER_SECURITY_INFORMATION) { + + if (IrpSp->Parameters.SetSecurity.SecurityInformation & OWNER_SECURITY_INFORMATION) { TRACE("OWNER_SECURITY_INFORMATION\n"); access_req |= WRITE_OWNER; } - if (IrpSp->Parameters.QuerySecurity.SecurityInformation & GROUP_SECURITY_INFORMATION) { + if (IrpSp->Parameters.SetSecurity.SecurityInformation & GROUP_SECURITY_INFORMATION) { TRACE("GROUP_SECURITY_INFORMATION\n"); access_req |= WRITE_OWNER; } - if (IrpSp->Parameters.QuerySecurity.SecurityInformation & DACL_SECURITY_INFORMATION) { + if (IrpSp->Parameters.SetSecurity.SecurityInformation & DACL_SECURITY_INFORMATION) { TRACE("DACL_SECURITY_INFORMATION\n"); access_req |= WRITE_DAC; } - if (IrpSp->Parameters.QuerySecurity.SecurityInformation & SACL_SECURITY_INFORMATION) { + if (IrpSp->Parameters.SetSecurity.SecurityInformation & SACL_SECURITY_INFORMATION) { TRACE("SACL_SECURITY_INFORMATION\n"); access_req |= ACCESS_SYSTEM_SECURITY; } - - if ((ccb->access & access_req) != access_req) { + + if (Irp->RequestorMode == UserMode && (ccb->access & access_req) != access_req) { Status = STATUS_ACCESS_DENIED; WARN("insufficient privileges\n"); goto end; } - + Status = set_file_security(DeviceObject->DeviceExtension, FileObject, IrpSp->Parameters.SetSecurity.SecurityDescriptor, - IrpSp->Parameters.SetSecurity.SecurityInformation); - + &IrpSp->Parameters.SetSecurity.SecurityInformation, Irp); + end: Irp->IoStatus.Status = Status; IoCompleteRequest(Irp, IO_NO_INCREMENT); - + TRACE("returning %08x\n", Status); -exit: - if (top_level) + if (top_level) IoSetTopLevelIrp(NULL); FsRtlExitFileSystem(); - + return Status; } +static BOOL search_for_gid(fcb* fcb, PSID sid) { + LIST_ENTRY* le; + + le = gid_map_list.Flink; + while (le != &gid_map_list) { + gid_map* gm = CONTAINING_RECORD(le, gid_map, listentry); + + if (RtlEqualSid(sid, gm->sid)) { + fcb->inode_item.st_gid = gm->gid; + return TRUE; + } + + le = le->Flink; + } + + return FALSE; +} + +void find_gid(struct _fcb* fcb, struct _fcb* parfcb, PSECURITY_SUBJECT_CONTEXT subjcont) { + NTSTATUS Status; + TOKEN_OWNER* to; + TOKEN_PRIMARY_GROUP* tpg; + TOKEN_GROUPS* tg; + + if (parfcb && parfcb->inode_item.st_mode & S_ISGID) { + fcb->inode_item.st_gid = parfcb->inode_item.st_gid; + return; + } + + ExAcquireResourceSharedLite(&mapping_lock, TRUE); + + if (!subjcont || !subjcont->PrimaryToken || IsListEmpty(&gid_map_list)) { + ExReleaseResourceLite(&mapping_lock); + return; + } + + Status = SeQueryInformationToken(subjcont->PrimaryToken, TokenOwner, (void**)&to); + if (!NT_SUCCESS(Status)) + ERR("SeQueryInformationToken returned %08x\n", Status); + else { + if (search_for_gid(fcb, to->Owner)) { + ExReleaseResourceLite(&mapping_lock); + ExFreePool(to); + return; + } + + ExFreePool(to); + } + + Status = SeQueryInformationToken(subjcont->PrimaryToken, TokenPrimaryGroup, (void**)&tpg); + if (!NT_SUCCESS(Status)) + ERR("SeQueryInformationToken returned %08x\n", Status); + else { + if (search_for_gid(fcb, tpg->PrimaryGroup)) { + ExReleaseResourceLite(&mapping_lock); + ExFreePool(tpg); + return; + } + + ExFreePool(tpg); + } + + Status = SeQueryInformationToken(subjcont->PrimaryToken, TokenGroups, (void**)&tg); + if (!NT_SUCCESS(Status)) + ERR("SeQueryInformationToken returned %08x\n", Status); + else { + ULONG i; + + for (i = 0; i < tg->GroupCount; i++) { + if (search_for_gid(fcb, tg->Groups[i].Sid)) { + ExReleaseResourceLite(&mapping_lock); + ExFreePool(tg); + return; + } + } + + ExFreePool(tg); + } + + ExReleaseResourceLite(&mapping_lock); +} + NTSTATUS fcb_get_new_sd(fcb* fcb, file_ref* parfileref, ACCESS_STATE* as) { NTSTATUS Status; PSID owner; BOOLEAN defaulted; - + Status = SeAssignSecurityEx(parfileref ? parfileref->fcb->sd : NULL, as->SecurityDescriptor, (void**)&fcb->sd, NULL, fcb->type == BTRFS_TYPE_DIRECTORY, SEF_SACL_AUTO_INHERIT, &as->SubjectSecurityContext, IoGetFileObjectGenericMapping(), PagedPool); @@ -982,7 +914,7 @@ NTSTATUS fcb_get_new_sd(fcb* fcb, file_ref* parfileref, ACCESS_STATE* as) { ERR("SeAssignSecurityEx returned %08x\n", Status); return Status; } - + Status = RtlGetOwnerSecurityDescriptor(fcb->sd, &owner, &defaulted); if (!NT_SUCCESS(Status)) { ERR("RtlGetOwnerSecurityDescriptor returned %08x\n", Status); @@ -990,6 +922,8 @@ NTSTATUS fcb_get_new_sd(fcb* fcb, file_ref* parfileref, ACCESS_STATE* as) { } else { fcb->inode_item.st_uid = sid_to_uid(owner); } - + + find_gid(fcb, parfileref ? parfileref->fcb : NULL, &as->SubjectSecurityContext); + return STATUS_SUCCESS; } diff --git a/reactos/drivers/filesystems/btrfs/send.c b/reactos/drivers/filesystems/btrfs/send.c new file mode 100644 index 00000000000..a865c62c3e2 --- /dev/null +++ b/reactos/drivers/filesystems/btrfs/send.c @@ -0,0 +1,3851 @@ +/* Copyright (c) Mark Harmstone 2017 + * + * This file is part of WinBtrfs. + * + * WinBtrfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or + * (at your option) any later version. + * + * WinBtrfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public Licence for more details. + * + * You should have received a copy of the GNU Lesser General Public Licence + * along with WinBtrfs. If not, see . */ + +#include "btrfs_drv.h" + +typedef struct send_dir { + LIST_ENTRY list_entry; + UINT64 inode; + BOOL dummy; + BTRFS_TIME atime; + BTRFS_TIME mtime; + BTRFS_TIME ctime; + struct send_dir* parent; + UINT16 namelen; + char* name; + LIST_ENTRY deleted_children; +} send_dir; + +typedef struct { + LIST_ENTRY list_entry; + UINT64 inode; + BOOL dir; + send_dir* sd; + char tmpname[64]; +} orphan; + +typedef struct { + LIST_ENTRY list_entry; + ULONG namelen; + char name[1]; +} deleted_child; + +typedef struct { + LIST_ENTRY list_entry; + send_dir* sd; + UINT16 namelen; + char name[1]; +} ref; + +typedef struct { + send_dir* sd; + UINT64 last_child_inode; + LIST_ENTRY list_entry; +} pending_rmdir; + +typedef struct { + UINT64 offset; + LIST_ENTRY list_entry; + ULONG datalen; + EXTENT_DATA data; +} send_ext; + +typedef struct { + device_extension* Vcb; + root* root; + root* parent; + UINT8* data; + ULONG datalen; + ULONG num_clones; + root** clones; + LIST_ENTRY orphans; + LIST_ENTRY dirs; + LIST_ENTRY pending_rmdirs; + KEVENT buffer_event; + send_dir* root_dir; + send_info* send; + + struct { + UINT64 inode; + BOOL deleting; + BOOL new; + UINT64 gen; + UINT64 uid; + UINT64 olduid; + UINT64 gid; + UINT64 oldgid; + UINT64 mode; + UINT64 oldmode; + UINT64 size; + UINT64 flags; + BTRFS_TIME atime; + BTRFS_TIME mtime; + BTRFS_TIME ctime; + BOOL file; + char* path; + orphan* o; + send_dir* sd; + LIST_ENTRY refs; + LIST_ENTRY oldrefs; + LIST_ENTRY exts; + LIST_ENTRY oldexts; + } lastinode; +} send_context; + +#define MAX_SEND_WRITE 0xc000 // 48 KB +#define SEND_BUFFER_LENGTH 0x100000 // 1 MB + +static NTSTATUS find_send_dir(send_context* context, UINT64 dir, UINT64 generation, send_dir** psd, BOOL* added_dummy); +static NTSTATUS wait_for_flush(send_context* context, traverse_ptr* tp1, traverse_ptr* tp2); + +static void send_command(send_context* context, UINT16 cmd) { + btrfs_send_command* bsc = (btrfs_send_command*)&context->data[context->datalen]; + + bsc->cmd = cmd; + bsc->csum = 0; + + context->datalen += sizeof(btrfs_send_command); +} + +static void send_command_finish(send_context* context, ULONG pos) { + btrfs_send_command* bsc = (btrfs_send_command*)&context->data[pos]; + + bsc->length = context->datalen - pos - sizeof(btrfs_send_command); + bsc->csum = calc_crc32c(0, (UINT8*)bsc, context->datalen - pos); +} + +static void send_add_tlv(send_context* context, UINT16 type, void* data, UINT16 length) { + btrfs_send_tlv* tlv = (btrfs_send_tlv*)&context->data[context->datalen]; + + tlv->type = type; + tlv->length = length; + + if (length > 0 && data) + RtlCopyMemory(&tlv[1], data, length); + + context->datalen += sizeof(btrfs_send_tlv) + length; +} + +static char* uint64_to_char(UINT64 num, char* buf) { + char *tmp, tmp2[20]; + + if (num == 0) { + buf[0] = '0'; + return buf + 1; + } + + tmp = &tmp2[20]; + while (num > 0) { + tmp--; + *tmp = (num % 10) + '0'; + num /= 10; + } + + RtlCopyMemory(buf, tmp, tmp2 + sizeof(tmp2) - tmp); + + return &buf[tmp2 + sizeof(tmp2) - tmp]; +} + +static NTSTATUS get_orphan_name(send_context* context, UINT64 inode, UINT64 generation, char* name) { + char *ptr, *ptr2; + UINT64 index = 0; + KEY searchkey; + + name[0] = 'o'; + + ptr = uint64_to_char(inode, &name[1]); + *ptr = '-'; ptr++; + ptr = uint64_to_char(generation, ptr); + *ptr = '-'; ptr++; + ptr2 = ptr; + + searchkey.obj_id = SUBVOL_ROOT_INODE; + searchkey.obj_type = TYPE_DIR_ITEM; + + do { + NTSTATUS Status; + traverse_ptr tp; + + ptr = uint64_to_char(index, ptr); + *ptr = 0; + + searchkey.offset = calc_crc32c(0xfffffffe, (UINT8*)name, (ULONG)(ptr - name)); + + Status = find_item(context->Vcb, context->root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (!keycmp(searchkey, tp.item->key)) + goto cont; + + if (context->parent) { + Status = find_item(context->Vcb, context->parent, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (!keycmp(searchkey, tp.item->key)) + goto cont; + } + + return STATUS_SUCCESS; + +cont: + index++; + ptr = ptr2; + } while (TRUE); +} + +static void add_orphan(send_context* context, orphan* o) { + LIST_ENTRY* le; + + le = context->orphans.Flink; + while (le != &context->orphans) { + orphan* o2 = CONTAINING_RECORD(le, orphan, list_entry); + + if (o2->inode > o->inode) { + InsertHeadList(o2->list_entry.Blink, &o->list_entry); + return; + } + + le = le->Flink; + } + + InsertTailList(&context->orphans, &o->list_entry); +} + +static NTSTATUS send_read_symlink(send_context* context, UINT64 inode, char** link, UINT16* linklen) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + EXTENT_DATA* ed; + + searchkey.obj_id = inode; + searchkey.obj_type = TYPE_EXTENT_DATA; + searchkey.offset = 0; + + Status = find_item(context->Vcb, context->root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (keycmp(tp.item->key, searchkey)) { + ERR("could not find (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + if (tp.item->size < sizeof(EXTENT_DATA)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(EXTENT_DATA)); + return STATUS_INTERNAL_ERROR; + } + + ed = (EXTENT_DATA*)tp.item->data; + + if (ed->type != EXTENT_TYPE_INLINE) { + WARN("symlink data was not inline, returning blank string\n"); + *link = NULL; + *linklen = 0; + return STATUS_SUCCESS; + } + + if (tp.item->size < offsetof(EXTENT_DATA, data[0]) + ed->decoded_size) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, offsetof(EXTENT_DATA, data[0]) + ed->decoded_size); + return STATUS_INTERNAL_ERROR; + } + + *link = (char*)ed->data; + *linklen = (UINT16)ed->decoded_size; + + return STATUS_SUCCESS; +} + +static NTSTATUS send_inode(send_context* context, traverse_ptr* tp, traverse_ptr* tp2) { + NTSTATUS Status; + INODE_ITEM* ii; + + if (tp2 && !tp) { + INODE_ITEM* ii2 = (INODE_ITEM*)tp2->item->data; + + if (tp2->item->size < sizeof(INODE_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset, + tp2->item->size, sizeof(INODE_ITEM)); + return STATUS_INTERNAL_ERROR; + } + + context->lastinode.inode = tp2->item->key.obj_id; + context->lastinode.deleting = TRUE; + context->lastinode.gen = ii2->generation; + context->lastinode.mode = ii2->st_mode; + context->lastinode.flags = ii2->flags; + context->lastinode.o = NULL; + context->lastinode.sd = NULL; + + return STATUS_SUCCESS; + } + + ii = (INODE_ITEM*)tp->item->data; + + if (tp->item->size < sizeof(INODE_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, sizeof(INODE_ITEM)); + return STATUS_INTERNAL_ERROR; + } + + context->lastinode.inode = tp->item->key.obj_id; + context->lastinode.deleting = FALSE; + context->lastinode.gen = ii->generation; + context->lastinode.uid = ii->st_uid; + context->lastinode.gid = ii->st_gid; + context->lastinode.mode = ii->st_mode; + context->lastinode.size = ii->st_size; + context->lastinode.atime = ii->st_atime; + context->lastinode.mtime = ii->st_mtime; + context->lastinode.ctime = ii->st_ctime; + context->lastinode.flags = ii->flags; + context->lastinode.file = FALSE; + context->lastinode.o = NULL; + context->lastinode.sd = NULL; + + if (context->lastinode.path) { + ExFreePool(context->lastinode.path); + context->lastinode.path = NULL; + } + + if (tp2) { + INODE_ITEM* ii2 = (INODE_ITEM*)tp2->item->data; + LIST_ENTRY* le; + + if (tp2->item->size < sizeof(INODE_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset, + tp2->item->size, sizeof(INODE_ITEM)); + return STATUS_INTERNAL_ERROR; + } + + context->lastinode.oldmode = ii2->st_mode; + context->lastinode.olduid = ii2->st_uid; + context->lastinode.oldgid = ii2->st_gid; + + if ((ii2->st_mode & __S_IFREG) == __S_IFREG && (ii2->st_mode & __S_IFLNK) != __S_IFLNK && (ii2->st_mode & __S_IFSOCK) != __S_IFSOCK) + context->lastinode.file = TRUE; + + context->lastinode.new = FALSE; + + le = context->orphans.Flink; + while (le != &context->orphans) { + orphan* o2 = CONTAINING_RECORD(le, orphan, list_entry); + + if (o2->inode == tp->item->key.obj_id) { + context->lastinode.o = o2; + break; + } else if (o2->inode > tp->item->key.obj_id) + break; + + le = le->Flink; + } + } else + context->lastinode.new = TRUE; + + if (tp->item->key.obj_id == SUBVOL_ROOT_INODE) { + send_dir* sd; + + Status = find_send_dir(context, tp->item->key.obj_id, ii->generation, &sd, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + + sd->atime = ii->st_atime; + sd->mtime = ii->st_mtime; + sd->ctime = ii->st_ctime; + context->root_dir = sd; + } else if (!tp2) { + ULONG pos = context->datalen; + UINT16 cmd; + send_dir* sd; + + char name[64]; + orphan* o; + + // skip creating orphan directory if we've already done so + if (ii->st_mode & __S_IFDIR) { + LIST_ENTRY* le; + + le = context->orphans.Flink; + while (le != &context->orphans) { + orphan* o2 = CONTAINING_RECORD(le, orphan, list_entry); + + if (o2->inode == tp->item->key.obj_id) { + context->lastinode.o = o2; + o2->sd->atime = ii->st_atime; + o2->sd->mtime = ii->st_mtime; + o2->sd->ctime = ii->st_ctime; + o2->sd->dummy = FALSE; + return STATUS_SUCCESS; + } else if (o2->inode > tp->item->key.obj_id) + break; + + le = le->Flink; + } + } + + if ((ii->st_mode & __S_IFSOCK) == __S_IFSOCK) + cmd = BTRFS_SEND_CMD_MKSOCK; + else if ((ii->st_mode & __S_IFLNK) == __S_IFLNK) + cmd = BTRFS_SEND_CMD_SYMLINK; + else if ((ii->st_mode & __S_IFCHR) == __S_IFCHR || (ii->st_mode & __S_IFBLK) == __S_IFBLK) + cmd = BTRFS_SEND_CMD_MKNOD; + else if ((ii->st_mode & __S_IFDIR) == __S_IFDIR) + cmd = BTRFS_SEND_CMD_MKDIR; + else if ((ii->st_mode & __S_IFIFO) == __S_IFIFO) + cmd = BTRFS_SEND_CMD_MKFIFO; + else { + cmd = BTRFS_SEND_CMD_MKFILE; + context->lastinode.file = TRUE; + } + + send_command(context, cmd); + + Status = get_orphan_name(context, tp->item->key.obj_id, ii->generation, name); + if (!NT_SUCCESS(Status)) { + ERR("get_orphan_name returned %08x\n", Status); + return Status; + } + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, name, (UINT16)strlen(name)); + send_add_tlv(context, BTRFS_SEND_TLV_INODE, &tp->item->key.obj_id, sizeof(UINT64)); + + if (cmd == BTRFS_SEND_CMD_MKNOD || cmd == BTRFS_SEND_CMD_MKFIFO || cmd == BTRFS_SEND_CMD_MKSOCK) { + UINT64 rdev = makedev((ii->st_rdev & 0xFFFFFFFFFFF) >> 20, ii->st_rdev & 0xFFFFF), mode = ii->st_mode; + + send_add_tlv(context, BTRFS_SEND_TLV_RDEV, &rdev, sizeof(UINT64)); + send_add_tlv(context, BTRFS_SEND_TLV_MODE, &mode, sizeof(UINT64)); + } else if (cmd == BTRFS_SEND_CMD_SYMLINK && ii->st_size > 0) { + char* link; + UINT16 linklen; + + Status = send_read_symlink(context, tp->item->key.obj_id, &link, &linklen); + if (!NT_SUCCESS(Status)) { + ERR("send_read_symlink returned %08x\n", Status); + return Status; + } + + send_add_tlv(context, BTRFS_SEND_TLV_PATH_LINK, link, linklen); + } + + send_command_finish(context, pos); + + if (ii->st_mode & __S_IFDIR) { + Status = find_send_dir(context, tp->item->key.obj_id, ii->generation, &sd, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + + sd->dummy = FALSE; + } else + sd = NULL; + + context->lastinode.sd = sd; + + o = ExAllocatePoolWithTag(PagedPool, sizeof(orphan), ALLOC_TAG); + if (!o) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + o->inode = tp->item->key.obj_id; + o->dir = (ii->st_mode & __S_IFDIR && ii->st_size > 0) ? TRUE : FALSE; + strcpy(o->tmpname, name); + o->sd = sd; + add_orphan(context, o); + + context->lastinode.path = ExAllocatePoolWithTag(PagedPool, strlen(o->tmpname) + 1, ALLOC_TAG); + if (!context->lastinode.path) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + strcpy(context->lastinode.path, o->tmpname); + + context->lastinode.o = o; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS send_add_dir(send_context* context, UINT64 inode, send_dir* parent, char* name, UINT16 namelen, BOOL dummy, LIST_ENTRY* lastentry, send_dir** psd) { + LIST_ENTRY* le; + send_dir* sd = ExAllocatePoolWithTag(PagedPool, sizeof(send_dir), ALLOC_TAG); + + if (!sd) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sd->inode = inode; + sd->dummy = dummy; + sd->parent = parent; + + if (!dummy) { + sd->atime = context->lastinode.atime; + sd->mtime = context->lastinode.mtime; + sd->ctime = context->lastinode.ctime; + } + + if (namelen > 0) { + sd->name = ExAllocatePoolWithTag(PagedPool, namelen, ALLOC_TAG); + if (!sd->name) { + ERR("out of memory\n"); + ExFreePool(sd); + return STATUS_INSUFFICIENT_RESOURCES; + } + + memcpy(sd->name, name, namelen); + } else + sd->name = NULL; + + sd->namelen = namelen; + + InitializeListHead(&sd->deleted_children); + + if (lastentry) + InsertHeadList(lastentry, &sd->list_entry); + else { + le = context->dirs.Flink; + while (le != &context->dirs) { + send_dir* sd2 = CONTAINING_RECORD(le, send_dir, list_entry); + + if (sd2->inode > sd->inode) { + InsertHeadList(sd2->list_entry.Blink, &sd->list_entry); + + if (psd) + *psd = sd; + + return STATUS_SUCCESS; + } + + le = le->Flink; + } + + InsertTailList(&context->dirs, &sd->list_entry); + } + + if (psd) + *psd = sd; + + return STATUS_SUCCESS; +} + +static __inline UINT16 find_path_len(send_dir* parent, UINT16 namelen) { + UINT16 len = namelen; + + while (parent && parent->namelen > 0) { + len += parent->namelen + 1; + parent = parent->parent; + } + + return len; +} + +static void find_path(char* path, send_dir* parent, char* name, ULONG namelen) { + ULONG len = namelen; + + RtlCopyMemory(path, name, namelen); + + while (parent && parent->namelen > 0) { + RtlMoveMemory(path + parent->namelen + 1, path, len); + RtlCopyMemory(path, parent->name, parent->namelen); + path[parent->namelen] = '/'; + len += parent->namelen + 1; + + parent = parent->parent; + } +} + +static void send_add_tlv_path(send_context* context, UINT16 type, send_dir* parent, char* name, UINT16 namelen) { + UINT16 len = find_path_len(parent, namelen); + + send_add_tlv(context, type, NULL, len); + + if (len > 0) + find_path((char*)&context->data[context->datalen - len], parent, name, namelen); +} + +static NTSTATUS found_path(send_context* context, send_dir* parent, char* name, UINT16 namelen) { + ULONG pos = context->datalen; + + if (context->lastinode.o) { + send_command(context, BTRFS_SEND_CMD_RENAME); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, context->root_dir, context->lastinode.o->tmpname, (UINT16)strlen(context->lastinode.o->tmpname)); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH_TO, parent, name, namelen); + + send_command_finish(context, pos); + } else { + send_command(context, BTRFS_SEND_CMD_LINK); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, parent, name, namelen); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH_LINK, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + + send_command_finish(context, pos); + } + + if (context->lastinode.o) { + UINT16 pathlen; + + if (context->lastinode.o->sd) { + if (context->lastinode.o->sd->name) + ExFreePool(context->lastinode.o->sd->name); + + context->lastinode.o->sd->name = ExAllocatePoolWithTag(PagedPool, namelen, ALLOC_TAG); + if (!context->lastinode.o->sd->name) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(context->lastinode.o->sd->name, name, namelen); + context->lastinode.o->sd->namelen = namelen; + context->lastinode.o->sd->parent = parent; + } + + if (context->lastinode.path) + ExFreePool(context->lastinode.path); + + pathlen = find_path_len(parent, namelen); + context->lastinode.path = ExAllocatePoolWithTag(PagedPool, pathlen + 1, ALLOC_TAG); + if (!context->lastinode.path) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + find_path(context->lastinode.path, parent, name, namelen); + context->lastinode.path[pathlen] = 0; + + RemoveEntryList(&context->lastinode.o->list_entry); + ExFreePool(context->lastinode.o); + + context->lastinode.o = NULL; + } + + return STATUS_SUCCESS; +} + +static void send_utimes_command_dir(send_context* context, send_dir* sd, BTRFS_TIME* atime, BTRFS_TIME* mtime, BTRFS_TIME* ctime) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_UTIMES); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, sd->parent, sd->name, sd->namelen); + + send_add_tlv(context, BTRFS_SEND_TLV_ATIME, atime, sizeof(BTRFS_TIME)); + send_add_tlv(context, BTRFS_SEND_TLV_MTIME, mtime, sizeof(BTRFS_TIME)); + send_add_tlv(context, BTRFS_SEND_TLV_CTIME, ctime, sizeof(BTRFS_TIME)); + + send_command_finish(context, pos); +} + +static NTSTATUS find_send_dir(send_context* context, UINT64 dir, UINT64 generation, send_dir** psd, BOOL* added_dummy) { + NTSTATUS Status; + LIST_ENTRY* le; + char name[64]; + + le = context->dirs.Flink; + while (le != &context->dirs) { + send_dir* sd2 = CONTAINING_RECORD(le, send_dir, list_entry); + + if (sd2->inode > dir) + break; + else if (sd2->inode == dir) { + *psd = sd2; + + if (added_dummy) + *added_dummy = FALSE; + + return STATUS_SUCCESS; + } + + le = le->Flink; + } + + if (dir == SUBVOL_ROOT_INODE) { + Status = send_add_dir(context, dir, NULL, NULL, 0, FALSE, le, psd); + if (!NT_SUCCESS(Status)) { + ERR("send_add_dir returned %08x\n", Status); + return Status; + } + + if (added_dummy) + *added_dummy = FALSE; + + return STATUS_SUCCESS; + } + + if (context->parent) { + KEY searchkey; + traverse_ptr tp; + + searchkey.obj_id = dir; + searchkey.obj_type = TYPE_INODE_REF; // directories should never have an extiref + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(context->Vcb, context->parent, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + INODE_REF* ir = (INODE_REF*)tp.item->data; + send_dir* parent; + + if (tp.item->size < sizeof(INODE_REF) || tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + if (tp.item->key.offset == SUBVOL_ROOT_INODE) + parent = context->root_dir; + else { + Status = find_send_dir(context, tp.item->key.offset, generation, &parent, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + } + + Status = send_add_dir(context, dir, parent, ir->name, ir->n, TRUE, NULL, psd); + if (!NT_SUCCESS(Status)) { + ERR("send_add_dir returned %08x\n", Status); + return Status; + } + + if (added_dummy) + *added_dummy = FALSE; + + return STATUS_SUCCESS; + } + } + + Status = get_orphan_name(context, dir, generation, name); + if (!NT_SUCCESS(Status)) { + ERR("get_orphan_name returned %08x\n", Status); + return Status; + } + + Status = send_add_dir(context, dir, NULL, name, (UINT16)strlen(name), TRUE, le, psd); + if (!NT_SUCCESS(Status)) { + ERR("send_add_dir returned %08x\n", Status); + return Status; + } + + if (added_dummy) + *added_dummy = TRUE; + + return STATUS_SUCCESS; +} + +static NTSTATUS send_inode_ref(send_context* context, traverse_ptr* tp, BOOL tree2) { + NTSTATUS Status; + UINT64 inode = tp ? tp->item->key.obj_id : 0, dir = tp ? tp->item->key.offset : 0; + LIST_ENTRY* le; + INODE_REF* ir; + UINT16 len; + send_dir* sd = NULL; + orphan* o2 = NULL; + + if (inode == dir) // root + return STATUS_SUCCESS; + + if (tp->item->size < sizeof(INODE_REF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, sizeof(INODE_REF)); + return STATUS_INTERNAL_ERROR; + } + + if (dir != SUBVOL_ROOT_INODE) { + BOOL added_dummy; + + Status = find_send_dir(context, dir, context->root->root_item.ctransid, &sd, &added_dummy); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + + // directory has higher inode number than file, so might need to be created + if (added_dummy) { + BOOL found = FALSE; + + le = context->orphans.Flink; + while (le != &context->orphans) { + o2 = CONTAINING_RECORD(le, orphan, list_entry); + + if (o2->inode == dir) { + found = TRUE; + break; + } else if (o2->inode > dir) + break; + + le = le->Flink; + } + + if (!found) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_MKDIR); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, NULL, sd->name, sd->namelen); + + send_add_tlv(context, BTRFS_SEND_TLV_INODE, &dir, sizeof(UINT64)); + + send_command_finish(context, pos); + + o2 = ExAllocatePoolWithTag(PagedPool, sizeof(orphan), ALLOC_TAG); + if (!o2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + o2->inode = dir; + o2->dir = TRUE; + memcpy(o2->tmpname, sd->name, sd->namelen); + o2->tmpname[sd->namelen] = 0; + o2->sd = sd; + add_orphan(context, o2); + } + } + } else + sd = context->root_dir; + + len = tp->item->size; + ir = (INODE_REF*)tp->item->data; + + while (len > 0) { + ref* r; + + if (len < sizeof(INODE_REF) || len < offsetof(INODE_REF, name[0]) + ir->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + r = ExAllocatePoolWithTag(PagedPool, offsetof(ref, name[0]) + ir->n, ALLOC_TAG); + if (!r) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + r->sd = sd; + r->namelen = ir->n; + RtlCopyMemory(r->name, ir->name, ir->n); + + InsertTailList(tree2 ? &context->lastinode.oldrefs : &context->lastinode.refs, &r->list_entry); + + len -= (UINT16)offsetof(INODE_REF, name[0]) + ir->n; + ir = (INODE_REF*)&ir->name[ir->n]; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS send_inode_extref(send_context* context, traverse_ptr* tp, BOOL tree2) { + INODE_EXTREF* ier; + UINT16 len; + + if (tp->item->size < sizeof(INODE_EXTREF)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, sizeof(INODE_EXTREF)); + return STATUS_INTERNAL_ERROR; + } + + len = tp->item->size; + ier = (INODE_EXTREF*)tp->item->data; + + while (len > 0) { + NTSTATUS Status; + send_dir* sd = NULL; + orphan* o2 = NULL; + ref* r; + + if (len < sizeof(INODE_EXTREF) || len < offsetof(INODE_EXTREF, name[0]) + ier->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + if (ier->dir != SUBVOL_ROOT_INODE) { + LIST_ENTRY* le; + BOOL added_dummy; + + Status = find_send_dir(context, ier->dir, context->root->root_item.ctransid, &sd, &added_dummy); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + + // directory has higher inode number than file, so might need to be created + if (added_dummy) { + BOOL found = FALSE; + + le = context->orphans.Flink; + while (le != &context->orphans) { + o2 = CONTAINING_RECORD(le, orphan, list_entry); + + if (o2->inode == ier->dir) { + found = TRUE; + break; + } else if (o2->inode > ier->dir) + break; + + le = le->Flink; + } + + if (!found) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_MKDIR); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, NULL, sd->name, sd->namelen); + send_add_tlv(context, BTRFS_SEND_TLV_INODE, &ier->dir, sizeof(UINT64)); + + send_command_finish(context, pos); + + o2 = ExAllocatePoolWithTag(PagedPool, sizeof(orphan), ALLOC_TAG); + if (!o2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + o2->inode = ier->dir; + o2->dir = TRUE; + memcpy(o2->tmpname, sd->name, sd->namelen); + o2->tmpname[sd->namelen] = 0; + o2->sd = sd; + add_orphan(context, o2); + } + } + } else + sd = context->root_dir; + + r = ExAllocatePoolWithTag(PagedPool, offsetof(ref, name[0]) + ier->n, ALLOC_TAG); + if (!r) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + r->sd = sd; + r->namelen = ier->n; + RtlCopyMemory(r->name, ier->name, ier->n); + + InsertTailList(tree2 ? &context->lastinode.oldrefs : &context->lastinode.refs, &r->list_entry); + + len -= (UINT16)offsetof(INODE_EXTREF, name[0]) + ier->n; + ier = (INODE_EXTREF*)&ier->name[ier->n]; + } + + return STATUS_SUCCESS; +} + +static void send_subvol_header(send_context* context, root* r, file_ref* fr) { + ULONG pos = context->datalen; + + send_command(context, context->parent ? BTRFS_SEND_CMD_SNAPSHOT : BTRFS_SEND_CMD_SUBVOL); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, fr->dc->utf8.Buffer, fr->dc->utf8.Length); + + send_add_tlv(context, BTRFS_SEND_TLV_UUID, r->root_item.rtransid == 0 ? &r->root_item.uuid : &r->root_item.received_uuid, sizeof(BTRFS_UUID)); + send_add_tlv(context, BTRFS_SEND_TLV_TRANSID, &r->root_item.ctransid, sizeof(UINT64)); + + if (context->parent) { + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_UUID, + context->parent->root_item.rtransid == 0 ? &context->parent->root_item.uuid : &context->parent->root_item.received_uuid, sizeof(BTRFS_UUID)); + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_CTRANSID, &context->parent->root_item.ctransid, sizeof(UINT64)); + } + + send_command_finish(context, pos); +} + +static void send_chown_command(send_context* context, char* path, UINT64 uid, UINT64 gid) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_CHOWN); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, path, path ? (UINT16)strlen(path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_UID, &uid, sizeof(UINT64)); + send_add_tlv(context, BTRFS_SEND_TLV_GID, &gid, sizeof(UINT64)); + + send_command_finish(context, pos); +} + +static void send_chmod_command(send_context* context, char* path, UINT64 mode) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_CHMOD); + + mode &= 07777; + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, path, path ? (UINT16)strlen(path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_MODE, &mode, sizeof(UINT64)); + + send_command_finish(context, pos); +} + +static void send_utimes_command(send_context* context, char* path, BTRFS_TIME* atime, BTRFS_TIME* mtime, BTRFS_TIME* ctime) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_UTIMES); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, path, path ? (UINT16)strlen(path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_ATIME, atime, sizeof(BTRFS_TIME)); + send_add_tlv(context, BTRFS_SEND_TLV_MTIME, mtime, sizeof(BTRFS_TIME)); + send_add_tlv(context, BTRFS_SEND_TLV_CTIME, ctime, sizeof(BTRFS_TIME)); + + send_command_finish(context, pos); +} + +static void send_truncate_command(send_context* context, char* path, UINT64 size) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_TRUNCATE); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, path, path ? (UINT16)strlen(path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_SIZE, &size, sizeof(UINT64)); + + send_command_finish(context, pos); +} + +static NTSTATUS send_unlink_command(send_context* context, send_dir* parent, UINT16 namelen, char* name) { + ULONG pos = context->datalen; + UINT16 pathlen; + + send_command(context, BTRFS_SEND_CMD_UNLINK); + + pathlen = find_path_len(parent, namelen); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, NULL, pathlen); + + find_path((char*)&context->data[context->datalen - pathlen], parent, name, namelen); + + send_command_finish(context, pos); + + return STATUS_SUCCESS; +} + +static void send_rmdir_command(send_context* context, UINT16 pathlen, char* path) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_RMDIR); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, path, pathlen); + send_command_finish(context, pos); +} + +static NTSTATUS get_dir_last_child(send_context* context, UINT64* last_inode) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + + *last_inode = 0; + + searchkey.obj_id = context->lastinode.inode; + searchkey.obj_type = TYPE_DIR_INDEX; + searchkey.offset = 2; + + Status = find_item(context->Vcb, context->parent, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + DIR_ITEM* di = (DIR_ITEM*)tp.item->data; + + if (tp.item->size < sizeof(DIR_ITEM) || tp.item->size < offsetof(DIR_ITEM, name[0]) + di->m + di->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + if (di->key.obj_type == TYPE_INODE_ITEM) + *last_inode = max(*last_inode, di->key.obj_id); + } else + break; + + if (find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + break; + } while (TRUE); + + return STATUS_SUCCESS; +} + +static NTSTATUS add_pending_rmdir(send_context* context, UINT64 last_inode) { + pending_rmdir* pr; + LIST_ENTRY* le; + + pr = ExAllocatePoolWithTag(PagedPool, sizeof(pending_rmdir), ALLOC_TAG); + if (!pr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + pr->sd = context->lastinode.sd; + pr->last_child_inode = last_inode; + + le = context->pending_rmdirs.Flink; + while (le != &context->pending_rmdirs) { + pending_rmdir* pr2 = CONTAINING_RECORD(le, pending_rmdir, list_entry); + + if (pr2->last_child_inode > pr->last_child_inode) { + InsertHeadList(pr2->list_entry.Blink, &pr->list_entry); + return STATUS_SUCCESS; + } + + le = le->Flink; + } + + InsertTailList(&context->pending_rmdirs, &pr->list_entry); + + return STATUS_SUCCESS; +} + +static NTSTATUS look_for_collision(send_context* context, send_dir* sd, char* name, ULONG namelen, UINT64* inode, BOOL* dir) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + DIR_ITEM* di; + UINT16 len; + + searchkey.obj_id = sd->inode; + searchkey.obj_type = TYPE_DIR_ITEM; + searchkey.offset = calc_crc32c(0xfffffffe, (UINT8*)name, namelen); + + Status = find_item(context->Vcb, context->parent, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (keycmp(tp.item->key, searchkey)) + return STATUS_SUCCESS; + + di = (DIR_ITEM*)tp.item->data; + len = tp.item->size; + + do { + if (len < sizeof(DIR_ITEM) || len < offsetof(DIR_ITEM, name[0]) + di->m + di->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + if (di->n == namelen && RtlCompareMemory(di->name, name, namelen) == namelen) { + *inode = di->key.obj_type == TYPE_INODE_ITEM ? di->key.obj_id : 0; + *dir = di->type == BTRFS_TYPE_DIRECTORY ? TRUE: FALSE; + return STATUS_OBJECT_NAME_COLLISION; + } + + di = (DIR_ITEM*)&di->name[di->m + di->n]; + len -= (UINT16)offsetof(DIR_ITEM, name[0]) + di->m + di->n; + } while (len > 0); + + return STATUS_SUCCESS; +} + +static NTSTATUS make_file_orphan(send_context* context, UINT64 inode, BOOL dir, UINT64 generation, ref* r) { + NTSTATUS Status; + ULONG pos = context->datalen; + send_dir* sd = NULL; + orphan* o; + LIST_ENTRY* le; + char name[64]; + + if (!dir) { + deleted_child* dc; + + dc = ExAllocatePoolWithTag(PagedPool, offsetof(deleted_child, name[0]) + r->namelen, ALLOC_TAG); + if (!dc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dc->namelen = r->namelen; + RtlCopyMemory(dc->name, r->name, r->namelen); + InsertTailList(&r->sd->deleted_children, &dc->list_entry); + } + + le = context->orphans.Flink; + while (le != &context->orphans) { + orphan* o2 = CONTAINING_RECORD(le, orphan, list_entry); + + if (o2->inode == inode) { + send_command(context, BTRFS_SEND_CMD_UNLINK); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, r->sd, r->name, r->namelen); + + send_command_finish(context, pos); + + return STATUS_SUCCESS; + } else if (o2->inode > inode) + break; + + le = le->Flink; + } + + Status = get_orphan_name(context, inode, generation, name); + if (!NT_SUCCESS(Status)) { + ERR("get_orphan_name returned %08x\n", Status); + return Status; + } + + if (dir) { + Status = find_send_dir(context, inode, generation, &sd, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + + sd->dummy = TRUE; + + send_command(context, BTRFS_SEND_CMD_RENAME); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, r->sd, r->name, r->namelen); + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH_TO, context->root_dir, name, (UINT16)strlen(name)); + + send_command_finish(context, pos); + + if (sd->name) + ExFreePool(sd->name); + + sd->namelen = (UINT16)strlen(name); + sd->name = ExAllocatePoolWithTag(PagedPool, sd->namelen, ALLOC_TAG); + if (!sd->name) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(sd->name, name, sd->namelen); + sd->parent = context->root_dir; + } else { + send_command(context, BTRFS_SEND_CMD_RENAME); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH, r->sd, r->name, r->namelen); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH_TO, context->root_dir, name, (UINT16)strlen(name)); + + send_command_finish(context, pos); + } + + o = ExAllocatePoolWithTag(PagedPool, sizeof(orphan), ALLOC_TAG); + if (!o) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + o->inode = inode; + o->dir = TRUE; + strcpy(o->tmpname, name); + o->sd = sd; + add_orphan(context, o); + + return STATUS_SUCCESS; +} + +static NTSTATUS flush_refs(send_context* context, traverse_ptr* tp1, traverse_ptr* tp2) { + NTSTATUS Status; + LIST_ENTRY* le; + ref *nameref = NULL, *nameref2 = NULL; + + if (context->lastinode.mode & __S_IFDIR) { // directory + ref* r = IsListEmpty(&context->lastinode.refs) ? NULL : CONTAINING_RECORD(context->lastinode.refs.Flink, ref, list_entry); + ref* or = IsListEmpty(&context->lastinode.oldrefs) ? NULL : CONTAINING_RECORD(context->lastinode.oldrefs.Flink, ref, list_entry); + + if (or && !context->lastinode.o) { + ULONG len = find_path_len(or->sd, or->namelen); + + context->lastinode.path = ExAllocatePoolWithTag(PagedPool, len + 1, ALLOC_TAG); + if (!context->lastinode.path) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + find_path(context->lastinode.path, or->sd, or->name, or->namelen); + context->lastinode.path[len] = 0; + + if (!context->lastinode.sd) { + Status = find_send_dir(context, context->lastinode.inode, context->lastinode.gen, &context->lastinode.sd, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("find_send_dir returned %08x\n", Status); + return Status; + } + } + } + + if (r && or) { + UINT64 inode; + BOOL dir; + + Status = look_for_collision(context, r->sd, r->name, r->namelen, &inode, &dir); + if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_COLLISION) { + ERR("look_for_collision returned %08x\n", Status); + return Status; + } + + if (Status == STATUS_OBJECT_NAME_COLLISION && inode > context->lastinode.inode) { + Status = make_file_orphan(context, inode, dir, context->parent->root_item.ctransid, r); + if (!NT_SUCCESS(Status)) { + ERR("make_file_orphan returned %08x\n", Status); + return Status; + } + } + + if (context->lastinode.o) { + Status = found_path(context, r->sd, r->name, r->namelen); + if (!NT_SUCCESS(Status)) { + ERR("found_path returned %08x\n", Status); + return Status; + } + + if (!r->sd->dummy) + send_utimes_command_dir(context, r->sd, &r->sd->atime, &r->sd->mtime, &r->sd->ctime); + } else if (r->sd != or->sd || r->namelen != or->namelen || RtlCompareMemory(r->name, or->name, r->namelen) != r->namelen) { // moved or renamed + ULONG pos = context->datalen, len; + + send_command(context, BTRFS_SEND_CMD_RENAME); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, (UINT16)strlen(context->lastinode.path)); + + send_add_tlv_path(context, BTRFS_SEND_TLV_PATH_TO, r->sd, r->name, r->namelen); + + send_command_finish(context, pos); + + if (!r->sd->dummy) + send_utimes_command_dir(context, r->sd, &r->sd->atime, &r->sd->mtime, &r->sd->ctime); + + if (context->lastinode.sd->name) + ExFreePool(context->lastinode.sd->name); + + context->lastinode.sd->name = ExAllocatePoolWithTag(PagedPool, r->namelen, ALLOC_TAG); + if (!context->lastinode.sd->name) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(context->lastinode.sd->name, r->name, r->namelen); + context->lastinode.sd->parent = r->sd; + + if (context->lastinode.path) + ExFreePool(context->lastinode.path); + + len = find_path_len(r->sd, r->namelen); + context->lastinode.path = ExAllocatePoolWithTag(PagedPool, len + 1, ALLOC_TAG); + if (!context->lastinode.path) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + find_path(context->lastinode.path, r->sd, r->name, r->namelen); + context->lastinode.path[len] = 0; + } + } else if (r && !or) { // new + Status = found_path(context, r->sd, r->name, r->namelen); + if (!NT_SUCCESS(Status)) { + ERR("found_path returned %08x\n", Status); + return Status; + } + + if (!r->sd->dummy) + send_utimes_command_dir(context, r->sd, &r->sd->atime, &r->sd->mtime, &r->sd->ctime); + } else { // deleted + UINT64 last_inode; + + Status = get_dir_last_child(context, &last_inode); + if (!NT_SUCCESS(Status)) { + ERR("get_dir_last_child returned %08x\n", Status); + return Status; + } + + if (last_inode <= context->lastinode.inode) { + send_rmdir_command(context, (UINT16)strlen(context->lastinode.path), context->lastinode.path); + + if (!or->sd->dummy) + send_utimes_command_dir(context, or->sd, &or->sd->atime, &or->sd->mtime, &or->sd->ctime); + } else { + char name[64]; + ULONG pos = context->datalen; + + Status = get_orphan_name(context, context->lastinode.inode, context->lastinode.gen, name); + if (!NT_SUCCESS(Status)) { + ERR("get_orphan_name returned %08x\n", Status); + return Status; + } + + send_command(context, BTRFS_SEND_CMD_RENAME); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, (UINT16)strlen(context->lastinode.path)); + send_add_tlv(context, BTRFS_SEND_TLV_PATH_TO, name, (UINT16)strlen(name)); + send_command_finish(context, pos); + + if (context->lastinode.sd->name) + ExFreePool(context->lastinode.sd->name); + + context->lastinode.sd->name = ExAllocatePoolWithTag(PagedPool, strlen(name), ALLOC_TAG); + if (!context->lastinode.sd->name) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(context->lastinode.sd->name, name, strlen(name)); + context->lastinode.sd->namelen = (UINT16)strlen(name); + context->lastinode.sd->dummy = TRUE; + context->lastinode.sd->parent = NULL; + + send_utimes_command(context, NULL, &context->root_dir->atime, &context->root_dir->mtime, &context->root_dir->ctime); + + Status = add_pending_rmdir(context, last_inode); + if (!NT_SUCCESS(Status)) { + ERR("add_pending_rmdir returned %08x\n", Status); + return Status; + } + } + } + + while (!IsListEmpty(&context->lastinode.refs)) { + r = CONTAINING_RECORD(RemoveHeadList(&context->lastinode.refs), ref, list_entry); + ExFreePool(r); + } + + while (!IsListEmpty(&context->lastinode.oldrefs)) { + or = CONTAINING_RECORD(RemoveHeadList(&context->lastinode.oldrefs), ref, list_entry); + ExFreePool(or); + } + + return STATUS_SUCCESS; + } else { + if (!IsListEmpty(&context->lastinode.oldrefs)) { + ref* or = CONTAINING_RECORD(context->lastinode.oldrefs.Flink, ref, list_entry); + ULONG len = find_path_len(or->sd, or->namelen); + + context->lastinode.path = ExAllocatePoolWithTag(PagedPool, len + 1, ALLOC_TAG); + if (!context->lastinode.path) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + find_path(context->lastinode.path, or->sd, or->name, or->namelen); + context->lastinode.path[len] = 0; + nameref = or; + } + + // remove unchanged refs + le = context->lastinode.oldrefs.Flink; + while (le != &context->lastinode.oldrefs) { + ref* or = CONTAINING_RECORD(le, ref, list_entry); + LIST_ENTRY* le2; + BOOL matched = FALSE; + + le2 = context->lastinode.refs.Flink; + while (le2 != &context->lastinode.refs) { + ref* r = CONTAINING_RECORD(le2, ref, list_entry); + + if (r->sd == or->sd && r->namelen == or->namelen && RtlCompareMemory(r->name, or->name, r->namelen) == r->namelen) { + RemoveEntryList(&r->list_entry); + ExFreePool(r); + matched = TRUE; + break; + } + + le2 = le2->Flink; + } + + if (matched) { + le = le->Flink; + RemoveEntryList(&or->list_entry); + ExFreePool(or); + continue; + } + + le = le->Flink; + } + + while (!IsListEmpty(&context->lastinode.refs)) { + ref* r = CONTAINING_RECORD(RemoveHeadList(&context->lastinode.refs), ref, list_entry); + UINT64 inode; + BOOL dir; + + if (context->parent) { + Status = look_for_collision(context, r->sd, r->name, r->namelen, &inode, &dir); + if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_COLLISION) { + ERR("look_for_collision returned %08x\n", Status); + return Status; + } + + if (Status == STATUS_OBJECT_NAME_COLLISION && inode > context->lastinode.inode) { + Status = make_file_orphan(context, inode, dir, context->lastinode.gen, r); + if (!NT_SUCCESS(Status)) { + ERR("make_file_orphan returned %08x\n", Status); + return Status; + } + } + } + + if (context->datalen > SEND_BUFFER_LENGTH) { + Status = wait_for_flush(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("wait_for_flush returned %08x\n", Status); + return Status; + } + + if (context->send->cancelling) + return STATUS_SUCCESS; + } + + Status = found_path(context, r->sd, r->name, r->namelen); + if (!NT_SUCCESS(Status)) { + ERR("found_path returned %08x\n", Status); + return Status; + } + + if (!r->sd->dummy) + send_utimes_command_dir(context, r->sd, &r->sd->atime, &r->sd->mtime, &r->sd->ctime); + + if (nameref && !nameref2) + nameref2 = r; + else + ExFreePool(r); + } + + while (!IsListEmpty(&context->lastinode.oldrefs)) { + ref* or = CONTAINING_RECORD(RemoveHeadList(&context->lastinode.oldrefs), ref, list_entry); + BOOL deleted = FALSE; + + le = or->sd->deleted_children.Flink; + while (le != &or->sd->deleted_children) { + deleted_child* dc = CONTAINING_RECORD(le, deleted_child, list_entry); + + if (dc->namelen == or->namelen && RtlCompareMemory(dc->name, or->name, or->namelen) == or->namelen) { + RemoveEntryList(&dc->list_entry); + ExFreePool(dc); + deleted = TRUE; + break; + } + + le = le->Flink; + } + + if (!deleted) { + if (context->datalen > SEND_BUFFER_LENGTH) { + Status = wait_for_flush(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("wait_for_flush returned %08x\n", Status); + return Status; + } + + if (context->send->cancelling) + return STATUS_SUCCESS; + } + + Status = send_unlink_command(context, or->sd, or->namelen, or->name); + if (!NT_SUCCESS(Status)) { + ERR("send_unlink_command returned %08x\n", Status); + return Status; + } + + if (!or->sd->dummy) + send_utimes_command_dir(context, or->sd, &or->sd->atime, &or->sd->mtime, &or->sd->ctime); + } + + if (or == nameref && nameref2) { + UINT16 len = find_path_len(nameref2->sd, nameref2->namelen); + + if (context->lastinode.path) + ExFreePool(context->lastinode.path); + + context->lastinode.path = ExAllocatePoolWithTag(PagedPool, len + 1, ALLOC_TAG); + if (!context->lastinode.path) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + find_path(context->lastinode.path, nameref2->sd, nameref2->name, nameref2->namelen); + context->lastinode.path[len] = 0; + + ExFreePool(nameref2); + } + + ExFreePool(or); + } + } + + return STATUS_SUCCESS; +} + +static NTSTATUS wait_for_flush(send_context* context, traverse_ptr* tp1, traverse_ptr* tp2) { + NTSTATUS Status; + KEY key1, key2; + + if (tp1) + key1 = tp1->item->key; + + if (tp2) + key2 = tp2->item->key; + + ExReleaseResourceLite(&context->Vcb->tree_lock); + + KeClearEvent(&context->send->cleared_event); + KeSetEvent(&context->buffer_event, 0, TRUE); + KeWaitForSingleObject(&context->send->cleared_event, Executive, KernelMode, FALSE, NULL); + + ExAcquireResourceSharedLite(&context->Vcb->tree_lock, TRUE); + + if (context->send->cancelling) + return STATUS_SUCCESS; + + if (tp1) { + Status = find_item(context->Vcb, context->root, tp1, &key1, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (keycmp(tp1->item->key, key1)) { + ERR("readonly subvolume changed\n"); + return STATUS_INTERNAL_ERROR; + } + } + + if (tp2) { + Status = find_item(context->Vcb, context->parent, tp2, &key2, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (keycmp(tp2->item->key, key2)) { + ERR("readonly subvolume changed\n"); + return STATUS_INTERNAL_ERROR; + } + } + + return STATUS_SUCCESS; +} + +static NTSTATUS add_ext_holes(LIST_ENTRY* exts, UINT64 size) { + UINT64 lastoff = 0; + LIST_ENTRY* le; + + le = exts->Flink; + while (le != exts) { + send_ext* ext = CONTAINING_RECORD(le, send_ext, list_entry); + + if (ext->offset > lastoff) { + send_ext* ext2 = ExAllocatePoolWithTag(PagedPool, offsetof(send_ext, data.data) + sizeof(EXTENT_DATA2), ALLOC_TAG); + EXTENT_DATA2* ed2; + + if (!ext2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ed2 = (EXTENT_DATA2*)ext2->data.data; + + ext2->offset = lastoff; + ext2->datalen = offsetof(EXTENT_DATA, data) + sizeof(EXTENT_DATA2); + ext2->data.decoded_size = ed2->num_bytes = ext->offset - lastoff; + ext2->data.type = EXTENT_TYPE_REGULAR; + ed2->address = ed2->size = ed2->offset = 0; + + InsertHeadList(le->Blink, &ext2->list_entry); + } + + if (ext->data.type == EXTENT_TYPE_INLINE) + lastoff = ext->offset + ext->data.decoded_size; + else { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data.data; + lastoff = ext->offset + ed2->num_bytes; + } + + le = le->Flink; + } + + if (size > lastoff) { + send_ext* ext2 = ExAllocatePoolWithTag(PagedPool, offsetof(send_ext, data.data) + sizeof(EXTENT_DATA2), ALLOC_TAG); + EXTENT_DATA2* ed2; + + if (!ext2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ed2 = (EXTENT_DATA2*)ext2->data.data; + + ext2->offset = lastoff; + ext2->datalen = offsetof(EXTENT_DATA, data) + sizeof(EXTENT_DATA2); + ext2->data.decoded_size = ed2->num_bytes = size - lastoff; + ext2->data.type = EXTENT_TYPE_REGULAR; + ed2->address = ed2->size = ed2->offset = 0; + + InsertTailList(exts, &ext2->list_entry); + } + + return STATUS_SUCCESS; +} + +static NTSTATUS divide_ext(send_ext* ext, UINT64 len, BOOL trunc) { + send_ext* ext2; + EXTENT_DATA2 *ed2a, *ed2b; + + if (ext->data.type == EXTENT_TYPE_INLINE) { + if (!trunc) { + ext2 = ExAllocatePoolWithTag(PagedPool, (ULONG)(offsetof(send_ext, data.data) + ext->data.decoded_size - len), ALLOC_TAG); + + if (!ext2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ext2->offset = ext->offset + len; + ext2->datalen = (ULONG)(ext->data.decoded_size - len); + ext2->data.decoded_size = ext->data.decoded_size - len; + ext2->data.compression = ext->data.compression; + ext2->data.encryption = ext->data.encryption; + ext2->data.encoding = ext->data.encoding; + ext2->data.type = ext->data.type; + RtlCopyMemory(ext2->data.data, ext->data.data + len, (ULONG)(ext->data.decoded_size - len)); + + InsertHeadList(&ext->list_entry, &ext2->list_entry); + } + + ext->data.decoded_size = len; + + return STATUS_SUCCESS; + } + + ed2a = (EXTENT_DATA2*)ext->data.data; + + if (!trunc) { + ext2 = ExAllocatePoolWithTag(PagedPool, offsetof(send_ext, data.data) + sizeof(EXTENT_DATA2), ALLOC_TAG); + + if (!ext2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ed2b = (EXTENT_DATA2*)ext2->data.data; + + ext2->offset = ext->offset + len; + ext2->datalen = offsetof(EXTENT_DATA, data) + sizeof(EXTENT_DATA2); + + ext2->data.compression = ext->data.compression; + ext2->data.encryption = ext->data.encryption; + ext2->data.encoding = ext->data.encoding; + ext2->data.type = ext->data.type; + ed2b->num_bytes = ed2a->num_bytes - len; + + if (ed2a->size == 0) { + ext2->data.decoded_size = ed2b->num_bytes; + ext->data.decoded_size = len; + + ed2b->address = ed2b->size = ed2b->offset = 0; + } else { + ext2->data.decoded_size = ext->data.decoded_size; + + ed2b->address = ed2a->address; + ed2b->size = ed2a->size; + ed2b->offset = ed2a->offset + len; + } + + InsertHeadList(&ext->list_entry, &ext2->list_entry); + } + + ed2a->num_bytes = len; + + return STATUS_SUCCESS; +} + +static NTSTATUS sync_ext_cutoff_points(send_context* context) { + NTSTATUS Status; + send_ext *ext1, *ext2; + + ext1 = CONTAINING_RECORD(context->lastinode.exts.Flink, send_ext, list_entry); + ext2 = CONTAINING_RECORD(context->lastinode.oldexts.Flink, send_ext, list_entry); + + do { + UINT64 len1, len2; + EXTENT_DATA2 *ed2a, *ed2b; + + ed2a = ext1->data.type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ext1->data.data; + ed2b = ext2->data.type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ext2->data.data; + + len1 = ed2a ? ed2a->num_bytes : ext1->data.decoded_size; + len2 = ed2b ? ed2b->num_bytes : ext2->data.decoded_size; + + if (len1 < len2) { + Status = divide_ext(ext2, len1, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("divide_ext returned %08x\n", Status); + return Status; + } + } else if (len2 < len1) { + Status = divide_ext(ext1, len2, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("divide_ext returned %08x\n", Status); + return Status; + } + } + + if (ext1->list_entry.Flink == &context->lastinode.exts || ext2->list_entry.Flink == &context->lastinode.oldexts) + break; + + ext1 = CONTAINING_RECORD(ext1->list_entry.Flink, send_ext, list_entry); + ext2 = CONTAINING_RECORD(ext2->list_entry.Flink, send_ext, list_entry); + } while (TRUE); + + ext1 = CONTAINING_RECORD(context->lastinode.exts.Blink, send_ext, list_entry); + ext2 = CONTAINING_RECORD(context->lastinode.oldexts.Blink, send_ext, list_entry); + + Status = divide_ext(ext1, context->lastinode.size - ext1->offset, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("divide_ext returned %08x\n", Status); + return Status; + } + + Status = divide_ext(ext2, context->lastinode.size - ext2->offset, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("divide_ext returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; +} + +static BOOL send_add_tlv_clone_path(send_context* context, root* r, UINT64 inode) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + UINT16 len = 0; + UINT64 num; + UINT8* ptr; + + num = inode; + + while (num != SUBVOL_ROOT_INODE) { + searchkey.obj_id = num; + searchkey.obj_type = TYPE_INODE_EXTREF; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(context->Vcb, r, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return FALSE; + } + + if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_INODE_REF && tp.item->key.obj_type != TYPE_INODE_EXTREF)) { + ERR("could not find INODE_REF for inode %llx\n", searchkey.obj_id); + return FALSE; + } + + if (len > 0) + len++; + + if (tp.item->key.obj_type == TYPE_INODE_REF) { + INODE_REF* ir = (INODE_REF*)tp.item->data; + + if (tp.item->size < sizeof(INODE_REF) || tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return FALSE; + } + + len += ir->n; + num = tp.item->key.offset; + } else { + INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; + + if (tp.item->size < sizeof(INODE_EXTREF) || tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + return FALSE; + } + + len += ier->n; + num = ier->dir; + } + } + + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_PATH, NULL, len); + ptr = &context->data[context->datalen]; + + num = inode; + + while (num != SUBVOL_ROOT_INODE) { + searchkey.obj_id = num; + searchkey.obj_type = TYPE_INODE_EXTREF; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(context->Vcb, r, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return FALSE; + } + + if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_INODE_REF && tp.item->key.obj_type != TYPE_INODE_EXTREF)) { + ERR("could not find INODE_REF for inode %llx\n", searchkey.obj_id); + return FALSE; + } + + if (num != inode) { + ptr--; + *ptr = '/'; + } + + if (tp.item->key.obj_type == TYPE_INODE_REF) { + INODE_REF* ir = (INODE_REF*)tp.item->data; + + RtlCopyMemory(ptr - ir->n, ir->name, ir->n); + ptr -= ir->n; + num = tp.item->key.offset; + } else { + INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; + + RtlCopyMemory(ptr - ier->n, ier->name, ier->n); + ptr -= ier->n; + num = ier->dir; + } + } + + return TRUE; +} + +static BOOL try_clone_edr(send_context* context, send_ext* se, EXTENT_DATA_REF* edr) { + NTSTATUS Status; + root* r = NULL; + KEY searchkey; + traverse_ptr tp; + EXTENT_DATA2* seed2 = (EXTENT_DATA2*)se->data.data; + + if (context->parent && edr->root == context->parent->id) + r = context->parent; + + if (!r && context->num_clones > 0) { + ULONG i; + + for (i = 0; i < context->num_clones; i++) { + if (context->clones[i]->id == edr->root && context->clones[i] != context->root) { + r = context->clones[i]; + break; + } + } + } + + if (!r) + return FALSE; + + searchkey.obj_id = edr->objid; + searchkey.obj_type = TYPE_EXTENT_DATA; + searchkey.offset = 0; + + Status = find_item(context->Vcb, r, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return FALSE; + } + + while (TRUE) { + traverse_ptr next_tp; + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + if (tp.item->size < sizeof(EXTENT_DATA)) + ERR("(%llx,%x,%llx) has size %u, not at least %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA)); + else { + EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data; + + if (ed->type == EXTENT_TYPE_REGULAR) { + if (tp.item->size < offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) + ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); + else { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed2->address == seed2->address && ed2->size == seed2->size && seed2->offset <= ed2->offset && seed2->offset + seed2->num_bytes >= ed2->offset + ed2->num_bytes) { + UINT64 clone_offset = tp.item->key.offset + ed2->offset - seed2->offset; + UINT64 clone_len = min(context->lastinode.size - se->offset, ed2->num_bytes); + + if (clone_offset % context->Vcb->superblock.sector_size == 0 && clone_len % context->Vcb->superblock.sector_size == 0) { + ULONG pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_CLONE); + + send_add_tlv(context, BTRFS_SEND_TLV_OFFSET, &se->offset, sizeof(UINT64)); + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_LENGTH, &clone_len, sizeof(UINT64)); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_UUID, r->root_item.rtransid == 0 ? &r->root_item.uuid : &r->root_item.received_uuid, sizeof(BTRFS_UUID)); + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_CTRANSID, &r->root_item.ctransid, sizeof(UINT64)); + + if (!send_add_tlv_clone_path(context, r, tp.item->key.obj_id)) + context->datalen = pos; + else { + send_add_tlv(context, BTRFS_SEND_TLV_CLONE_OFFSET, &clone_offset, sizeof(UINT64)); + + send_command_finish(context, pos); + + return TRUE; + } + } + } + } + } + } + } else if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) + break; + + if (find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + break; + } + + return FALSE; +} + +static BOOL try_clone(send_context* context, send_ext* se) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp; + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)se->data.data; + EXTENT_ITEM* ei; + UINT64 rc = 0; + + searchkey.obj_id = ed2->address; + searchkey.obj_type = TYPE_EXTENT_ITEM; + searchkey.offset = ed2->size; + + Status = find_item(context->Vcb, context->Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return FALSE; + } + + if (keycmp(tp.item->key, searchkey)) { + ERR("(%llx,%x,%llx) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); + return FALSE; + } + + if (tp.item->size < sizeof(EXTENT_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); + return FALSE; + } + + ei = (EXTENT_ITEM*)tp.item->data; + + if (tp.item->size > sizeof(EXTENT_ITEM)) { + UINT32 len = tp.item->size - sizeof(EXTENT_ITEM); + UINT8* ptr = (UINT8*)&ei[1]; + + while (len > 0) { + UINT8 secttype = *ptr; + ULONG sectlen = get_extent_data_len(secttype); + UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8)); + + len--; + + if (sectlen > len) { + ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen); + return FALSE; + } + + if (sectlen == 0) { + ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype); + return FALSE; + } + + rc += sectcount; + + if (secttype == TYPE_EXTENT_DATA_REF) { + EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8)); + + if (try_clone_edr(context, se, sectedr)) + return TRUE; + } + + len -= sectlen; + ptr += sizeof(UINT8) + sectlen; + } + } + + if (rc >= ei->refcount) + return FALSE; + + searchkey.obj_type = TYPE_EXTENT_DATA_REF; + searchkey.offset = 0; + + Status = find_item(context->Vcb, context->Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return FALSE; + } + + while (TRUE) { + traverse_ptr next_tp; + + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + if (tp.item->size < sizeof(EXTENT_DATA_REF)) + ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA_REF)); + else { + if (try_clone_edr(context, se, (EXTENT_DATA_REF*)tp.item->data)) + return TRUE; + } + } else if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) + break; + + if (find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + break; + } + + return FALSE; +} + +static NTSTATUS flush_extents(send_context* context, traverse_ptr* tp1, traverse_ptr* tp2) { + NTSTATUS Status; + + if ((IsListEmpty(&context->lastinode.exts) && IsListEmpty(&context->lastinode.oldexts)) || context->lastinode.size == 0) + return STATUS_SUCCESS; + + if (context->parent) { + Status = add_ext_holes(&context->lastinode.exts, context->lastinode.size); + if (!NT_SUCCESS(Status)) { + ERR("add_ext_holes returned %08x\n", Status); + return Status; + } + + Status = add_ext_holes(&context->lastinode.oldexts, context->lastinode.size); + if (!NT_SUCCESS(Status)) { + ERR("add_ext_holes returned %08x\n", Status); + return Status; + } + + Status = sync_ext_cutoff_points(context); + if (!NT_SUCCESS(Status)) { + ERR("sync_ext_cutoff_points returned %08x\n", Status); + return Status; + } + } + + while (!IsListEmpty(&context->lastinode.exts)) { + send_ext* se = CONTAINING_RECORD(RemoveHeadList(&context->lastinode.exts), send_ext, list_entry); + send_ext* se2 = context->parent ? CONTAINING_RECORD(RemoveHeadList(&context->lastinode.oldexts), send_ext, list_entry) : NULL; + ULONG pos; + EXTENT_DATA2* ed2; + + if (se2) { + if (se->data.type == EXTENT_TYPE_INLINE && se2->data.type == EXTENT_TYPE_INLINE && + RtlCompareMemory(se->data.data, se2->data.data, (ULONG)se->data.decoded_size) == (ULONG)se->data.decoded_size) { + ExFreePool(se); + ExFreePool(se2); + continue; + } + + if (se->data.type == EXTENT_TYPE_REGULAR && se2->data.type == EXTENT_TYPE_REGULAR) { + EXTENT_DATA2 *ed2a, *ed2b; + + ed2a = (EXTENT_DATA2*)se->data.data; + ed2b = (EXTENT_DATA2*)se2->data.data; + + if (RtlCompareMemory(ed2a, ed2b, sizeof(EXTENT_DATA2)) == sizeof(EXTENT_DATA2)) { + ExFreePool(se); + ExFreePool(se2); + continue; + } + } + } + + if (se->data.type == EXTENT_TYPE_INLINE) { + pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_WRITE); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_OFFSET, &se->offset, sizeof(UINT64)); + + if (se->data.compression == BTRFS_COMPRESSION_NONE) + send_add_tlv(context, BTRFS_SEND_TLV_DATA, se->data.data, (UINT16)se->data.decoded_size); + else if (se->data.compression == BTRFS_COMPRESSION_ZLIB || se->data.compression == BTRFS_COMPRESSION_LZO) { + ULONG inlen = se->datalen - (ULONG)offsetof(EXTENT_DATA, data[0]); + + send_add_tlv(context, BTRFS_SEND_TLV_DATA, NULL, (UINT16)se->data.decoded_size); + RtlZeroMemory(&context->data[context->datalen - se->data.decoded_size], (ULONG)se->data.decoded_size); + + if (se->data.compression == BTRFS_COMPRESSION_ZLIB) { + Status = zlib_decompress(se->data.data, inlen, &context->data[context->datalen - se->data.decoded_size], (UINT32)se->data.decoded_size); + if (!NT_SUCCESS(Status)) { + ERR("zlib_decompress returned %08x\n", Status); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + } else if (se->data.compression == BTRFS_COMPRESSION_LZO) { + if (inlen < sizeof(UINT32)) { + ERR("extent data was truncated\n"); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INTERNAL_ERROR; + } else + inlen -= sizeof(UINT32); + + Status = lzo_decompress(se->data.data + sizeof(UINT32), inlen, &context->data[context->datalen - se->data.decoded_size], (UINT32)se->data.decoded_size, sizeof(UINT32)); + if (!NT_SUCCESS(Status)) { + ERR("lzo_decompress returned %08x\n", Status); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + } + } else { + ERR("unhandled compression type %x\n", se->data.compression); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_NOT_IMPLEMENTED; + } + + send_command_finish(context, pos); + + ExFreePool(se); + if (se2) ExFreePool(se2); + continue; + } + + ed2 = (EXTENT_DATA2*)se->data.data; + + if (ed2->size != 0 && (context->parent || context->num_clones > 0)) { + if (try_clone(context, se)) { + ExFreePool(se); + if (se2) ExFreePool(se2); + continue; + } + } + + if (ed2->size == 0) { // write sparse + UINT64 off, offset; + + for (off = ed2->offset; off < ed2->offset + ed2->num_bytes; off += MAX_SEND_WRITE) { + UINT16 length = (UINT16)min(min(ed2->offset + ed2->num_bytes - off, MAX_SEND_WRITE), context->lastinode.size - se->offset - off); + + if (context->datalen > SEND_BUFFER_LENGTH) { + Status = wait_for_flush(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("wait_for_flush returned %08x\n", Status); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + + if (context->send->cancelling) { + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_SUCCESS; + } + } + + pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_WRITE); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + + offset = se->offset + off; + send_add_tlv(context, BTRFS_SEND_TLV_OFFSET, &offset, sizeof(UINT64)); + + send_add_tlv(context, BTRFS_SEND_TLV_DATA, NULL, length); + RtlZeroMemory(&context->data[context->datalen - length], length); + + send_command_finish(context, pos); + } + } else if (se->data.compression == BTRFS_COMPRESSION_NONE) { + UINT64 off, offset; + UINT8* buf; + + buf = ExAllocatePoolWithTag(NonPagedPool, MAX_SEND_WRITE + (2 * context->Vcb->superblock.sector_size), ALLOC_TAG); + if (!buf) { + ERR("out of memory\n"); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (off = ed2->offset; off < ed2->offset + ed2->num_bytes; off += MAX_SEND_WRITE) { + UINT16 length = (UINT16)min(ed2->offset + ed2->num_bytes - off, MAX_SEND_WRITE); + ULONG skip_start; + UINT64 addr = ed2->address + off; + UINT32* csum; + + if (context->datalen > SEND_BUFFER_LENGTH) { + Status = wait_for_flush(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("wait_for_flush returned %08x\n", Status); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + + if (context->send->cancelling) { + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_SUCCESS; + } + } + + skip_start = addr % context->Vcb->superblock.sector_size; + addr -= skip_start; + + if (context->lastinode.flags & BTRFS_INODE_NODATASUM) + csum = NULL; + else { + UINT32 len; + + len = (UINT32)sector_align(length + skip_start, context->Vcb->superblock.sector_size) / context->Vcb->superblock.sector_size; + + csum = ExAllocatePoolWithTag(PagedPool, len * sizeof(UINT32), ALLOC_TAG); + if (!csum) { + ERR("out of memory\n"); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = load_csum(context->Vcb, csum, addr, len, NULL); + if (!NT_SUCCESS(Status)) { + ERR("load_csum returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INSUFFICIENT_RESOURCES; + } + } + + Status = read_data(context->Vcb, addr, (UINT32)sector_align(length + skip_start, context->Vcb->superblock.sector_size), + csum, FALSE, buf, NULL, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + if (csum) ExFreePool(csum); + return Status; + } + + if (csum) + ExFreePool(csum); + + pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_WRITE); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + + offset = se->offset + off; + send_add_tlv(context, BTRFS_SEND_TLV_OFFSET, &offset, sizeof(UINT64)); + + length = min((UINT16)(context->lastinode.size - se->offset - off), length); + send_add_tlv(context, BTRFS_SEND_TLV_DATA, buf + skip_start, length); + + send_command_finish(context, pos); + } + + ExFreePool(buf); + } else { + UINT8 *buf, *compbuf; + UINT64 off; + UINT32* csum; + + buf = ExAllocatePoolWithTag(PagedPool, (ULONG)se->data.decoded_size, ALLOC_TAG); + if (!buf) { + ERR("out of memory\n"); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + compbuf = ExAllocatePoolWithTag(PagedPool, (ULONG)ed2->size, ALLOC_TAG); + if (!compbuf) { + ERR("out of memory\n"); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + if (context->lastinode.flags & BTRFS_INODE_NODATASUM) + csum = NULL; + else { + UINT32 len; + + len = (UINT32)(ed2->size / context->Vcb->superblock.sector_size); + + csum = ExAllocatePoolWithTag(PagedPool, len * sizeof(UINT32), ALLOC_TAG); + if (!csum) { + ERR("out of memory\n"); + ExFreePool(compbuf); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = load_csum(context->Vcb, csum, ed2->address, len, NULL); + if (!NT_SUCCESS(Status)) { + ERR("load_csum returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(compbuf); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + } + + Status = read_data(context->Vcb, ed2->address, (UINT32)ed2->size, csum, FALSE, compbuf, NULL, NULL, NULL, 0, FALSE, NormalPagePriority); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + ExFreePool(compbuf); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + if (csum) ExFreePool(csum); + return Status; + } + + if (csum) + ExFreePool(csum); + + if (se->data.compression == BTRFS_COMPRESSION_ZLIB) { + Status = zlib_decompress(compbuf, (UINT32)ed2->size, buf, (UINT32)se->data.decoded_size); + if (!NT_SUCCESS(Status)) { + ERR("zlib_decompress returned %08x\n", Status); + ExFreePool(compbuf); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + } else if (se->data.compression == BTRFS_COMPRESSION_LZO) { + Status = lzo_decompress(&compbuf[sizeof(UINT32)], (UINT32)ed2->size, buf, (UINT32)se->data.decoded_size, sizeof(UINT32)); + if (!NT_SUCCESS(Status)) { + ERR("lzo_decompress returned %08x\n", Status); + ExFreePool(compbuf); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + } + + ExFreePool(compbuf); + + for (off = ed2->offset; off < ed2->offset + ed2->num_bytes; off += MAX_SEND_WRITE) { + UINT16 length = (UINT16)min(ed2->offset + ed2->num_bytes - off, MAX_SEND_WRITE); + UINT64 offset; + + if (context->datalen > SEND_BUFFER_LENGTH) { + Status = wait_for_flush(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("wait_for_flush returned %08x\n", Status); + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return Status; + } + + if (context->send->cancelling) { + ExFreePool(buf); + ExFreePool(se); + if (se2) ExFreePool(se2); + return STATUS_SUCCESS; + } + } + + pos = context->datalen; + + send_command(context, BTRFS_SEND_CMD_WRITE); + + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + + offset = se->offset + off; + send_add_tlv(context, BTRFS_SEND_TLV_OFFSET, &offset, sizeof(UINT64)); + + length = min((UINT16)(context->lastinode.size - se->offset - off), length); + send_add_tlv(context, BTRFS_SEND_TLV_DATA, &buf[off], length); + + send_command_finish(context, pos); + } + + ExFreePool(buf); + } + + ExFreePool(se); + if (se2) ExFreePool(se2); + } + + return STATUS_SUCCESS; +} + +static NTSTATUS finish_inode(send_context* context, traverse_ptr* tp1, traverse_ptr* tp2) { + LIST_ENTRY* le; + + if (!IsListEmpty(&context->lastinode.refs) || !IsListEmpty(&context->lastinode.oldrefs)) { + NTSTATUS Status = flush_refs(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("flush_refs returned %08x\n", Status); + return Status; + } + + if (context->send->cancelling) + return STATUS_SUCCESS; + } + + if (!context->lastinode.deleting) { + if (context->lastinode.file) { + NTSTATUS Status = flush_extents(context, tp1, tp2); + if (!NT_SUCCESS(Status)) { + ERR("flush_extents returned %08x\n", Status); + return Status; + } + + if (context->send->cancelling) + return STATUS_SUCCESS; + + send_truncate_command(context, context->lastinode.path, context->lastinode.size); + } + + if (context->lastinode.new || context->lastinode.uid != context->lastinode.olduid || context->lastinode.gid != context->lastinode.oldgid) + send_chown_command(context, context->lastinode.path, context->lastinode.uid, context->lastinode.gid); + + if (((context->lastinode.mode & __S_IFLNK) != __S_IFLNK || ((context->lastinode.mode & 07777) != 0777)) && + (context->lastinode.new || context->lastinode.mode != context->lastinode.oldmode)) + send_chmod_command(context, context->lastinode.path, context->lastinode.mode); + + send_utimes_command(context, context->lastinode.path, &context->lastinode.atime, &context->lastinode.mtime, &context->lastinode.ctime); + } + + while (!IsListEmpty(&context->lastinode.exts)) { + ExFreePool(CONTAINING_RECORD(RemoveHeadList(&context->lastinode.exts), send_ext, list_entry)); + } + + while (!IsListEmpty(&context->lastinode.oldexts)) { + ExFreePool(CONTAINING_RECORD(RemoveHeadList(&context->lastinode.oldexts), send_ext, list_entry)); + } + + if (context->parent) { + le = context->pending_rmdirs.Flink; + + while (le != &context->pending_rmdirs) { + pending_rmdir* pr = CONTAINING_RECORD(le, pending_rmdir, list_entry); + + if (pr->last_child_inode <= context->lastinode.inode) { + le = le->Flink; + + send_rmdir_command(context, pr->sd->namelen, pr->sd->name); + + RemoveEntryList(&pr->sd->list_entry); + + if (pr->sd->name) + ExFreePool(pr->sd->name); + + while (!IsListEmpty(&pr->sd->deleted_children)) { + deleted_child* dc = CONTAINING_RECORD(RemoveHeadList(&pr->sd->deleted_children), deleted_child, list_entry); + ExFreePool(dc); + } + + ExFreePool(pr->sd); + + RemoveEntryList(&pr->list_entry); + ExFreePool(pr); + } else + break; + } + } + + context->lastinode.inode = 0; + context->lastinode.o = NULL; + + if (context->lastinode.path) { + ExFreePool(context->lastinode.path); + context->lastinode.path = NULL; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS send_extent_data(send_context* context, traverse_ptr* tp, traverse_ptr* tp2) { + NTSTATUS Status; + + if (tp && tp2 && tp->item->size == tp2->item->size && RtlCompareMemory(tp->item->data, tp2->item->data, tp->item->size) == tp->item->size) + return STATUS_SUCCESS; + + if (!IsListEmpty(&context->lastinode.refs) || !IsListEmpty(&context->lastinode.oldrefs)) { + Status = flush_refs(context, tp, tp2); + if (!NT_SUCCESS(Status)) { + ERR("flush_refs returned %08x\n", Status); + return Status; + } + + if (context->send->cancelling) + return STATUS_SUCCESS; + } + + if ((context->lastinode.mode & __S_IFLNK) == __S_IFLNK) + return STATUS_SUCCESS; + + if (tp) { + EXTENT_DATA* ed; + EXTENT_DATA2* ed2 = NULL; + + if (tp->item->size < sizeof(EXTENT_DATA)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, sizeof(EXTENT_DATA)); + return STATUS_INTERNAL_ERROR; + } + + ed = (EXTENT_DATA*)tp->item->data; + + if (ed->encryption != BTRFS_ENCRYPTION_NONE) { + ERR("unknown encryption type %u\n", ed->encryption); + return STATUS_INTERNAL_ERROR; + } + + if (ed->encoding != BTRFS_ENCODING_NONE) { + ERR("unknown encoding type %u\n", ed->encoding); + return STATUS_INTERNAL_ERROR; + } + + if (ed->compression != BTRFS_COMPRESSION_NONE && ed->compression != BTRFS_COMPRESSION_ZLIB && ed->compression != BTRFS_COMPRESSION_LZO) { + ERR("unknown compression type %u\n", ed->compression); + return STATUS_INTERNAL_ERROR; + } + + if (ed->type == EXTENT_TYPE_REGULAR) { + if (tp->item->size < offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); + return STATUS_INTERNAL_ERROR; + } + + ed2 = (EXTENT_DATA2*)ed->data; + } else if (ed->type == EXTENT_TYPE_INLINE) { + if (tp->item->size < offsetof(EXTENT_DATA, data[0]) + ed->decoded_size && ed->compression == BTRFS_COMPRESSION_NONE) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, offsetof(EXTENT_DATA, data[0]) + ed->decoded_size); + return STATUS_INTERNAL_ERROR; + } + } + + if ((ed->type == EXTENT_TYPE_INLINE || (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0)) && ed->decoded_size != 0) { + send_ext* se = ExAllocatePoolWithTag(PagedPool, offsetof(send_ext, data) + tp->item->size, ALLOC_TAG); + + if (!se) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + se->offset = tp->item->key.offset; + se->datalen = tp->item->size; + RtlCopyMemory(&se->data, tp->item->data, tp->item->size); + InsertTailList(&context->lastinode.exts, &se->list_entry); + } + } + + if (tp2) { + EXTENT_DATA* ed; + EXTENT_DATA2* ed2 = NULL; + + if (tp2->item->size < sizeof(EXTENT_DATA)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset, + tp2->item->size, sizeof(EXTENT_DATA)); + return STATUS_INTERNAL_ERROR; + } + + ed = (EXTENT_DATA*)tp2->item->data; + + if (ed->encryption != BTRFS_ENCRYPTION_NONE) { + ERR("unknown encryption type %u\n", ed->encryption); + return STATUS_INTERNAL_ERROR; + } + + if (ed->encoding != BTRFS_ENCODING_NONE) { + ERR("unknown encoding type %u\n", ed->encoding); + return STATUS_INTERNAL_ERROR; + } + + if (ed->compression != BTRFS_COMPRESSION_NONE && ed->compression != BTRFS_COMPRESSION_ZLIB && ed->compression != BTRFS_COMPRESSION_LZO) { + ERR("unknown compression type %u\n", ed->compression); + return STATUS_INTERNAL_ERROR; + } + + if (ed->type == EXTENT_TYPE_REGULAR) { + if (tp2->item->size < offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset, + tp2->item->size, offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); + return STATUS_INTERNAL_ERROR; + } + + ed2 = (EXTENT_DATA2*)ed->data; + } else if (ed->type == EXTENT_TYPE_INLINE) { + if (tp2->item->size < offsetof(EXTENT_DATA, data[0]) + ed->decoded_size) { + ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset, + tp2->item->size, offsetof(EXTENT_DATA, data[0]) + ed->decoded_size); + return STATUS_INTERNAL_ERROR; + } + } + + if ((ed->type == EXTENT_TYPE_INLINE || (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0)) && ed->decoded_size != 0) { + send_ext* se = ExAllocatePoolWithTag(PagedPool, offsetof(send_ext, data) + tp2->item->size, ALLOC_TAG); + + if (!se) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + se->offset = tp2->item->key.offset; + se->datalen = tp2->item->size; + RtlCopyMemory(&se->data, tp2->item->data, tp2->item->size); + InsertTailList(&context->lastinode.oldexts, &se->list_entry); + } + } + + return STATUS_SUCCESS; +} + +typedef struct { + UINT16 namelen; + char* name; + UINT16 value1len; + char* value1; + UINT16 value2len; + char* value2; + LIST_ENTRY list_entry; +} xattr_cmp; + +static NTSTATUS send_xattr(send_context* context, traverse_ptr* tp, traverse_ptr* tp2) { + if (tp && tp2 && tp->item->size == tp2->item->size && RtlCompareMemory(tp->item->data, tp2->item->data, tp->item->size) == tp->item->size) + return STATUS_SUCCESS; + + if (!IsListEmpty(&context->lastinode.refs) || !IsListEmpty(&context->lastinode.oldrefs)) { + NTSTATUS Status = flush_refs(context, tp, tp2); + if (!NT_SUCCESS(Status)) { + ERR("flush_refs returned %08x\n", Status); + return Status; + } + + if (context->send->cancelling) + return STATUS_SUCCESS; + } + + if (tp && tp->item->size < sizeof(DIR_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, + tp->item->size, sizeof(DIR_ITEM)); + return STATUS_INTERNAL_ERROR; + } + + if (tp2 && tp2->item->size < sizeof(DIR_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset, + tp2->item->size, sizeof(DIR_ITEM)); + return STATUS_INTERNAL_ERROR; + } + + if (tp && !tp2) { + ULONG len; + DIR_ITEM* di; + + len = tp->item->size; + di = (DIR_ITEM*)tp->item->data; + + do { + ULONG pos; + + if (len < sizeof(DIR_ITEM) || len < offsetof(DIR_ITEM, name[0]) + di->m + di->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + pos = context->datalen; + send_command(context, BTRFS_SEND_CMD_SET_XATTR); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_XATTR_NAME, di->name, di->n); + send_add_tlv(context, BTRFS_SEND_TLV_XATTR_DATA, &di->name[di->n], di->m); + send_command_finish(context, pos); + + len -= (ULONG)offsetof(DIR_ITEM, name[0]) + di->m + di->n; + di = (DIR_ITEM*)&di->name[di->m + di->n]; + } while (len > 0); + } else if (!tp && tp2) { + ULONG len; + DIR_ITEM* di; + + len = tp2->item->size; + di = (DIR_ITEM*)tp2->item->data; + + do { + ULONG pos; + + if (len < sizeof(DIR_ITEM) || len < offsetof(DIR_ITEM, name[0]) + di->m + di->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + pos = context->datalen; + send_command(context, BTRFS_SEND_CMD_REMOVE_XATTR); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_XATTR_NAME, di->name, di->n); + send_command_finish(context, pos); + + len -= (ULONG)offsetof(DIR_ITEM, name[0]) + di->m + di->n; + di = (DIR_ITEM*)&di->name[di->m + di->n]; + } while (len > 0); + } else { + ULONG len; + DIR_ITEM* di; + LIST_ENTRY xattrs; + + InitializeListHead(&xattrs); + + len = tp->item->size; + di = (DIR_ITEM*)tp->item->data; + + do { + xattr_cmp* xa; + + if (len < sizeof(DIR_ITEM) || len < offsetof(DIR_ITEM, name[0]) + di->m + di->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + xa = ExAllocatePoolWithTag(PagedPool, sizeof(xattr_cmp), ALLOC_TAG); + if (!xa) { + ERR("out of memory\n"); + + while (!IsListEmpty(&xattrs)) { + ExFreePool(CONTAINING_RECORD(RemoveHeadList(&xattrs), xattr_cmp, list_entry)); + } + + return STATUS_INSUFFICIENT_RESOURCES; + } + + xa->namelen = di->n; + xa->name = di->name; + xa->value1len = di->m; + xa->value1 = di->name + di->n; + xa->value2len = 0; + xa->value2 = NULL; + + InsertTailList(&xattrs, &xa->list_entry); + + len -= (ULONG)offsetof(DIR_ITEM, name[0]) + di->m + di->n; + di = (DIR_ITEM*)&di->name[di->m + di->n]; + } while (len > 0); + + len = tp2->item->size; + di = (DIR_ITEM*)tp2->item->data; + + do { + xattr_cmp* xa; + LIST_ENTRY* le; + BOOL found = FALSE; + + if (len < sizeof(DIR_ITEM) || len < offsetof(DIR_ITEM, name[0]) + di->m + di->n) { + ERR("(%llx,%x,%llx) was truncated\n", tp2->item->key.obj_id, tp2->item->key.obj_type, tp2->item->key.offset); + return STATUS_INTERNAL_ERROR; + } + + le = xattrs.Flink; + while (le != &xattrs) { + xa = CONTAINING_RECORD(le, xattr_cmp, list_entry); + + if (xa->namelen == di->n && RtlCompareMemory(xa->name, di->name, di->n) == di->n) { + xa->value2len = di->m; + xa->value2 = di->name + di->n; + found = TRUE; + break; + } + + le = le->Flink; + } + + if (!found) { + xa = ExAllocatePoolWithTag(PagedPool, sizeof(xattr_cmp), ALLOC_TAG); + if (!xa) { + ERR("out of memory\n"); + + while (!IsListEmpty(&xattrs)) { + ExFreePool(CONTAINING_RECORD(RemoveHeadList(&xattrs), xattr_cmp, list_entry)); + } + + return STATUS_INSUFFICIENT_RESOURCES; + } + + xa->namelen = di->n; + xa->name = di->name; + xa->value1len = 0; + xa->value1 = NULL; + xa->value2len = di->m; + xa->value2 = di->name + di->n; + + InsertTailList(&xattrs, &xa->list_entry); + } + + len -= (ULONG)offsetof(DIR_ITEM, name[0]) + di->m + di->n; + di = (DIR_ITEM*)&di->name[di->m + di->n]; + } while (len > 0); + + while (!IsListEmpty(&xattrs)) { + xattr_cmp* xa = CONTAINING_RECORD(RemoveHeadList(&xattrs), xattr_cmp, list_entry); + + if (xa->value1len != xa->value2len || !xa->value1 || !xa->value2 || RtlCompareMemory(xa->value1, xa->value2, xa->value1len) != xa->value1len) { + ULONG pos; + + if (!xa->value1) { + pos = context->datalen; + send_command(context, BTRFS_SEND_CMD_REMOVE_XATTR); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_XATTR_NAME, xa->name, xa->namelen); + send_command_finish(context, pos); + } else { + pos = context->datalen; + send_command(context, BTRFS_SEND_CMD_SET_XATTR); + send_add_tlv(context, BTRFS_SEND_TLV_PATH, context->lastinode.path, context->lastinode.path ? (UINT16)strlen(context->lastinode.path) : 0); + send_add_tlv(context, BTRFS_SEND_TLV_XATTR_NAME, xa->name, xa->namelen); + send_add_tlv(context, BTRFS_SEND_TLV_XATTR_DATA, xa->value1, xa->value1len); + send_command_finish(context, pos); + } + } + + ExFreePool(xa); + } + } + + return STATUS_SUCCESS; +} + +_Function_class_(KSTART_ROUTINE) +#ifdef __REACTOS__ +static void NTAPI send_thread(void* ctx) { +#else +static void send_thread(void* ctx) { +#endif + send_context* context = (send_context*)ctx; + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp, tp2; + + InterlockedIncrement(&context->root->send_ops); + + if (context->parent) + InterlockedIncrement(&context->parent->send_ops); + + if (context->clones) { + ULONG i; + + for (i = 0; i < context->num_clones; i++) { + InterlockedIncrement(&context->clones[i]->send_ops); + } + } + + ExAcquireResourceExclusiveLite(&context->Vcb->tree_lock, TRUE); + + flush_subvol_fcbs(context->root); + + if (context->parent) + flush_subvol_fcbs(context->parent); + + if (context->Vcb->need_write) + Status = do_write(context->Vcb, NULL); + else + Status = STATUS_SUCCESS; + + free_trees(context->Vcb); + + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + ExConvertExclusiveToSharedLite(&context->Vcb->tree_lock); + + searchkey.obj_id = searchkey.offset = 0; + searchkey.obj_type = 0; + + Status = find_item(context->Vcb, context->root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->parent) { + BOOL ended1 = FALSE, ended2 = FALSE; + Status = find_item(context->Vcb, context->parent, &tp2, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + do { + traverse_ptr next_tp; + + if (context->datalen > SEND_BUFFER_LENGTH) { + KEY key1 = tp.item->key, key2 = tp2.item->key; + + ExReleaseResourceLite(&context->Vcb->tree_lock); + + KeClearEvent(&context->send->cleared_event); + KeSetEvent(&context->buffer_event, 0, TRUE); + KeWaitForSingleObject(&context->send->cleared_event, Executive, KernelMode, FALSE, NULL); + + if (context->send->cancelling) + goto end; + + ExAcquireResourceSharedLite(&context->Vcb->tree_lock, TRUE); + + if (!ended1) { + Status = find_item(context->Vcb, context->root, &tp, &key1, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (keycmp(tp.item->key, key1)) { + ERR("readonly subvolume changed\n"); + ExReleaseResourceLite(&context->Vcb->tree_lock); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } + + if (!ended2) { + Status = find_item(context->Vcb, context->parent, &tp2, &key2, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (keycmp(tp2.item->key, key2)) { + ERR("readonly subvolume changed\n"); + ExReleaseResourceLite(&context->Vcb->tree_lock); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } + } + + while (!ended1 && !ended2 && tp.tree->header.address == tp2.tree->header.address) { + Status = skip_to_difference(context->Vcb, &tp, &tp2, &ended1, &ended2); + if (!NT_SUCCESS(Status)) { + ERR("skip_to_difference returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (!ended1 && !ended2 && !keycmp(tp.item->key, tp2.item->key)) { + BOOL no_next = FALSE, no_next2 = FALSE; + + TRACE("~ %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + + if (context->lastinode.inode != 0 && tp.item->key.obj_id > context->lastinode.inode) { + Status = finish_inode(context, ended1 ? NULL : &tp, ended2 ? NULL : &tp2); + if (!NT_SUCCESS(Status)) { + ERR("finish_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (tp.item->key.obj_type == TYPE_INODE_ITEM) { + if (tp.item->size == tp2.item->size && tp.item->size > 0 && RtlCompareMemory(tp.item->data, tp2.item->data, tp.item->size) == tp.item->size) { + UINT64 inode = tp.item->key.obj_id; + + while (TRUE) { + if (!find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) { + ended1 = TRUE; + break; + } + + tp = next_tp; + + if (tp.item->key.obj_id != inode) + break; + } + + while (TRUE) { + if (!find_next_item(context->Vcb, &tp2, &next_tp, FALSE, NULL)) { + ended2 = TRUE; + break; + } + + tp2 = next_tp; + + if (tp2.item->key.obj_id != inode) + break; + } + + no_next = TRUE; + } else if (tp.item->size > sizeof(UINT64) && tp2.item->size > sizeof(UINT64) && *(UINT64*)tp.item->data != *(UINT64*)tp2.item->data) { + UINT64 inode = tp.item->key.obj_id; + + Status = send_inode(context, NULL, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + while (TRUE) { + if (!find_next_item(context->Vcb, &tp2, &next_tp, FALSE, NULL)) { + ended2 = TRUE; + break; + } + + tp2 = next_tp; + + if (tp2.item->key.obj_id != inode) + break; + + if (tp2.item->key.obj_type == TYPE_INODE_REF) { + Status = send_inode_ref(context, &tp2, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_ref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp2.item->key.obj_type == TYPE_INODE_EXTREF) { + Status = send_inode_extref(context, &tp2, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_extref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + } + + Status = finish_inode(context, ended1 ? NULL : &tp, ended2 ? NULL : &tp2); + if (!NT_SUCCESS(Status)) { + ERR("finish_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + no_next2 = TRUE; + + Status = send_inode(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else { + Status = send_inode(context, &tp, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + } else if (tp.item->key.obj_type == TYPE_INODE_REF) { + Status = send_inode_ref(context, &tp, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_ref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + Status = send_inode_ref(context, &tp2, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_ref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { + Status = send_inode_extref(context, &tp, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_extref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + Status = send_inode_extref(context, &tp2, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_extref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_EXTENT_DATA) { + Status = send_extent_data(context, &tp, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_extent_data returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_XATTR_ITEM) { + Status = send_xattr(context, &tp, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_xattr returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (!no_next) { + if (find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + ended1 = TRUE; + + if (!no_next2) { + if (find_next_item(context->Vcb, &tp2, &next_tp, FALSE, NULL)) + tp2 = next_tp; + else + ended2 = TRUE; + } + } + } else if (ended2 || (!ended1 && !ended2 && keycmp(tp.item->key, tp2.item->key) == -1)) { + TRACE("A %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + + if (context->lastinode.inode != 0 && tp.item->key.obj_id > context->lastinode.inode) { + Status = finish_inode(context, ended1 ? NULL : &tp, ended2 ? NULL : &tp2); + if (!NT_SUCCESS(Status)) { + ERR("finish_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (tp.item->key.obj_type == TYPE_INODE_ITEM) { + Status = send_inode(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_INODE_REF) { + Status = send_inode_ref(context, &tp, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_ref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { + Status = send_inode_extref(context, &tp, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_extref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_EXTENT_DATA) { + Status = send_extent_data(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_extent_data returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_XATTR_ITEM) { + Status = send_xattr(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_xattr returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + ended1 = TRUE; + } else if (ended1 || (!ended1 && !ended2 && keycmp(tp.item->key, tp2.item->key) == 1)) { + TRACE("B %llx,%x,%llx\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset); + + if (context->lastinode.inode != 0 && tp2.item->key.obj_id > context->lastinode.inode) { + Status = finish_inode(context, ended1 ? NULL : &tp, ended2 ? NULL : &tp2); + if (!NT_SUCCESS(Status)) { + ERR("finish_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (tp2.item->key.obj_type == TYPE_INODE_ITEM) { + Status = send_inode(context, NULL, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp2.item->key.obj_type == TYPE_INODE_REF) { + Status = send_inode_ref(context, &tp2, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_ref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp2.item->key.obj_type == TYPE_INODE_EXTREF) { + Status = send_inode_extref(context, &tp2, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_extref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp2.item->key.obj_type == TYPE_EXTENT_DATA && !context->lastinode.deleting) { + Status = send_extent_data(context, NULL, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_extent_data returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp2.item->key.obj_type == TYPE_XATTR_ITEM && !context->lastinode.deleting) { + Status = send_xattr(context, NULL, &tp2); + if (!NT_SUCCESS(Status)) { + ERR("send_xattr returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (find_next_item(context->Vcb, &tp2, &next_tp, FALSE, NULL)) + tp2 = next_tp; + else + ended2 = TRUE; + } + } while (!ended1 || !ended2); + } else { + do { + traverse_ptr next_tp; + + if (context->datalen > SEND_BUFFER_LENGTH) { + KEY key = tp.item->key; + + ExReleaseResourceLite(&context->Vcb->tree_lock); + + KeClearEvent(&context->send->cleared_event); + KeSetEvent(&context->buffer_event, 0, TRUE); + KeWaitForSingleObject(&context->send->cleared_event, Executive, KernelMode, FALSE, NULL); + + if (context->send->cancelling) + goto end; + + ExAcquireResourceSharedLite(&context->Vcb->tree_lock, TRUE); + + Status = find_item(context->Vcb, context->root, &tp, &key, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (keycmp(tp.item->key, key)) { + ERR("readonly subvolume changed\n"); + ExReleaseResourceLite(&context->Vcb->tree_lock); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } + + if (context->lastinode.inode != 0 && tp.item->key.obj_id > context->lastinode.inode) { + Status = finish_inode(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("finish_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (tp.item->key.obj_type == TYPE_INODE_ITEM) { + Status = send_inode(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_INODE_REF) { + Status = send_inode_ref(context, &tp, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_ref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { + Status = send_inode_extref(context, &tp, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("send_inode_extref returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_EXTENT_DATA) { + Status = send_extent_data(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_extent_data returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } else if (tp.item->key.obj_type == TYPE_XATTR_ITEM) { + Status = send_xattr(context, &tp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("send_xattr returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + if (context->send->cancelling) { + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + } + + if (find_next_item(context->Vcb, &tp, &next_tp, FALSE, NULL)) + tp = next_tp; + else + break; + } while (TRUE); + } + + if (context->lastinode.inode != 0) { + Status = finish_inode(context, NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("finish_inode returned %08x\n", Status); + ExReleaseResourceLite(&context->Vcb->tree_lock); + goto end; + } + + ExReleaseResourceLite(&context->Vcb->tree_lock); + + if (context->send->cancelling) + goto end; + } else + ExReleaseResourceLite(&context->Vcb->tree_lock); + + KeClearEvent(&context->send->cleared_event); + KeSetEvent(&context->buffer_event, 0, TRUE); + KeWaitForSingleObject(&context->send->cleared_event, Executive, KernelMode, FALSE, NULL); + + Status = STATUS_SUCCESS; + +end: + if (!NT_SUCCESS(Status)) { + KeSetEvent(&context->buffer_event, 0, FALSE); + + if (context->send->ccb) + context->send->ccb->send_status = Status; + } + + ExAcquireResourceExclusiveLite(&context->Vcb->send_load_lock, TRUE); + + while (!IsListEmpty(&context->orphans)) { + orphan* o = CONTAINING_RECORD(RemoveHeadList(&context->orphans), orphan, list_entry); + ExFreePool(o); + } + + while (!IsListEmpty(&context->dirs)) { + send_dir* sd = CONTAINING_RECORD(RemoveHeadList(&context->dirs), send_dir, list_entry); + + if (sd->name) + ExFreePool(sd->name); + + while (!IsListEmpty(&sd->deleted_children)) { + deleted_child* dc = CONTAINING_RECORD(RemoveHeadList(&sd->deleted_children), deleted_child, list_entry); + ExFreePool(dc); + } + + ExFreePool(sd); + } + + ZwClose(context->send->thread); + context->send->thread = NULL; + + if (context->send->ccb) + context->send->ccb->send = NULL; + + RemoveEntryList(&context->send->list_entry); + ExFreePool(context->send); + ExFreePool(context->data); + + InterlockedDecrement(&context->Vcb->running_sends); + InterlockedDecrement(&context->root->send_ops); + + if (context->parent) + InterlockedDecrement(&context->parent->send_ops); + + ExReleaseResourceLite(&context->Vcb->send_load_lock); + + if (context->clones) { + ULONG i; + + for (i = 0; i < context->num_clones; i++) { + InterlockedDecrement(&context->clones[i]->send_ops); + } + + ExFreePool(context->clones); + } + + ExFreePool(context); + + PsTerminateSystemThread(STATUS_SUCCESS); +} + +NTSTATUS send_subvol(device_extension* Vcb, void* data, ULONG datalen, PFILE_OBJECT FileObject, PIRP Irp) { + NTSTATUS Status; + fcb* fcb; + ccb* ccb; + root* parsubvol = NULL; + send_context* context; + send_info* send; + ULONG num_clones = 0; + root** clones = NULL; + + if (!FileObject || !FileObject->FsContext || !FileObject->FsContext2 || FileObject->FsContext == Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), Irp->RequestorMode)) + return STATUS_PRIVILEGE_NOT_HELD; + + fcb = FileObject->FsContext; + ccb = FileObject->FsContext2; + + if (fcb->inode != SUBVOL_ROOT_INODE || fcb == Vcb->root_fileref->fcb) + return STATUS_INVALID_PARAMETER; + + if (!Vcb->readonly && !(fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) + return STATUS_INVALID_PARAMETER; + + if (data) { + btrfs_send_subvol* bss = (btrfs_send_subvol*)data; + HANDLE parent; + +#if defined(_WIN64) + if (IoIs32bitProcess(Irp)) { + btrfs_send_subvol32* bss32 = (btrfs_send_subvol32*)data; + + if (datalen < offsetof(btrfs_send_subvol32, num_clones)) + return STATUS_INVALID_PARAMETER; + + parent = Handle32ToHandle(bss32->parent); + + if (datalen >= offsetof(btrfs_send_subvol32, clones[0])) + num_clones = bss32->num_clones; + + if (datalen < offsetof(btrfs_send_subvol32, clones[0]) + (num_clones * sizeof(UINT32))) + return STATUS_INVALID_PARAMETER; + } else { +#endif + if (datalen < offsetof(btrfs_send_subvol, num_clones)) + return STATUS_INVALID_PARAMETER; + + parent = bss->parent; + + if (datalen >= offsetof(btrfs_send_subvol, clones[0])) + num_clones = bss->num_clones; + + if (datalen < offsetof(btrfs_send_subvol, clones[0]) + (num_clones * sizeof(HANDLE))) + return STATUS_INVALID_PARAMETER; +#if defined(_WIN64) + } +#endif + + if (parent) { + PFILE_OBJECT fileobj; + struct _fcb* parfcb; + + Status = ObReferenceObjectByHandle(parent, 0, *IoFileObjectType, Irp->RequestorMode, (void**)&fileobj, NULL); + if (!NT_SUCCESS(Status)) { + ERR("ObReferenceObjectByHandle returned %08x\n", Status); + return Status; + } + + if (fileobj->DeviceObject != FileObject->DeviceObject) { + ObDereferenceObject(fileobj); + return STATUS_INVALID_PARAMETER; + } + + parfcb = fileobj->FsContext; + + if (!parfcb || parfcb == Vcb->root_fileref->fcb || parfcb == Vcb->volume_fcb || parfcb->inode != SUBVOL_ROOT_INODE) { + ObDereferenceObject(fileobj); + return STATUS_INVALID_PARAMETER; + } + + parsubvol = parfcb->subvol; + ObDereferenceObject(fileobj); + + if (!Vcb->readonly && !(parsubvol->root_item.flags & BTRFS_SUBVOL_READONLY)) + return STATUS_INVALID_PARAMETER; + + if (parsubvol == fcb->subvol) + return STATUS_INVALID_PARAMETER; + } + + if (num_clones > 0) { + ULONG i; + + clones = ExAllocatePoolWithTag(PagedPool, sizeof(root*) * num_clones, ALLOC_TAG); + if (!clones) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + for (i = 0; i < num_clones; i++) { + HANDLE h; + PFILE_OBJECT fileobj; + struct _fcb* clonefcb; + +#if defined(_WIN64) + if (IoIs32bitProcess(Irp)) { + btrfs_send_subvol32* bss32 = (btrfs_send_subvol32*)data; + + h = Handle32ToHandle(bss32->clones[i]); + } else +#endif + h = bss->clones[i]; + + Status = ObReferenceObjectByHandle(h, 0, *IoFileObjectType, Irp->RequestorMode, (void**)&fileobj, NULL); + if (!NT_SUCCESS(Status)) { + ERR("ObReferenceObjectByHandle returned %08x\n", Status); + ExFreePool(clones); + return Status; + } + + if (fileobj->DeviceObject != FileObject->DeviceObject) { + ObDereferenceObject(fileobj); + ExFreePool(clones); + return STATUS_INVALID_PARAMETER; + } + + clonefcb = fileobj->FsContext; + + if (!clonefcb || clonefcb == Vcb->root_fileref->fcb || clonefcb == Vcb->volume_fcb || clonefcb->inode != SUBVOL_ROOT_INODE) { + ObDereferenceObject(fileobj); + ExFreePool(clones); + return STATUS_INVALID_PARAMETER; + } + + clones[i] = clonefcb->subvol; + ObDereferenceObject(fileobj); + + if (!Vcb->readonly && !(clones[i]->root_item.flags & BTRFS_SUBVOL_READONLY)) { + ExFreePool(clones); + return STATUS_INVALID_PARAMETER; + } + } + } + } + + ExAcquireResourceExclusiveLite(&Vcb->send_load_lock, TRUE); + + if (ccb->send) { + WARN("send operation already running\n"); + ExReleaseResourceLite(&Vcb->send_load_lock); + return STATUS_DEVICE_NOT_READY; + } + + context = ExAllocatePoolWithTag(NonPagedPool, sizeof(send_context), ALLOC_TAG); + if (!context) { + ERR("out of memory\n"); + ExReleaseResourceLite(&Vcb->send_load_lock); + return STATUS_INSUFFICIENT_RESOURCES; + } + + context->Vcb = Vcb; + context->root = fcb->subvol; + context->parent = parsubvol; + InitializeListHead(&context->orphans); + InitializeListHead(&context->dirs); + InitializeListHead(&context->pending_rmdirs); + context->lastinode.inode = 0; + context->lastinode.path = NULL; + context->lastinode.sd = NULL; + context->root_dir = NULL; + context->num_clones = num_clones; + context->clones = clones; + InitializeListHead(&context->lastinode.refs); + InitializeListHead(&context->lastinode.oldrefs); + InitializeListHead(&context->lastinode.exts); + InitializeListHead(&context->lastinode.oldexts); + + context->data = ExAllocatePoolWithTag(PagedPool, SEND_BUFFER_LENGTH + (2 * MAX_SEND_WRITE), ALLOC_TAG); // give ourselves some wiggle room + if (!context->data) { + ExFreePool(context); + ExReleaseResourceLite(&Vcb->send_load_lock); + return STATUS_INSUFFICIENT_RESOURCES; + } + + context->datalen = 0; + + send_subvol_header(context, fcb->subvol, ccb->fileref); // FIXME - fileref needs some sort of lock here + + KeInitializeEvent(&context->buffer_event, NotificationEvent, FALSE); + + send = ExAllocatePoolWithTag(NonPagedPool, sizeof(send_info), ALLOC_TAG); + if (!send) { + ERR("out of memory\n"); + ExFreePool(context->data); + ExFreePool(context); + ExReleaseResourceLite(&Vcb->send_load_lock); + return STATUS_INSUFFICIENT_RESOURCES; + } + + KeInitializeEvent(&send->cleared_event, NotificationEvent, FALSE); + + send->context = context; + context->send = send; + + ccb->send = send; + send->ccb = ccb; + ccb->send_status = STATUS_SUCCESS; + + send->cancelling = FALSE; + + InterlockedIncrement(&Vcb->running_sends); + + Status = PsCreateSystemThread(&send->thread, 0, NULL, NULL, NULL, send_thread, context); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + ccb->send = NULL; + InterlockedDecrement(&Vcb->running_sends); + ExFreePool(send); + ExFreePool(context->data); + ExFreePool(context); + ExReleaseResourceLite(&Vcb->send_load_lock); + return Status; + } + + InsertTailList(&Vcb->send_ops, &send->list_entry); + ExReleaseResourceLite(&Vcb->send_load_lock); + + return STATUS_SUCCESS; +} + +NTSTATUS read_send_buffer(device_extension* Vcb, PFILE_OBJECT FileObject, void* data, ULONG datalen, ULONG_PTR* retlen, KPROCESSOR_MODE processor_mode) { + ccb* ccb; + send_context* context; + + ccb = FileObject ? FileObject->FsContext2 : NULL; + if (!ccb) + return STATUS_INVALID_PARAMETER; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + ExAcquireResourceExclusiveLite(&Vcb->send_load_lock, TRUE); + + if (!ccb->send) { + ExReleaseResourceLite(&Vcb->send_load_lock); + return !NT_SUCCESS(ccb->send_status) ? ccb->send_status : STATUS_END_OF_FILE; + } + + context = (send_context*)ccb->send->context; + + KeWaitForSingleObject(&context->buffer_event, Executive, KernelMode, FALSE, NULL); + + if (datalen == 0) { + ExReleaseResourceLite(&Vcb->send_load_lock); + return STATUS_SUCCESS; + } + + RtlCopyMemory(data, context->data, min(datalen, context->datalen)); + + if (datalen < context->datalen) { // not empty yet + *retlen = datalen; + RtlMoveMemory(context->data, &context->data[datalen], context->datalen - datalen); + context->datalen -= datalen; + ExReleaseResourceLite(&Vcb->send_load_lock); + } else { + *retlen = context->datalen; + context->datalen = 0; + ExReleaseResourceLite(&Vcb->send_load_lock); + + KeClearEvent(&context->buffer_event); + KeSetEvent(&ccb->send->cleared_event, 0, FALSE); + } + + return STATUS_SUCCESS; +} diff --git a/reactos/drivers/filesystems/btrfs/treefuncs.c b/reactos/drivers/filesystems/btrfs/treefuncs.c index 07919625db7..e2f6001e654 100644 --- a/reactos/drivers/filesystems/btrfs/treefuncs.c +++ b/reactos/drivers/filesystems/btrfs/treefuncs.c @@ -1,25 +1,23 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" -// #define DEBUG_TREE_LOCKS - -NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, tree* parent, PIRP Irp, const char* func, const char* file, unsigned int line) { +NTSTATUS load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, UINT64 generation, PIRP Irp) { UINT8* buf; NTSTATUS Status; tree_header* th; @@ -29,62 +27,55 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** UINT8 h; BOOL inserted; LIST_ENTRY* le; - - TRACE("(%p, %llx)\n", Vcb, addr); - + buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); if (!buf) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, &c, Irp, FALSE); + + Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, &c, Irp, generation, FALSE, NormalPagePriority); if (!NT_SUCCESS(Status)) { ERR("read_data returned 0x%08x\n", Status); ExFreePool(buf); return Status; } - + th = (tree_header*)buf; - + t = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG); if (!t) { ERR("out of memory\n"); ExFreePool(buf); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(&t->header, th, sizeof(tree_header)); -// t->address = addr; -// t->level = th->level; t->hash = calc_crc32c(0xffffffff, (UINT8*)&addr, sizeof(UINT64)); t->has_address = TRUE; t->Vcb = Vcb; t->parent = NULL; t->root = r; -// t->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); t->paritem = NULL; t->size = 0; t->new_address = 0; t->has_new_address = FALSE; t->updated_extents = FALSE; t->write = FALSE; - -// ExInitializeResourceLite(&t->nonpaged->load_tree_lock); - -// t->items = ExAllocatePoolWithTag(PagedPool, num_items * sizeof(tree_data), ALLOC_TAG); + t->uniqueness_determined = FALSE; + InitializeListHead(&t->itemlist); - + if (t->header.level == 0) { // leaf node leaf_node* ln = (leaf_node*)(buf + sizeof(tree_header)); unsigned int i; - + if ((t->header.num_items * sizeof(leaf_node)) + sizeof(tree_header) > Vcb->superblock.node_size) { ERR("tree at %llx has more items than expected (%x)\n", t->header.num_items); ExFreePool(buf); return STATUS_INSUFFICIENT_RESOURCES; } - + for (i = 0; i < t->header.num_items; i++) { td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { @@ -92,42 +83,41 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** ExFreePool(buf); return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = ln[i].key; -// TRACE("load_tree: leaf item %u (%x,%x,%x)\n", i, (UINT32)ln[i].key.obj_id, ln[i].key.obj_type, (UINT32)ln[i].key.offset); - - if (ln[i].size > 0) { - td->data = ExAllocatePoolWithTag(PagedPool, ln[i].size, ALLOC_TAG); - if (!td->data) { - ERR("out of memory\n"); - ExFreePool(buf); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(td->data, buf + sizeof(tree_header) + ln[i].offset, ln[i].size); - } else + + if (ln[i].size > 0) + td->data = buf + sizeof(tree_header) + ln[i].offset; + else td->data = NULL; - - td->size = ln[i].size; + + if (ln[i].size + sizeof(tree_header) + sizeof(leaf_node) > Vcb->superblock.node_size) { + ERR("overlarge item in tree %llx: %u > %u\n", addr, ln[i].size, Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)); + ExFreePool(buf); + return STATUS_INTERNAL_ERROR; + } + + td->size = (UINT16)ln[i].size; td->ignore = FALSE; td->inserted = FALSE; - + InsertTailList(&t->itemlist, &td->list_entry); - + t->size += ln[i].size; } - + t->size += t->header.num_items * sizeof(leaf_node); + t->buf = buf; } else { internal_node* in = (internal_node*)(buf + sizeof(tree_header)); unsigned int i; - + if ((t->header.num_items * sizeof(internal_node)) + sizeof(tree_header) > Vcb->superblock.node_size) { ERR("tree at %llx has more items than expected (%x)\n", t->header.num_items); ExFreePool(buf); return STATUS_INSUFFICIENT_RESOURCES; } - + for (i = 0; i < t->header.num_items; i++) { td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { @@ -135,35 +125,32 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** ExFreePool(buf); return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = in[i].key; -// TRACE("load_tree: internal item %u (%x,%x,%x)\n", i, (UINT32)in[i].key.obj_id, in[i].key.obj_type, (UINT32)in[i].key.offset); - + td->treeholder.address = in[i].address; td->treeholder.generation = in[i].generation; td->treeholder.tree = NULL; -// td->treeholder.nonpaged->status = tree_holder_unloaded; td->ignore = FALSE; td->inserted = FALSE; - + InsertTailList(&t->itemlist, &td->list_entry); } - + t->size = t->header.num_items * sizeof(internal_node); + t->buf = NULL; + ExFreePool(buf); } - - ExFreePool(buf); - - InterlockedIncrement(&Vcb->open_trees); + InsertTailList(&Vcb->trees, &t->list_entry); - + h = t->hash >> 24; - + if (!Vcb->trees_ptrs[h]) { UINT8 h2 = h; - + le = Vcb->trees_hash.Flink; - + if (h2 > 0) { h2--; do { @@ -171,23 +158,23 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** le = Vcb->trees_ptrs[h2]; break; } - + h2--; } while (h2 > 0); } } else le = Vcb->trees_ptrs[h]; - + inserted = FALSE; while (le != &Vcb->trees_hash) { tree* t2 = CONTAINING_RECORD(le, tree, list_entry_hash); - + if (t2->hash >= t->hash) { InsertHeadList(le->Blink, &t->list_entry_hash); inserted = TRUE; break; } - + le = le->Flink; } @@ -196,68 +183,48 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** if (!Vcb->trees_ptrs[h] || t->list_entry_hash.Flink == Vcb->trees_ptrs[h]) Vcb->trees_ptrs[h] = &t->list_entry_hash; - + TRACE("returning %p\n", t); - + *pt = t; - + return STATUS_SUCCESS; } -static tree* free_tree2(tree* t, const char* func, const char* file, unsigned int line) { - LIST_ENTRY* le; - tree_data* td; +static tree* free_tree2(tree* t) { tree* par; root* r = t->root; - + par = t->parent; - -// if (par) ExAcquireResourceExclusiveLite(&par->nonpaged->load_tree_lock, TRUE); - + if (r && r->treeholder.tree != t) r = NULL; - -// if (r) { -// FsRtlEnterFileSystem(); -// ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE); -// } - + if (par) { if (t->paritem) t->paritem->treeholder.tree = NULL; - -// ExReleaseResourceLite(&par->nonpaged->load_tree_lock); } - -// ExDeleteResourceLite(&t->nonpaged->load_tree_lock); - -// ExFreePool(t->nonpaged); - + while (!IsListEmpty(&t->itemlist)) { - le = RemoveHeadList(&t->itemlist); - td = CONTAINING_RECORD(le, tree_data, list_entry); - - if (t->header.level == 0 && td->data) + tree_data* td = CONTAINING_RECORD(RemoveHeadList(&t->itemlist), tree_data, list_entry); + + if (t->header.level == 0 && td->data && td->inserted) ExFreePool(td->data); - + ExFreeToPagedLookasideList(&t->Vcb->tree_data_lookaside, td); } - - InterlockedDecrement(&t->Vcb->open_trees); + RemoveEntryList(&t->list_entry); - - if (r) { + + if (r) r->treeholder.tree = NULL; -// ExReleaseResourceLite(&r->nonpaged->load_tree_lock); -// FsRtlExitFileSystem(); - } - + if (t->list_entry_hash.Flink) { UINT8 h = t->hash >> 24; if (t->Vcb->trees_ptrs[h] == &t->list_entry_hash) { if (t->list_entry_hash.Flink != &t->Vcb->trees_hash) { tree* t2 = CONTAINING_RECORD(t->list_entry_hash.Flink, tree, list_entry_hash); - + if ((t2->hash >> 24) == h) t->Vcb->trees_ptrs[h] = &t2->list_entry_hash; else @@ -265,141 +232,211 @@ static tree* free_tree2(tree* t, const char* func, const char* file, unsigned in } else t->Vcb->trees_ptrs[h] = NULL; } - + RemoveEntryList(&t->list_entry_hash); } - + + if (t->buf) + ExFreePool(t->buf); + ExFreePool(t); return NULL; } -NTSTATUS STDCALL _do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, PIRP Irp, - const char* func, const char* file, unsigned int line) { -// KIRQL irql; -// tree_holder_nonpaged* thnp = th->nonpaged; +NTSTATUS do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, PIRP Irp) { BOOL ret; - -// ExAcquireResourceExclusiveLite(&thnp->lock, TRUE); + ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE); - -// KeAcquireSpinLock(&thnp->spin_lock, &irql); -// -// if (thnp->status == tree_header_loading) { -// KeReleaseSpinLock(&thnp->spin_lock, irql); -// -// // FIXME - wait for Event -// } else if (thnp->status == tree_header_unloaded || thnp->status == tree_header_unloading) { -// if (thnp->status == tree_header_unloading) { -// KeReleaseSpinLock(&thnp->spin_lock, irql); -// // FIXME - wait for Event -// } -// -// // FIXME - change status -// thnp->status = tree_header_loading; -// KeReleaseSpinLock(&thnp->spin_lock, irql); -// -// // FIXME - load -// // FIXME - change status -// // FIXME - trigger event -// } else if (thnp->status == tree_header_loaded) { -// _increase_tree_rc(th->tree, func, file, line); -// KeReleaseSpinLock(&thnp->spin_lock, irql); -// -// ret = FALSE; -// } if (!th->tree) { NTSTATUS Status; - - Status = _load_tree(Vcb, th->address, r, &th->tree, t, Irp, func, file, line); + tree* nt; + + Status = load_tree(Vcb, th->address, r, &nt, th->generation, Irp); if (!NT_SUCCESS(Status)) { ERR("load_tree returned %08x\n", Status); ExReleaseResourceLite(&r->nonpaged->load_tree_lock); return Status; } - - th->tree->parent = t; - + + nt->parent = t; + #ifdef DEBUG_PARANOID - if (t && t->header.level <= th->tree->header.level) int3; + if (t && t->header.level <= nt->header.level) int3; #endif - - th->tree->paritem = td; - + + nt->paritem = td; + + th->tree = nt; + ret = TRUE; } else ret = FALSE; - -// KeReleaseSpinLock(&thnp->spin_lock, irql); - -// ExReleaseResourceLite(&thnp->lock); + ExReleaseResourceLite(&r->nonpaged->load_tree_lock); - + *loaded = ret; - + return STATUS_SUCCESS; } -tree* STDCALL _free_tree(tree* t, const char* func, const char* file, unsigned int line) { +tree* free_tree(tree* t) { tree* ret; root* r = t->root; - + ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE); - ret = free_tree2(t, func, file, line); + ret = free_tree2(t); ExReleaseResourceLite(&r->nonpaged->load_tree_lock); - + return ret; } static __inline tree_data* first_item(tree* t) { LIST_ENTRY* le = t->itemlist.Flink; - + if (le == &t->itemlist) return NULL; - + return CONTAINING_RECORD(le, tree_data, list_entry); } static __inline tree_data* prev_item(tree* t, tree_data* td) { LIST_ENTRY* le = td->list_entry.Blink; - + if (le == &t->itemlist) return NULL; - + return CONTAINING_RECORD(le, tree_data, list_entry); } static __inline tree_data* next_item(tree* t, tree_data* td) { LIST_ENTRY* le = td->list_entry.Flink; - + if (le == &t->itemlist) return NULL; - + return CONTAINING_RECORD(le, tree_data, list_entry); } -static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, PIRP Irp, - const char* func, const char* file, unsigned int line) { +static NTSTATUS next_item2(device_extension* Vcb, tree* t, tree_data* td, traverse_ptr* tp) { + tree_data* td2 = next_item(t, td); + tree* t2; + + if (td2) { + tp->tree = t; + tp->item = td2; + return STATUS_SUCCESS; + } + + t2 = t; + + do { + td2 = t2->paritem; + t2 = t2->parent; + } while (td2 && !next_item(t2, td2)); + + if (!td2) + return STATUS_NOT_FOUND; + + td2 = next_item(t2, td2); + + return find_item_to_level(Vcb, t2->root, tp, &td2->key, FALSE, t->header.level, NULL); +} + +NTSTATUS skip_to_difference(device_extension* Vcb, traverse_ptr* tp, traverse_ptr* tp2, BOOL* ended1, BOOL* ended2) { + NTSTATUS Status; + tree *t1, *t2; + tree_data *td1, *td2; + + t1 = tp->tree; + t2 = tp2->tree; + + do { + td1 = t1->paritem; + td2 = t2->paritem; + t1 = t1->parent; + t2 = t2->parent; + } while (t1 && t2 && t1->header.address == t2->header.address); + + while (TRUE) { + traverse_ptr tp3, tp4; + + Status = next_item2(Vcb, t1, td1, &tp3); + if (Status == STATUS_NOT_FOUND) + *ended1 = TRUE; + else if (!NT_SUCCESS(Status)) { + ERR("next_item2 returned %08x\n", Status); + return Status; + } + + Status = next_item2(Vcb, t2, td2, &tp4); + if (Status == STATUS_NOT_FOUND) + *ended2 = TRUE; + else if (!NT_SUCCESS(Status)) { + ERR("next_item2 returned %08x\n", Status); + return Status; + } + + if (*ended1 || *ended2) { + if (!*ended1) { + Status = find_item(Vcb, t1->root, tp, &tp3.item->key, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + } else if (!*ended2) { + Status = find_item(Vcb, t2->root, tp2, &tp4.item->key, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + } + + return STATUS_SUCCESS; + } + + if (tp3.tree->header.address != tp4.tree->header.address) { + Status = find_item(Vcb, t1->root, tp, &tp3.item->key, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + Status = find_item(Vcb, t2->root, tp2, &tp4.item->key, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; + } + + t1 = tp3.tree; + td1 = tp3.item; + t2 = tp4.tree; + td2 = tp4.item; + } +} + +static NTSTATUS find_item_in_tree(device_extension* Vcb, tree* t, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, PIRP Irp) { int cmp; tree_data *td, *lasttd; KEY key2; - - TRACE("(%p, %p, %p, %p, %u)\n", Vcb, t, tp, searchkey, ignore); - + cmp = 1; td = first_item(t); lasttd = NULL; - + if (!td) return STATUS_NOT_FOUND; - + key2 = *searchkey; - + do { cmp = keycmp(key2, td->key); -// TRACE("(%u) comparing (%x,%x,%x) to (%x,%x,%x) - %i (ignore = %s)\n", t->header.level, (UINT32)searchkey->obj_id, searchkey->obj_type, (UINT32)searchkey->offset, (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset, cmp, td->ignore ? "TRUE" : "FALSE"); + if (cmp == 1) { lasttd = td; td = next_item(t, td); @@ -407,13 +444,13 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver if (t->header.level == 0 && cmp == 0 && !ignore && td && td->ignore) { tree_data* origtd = td; - + while (td && td->ignore) td = next_item(t, td); - + if (td) { cmp = keycmp(key2, td->key); - + if (cmp != 0) { td = origtd; cmp = 0; @@ -422,244 +459,229 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver td = origtd; } } while (td && cmp == 1); - + if ((cmp == -1 || !td) && lasttd) td = lasttd; - + if (t->header.level == 0) { if (td->ignore && !ignore) { traverse_ptr oldtp; - + oldtp.tree = t; oldtp.item = td; - - while (_find_prev_item(Vcb, &oldtp, tp, TRUE, Irp, func, file, line)) { + + while (find_prev_item(Vcb, &oldtp, tp, Irp)) { if (!tp->item->ignore) return STATUS_SUCCESS; - + oldtp = *tp; } - + // if no valid entries before where item should be, look afterwards instead - + oldtp.tree = t; oldtp.item = td; - - while (_find_next_item(Vcb, &oldtp, tp, TRUE, Irp, func, file, line)) { + + while (find_next_item(Vcb, &oldtp, tp, TRUE, Irp)) { if (!tp->item->ignore) return STATUS_SUCCESS; - + oldtp = *tp; } - + return STATUS_NOT_FOUND; } else { tp->tree = t; tp->item = td; } - + return STATUS_SUCCESS; } else { NTSTATUS Status; BOOL loaded; - + while (td && td->treeholder.tree && IsListEmpty(&td->treeholder.tree->itemlist)) { td = prev_item(t, td); } - + if (!td) return STATUS_NOT_FOUND; - + if (t->header.level <= level) { tp->tree = t; tp->item = td; return STATUS_SUCCESS; } - -// if (i > 0) -// TRACE("entering tree from (%x,%x,%x) to (%x,%x,%x) (%p)\n", (UINT32)t->items[i].key.obj_id, t->items[i].key.obj_type, (UINT32)t->items[i].key.offset, (UINT32)t->items[i+1].key.obj_id, t->items[i+1].key.obj_type, (UINT32)t->items[i+1].key.offset, t->items[i].tree); - - Status = _do_load_tree(Vcb, &td->treeholder, t->root, t, td, &loaded, Irp, func, file, line); - if (!NT_SUCCESS(Status)) { - ERR("do_load_tree returned %08x\n", Status); - return Status; + + if (!td->treeholder.tree) { + Status = do_load_tree(Vcb, &td->treeholder, t->root, t, td, &loaded, Irp); + if (!NT_SUCCESS(Status)) { + ERR("do_load_tree returned %08x\n", Status); + return Status; + } } - - Status = find_item_in_tree(Vcb, td->treeholder.tree, tp, searchkey, ignore, level, Irp, func, file, line); - + + Status = find_item_in_tree(Vcb, td->treeholder.tree, tp, searchkey, ignore, level, Irp); + return Status; } } -NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line) { +NTSTATUS find_item(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _Out_ traverse_ptr* tp, + _In_ const KEY* searchkey, _In_ BOOL ignore, _In_opt_ PIRP Irp) { NTSTATUS Status; BOOL loaded; -// KIRQL irql; - - TRACE("(%p, %p, %p, %p)\n", Vcb, r, tp, searchkey); - + if (!r->treeholder.tree) { - Status = _do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, Irp, func, file, line); + Status = do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, Irp); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return Status; } } - Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, 0, Irp, func, file, line); + Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, 0, Irp); if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { ERR("find_item_in_tree returned %08x\n", Status); } - -// #ifdef DEBUG_PARANOID -// if (b && !ignore && tp->item->ignore) { -// ERR("error - returning ignored item\n"); -// int3; -// } -// #endif - + return Status; } -NTSTATUS STDCALL _find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, - PIRP Irp, const char* func, const char* file, unsigned int line) { +NTSTATUS find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, PIRP Irp) { NTSTATUS Status; BOOL loaded; - - TRACE("(%p, %p, %p, %p)\n", Vcb, r, tp, searchkey); - + if (!r->treeholder.tree) { - Status = _do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, Irp, func, file, line); + Status = do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, Irp); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return Status; } } - Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, level, Irp, func, file, line); + Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, level, Irp); if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { ERR("find_item_in_tree returned %08x\n", Status); } - + if (Status == STATUS_NOT_FOUND) { tp->tree = r->treeholder.tree; tp->item = NULL; } - + return Status; } -BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, - const char* func, const char* file, unsigned int line) { +BOOL find_next_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp) { tree* t; - tree_data *td, *next; + tree_data *td = NULL, *next; NTSTATUS Status; BOOL loaded; - + next = next_item(tp->tree, tp->item); - + if (!ignore) { while (next && next->ignore) next = next_item(tp->tree, next); } - + if (next) { next_tp->tree = tp->tree; next_tp->item = next; - + #ifdef DEBUG_PARANOID if (!ignore && next_tp->item->ignore) { ERR("error - returning ignored item\n"); int3; } #endif - + return TRUE; } - + if (!tp->tree->parent) return FALSE; - + t = tp->tree; do { if (t->parent) { td = next_item(t->parent, t->paritem); - + if (td) break; } - + t = t->parent; } while (t); - + if (!t) return FALSE; - - Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, Irp, func, file, line); + + Status = do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, Irp); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return FALSE; } - + t = td->treeholder.tree; - + while (t->header.level != 0) { tree_data* fi; - + fi = first_item(t); - - Status = _do_load_tree(Vcb, &fi->treeholder, t->parent->root, t, fi, &loaded, Irp, func, file, line); + + Status = do_load_tree(Vcb, &fi->treeholder, t->parent->root, t, fi, &loaded, Irp); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return FALSE; } - + t = fi->treeholder.tree; } - + next_tp->tree = t; next_tp->item = first_item(t); - + if (!ignore && next_tp->item->ignore) { traverse_ptr ntp2; BOOL b; - - while ((b = _find_next_item(Vcb, next_tp, &ntp2, TRUE, Irp, func, file, line))) { + + while ((b = find_next_item(Vcb, next_tp, &ntp2, TRUE, Irp))) { *next_tp = ntp2; - + if (!next_tp->item->ignore) break; } - + if (!b) return FALSE; } - + #ifdef DEBUG_PARANOID if (!ignore && next_tp->item->ignore) { ERR("error - returning ignored item\n"); int3; } #endif - + return TRUE; } static __inline tree_data* last_item(tree* t) { LIST_ENTRY* le = t->itemlist.Blink; - + if (le == &t->itemlist) return NULL; - + return CONTAINING_RECORD(le, tree_data, list_entry); } -BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, BOOL ignore, PIRP Irp, - const char* func, const char* file, unsigned int line) { +BOOL find_prev_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, PIRP Irp) { tree* t; tree_data* td; NTSTATUS Status; BOOL loaded; - + // FIXME - support ignore flag if (prev_item(tp->tree, tp->item)) { prev_tp->tree = tp->tree; @@ -667,148 +689,149 @@ BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, trav return TRUE; } - + if (!tp->tree->parent) return FALSE; - + t = tp->tree; while (t && (!t->parent || !prev_item(t->parent, t->paritem))) { t = t->parent; } - + if (!t) return FALSE; - + td = prev_item(t->parent, t->paritem); - - Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, Irp, func, file, line); + + Status = do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, Irp); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return FALSE; } - + t = td->treeholder.tree; - + while (t->header.level != 0) { tree_data* li; - + li = last_item(t); - - Status = _do_load_tree(Vcb, &li->treeholder, t->parent->root, t, li, &loaded, Irp, func, file, line); + + Status = do_load_tree(Vcb, &li->treeholder, t->parent->root, t, li, &loaded, Irp); if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); return FALSE; } - + t = li->treeholder.tree; } - + prev_tp->tree = t; prev_tp->item = last_item(t); - + return TRUE; } -// static void free_tree_holder(tree_holder* th) { -// root* r = th->tree->root; -// -// // ExAcquireResourceExclusiveLite(&th->nonpaged->lock, TRUE); -// ExAcquireResourceExclusiveLite(&r->nonpaged->load_tree_lock, TRUE); -// -// free_tree2(th->tree, funcname, __FILE__, __LINE__); -// -// // ExReleaseResourceLite(&th->nonpaged->lock); -// ExReleaseResourceLite(&r->nonpaged->load_tree_lock); -// } - void free_trees_root(device_extension* Vcb, root* r) { LIST_ENTRY* le; - UINT8 level; - + ULONG level; + for (level = 0; level <= 255; level++) { BOOL empty = TRUE; - + le = Vcb->trees.Flink; - + while (le != &Vcb->trees) { LIST_ENTRY* nextle = le->Flink; tree* t = CONTAINING_RECORD(le, tree, list_entry); - + if (t->root == r) { if (t->header.level == level) { BOOL top = !t->paritem; - + empty = FALSE; - - free_tree2(t, funcname, __FILE__, __LINE__); + + free_tree2(t); if (top && r->treeholder.tree == t) r->treeholder.tree = NULL; - + if (IsListEmpty(&Vcb->trees)) return; } else if (t->header.level > level) empty = FALSE; } - + le = nextle; } - + if (empty) break; } } -void STDCALL free_trees(device_extension* Vcb) { +void free_trees(device_extension* Vcb) { LIST_ENTRY* le; - UINT8 level; - + ULONG level; + for (level = 0; level <= 255; level++) { BOOL empty = TRUE; - + le = Vcb->trees.Flink; - + while (le != &Vcb->trees) { LIST_ENTRY* nextle = le->Flink; tree* t = CONTAINING_RECORD(le, tree, list_entry); root* r = t->root; - + if (t->header.level == level) { BOOL top = !t->paritem; - + empty = FALSE; - - free_tree2(t, funcname, __FILE__, __LINE__); + + free_tree2(t); if (top && r->treeholder.tree == t) r->treeholder.tree = NULL; - + if (IsListEmpty(&Vcb->trees)) return; } else if (t->header.level > level) empty = FALSE; - + le = nextle; } - + if (empty) break; } } -void add_rollback(device_extension* Vcb, LIST_ENTRY* rollback, enum rollback_type type, void* ptr) { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(suppress: 28194) +#endif +void add_rollback(_In_ LIST_ENTRY* rollback, _In_ enum rollback_type type, _In_ __drv_aliasesMem void* ptr) { rollback_item* ri; - + ri = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_item), ALLOC_TAG); if (!ri) { ERR("out of memory\n"); return; } - + ri->type = type; ri->ptr = ptr; InsertTailList(rollback, &ri->list_entry); } +#ifdef _MSC_VER +#pragma warning(pop) +#endif -BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, UINT32 size, traverse_ptr* ptp, PIRP Irp, LIST_ENTRY* rollback) { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(suppress: 28194) +#endif +NTSTATUS insert_tree_item(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_ UINT64 obj_id, + _In_ UINT8 obj_type, _In_ UINT64 offset, _In_reads_bytes_opt_(size) _When_(return >= 0, __drv_aliasesMem) void* data, + _In_ UINT16 size, _Out_opt_ traverse_ptr* ptp, _In_opt_ PIRP Irp) { traverse_ptr tp; KEY searchkey; int cmp; @@ -818,79 +841,71 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN LIST_ENTRY* le; KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc}; #endif - traverse_ptr* tp2; - BOOL success = FALSE; NTSTATUS Status; - - TRACE("(%p, %p, %llx, %x, %llx, %p, %x, %p, %p)\n", Vcb, r, obj_id, obj_type, offset, data, size, ptp, rollback); - -// #ifdef DEBUG_PARANOID -// if (!ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) { -// ERR("ERROR - tree_lock not held exclusively\n"); -// int3; -// } -// #endif - + + TRACE("(%p, %p, %llx, %x, %llx, %p, %x, %p)\n", Vcb, r, obj_id, obj_type, offset, data, size, ptp); + searchkey.obj_id = obj_id; searchkey.obj_type = obj_type; searchkey.offset = offset; - + Status = find_item(Vcb, r, &tp, &searchkey, TRUE, Irp); if (Status == STATUS_NOT_FOUND) { if (r) { if (!r->treeholder.tree) { BOOL loaded; - + Status = do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, Irp); - if (!NT_SUCCESS(Status)) { ERR("do_load_tree returned %08x\n", Status); - goto end; + return Status; } } - + if (r->treeholder.tree && r->treeholder.tree->header.num_items == 0) { tp.tree = r->treeholder.tree; tp.item = NULL; } else { ERR("error: unable to load tree for root %llx\n", r->id); - goto end; + return STATUS_INTERNAL_ERROR; } } else { ERR("error: find_item returned %08x\n", Status); - goto end; + return Status; } } else if (!NT_SUCCESS(Status)) { ERR("find_item returned %08x\n", Status); - goto end; + return Status; } - + TRACE("tp.item = %p\n", tp.item); - + if (tp.item) { TRACE("tp.item->key = %p\n", &tp.item->key); cmp = keycmp(searchkey, tp.item->key); - - if (cmp == 0 && !tp.item->ignore) { // FIXME - look for all items of the same key to make sure none are non-ignored + + if (cmp == 0 && !tp.item->ignore) { ERR("error: key (%llx,%x,%llx) already present\n", obj_id, obj_type, offset); +#ifdef DEBUG_PARANOID int3; - goto end; +#endif + return STATUS_INTERNAL_ERROR; } } else cmp = -1; - + td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); - goto end; + return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = searchkey; td->size = size; td->data = data; td->ignore = FALSE; td->inserted = TRUE; - + #ifdef _DEBUG le = tp.tree->itemlist.Flink; while (le != &tp.tree->itemlist) { @@ -898,155 +913,101 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN firstitem = td2->key; break; } - + TRACE("inserting %llx,%x,%llx into tree beginning %llx,%x,%llx (num_items %x)\n", obj_id, obj_type, offset, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tp.tree->header.num_items); #endif - + if (cmp == -1) { // very first key in root InsertHeadList(&tp.tree->itemlist, &td->list_entry); paritem = tp.tree->paritem; while (paritem) { -// ERR("paritem = %llx,%x,%llx, tp.item->key = %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); if (!keycmp(paritem->key, tp.item->key)) { paritem->key = searchkey; } else break; - + paritem = paritem->treeholder.tree->paritem; } } else if (cmp == 0) InsertHeadList(tp.item->list_entry.Blink, &td->list_entry); // make sure non-deleted item is before deleted ones else InsertHeadList(&tp.item->list_entry, &td->list_entry); - + tp.tree->header.num_items++; tp.tree->size += size + sizeof(leaf_node); -// ERR("tree %p, num_items now %x\n", tp.tree, tp.tree->header.num_items); -// ERR("size now %x\n", tp.tree->size); - + if (!tp.tree->write) { tp.tree->write = TRUE; Vcb->need_write = TRUE; } - + if (ptp) *ptp = tp; - + t = tp.tree; while (t) { if (t->paritem && t->paritem->ignore) { t->paritem->ignore = FALSE; t->parent->header.num_items++; t->parent->size += sizeof(internal_node); - - // FIXME - do we need to add a rollback entry here? } t->header.generation = Vcb->superblock.generation; t = t->parent; } - - // FIXME - free this correctly - - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { - ERR("out of memory\n"); - goto end; - } - - tp2->tree = tp.tree; - tp2->item = td; - - add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); - - success = TRUE; - -end: - return success; -} -static __inline tree_data* first_valid_item(tree* t) { - LIST_ENTRY* le = t->itemlist.Flink; - - while (le != &t->itemlist) { - tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - - if (!td->ignore) - return td; - - le = le->Flink; - } - - return NULL; + return STATUS_SUCCESS; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif -void STDCALL delete_tree_item(device_extension* Vcb, traverse_ptr* tp, LIST_ENTRY* rollback) { +NTSTATUS delete_tree_item(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _Inout_ traverse_ptr* tp) { tree* t; UINT64 gen; - traverse_ptr* tp2; TRACE("deleting item %llx,%x,%llx (ignore = %s)\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, tp->item->ignore ? "TRUE" : "FALSE"); - -#ifdef DEBUG_PARANOID -// if (!ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) { -// ERR("ERROR - tree_lock not held exclusively\n"); -// int3; -// } +#ifdef DEBUG_PARANOID if (tp->item->ignore) { ERR("trying to delete already-deleted item %llx,%x,%llx\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset); int3; + return STATUS_INTERNAL_ERROR; } #endif tp->item->ignore = TRUE; - + if (!tp->tree->write) { tp->tree->write = TRUE; Vcb->need_write = TRUE; } - + tp->tree->header.num_items--; - + if (tp->tree->header.level == 0) tp->tree->size -= sizeof(leaf_node) + tp->item->size; else tp->tree->size -= sizeof(internal_node); - + gen = tp->tree->Vcb->superblock.generation; - + t = tp->tree; while (t) { t->header.generation = gen; t = t->parent; } - - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { - ERR("out of memory\n"); - return; - } - - tp2->tree = tp->tree; - tp2->item = tp->item; - add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2); + return STATUS_SUCCESS; } -void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback) { - rollback_item* ri; - +void clear_rollback(LIST_ENTRY* rollback) { while (!IsListEmpty(rollback)) { LIST_ENTRY* le = RemoveHeadList(rollback); - ri = CONTAINING_RECORD(le, rollback_item, list_entry); - + rollback_item* ri = CONTAINING_RECORD(le, rollback_item, list_entry); + switch (ri->type) { - case ROLLBACK_INSERT_ITEM: - case ROLLBACK_DELETE_ITEM: - ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, ri->ptr); - break; - case ROLLBACK_ADD_SPACE: case ROLLBACK_SUBTRACT_SPACE: case ROLLBACK_INSERT_EXTENT: @@ -1057,7 +1018,7 @@ void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback) { default: break; } - + ExFreePool(ri); } } @@ -1065,102 +1026,66 @@ void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback) { void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) { NTSTATUS Status; rollback_item* ri; - + while (!IsListEmpty(rollback)) { LIST_ENTRY* le = RemoveTailList(rollback); ri = CONTAINING_RECORD(le, rollback_item, list_entry); - + switch (ri->type) { - case ROLLBACK_INSERT_ITEM: - { - traverse_ptr* tp = ri->ptr; - - if (!tp->item->ignore) { - tp->item->ignore = TRUE; - tp->tree->header.num_items--; - - if (tp->tree->header.level == 0) - tp->tree->size -= sizeof(leaf_node) + tp->item->size; - else - tp->tree->size -= sizeof(internal_node); - } - - ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, tp); - break; - } - - case ROLLBACK_DELETE_ITEM: - { - traverse_ptr* tp = ri->ptr; - - if (tp->item->ignore) { - tp->item->ignore = FALSE; - tp->tree->header.num_items++; - - if (tp->tree->header.level == 0) - tp->tree->size += sizeof(leaf_node) + tp->item->size; - else - tp->tree->size += sizeof(internal_node); - } - - ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, tp); - break; - } - case ROLLBACK_INSERT_EXTENT: { rollback_extent* re = ri->ptr; - + re->ext->ignore = TRUE; - - if (re->ext->data->type == EXTENT_TYPE_REGULAR || re->ext->data->type == EXTENT_TYPE_PREALLOC) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)re->ext->data->data; - + + if (re->ext->extent_data.type == EXTENT_TYPE_REGULAR || re->ext->extent_data.type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)re->ext->extent_data.data; + if (ed2->size != 0) { chunk* c = get_chunk_from_address(Vcb, ed2->address); - + if (c) { Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, re->fcb->subvol->id, re->fcb->inode, re->ext->offset - ed2->offset, -1, re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, NULL); - + if (!NT_SUCCESS(Status)) ERR("update_changed_extent_ref returned %08x\n", Status); } - + re->fcb->inode_item.st_blocks -= ed2->num_bytes; } } - + ExFreePool(re); break; } - + case ROLLBACK_DELETE_EXTENT: { rollback_extent* re = ri->ptr; - + re->ext->ignore = FALSE; - - if (re->ext->data->type == EXTENT_TYPE_REGULAR || re->ext->data->type == EXTENT_TYPE_PREALLOC) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)re->ext->data->data; - + + if (re->ext->extent_data.type == EXTENT_TYPE_REGULAR || re->ext->extent_data.type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)re->ext->extent_data.data; + if (ed2->size != 0) { chunk* c = get_chunk_from_address(Vcb, ed2->address); - + if (c) { Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, re->fcb->subvol->id, re->fcb->inode, re->ext->offset - ed2->offset, 1, re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, NULL); - + if (!NT_SUCCESS(Status)) ERR("update_changed_extent_ref returned %08x\n", Status); } - + re->fcb->inode_item.st_blocks += ed2->num_bytes; } } - + ExFreePool(re); break; } @@ -1169,75 +1094,85 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) { case ROLLBACK_SUBTRACT_SPACE: { rollback_space* rs = ri->ptr; - + if (rs->chunk) ExAcquireResourceExclusiveLite(&rs->chunk->lock, TRUE); - + if (ri->type == ROLLBACK_ADD_SPACE) - space_list_subtract2(Vcb, rs->list, rs->list_size, rs->address, rs->length, NULL); + space_list_subtract2(rs->list, rs->list_size, rs->address, rs->length, NULL, NULL); else - space_list_add2(Vcb, rs->list, rs->list_size, rs->address, rs->length, NULL); - + space_list_add2(rs->list, rs->list_size, rs->address, rs->length, NULL, NULL); + + if (rs->chunk) { + if (ri->type == ROLLBACK_ADD_SPACE) + rs->chunk->used += rs->length; + else + rs->chunk->used -= rs->length; + } + if (rs->chunk) { LIST_ENTRY* le2 = le->Blink; - + while (le2 != rollback) { LIST_ENTRY* le3 = le2->Blink; rollback_item* ri2 = CONTAINING_RECORD(le2, rollback_item, list_entry); - + if (ri2->type == ROLLBACK_ADD_SPACE || ri2->type == ROLLBACK_SUBTRACT_SPACE) { rollback_space* rs2 = ri2->ptr; - + if (rs2->chunk == rs->chunk) { - if (ri2->type == ROLLBACK_ADD_SPACE) - space_list_subtract2(Vcb, rs2->list, rs2->list_size, rs2->address, rs2->length, NULL); - else - space_list_add2(Vcb, rs2->list, rs2->list_size, rs2->address, rs2->length, NULL); - + if (ri2->type == ROLLBACK_ADD_SPACE) { + space_list_subtract2(rs2->list, rs2->list_size, rs2->address, rs2->length, NULL, NULL); + rs->chunk->used += rs2->length; + } else { + space_list_add2(rs2->list, rs2->list_size, rs2->address, rs2->length, NULL, NULL); + rs->chunk->used -= rs2->length; + } + ExFreePool(rs2); RemoveEntryList(&ri2->list_entry); ExFreePool(ri2); } } - + le2 = le3; } - + ExReleaseResourceLite(&rs->chunk->lock); } - + ExFreePool(rs); - + break; } } - + ExFreePool(ri); } } static void find_tree_end(tree* t, KEY* tree_end, BOOL* no_end) { tree* p; - + p = t; do { tree_data* pi; - + if (!p->parent) { *no_end = TRUE; return; } - + pi = p->paritem; - + if (pi->list_entry.Flink != &p->parent->itemlist) { tree_data* td = CONTAINING_RECORD(pi->list_entry.Flink, tree_data, list_entry); - + *tree_end = td->key; *no_end = FALSE; return; } - + p = p->parent; } while (p); } @@ -1246,14 +1181,14 @@ void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist) { while (!IsListEmpty(batchlist)) { LIST_ENTRY* le = RemoveHeadList(batchlist); batch_root* br = CONTAINING_RECORD(le, batch_root, list_entry); - + while (!IsListEmpty(&br->items)) { LIST_ENTRY* le2 = RemoveHeadList(&br->items); batch_item* bi = CONTAINING_RECORD(le2, batch_item, list_entry); - + ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi); } - + ExFreePool(br); } } @@ -1263,54 +1198,55 @@ static void add_delete_inode_extref(device_extension* Vcb, batch_item* bi, LIST_ LIST_ENTRY* le; INODE_REF* delir = (INODE_REF*)bi->data; INODE_EXTREF* ier; - + TRACE("entry in INODE_REF not found, adding Batch_DeleteInodeExtRef entry\n"); - + bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); if (!bi2) { ERR("out of memory\n"); return; } - + ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + delir->n, ALLOC_TAG); if (!ier) { ERR("out of memory\n"); + ExFreePool(bi2); return; } - + ier->dir = bi->key.offset; ier->index = delir->index; ier->n = delir->n; RtlCopyMemory(ier->name, delir->name, delir->n); - + bi2->key.obj_id = bi->key.obj_id; bi2->key.obj_type = TYPE_INODE_EXTREF; bi2->key.offset = calc_crc32c((UINT32)bi->key.offset, (UINT8*)ier->name, ier->n); bi2->data = ier; bi2->datalen = sizeof(INODE_EXTREF) - 1 + ier->n; bi2->operation = Batch_DeleteInodeExtRef; - + le = bi->list_entry.Flink; while (le != listhead) { batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry); - + if (keycmp(bi3->key, bi2->key) != -1) { InsertHeadList(le->Blink, &bi2->list_entry); return; } - + le = le->Flink; } - + InsertTailList(listhead, &bi2->list_entry); } -static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* t, tree_data* td, tree_data* newtd, LIST_ENTRY* listhead, LIST_ENTRY* rollback) { +static NTSTATUS handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* t, tree_data* td, tree_data* newtd, LIST_ENTRY* listhead, BOOL* ignore) { if (bi->operation == Batch_Delete || bi->operation == Batch_SetXattr || bi->operation == Batch_DirItem || bi->operation == Batch_InodeRef || bi->operation == Batch_InodeExtRef || bi->operation == Batch_DeleteDirItem || bi->operation == Batch_DeleteInodeRef || - bi->operation == Batch_DeleteInodeExtRef) { - UINT16 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node); - + bi->operation == Batch_DeleteInodeExtRef || bi->operation == Batch_DeleteXattr) { + UINT16 maxlen = (UINT16)(Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node)); + switch (bi->operation) { case Batch_SetXattr: { if (td->size < sizeof(DIR_ITEM)) { @@ -1320,71 +1256,70 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* ULONG size = td->size; DIR_ITEM* newxa = (DIR_ITEM*)bi->data; DIR_ITEM* xa = (DIR_ITEM*)td->data; - + while (TRUE) { ULONG oldxasize; - + if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) { ERR("(%llx,%x,%llx) was truncated\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset); break; } - + oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n; - + if (xa->n == newxa->n && RtlCompareMemory(newxa->name, xa->name, xa->n) == xa->n) { UINT64 pos; - + // replace - + if (td->size + bi->datalen - oldxasize > maxlen) ERR("DIR_ITEM would be over maximum size, truncating (%u + %u - %u > %u)\n", td->size, bi->datalen, oldxasize, maxlen); - + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen - oldxasize, ALLOC_TAG); if (!newdata) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + pos = (UINT8*)xa - td->data; - if (pos + oldxasize < td->size) { // copy after changed xattr - RtlCopyMemory(newdata + pos + bi->datalen, td->data + pos + oldxasize, td->size - pos - oldxasize); - } - + if (pos + oldxasize < td->size) // copy after changed xattr + RtlCopyMemory(newdata + pos + bi->datalen, td->data + pos + oldxasize, (ULONG)(td->size - pos - oldxasize)); + if (pos > 0) { // copy before changed xattr - RtlCopyMemory(newdata, td->data, pos); + RtlCopyMemory(newdata, td->data, (ULONG)pos); xa = (DIR_ITEM*)(newdata + pos); } else xa = (DIR_ITEM*)newdata; - + RtlCopyMemory(xa, bi->data, bi->datalen); - - bi->datalen = min(td->size + bi->datalen - oldxasize, maxlen); - + + bi->datalen = (UINT16)min(td->size + bi->datalen - oldxasize, maxlen); + ExFreePool(bi->data); bi->data = newdata; - + break; } - + if ((UINT8*)xa - (UINT8*)td->data + oldxasize >= size) { // not found, add to end of data - + if (td->size + bi->datalen > maxlen) ERR("DIR_ITEM would be over maximum size, truncating (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); if (!newdata) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newdata, td->data, td->size); - + xa = (DIR_ITEM*)((UINT8*)newdata + td->size); RtlCopyMemory(xa, bi->data, bi->datalen); - + bi->datalen = min(bi->datalen + td->size, maxlen); - + ExFreePool(bi->data); bi->data = newdata; @@ -1397,167 +1332,168 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* } break; } - + case Batch_DirItem: { UINT8* newdata; - + if (td->size + bi->datalen > maxlen) { ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - return TRUE; + return STATUS_INTERNAL_ERROR; } - + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); if (!newdata) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newdata, td->data, td->size); - + RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); bi->datalen += td->size; - + ExFreePool(bi->data); bi->data = newdata; - + break; } - + case Batch_InodeRef: { UINT8* newdata; - + if (td->size + bi->datalen > maxlen) { if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { INODE_REF* ir = (INODE_REF*)bi->data; INODE_EXTREF* ier; - ULONG ierlen; + UINT16 ierlen; batch_item* bi2; LIST_ENTRY* le; BOOL inserted = FALSE; - + TRACE("INODE_REF would be too long, adding INODE_EXTREF instead\n"); - ierlen = sizeof(INODE_EXTREF) - 1 + ir->n; - + ierlen = (UINT16)(offsetof(INODE_EXTREF, name[0]) + ir->n); + ier = ExAllocatePoolWithTag(PagedPool, ierlen, ALLOC_TAG); if (!ier) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + ier->dir = bi->key.offset; ier->index = ir->index; ier->n = ir->n; RtlCopyMemory(ier->name, ir->name, ier->n); - + bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); if (!bi2) { ERR("out of memory\n"); ExFreePool(ier); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + bi2->key.obj_id = bi->key.obj_id; bi2->key.obj_type = TYPE_INODE_EXTREF; bi2->key.offset = calc_crc32c((UINT32)ier->dir, (UINT8*)ier->name, ier->n); bi2->data = ier; bi2->datalen = ierlen; bi2->operation = Batch_InodeExtRef; - + le = bi->list_entry.Flink; while (le != listhead) { batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry); - + if (keycmp(bi3->key, bi2->key) != -1) { InsertHeadList(le->Blink, &bi2->list_entry); inserted = TRUE; } - + le = le->Flink; } - + if (!inserted) InsertTailList(listhead, &bi2->list_entry); - - return TRUE; + + *ignore = TRUE; + return STATUS_SUCCESS; } else { ERR("INODE_REF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - return TRUE; + return STATUS_INTERNAL_ERROR; } } - + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); if (!newdata) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newdata, td->data, td->size); - + RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); bi->datalen += td->size; - + ExFreePool(bi->data); bi->data = newdata; - + break; } - + case Batch_InodeExtRef: { UINT8* newdata; - + if (td->size + bi->datalen > maxlen) { ERR("INODE_EXTREF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - return TRUE; + return STATUS_INTERNAL_ERROR; } - + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); if (!newdata) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + RtlCopyMemory(newdata, td->data, td->size); - + RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); bi->datalen += td->size; - + ExFreePool(bi->data); bi->data = newdata; - + break; } - + case Batch_DeleteDirItem: { if (td->size < sizeof(DIR_ITEM)) { - WARN("DIR_ITEM was %u bytes, expected at least %u\n", td->size, sizeof(DIR_ITEM)); - return TRUE; + ERR("DIR_ITEM was %u bytes, expected at least %u\n", td->size, sizeof(DIR_ITEM)); + return STATUS_INTERNAL_ERROR; } else { DIR_ITEM *di, *deldi; LONG len; - + deldi = (DIR_ITEM*)bi->data; di = (DIR_ITEM*)td->data; len = td->size; - + do { if (di->m == deldi->m && di->n == deldi->n && RtlCompareMemory(di->name, deldi->name, di->n + di->m) == di->n + di->m) { - ULONG newlen = td->size - (sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m); - + UINT16 newlen = td->size - (sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m); + if (newlen == 0) { TRACE("deleting DIR_ITEM\n"); } else { UINT8 *newdi = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *dioff; tree_data* td2; - + if (!newdi) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + TRACE("modifying DIR_ITEM\n"); if ((UINT8*)di > td->data) { @@ -1566,83 +1502,84 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* } else { dioff = newdi; } - - if ((UINT8*)&di->name[di->n + di->m] - td->data < td->size) + + if ((UINT8*)&di->name[di->n + di->m] < td->data + td->size) RtlCopyMemory(dioff, &di->name[di->n + di->m], td->size - ((UINT8*)&di->name[di->n + di->m] - td->data)); - + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td2) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + td2->key = bi->key; td2->size = newlen; td2->data = newdi; td2->ignore = FALSE; td2->inserted = TRUE; - + InsertHeadList(td->list_entry.Blink, &td2->list_entry); - + t->header.num_items++; t->size += newlen + sizeof(leaf_node); t->write = TRUE; } - + break; } - + len -= sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m; di = (DIR_ITEM*)&di->name[di->n + di->m]; - + if (len == 0) { TRACE("could not find DIR_ITEM to delete\n"); - return TRUE; + *ignore = TRUE; + return STATUS_SUCCESS; } } while (len > 0); } break; } - + case Batch_DeleteInodeRef: { if (td->size < sizeof(INODE_REF)) { - WARN("INODE_REF was %u bytes, expected at least %u\n", td->size, sizeof(INODE_REF)); - return TRUE; + ERR("INODE_REF was %u bytes, expected at least %u\n", td->size, sizeof(INODE_REF)); + return STATUS_INTERNAL_ERROR; } else { INODE_REF *ir, *delir; ULONG len; BOOL changed = FALSE; - + delir = (INODE_REF*)bi->data; ir = (INODE_REF*)td->data; len = td->size; - + do { - ULONG itemlen; - - if (len < sizeof(INODE_REF) || len < sizeof(INODE_REF) - 1 + ir->n) { + UINT16 itemlen; + + if (len < sizeof(INODE_REF) || len < offsetof(INODE_REF, name[0]) + ir->n) { ERR("INODE_REF was truncated\n"); break; } - - itemlen = sizeof(INODE_REF) - sizeof(char) + ir->n; - + + itemlen = (UINT16)offsetof(INODE_REF, name[0]) + ir->n; + if (ir->n == delir->n && RtlCompareMemory(ir->name, delir->name, ir->n) == ir->n) { - ULONG newlen = td->size - itemlen; - + UINT16 newlen = td->size - itemlen; + changed = TRUE; - + if (newlen == 0) TRACE("deleting INODE_REF\n"); else { UINT8 *newir = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *iroff; tree_data* td2; - + if (!newir) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + TRACE("modifying INODE_REF\n"); if ((UINT8*)ir > td->data) { @@ -1651,90 +1588,91 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* } else { iroff = newir; } - - if ((UINT8*)&ir->name[ir->n] - td->data < td->size) + + if ((UINT8*)&ir->name[ir->n] < td->data + td->size) RtlCopyMemory(iroff, &ir->name[ir->n], td->size - ((UINT8*)&ir->name[ir->n] - td->data)); - + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td2) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + td2->key = bi->key; td2->size = newlen; td2->data = newir; td2->ignore = FALSE; td2->inserted = TRUE; - + InsertHeadList(td->list_entry.Blink, &td2->list_entry); - + t->header.num_items++; t->size += newlen + sizeof(leaf_node); t->write = TRUE; } - + break; } - + if (len > itemlen) { len -= itemlen; ir = (INODE_REF*)&ir->name[ir->n]; } else break; } while (len > 0); - + if (!changed) { if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { TRACE("entry in INODE_REF not found, adding Batch_DeleteInodeExtRef entry\n"); - + add_delete_inode_extref(Vcb, bi, listhead); - - return TRUE; + + *ignore = TRUE; + return STATUS_SUCCESS; } else WARN("entry not found in INODE_REF\n"); } } - + break; } - + case Batch_DeleteInodeExtRef: { if (td->size < sizeof(INODE_EXTREF)) { - WARN("INODE_EXTREF was %u bytes, expected at least %u\n", td->size, sizeof(INODE_EXTREF)); - return TRUE; + ERR("INODE_EXTREF was %u bytes, expected at least %u\n", td->size, sizeof(INODE_EXTREF)); + return STATUS_INTERNAL_ERROR; } else { INODE_EXTREF *ier, *delier; ULONG len; - + delier = (INODE_EXTREF*)bi->data; ier = (INODE_EXTREF*)td->data; len = td->size; - + do { - ULONG itemlen; - - if (len < sizeof(INODE_EXTREF) || len < sizeof(INODE_EXTREF) - 1 + ier->n) { + UINT16 itemlen; + + if (len < sizeof(INODE_EXTREF) || len < offsetof(INODE_EXTREF, name[0]) + ier->n) { ERR("INODE_REF was truncated\n"); break; } - - itemlen = sizeof(INODE_EXTREF) - sizeof(char) + ier->n; - + + itemlen = (UINT16)offsetof(INODE_EXTREF, name[0]) + ier->n; + if (ier->dir == delier->dir && ier->n == delier->n && RtlCompareMemory(ier->name, delier->name, ier->n) == ier->n) { - ULONG newlen = td->size - itemlen; - + UINT16 newlen = td->size - itemlen; + if (newlen == 0) TRACE("deleting INODE_EXTREF\n"); else { UINT8 *newier = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *ieroff; tree_data* td2; - + if (!newier) { ERR("out of memory\n"); - return TRUE; + return STATUS_INSUFFICIENT_RESOURCES; } - + TRACE("modifying INODE_EXTREF\n"); if ((UINT8*)ier > td->data) { @@ -1743,32 +1681,33 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* } else { ieroff = newier; } - - if ((UINT8*)&ier->name[ier->n] - td->data < td->size) + + if ((UINT8*)&ier->name[ier->n] < td->data + td->size) RtlCopyMemory(ieroff, &ier->name[ier->n], td->size - ((UINT8*)&ier->name[ier->n] - td->data)); - + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td2) { ERR("out of memory\n"); - return TRUE; + ExFreePool(newier); + return STATUS_INSUFFICIENT_RESOURCES; } - + td2->key = bi->key; td2->size = newlen; td2->data = newier; td2->ignore = FALSE; td2->inserted = TRUE; - + InsertHeadList(td->list_entry.Blink, &td2->list_entry); - + t->header.num_items++; t->size += newlen + sizeof(leaf_node); t->write = TRUE; } - + break; } - + if (len > itemlen) { len -= itemlen; ier = (INODE_EXTREF*)&ier->name[ier->n]; @@ -1778,98 +1717,157 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* } break; } - + + case Batch_DeleteXattr: { + if (td->size < sizeof(DIR_ITEM)) { + ERR("XATTR_ITEM was %u bytes, expected at least %u\n", td->size, sizeof(DIR_ITEM)); + return STATUS_INTERNAL_ERROR; + } else { + DIR_ITEM *di, *deldi; + LONG len; + + deldi = (DIR_ITEM*)bi->data; + di = (DIR_ITEM*)td->data; + len = td->size; + + do { + if (di->n == deldi->n && RtlCompareMemory(di->name, deldi->name, di->n) == di->n) { + UINT16 newlen = td->size - ((UINT16)offsetof(DIR_ITEM, name[0]) + di->n + di->m); + + if (newlen == 0) + TRACE("deleting XATTR_ITEM\n"); + else { + UINT8 *newdi = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *dioff; + tree_data* td2; + + if (!newdi) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + TRACE("modifying XATTR_ITEM\n"); + + if ((UINT8*)di > td->data) { + RtlCopyMemory(newdi, td->data, (UINT8*)di - td->data); + dioff = newdi + ((UINT8*)di - td->data); + } else + dioff = newdi; + + if ((UINT8*)&di->name[di->n + di->m] < td->data + td->size) + RtlCopyMemory(dioff, &di->name[di->n + di->m], td->size - ((UINT8*)&di->name[di->n + di->m] - td->data)); + + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); + if (!td2) { + ERR("out of memory\n"); + ExFreePool(newdi); + return STATUS_INSUFFICIENT_RESOURCES; + } + + td2->key = bi->key; + td2->size = newlen; + td2->data = newdi; + td2->ignore = FALSE; + td2->inserted = TRUE; + + InsertHeadList(td->list_entry.Blink, &td2->list_entry); + + t->header.num_items++; + t->size += newlen + sizeof(leaf_node); + t->write = TRUE; + } + + break; + } + + len -= sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m; + di = (DIR_ITEM*)&di->name[di->n + di->m]; + + if (len == 0) { + TRACE("could not find DIR_ITEM to delete\n"); + *ignore = TRUE; + return STATUS_SUCCESS; + } + } while (len > 0); + } + break; + } + case Batch_Delete: break; - + default: ERR("unexpected batch operation type\n"); - int3; - break; + return STATUS_INTERNAL_ERROR; } - + // delete old item if (!td->ignore) { - traverse_ptr* tp2; - td->ignore = TRUE; - + t->header.num_items--; t->size -= sizeof(leaf_node) + td->size; t->write = TRUE; - - if (rollback) { - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { - ERR("out of memory\n"); - return FALSE; - } - - tp2->tree = t; - tp2->item = td; - - add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2); - } } if (newtd) { newtd->data = bi->data; newtd->size = bi->datalen; - InsertHeadList(&td->list_entry, &newtd->list_entry); + InsertHeadList(td->list_entry.Blink, &newtd->list_entry); } } else { ERR("(%llx,%x,%llx) already exists\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset); - int3; + return STATUS_INTERNAL_ERROR; } - - return FALSE; + + *ignore = FALSE; + return STATUS_SUCCESS; } -static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS commit_batch_list_root(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, batch_root* br, PIRP Irp) { LIST_ENTRY* le; NTSTATUS Status; - + TRACE("root: %llx\n", br->r->id); - + le = br->items.Flink; while (le != &br->items) { batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry); - LIST_ENTRY *le2, *listhead; - traverse_ptr tp, *tp2; + LIST_ENTRY *le2; + traverse_ptr tp; KEY tree_end; BOOL no_end; - tree_data* td; + tree_data *td, *listhead; int cmp; tree* t; BOOL ignore = FALSE; - + TRACE("(%llx,%x,%llx)\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset); - - Status = find_item(Vcb, br->r, &tp, &bi->key, FALSE, Irp); + + Status = find_item(Vcb, br->r, &tp, &bi->key, TRUE, Irp); if (!NT_SUCCESS(Status)) { // FIXME - handle STATUS_NOT_FOUND ERR("find_item returned %08x\n", Status); - return; + return Status; } - + find_tree_end(tp.tree, &tree_end, &no_end); - + if (bi->operation == Batch_DeleteInode) { if (tp.item->key.obj_id == bi->key.obj_id) { BOOL ended = FALSE; - + td = tp.item; - + if (!tp.item->ignore) { tp.item->ignore = TRUE; tp.tree->header.num_items--; tp.tree->size -= tp.item->size + sizeof(leaf_node); tp.tree->write = TRUE; } - + le2 = tp.item->list_entry.Flink; while (le2 != &tp.tree->itemlist) { td = CONTAINING_RECORD(le2, tree_data, list_entry); - + if (td->key.obj_id == bi->key.obj_id) { if (!td->ignore) { td->ignore = TRUE; @@ -1881,24 +1879,24 @@ static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP I ended = TRUE; break; } - + le2 = le2->Flink; } - + while (!ended) { traverse_ptr next_tp; - + tp.item = td; - + if (!find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) break; - + tp = next_tp; - + le2 = &tp.item->list_entry; while (le2 != &tp.tree->itemlist) { td = CONTAINING_RECORD(le2, tree_data, list_entry); - + if (td->key.obj_id == bi->key.obj_id) { if (!td->ignore) { td->ignore = TRUE; @@ -1910,35 +1908,170 @@ static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP I ended = TRUE; break; } - + + le2 = le2->Flink; + } + } + } + } else if (bi->operation == Batch_DeleteExtentData) { + if (tp.item->key.obj_id < bi->key.obj_id || (tp.item->key.obj_id == bi->key.obj_id && tp.item->key.obj_type < bi->key.obj_type)) { + traverse_ptr tp2; + + if (find_next_item(Vcb, &tp, &tp2, FALSE, Irp)) { + if (tp2.item->key.obj_id == bi->key.obj_id && tp2.item->key.obj_type == bi->key.obj_type) { + tp = tp2; + find_tree_end(tp.tree, &tree_end, &no_end); + } + } + } + + if (tp.item->key.obj_id == bi->key.obj_id && tp.item->key.obj_type == bi->key.obj_type) { + BOOL ended = FALSE; + + td = tp.item; + + if (!tp.item->ignore) { + tp.item->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= tp.item->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + + le2 = tp.item->list_entry.Flink; + while (le2 != &tp.tree->itemlist) { + td = CONTAINING_RECORD(le2, tree_data, list_entry); + + if (td->key.obj_id == bi->key.obj_id && td->key.obj_type == bi->key.obj_type) { + if (!td->ignore) { + td->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= td->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + } else { + ended = TRUE; + break; + } + + le2 = le2->Flink; + } + + while (!ended) { + traverse_ptr next_tp; + + tp.item = td; + + if (!find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) + break; + + tp = next_tp; + + le2 = &tp.item->list_entry; + while (le2 != &tp.tree->itemlist) { + td = CONTAINING_RECORD(le2, tree_data, list_entry); + + if (td->key.obj_id == bi->key.obj_id && td->key.obj_type == bi->key.obj_type) { + if (!td->ignore) { + td->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= td->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + } else { + ended = TRUE; + break; + } + + le2 = le2->Flink; + } + } + } + } else if (bi->operation == Batch_DeleteFreeSpace) { + if (tp.item->key.obj_id >= bi->key.obj_id && tp.item->key.obj_id < bi->key.obj_id + bi->key.offset) { + BOOL ended = FALSE; + + td = tp.item; + + if (!tp.item->ignore) { + tp.item->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= tp.item->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + + le2 = tp.item->list_entry.Flink; + while (le2 != &tp.tree->itemlist) { + td = CONTAINING_RECORD(le2, tree_data, list_entry); + + if (td->key.obj_id >= bi->key.obj_id && td->key.obj_id < bi->key.obj_id + bi->key.offset) { + if (!td->ignore) { + td->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= td->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + } else { + ended = TRUE; + break; + } + + le2 = le2->Flink; + } + + while (!ended) { + traverse_ptr next_tp; + + tp.item = td; + + if (!find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) + break; + + tp = next_tp; + + le2 = &tp.item->list_entry; + while (le2 != &tp.tree->itemlist) { + td = CONTAINING_RECORD(le2, tree_data, list_entry); + + if (td->key.obj_id >= bi->key.obj_id && td->key.obj_id < bi->key.obj_id + bi->key.offset) { + if (!td->ignore) { + td->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= td->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + } else { + ended = TRUE; + break; + } + le2 = le2->Flink; } } } } else { - if (bi->operation == Batch_Delete || bi->operation == Batch_DeleteDirItem || - bi->operation == Batch_DeleteInodeRef || bi->operation == Batch_DeleteInodeExtRef) + if (bi->operation == Batch_Delete || bi->operation == Batch_DeleteDirItem || bi->operation == Batch_DeleteInodeRef || + bi->operation == Batch_DeleteInodeExtRef || bi->operation == Batch_DeleteXattr) td = NULL; else { td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); - return; + return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = bi->key; td->size = bi->datalen; td->data = bi->data; td->ignore = FALSE; td->inserted = TRUE; } - + cmp = keycmp(bi->key, tp.item->key); - + if (cmp == -1) { // very first key in root if (td) { tree_data* paritem; - + InsertHeadList(&tp.tree->itemlist, &td->list_entry); paritem = tp.tree->paritem; @@ -1947,178 +2080,193 @@ static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP I paritem->key = bi->key; } else break; - + paritem = paritem->treeholder.tree->paritem; } } } else if (cmp == 0) { // item already exists - ignore = handle_batch_collision(Vcb, bi, tp.tree, tp.item, td, &br->items, rollback); + if (tp.item->ignore) { + if (td) + InsertHeadList(tp.item->list_entry.Blink, &td->list_entry); + } else { + Status = handle_batch_collision(Vcb, bi, tp.tree, tp.item, td, &br->items, &ignore); + if (!NT_SUCCESS(Status)) { + ERR("handle_batch_collision returned %08x\n", Status); + + if (td) + ExFreeToPagedLookasideList(&Vcb->tree_data_lookaside, td); + + return Status; + } + } } else if (td) { InsertHeadList(&tp.item->list_entry, &td->list_entry); } - + if (bi->operation == Batch_DeleteInodeRef && cmp != 0 && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { add_delete_inode_extref(Vcb, bi, &br->items); } - + if (!ignore && td) { tp.tree->header.num_items++; tp.tree->size += bi->datalen + sizeof(leaf_node); tp.tree->write = TRUE; - - if (rollback) { - // FIXME - free this correctly - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { - ERR("out of memory\n"); - return; - } - - tp2->tree = tp.tree; - tp2->item = td; - add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); - } - - listhead = &td->list_entry; - } else { - listhead = &tp.item->list_entry; - - if (!td && tp.item->ignore && tp.item->list_entry.Blink != &tp.tree->itemlist) { - tree_data* prevtd = CONTAINING_RECORD(tp.item->list_entry.Blink, tree_data, list_entry); - - if (!prevtd->ignore && !keycmp(prevtd->key, tp.item->key)) - listhead = &prevtd->list_entry; - } + listhead = td; + } else + listhead = tp.item; + + while (listhead->list_entry.Blink != &tp.tree->itemlist) { + tree_data* prevtd = CONTAINING_RECORD(listhead->list_entry.Blink, tree_data, list_entry); + + if (!keycmp(prevtd->key, listhead->key)) + listhead = prevtd; + else + break; } - + le2 = le->Flink; while (le2 != &br->items) { batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry); - - if (bi2->operation == Batch_DeleteInode) + + if (bi2->operation == Batch_DeleteInode || bi2->operation == Batch_DeleteExtentData || bi2->operation == Batch_DeleteFreeSpace) break; - + if (no_end || keycmp(bi2->key, tree_end) == -1) { LIST_ENTRY* le3; BOOL inserted = FALSE; - + ignore = FALSE; - - if (bi2->operation == Batch_Delete || bi2->operation == Batch_DeleteDirItem || - bi2->operation == Batch_DeleteInodeRef || bi2->operation == Batch_DeleteInodeExtRef) + + if (bi2->operation == Batch_Delete || bi2->operation == Batch_DeleteDirItem || bi2->operation == Batch_DeleteInodeRef || + bi2->operation == Batch_DeleteInodeExtRef || bi2->operation == Batch_DeleteXattr) td = NULL; else { td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); - return; + return STATUS_INSUFFICIENT_RESOURCES; } - + td->key = bi2->key; td->size = bi2->datalen; td->data = bi2->data; td->ignore = FALSE; td->inserted = TRUE; } - - le3 = listhead; + + le3 = &listhead->list_entry; while (le3 != &tp.tree->itemlist) { tree_data* td2 = CONTAINING_RECORD(le3, tree_data, list_entry); - - if (!td2->ignore) { - cmp = keycmp(bi2->key, td2->key); - if (cmp == 0) { - ignore = handle_batch_collision(Vcb, bi2, tp.tree, td2, td, &br->items, rollback); - inserted = TRUE; - break; - } else if (cmp == -1) { + cmp = keycmp(bi2->key, td2->key); + + if (cmp == 0) { + if (td2->ignore) { if (td) { InsertHeadList(le3->Blink, &td->list_entry); inserted = TRUE; } else if (bi2->operation == Batch_DeleteInodeRef && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { add_delete_inode_extref(Vcb, bi2, &br->items); } - break; + } else { + Status = handle_batch_collision(Vcb, bi2, tp.tree, td2, td, &br->items, &ignore); + if (!NT_SUCCESS(Status)) { + ERR("handle_batch_collision returned %08x\n", Status); + return Status; + } } + + inserted = TRUE; + break; + } else if (cmp == -1) { + if (td) { + InsertHeadList(le3->Blink, &td->list_entry); + inserted = TRUE; + } else if (bi2->operation == Batch_DeleteInodeRef && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { + add_delete_inode_extref(Vcb, bi2, &br->items); + } + break; } - + le3 = le3->Flink; } - + if (td) { if (!inserted) InsertTailList(&tp.tree->itemlist, &td->list_entry); - + if (!ignore) { tp.tree->header.num_items++; tp.tree->size += bi2->datalen + sizeof(leaf_node); - - if (rollback) { - // FIXME - free this correctly - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { - ERR("out of memory\n"); - return; - } - - tp2->tree = tp.tree; - tp2->item = td; - - add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); - } - - listhead = &td->list_entry; + + listhead = td; } } else if (!inserted && bi2->operation == Batch_DeleteInodeRef && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { add_delete_inode_extref(Vcb, bi2, &br->items); } - + + while (listhead->list_entry.Blink != &tp.tree->itemlist) { + tree_data* prevtd = CONTAINING_RECORD(listhead->list_entry.Blink, tree_data, list_entry); + + if (!keycmp(prevtd->key, listhead->key)) + listhead = prevtd; + else + break; + } + le = le2; } else break; - + le2 = le2->Flink; } - + t = tp.tree; while (t) { if (t->paritem && t->paritem->ignore) { t->paritem->ignore = FALSE; t->parent->header.num_items++; t->parent->size += sizeof(internal_node); - - // FIXME - do we need to add a rollback entry here? } t->header.generation = Vcb->superblock.generation; t = t->parent; } } - + le = le->Flink; } - + // FIXME - remove as we are going along while (!IsListEmpty(&br->items)) { - LIST_ENTRY* le = RemoveHeadList(&br->items); - batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry); - - if ((bi->operation == Batch_DeleteDirItem || bi->operation == Batch_DeleteInodeRef || bi->operation == Batch_DeleteInodeExtRef) && bi->data) + batch_item* bi = CONTAINING_RECORD(RemoveHeadList(&br->items), batch_item, list_entry); + + if ((bi->operation == Batch_DeleteDirItem || bi->operation == Batch_DeleteInodeRef || + bi->operation == Batch_DeleteInodeExtRef || bi->operation == Batch_DeleteXattr) && bi->data) ExFreePool(bi->data); - + ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi); } + + return STATUS_SUCCESS; } -void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS commit_batch_list(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp) { + NTSTATUS Status; + while (!IsListEmpty(batchlist)) { LIST_ENTRY* le = RemoveHeadList(batchlist); batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry); - - commit_batch_list_root(Vcb, br2, Irp, rollback); - + + Status = commit_batch_list_root(Vcb, br2, Irp); + if (!NT_SUCCESS(Status)) { + ERR("commit_batch_list_root returned %08x\n", Status); + return Status; + } + ExFreePool(br2); } + + return STATUS_SUCCESS; } diff --git a/reactos/drivers/filesystems/btrfs/volume.c b/reactos/drivers/filesystems/btrfs/volume.c new file mode 100755 index 00000000000..eff7f07c4cd --- /dev/null +++ b/reactos/drivers/filesystems/btrfs/volume.c @@ -0,0 +1,1267 @@ +/* Copyright (c) Mark Harmstone 2016-17 + * + * This file is part of WinBtrfs. + * + * WinBtrfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or + * (at your option) any later version. + * + * WinBtrfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public Licence for more details. + * + * You should have received a copy of the GNU Lesser General Public Licence + * along with WinBtrfs. If not, see . */ + +#include "btrfs_drv.h" +#include +#include +#include +#include +#include + +#define IOCTL_VOLUME_IS_DYNAMIC CTL_CODE(IOCTL_VOLUME_BASE, 18, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define IOCTL_VOLUME_POST_ONLINE CTL_CODE(IOCTL_VOLUME_BASE, 25, METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) + +extern PDRIVER_OBJECT drvobj; +extern PDEVICE_OBJECT master_devobj; +extern ERESOURCE pdo_list_lock; +extern LIST_ENTRY pdo_list; +extern UNICODE_STRING registry_path; + +NTSTATUS vol_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + + TRACE("(%p, %p)\n", DeviceObject, Irp); + + if (vde->removing) + return STATUS_DEVICE_NOT_READY; + + Irp->IoStatus.Information = FILE_OPENED; + InterlockedIncrement(&vde->open_count); + + return STATUS_SUCCESS; +} + +NTSTATUS vol_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + pdo_device_extension* pdode = vde->pdode; + + TRACE("(%p, %p)\n", DeviceObject, Irp); + + Irp->IoStatus.Information = 0; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (InterlockedDecrement(&vde->open_count) == 0 && vde->removing) { + NTSTATUS Status; + UNICODE_STRING mmdevpath; + PDEVICE_OBJECT mountmgr; + PFILE_OBJECT mountmgrfo; + PDEVICE_OBJECT pdo; + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &mountmgrfo, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + remove_drive_letter(mountmgr, &vde->name); + + ObDereferenceObject(mountmgrfo); + } + + if (vde->mounted_device) { + device_extension* Vcb = vde->mounted_device->DeviceExtension; + + Vcb->vde = NULL; + } + + if (vde->name.Buffer) + ExFreePool(vde->name.Buffer); + + ExReleaseResourceLite(&pdode->child_lock); + ExDeleteResourceLite(&pdode->child_lock); + IoDetachDevice(vde->pdo); + + pdo = vde->pdo; + IoDeleteDevice(vde->device); + + if (no_pnp) + IoDeleteDevice(pdo); + } else + ExReleaseResourceLite(&pdode->child_lock); + + return STATUS_SUCCESS; +} + +typedef struct { + IO_STATUS_BLOCK iosb; + KEVENT Event; +} vol_read_context; + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI vol_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS vol_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif + vol_read_context* context = conptr; + + UNUSED(DeviceObject); + + context->iosb = Irp->IoStatus; + KeSetEvent(&context->Event, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + +NTSTATUS vol_read(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + pdo_device_extension* pdode = vde->pdode; + volume_child* vc; + NTSTATUS Status; + PIRP Irp2; + vol_read_context context; + PIO_STACK_LOCATION IrpSp, IrpSp2; + + TRACE("(%p, %p)\n", DeviceObject, Irp); + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (IsListEmpty(&pdode->children)) { + ExReleaseResourceLite(&pdode->child_lock); + Status = STATUS_INVALID_DEVICE_REQUEST; + goto end; + } + + vc = CONTAINING_RECORD(pdode->children.Flink, volume_child, list_entry); + + // We can't use IoSkipCurrentIrpStackLocation as the device isn't in our stack + + Irp2 = IoAllocateIrp(vc->devobj->StackSize, FALSE); + + if (!Irp2) { + ERR("IoAllocateIrp failed\n"); + ExReleaseResourceLite(&pdode->child_lock); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + IrpSp = IoGetCurrentIrpStackLocation(Irp); + IrpSp2 = IoGetNextIrpStackLocation(Irp2); + + IrpSp2->MajorFunction = IRP_MJ_READ; + + if (vc->devobj->Flags & DO_BUFFERED_IO) { + Irp2->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, IrpSp->Parameters.Read.Length, ALLOC_TAG); + if (!Irp2->AssociatedIrp.SystemBuffer) { + ERR("out of memory\n"); + ExReleaseResourceLite(&pdode->child_lock); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + Irp2->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; + + Irp2->UserBuffer = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, NormalPagePriority); + } else if (vc->devobj->Flags & DO_DIRECT_IO) + Irp2->MdlAddress = Irp->MdlAddress; + else + Irp2->UserBuffer = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, NormalPagePriority); + + IrpSp2->Parameters.Read.Length = IrpSp->Parameters.Read.Length; + IrpSp2->Parameters.Read.ByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart; + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + Irp2->UserIosb = &context.iosb; + + IoSetCompletionRoutine(Irp2, vol_read_completion, &context, TRUE, TRUE, TRUE); + + Status = IoCallDriver(vc->devobj, Irp2); + + if (Status == STATUS_PENDING) { + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + Status = context.iosb.Status; + } + + ExReleaseResourceLite(&pdode->child_lock); + + Irp->IoStatus.Information = context.iosb.Information; + +end: + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + + return Status; +} + +NTSTATUS vol_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + pdo_device_extension* pdode = vde->pdode; + volume_child* vc; + NTSTATUS Status; + PIRP Irp2; + vol_read_context context; + PIO_STACK_LOCATION IrpSp, IrpSp2; + + TRACE("(%p, %p)\n", DeviceObject, Irp); + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (IsListEmpty(&pdode->children)) { + ExReleaseResourceLite(&pdode->child_lock); + Status = STATUS_INVALID_DEVICE_REQUEST; + goto end; + } + + vc = CONTAINING_RECORD(pdode->children.Flink, volume_child, list_entry); + + if (vc->list_entry.Flink != &pdode->children) { // more than once device + ExReleaseResourceLite(&pdode->child_lock); + Status = STATUS_ACCESS_DENIED; + goto end; + } + + // We can't use IoSkipCurrentIrpStackLocation as the device isn't in our stack + + Irp2 = IoAllocateIrp(vc->devobj->StackSize, FALSE); + + if (!Irp2) { + ERR("IoAllocateIrp failed\n"); + ExReleaseResourceLite(&pdode->child_lock); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + IrpSp = IoGetCurrentIrpStackLocation(Irp); + IrpSp2 = IoGetNextIrpStackLocation(Irp2); + + IrpSp2->MajorFunction = IRP_MJ_WRITE; + + if (vc->devobj->Flags & DO_BUFFERED_IO) { + Irp2->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, NormalPagePriority); + + Irp2->Flags |= IRP_BUFFERED_IO; + + Irp2->UserBuffer = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, NormalPagePriority); + } else if (vc->devobj->Flags & DO_DIRECT_IO) + Irp2->MdlAddress = Irp->MdlAddress; + else + Irp2->UserBuffer = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, NormalPagePriority); + + IrpSp2->Parameters.Write.Length = IrpSp->Parameters.Write.Length; + IrpSp2->Parameters.Write.ByteOffset.QuadPart = IrpSp->Parameters.Write.ByteOffset.QuadPart; + + KeInitializeEvent(&context.Event, NotificationEvent, FALSE); + Irp2->UserIosb = &context.iosb; + + IoSetCompletionRoutine(Irp2, vol_read_completion, &context, TRUE, TRUE, TRUE); + + Status = IoCallDriver(vc->devobj, Irp2); + + if (Status == STATUS_PENDING) { + KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); + Status = context.iosb.Status; + } + + ExReleaseResourceLite(&pdode->child_lock); + + Irp->IoStatus.Information = context.iosb.Information; + +end: + Irp->IoStatus.Status = Status; + IoCompleteRequest(Irp, IO_NO_INCREMENT); + + return Status; +} + +NTSTATUS vol_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_query_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_set_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + Irp->IoStatus.Information = 0; + + return STATUS_SUCCESS; +} + +NTSTATUS vol_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_file_system_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_lock_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +static NTSTATUS vol_query_device_name(volume_device_extension* vde, PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + PMOUNTDEV_NAME name; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME)) { + Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME); + return STATUS_BUFFER_TOO_SMALL; + } + + name = Irp->AssociatedIrp.SystemBuffer; + name->NameLength = vde->name.Length; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < offsetof(MOUNTDEV_NAME, Name[0]) + name->NameLength) { + Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME); + return STATUS_BUFFER_OVERFLOW; + } + + RtlCopyMemory(name->Name, vde->name.Buffer, vde->name.Length); + + Irp->IoStatus.Information = offsetof(MOUNTDEV_NAME, Name[0]) + name->NameLength; + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_query_unique_id(volume_device_extension* vde, PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + MOUNTDEV_UNIQUE_ID* mduid; + pdo_device_extension* pdode; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID)) { + Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID); + return STATUS_BUFFER_TOO_SMALL; + } + + mduid = Irp->AssociatedIrp.SystemBuffer; + mduid->UniqueIdLength = sizeof(BTRFS_UUID); + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < offsetof(MOUNTDEV_UNIQUE_ID, UniqueId[0]) + mduid->UniqueIdLength) { + Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID); + return STATUS_BUFFER_OVERFLOW; + } + + if (!vde->pdo) + return STATUS_INVALID_PARAMETER; + + pdode = vde->pdode; + + RtlCopyMemory(mduid->UniqueId, &pdode->uuid, sizeof(BTRFS_UUID)); + + Irp->IoStatus.Information = offsetof(MOUNTDEV_UNIQUE_ID, UniqueId[0]) + mduid->UniqueIdLength; + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_is_dynamic(PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + UINT8* buf; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength == 0 || !Irp->AssociatedIrp.SystemBuffer) + return STATUS_INVALID_PARAMETER; + + buf = (UINT8*)Irp->AssociatedIrp.SystemBuffer; + + *buf = 1; + + Irp->IoStatus.Information = 1; + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_check_verify(volume_device_extension* vde) { + pdo_device_extension* pdode = vde->pdode; + NTSTATUS Status; + LIST_ENTRY* le; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + Status = dev_ioctl(vc->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, NULL, 0, FALSE, NULL); + if (!NT_SUCCESS(Status)) + goto end; + + le = le->Flink; + } + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&pdode->child_lock); + + return Status; +} + +static NTSTATUS vol_get_disk_extents(volume_device_extension* vde, PIRP Irp) { + pdo_device_extension* pdode = vde->pdode; + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + LIST_ENTRY* le; + ULONG num_extents = 0, i, max_extents = 1; + NTSTATUS Status; + VOLUME_DISK_EXTENTS *ext, *ext3; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(VOLUME_DISK_EXTENTS)) + return STATUS_BUFFER_TOO_SMALL; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + VOLUME_DISK_EXTENTS ext2; + + Status = dev_ioctl(vc->devobj, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, &ext2, sizeof(VOLUME_DISK_EXTENTS), FALSE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ERR("IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS returned %08x\n", Status); + goto end; + } + + num_extents += ext2.NumberOfDiskExtents; + + if (ext2.NumberOfDiskExtents > max_extents) + max_extents = ext2.NumberOfDiskExtents; + + le = le->Flink; + } + + ext = Irp->AssociatedIrp.SystemBuffer; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < offsetof(VOLUME_DISK_EXTENTS, Extents[0]) + (num_extents * sizeof(DISK_EXTENT))) { + Irp->IoStatus.Information = offsetof(VOLUME_DISK_EXTENTS, Extents[0]); + ext->NumberOfDiskExtents = num_extents; + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + ext3 = ExAllocatePoolWithTag(PagedPool, offsetof(VOLUME_DISK_EXTENTS, Extents[0]) + (max_extents * sizeof(DISK_EXTENT)), ALLOC_TAG); + if (!ext3) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + i = 0; + ext->NumberOfDiskExtents = 0; + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + Status = dev_ioctl(vc->devobj, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, ext3, + (ULONG)offsetof(VOLUME_DISK_EXTENTS, Extents[0]) + (max_extents * sizeof(DISK_EXTENT)), FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS returned %08x\n", Status); + ExFreePool(ext3); + goto end; + } + + if (i + ext3->NumberOfDiskExtents > num_extents) { + Irp->IoStatus.Information = offsetof(VOLUME_DISK_EXTENTS, Extents[0]); + ext->NumberOfDiskExtents = i + ext3->NumberOfDiskExtents; + Status = STATUS_BUFFER_OVERFLOW; + ExFreePool(ext3); + goto end; + } + + RtlCopyMemory(&ext->Extents[i], ext3->Extents, sizeof(DISK_EXTENT) * ext3->NumberOfDiskExtents); + i += ext3->NumberOfDiskExtents; + + le = le->Flink; + } + + ExFreePool(ext3); + + Status = STATUS_SUCCESS; + + ext->NumberOfDiskExtents = i; + Irp->IoStatus.Information = offsetof(VOLUME_DISK_EXTENTS, Extents[0]) + (i * sizeof(DISK_EXTENT)); + +end: + ExReleaseResourceLite(&pdode->child_lock); + + return Status; +} + +static NTSTATUS vol_is_writable(volume_device_extension* vde) { + pdo_device_extension* pdode = vde->pdode; + NTSTATUS Status; + LIST_ENTRY* le; + BOOL writable = FALSE; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + Status = dev_ioctl(vc->devobj, IOCTL_DISK_IS_WRITABLE, NULL, 0, NULL, 0, TRUE, NULL); + + if (NT_SUCCESS(Status)) { + writable = TRUE; + break; + } else if (Status != STATUS_MEDIA_WRITE_PROTECTED) + goto end; + + le = le->Flink; + } + + Status = writable ? STATUS_SUCCESS : STATUS_MEDIA_WRITE_PROTECTED; + +end: +ExReleaseResourceLite(&pdode->child_lock); + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_get_length(volume_device_extension* vde, PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + pdo_device_extension* pdode = vde->pdode; + GET_LENGTH_INFORMATION* gli; + LIST_ENTRY* le; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(GET_LENGTH_INFORMATION)) + return STATUS_BUFFER_TOO_SMALL; + + gli = (GET_LENGTH_INFORMATION*)Irp->AssociatedIrp.SystemBuffer; + + gli->Length.QuadPart = 0; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + gli->Length.QuadPart += vc->size; + + le = le->Flink; + } + + ExReleaseResourceLite(&pdode->child_lock); + + Irp->IoStatus.Information = sizeof(GET_LENGTH_INFORMATION); + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_get_drive_geometry(PDEVICE_OBJECT DeviceObject, PIRP Irp) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + pdo_device_extension* pdode = vde->pdode; + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + DISK_GEOMETRY* geom; + UINT64 length; + LIST_ENTRY* le; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(DISK_GEOMETRY)) + return STATUS_BUFFER_TOO_SMALL; + + length = 0; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry); + + length += vc->size; + + le = le->Flink; + } + + ExReleaseResourceLite(&pdode->child_lock); + + geom = (DISK_GEOMETRY*)Irp->AssociatedIrp.SystemBuffer; + geom->BytesPerSector = DeviceObject->SectorSize == 0 ? 0x200 : DeviceObject->SectorSize; + geom->SectorsPerTrack = 0x3f; + geom->TracksPerCylinder = 0xff; + geom->Cylinders.QuadPart = length / (UInt32x32To64(geom->TracksPerCylinder, geom->SectorsPerTrack) * geom->BytesPerSector); + geom->MediaType = DeviceObject->Characteristics & FILE_REMOVABLE_MEDIA ? RemovableMedia : FixedMedia; + + Irp->IoStatus.Information = sizeof(DISK_GEOMETRY); + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_get_gpt_attributes(PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + VOLUME_GET_GPT_ATTRIBUTES_INFORMATION* vggai; + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(VOLUME_GET_GPT_ATTRIBUTES_INFORMATION)) + return STATUS_BUFFER_TOO_SMALL; + + vggai = (VOLUME_GET_GPT_ATTRIBUTES_INFORMATION*)Irp->AssociatedIrp.SystemBuffer; + + vggai->GptAttributes = 0; + + Irp->IoStatus.Information = sizeof(VOLUME_GET_GPT_ATTRIBUTES_INFORMATION); + + return STATUS_SUCCESS; +} + +static NTSTATUS vol_get_device_number(volume_device_extension* vde, PIRP Irp) { + pdo_device_extension* pdode = vde->pdode; + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + volume_child* vc; + STORAGE_DEVICE_NUMBER* sdn; + + // If only one device, return its disk number. This is needed for ejection to work. + + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(STORAGE_DEVICE_NUMBER)) + return STATUS_BUFFER_TOO_SMALL; + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (IsListEmpty(&pdode->children) || pdode->num_children > 1) { + ExReleaseResourceLite(&pdode->child_lock); + return STATUS_INVALID_DEVICE_REQUEST; + } + + vc = CONTAINING_RECORD(pdode->children.Flink, volume_child, list_entry); + + if (vc->disk_num == 0xffffffff) { + ExReleaseResourceLite(&pdode->child_lock); + return STATUS_INVALID_DEVICE_REQUEST; + } + + sdn = (STORAGE_DEVICE_NUMBER*)Irp->AssociatedIrp.SystemBuffer; + + sdn->DeviceType = FILE_DEVICE_DISK; + sdn->DeviceNumber = vc->disk_num; + sdn->PartitionNumber = vc->part_num; + + ExReleaseResourceLite(&pdode->child_lock); + + Irp->IoStatus.Information = sizeof(STORAGE_DEVICE_NUMBER); + + return STATUS_SUCCESS; +} + +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI vol_ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS vol_ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif + KEVENT* event = conptr; + + UNUSED(DeviceObject); + UNUSED(Irp); + + KeSetEvent(event, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + +static NTSTATUS vol_ioctl_passthrough(volume_device_extension* vde, PIRP Irp) { + NTSTATUS Status; + volume_child* vc; + PIRP Irp2; + PIO_STACK_LOCATION IrpSp, IrpSp2; + KEVENT Event; + pdo_device_extension* pdode = vde->pdode; + + TRACE("(%p, %p)\n", vde, Irp); + + ExAcquireResourceSharedLite(&pdode->child_lock, TRUE); + + if (IsListEmpty(&pdode->children)) { + ExReleaseResourceLite(&pdode->child_lock); + return STATUS_INVALID_DEVICE_REQUEST; + } + + vc = CONTAINING_RECORD(pdode->children.Flink, volume_child, list_entry); + + if (vc->list_entry.Flink != &pdode->children) { // more than one device + ExReleaseResourceLite(&pdode->child_lock); + return STATUS_INVALID_DEVICE_REQUEST; + } + + Irp2 = IoAllocateIrp(vc->devobj->StackSize, FALSE); + + if (!Irp2) { + ERR("IoAllocateIrp failed\n"); + ExReleaseResourceLite(&pdode->child_lock); + return STATUS_INSUFFICIENT_RESOURCES; + } + + IrpSp = IoGetCurrentIrpStackLocation(Irp); + IrpSp2 = IoGetNextIrpStackLocation(Irp2); + + IrpSp2->MajorFunction = IrpSp->MajorFunction; + IrpSp2->MinorFunction = IrpSp->MinorFunction; + + IrpSp2->Parameters.DeviceIoControl.OutputBufferLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength; + IrpSp2->Parameters.DeviceIoControl.InputBufferLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength; + IrpSp2->Parameters.DeviceIoControl.IoControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode; + IrpSp2->Parameters.DeviceIoControl.Type3InputBuffer = IrpSp->Parameters.DeviceIoControl.Type3InputBuffer; + + Irp2->AssociatedIrp.SystemBuffer = Irp->AssociatedIrp.SystemBuffer; + Irp2->MdlAddress = Irp->MdlAddress; + Irp2->UserBuffer = Irp->UserBuffer; + Irp2->Flags = Irp->Flags; + + KeInitializeEvent(&Event, NotificationEvent, FALSE); + + IoSetCompletionRoutine(Irp2, vol_ioctl_completion, &Event, TRUE, TRUE, TRUE); + + Status = IoCallDriver(vc->devobj, Irp2); + + if (Status == STATUS_PENDING) { + KeWaitForSingleObject(&Event, Executive, KernelMode, FALSE, NULL); + Status = Irp2->IoStatus.Status; + } + + Irp->IoStatus.Status = Irp2->IoStatus.Status; + Irp->IoStatus.Information = Irp2->IoStatus.Information; + + ExReleaseResourceLite(&pdode->child_lock); + + return Status; +} + +NTSTATUS vol_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + volume_device_extension* vde = DeviceObject->DeviceExtension; + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + + TRACE("(%p, %p)\n", DeviceObject, Irp); + + Irp->IoStatus.Information = 0; + + switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) { + case IOCTL_MOUNTDEV_QUERY_DEVICE_NAME: + return vol_query_device_name(vde, Irp); + + case IOCTL_MOUNTDEV_QUERY_UNIQUE_ID: + return vol_query_unique_id(vde, Irp); + + case IOCTL_STORAGE_GET_DEVICE_NUMBER: + return vol_get_device_number(vde, Irp); + + case IOCTL_MOUNTDEV_QUERY_SUGGESTED_LINK_NAME: + TRACE("unhandled control code IOCTL_MOUNTDEV_QUERY_SUGGESTED_LINK_NAME\n"); + break; + + case IOCTL_MOUNTDEV_QUERY_STABLE_GUID: + TRACE("unhandled control code IOCTL_MOUNTDEV_QUERY_STABLE_GUID\n"); + break; + + case IOCTL_MOUNTDEV_LINK_CREATED: + TRACE("unhandled control code IOCTL_MOUNTDEV_LINK_CREATED\n"); + break; + + case IOCTL_VOLUME_GET_GPT_ATTRIBUTES: + return vol_get_gpt_attributes(Irp); + + case IOCTL_VOLUME_IS_DYNAMIC: + return vol_is_dynamic(Irp); + + case IOCTL_VOLUME_ONLINE: + TRACE("unhandled control code IOCTL_VOLUME_ONLINE\n"); + break; + + case IOCTL_VOLUME_POST_ONLINE: + TRACE("unhandled control code IOCTL_VOLUME_POST_ONLINE\n"); + break; + + case IOCTL_DISK_GET_DRIVE_GEOMETRY: + return vol_get_drive_geometry(DeviceObject, Irp); + + case IOCTL_DISK_IS_WRITABLE: + return vol_is_writable(vde); + + case IOCTL_DISK_GET_LENGTH_INFO: + return vol_get_length(vde, Irp); + + case IOCTL_STORAGE_CHECK_VERIFY: + case IOCTL_DISK_CHECK_VERIFY: + return vol_check_verify(vde); + + case IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS: + return vol_get_disk_extents(vde, Irp); + + default: // pass ioctl through if only one child device + return vol_ioctl_passthrough(vde, Irp); + } + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_shutdown(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + TRACE("(%p, %p)\n", DeviceObject, Irp); + + return STATUS_INVALID_DEVICE_REQUEST; +} + +NTSTATUS vol_power(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + NTSTATUS Status; + + TRACE("(%p, %p)\n", DeviceObject, Irp); + + if (IrpSp->MinorFunction == IRP_MN_SET_POWER || IrpSp->MinorFunction == IRP_MN_QUERY_POWER) + Irp->IoStatus.Status = STATUS_SUCCESS; + + Status = Irp->IoStatus.Status; + PoStartNextPowerIrp(Irp); + + return Status; +} + +NTSTATUS mountmgr_add_drive_letter(PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath) { + NTSTATUS Status; + ULONG mmdltsize; + MOUNTMGR_DRIVE_LETTER_TARGET* mmdlt; + MOUNTMGR_DRIVE_LETTER_INFORMATION mmdli; + + mmdltsize = (ULONG)offsetof(MOUNTMGR_DRIVE_LETTER_TARGET, DeviceName[0]) + devpath->Length; + + mmdlt = ExAllocatePoolWithTag(NonPagedPool, mmdltsize, ALLOC_TAG); + if (!mmdlt) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + mmdlt->DeviceNameLength = devpath->Length; + RtlCopyMemory(&mmdlt->DeviceName, devpath->Buffer, devpath->Length); + TRACE("mmdlt = %.*S\n", mmdlt->DeviceNameLength / sizeof(WCHAR), mmdlt->DeviceName); + + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_NEXT_DRIVE_LETTER, mmdlt, mmdltsize, &mmdli, sizeof(MOUNTMGR_DRIVE_LETTER_INFORMATION), FALSE, NULL); + + if (!NT_SUCCESS(Status)) + ERR("IOCTL_MOUNTMGR_NEXT_DRIVE_LETTER returned %08x\n", Status); + else + TRACE("DriveLetterWasAssigned = %u, CurrentDriveLetter = %c\n", mmdli.DriveLetterWasAssigned, mmdli.CurrentDriveLetter); + + ExFreePool(mmdlt); + + return Status; +} + +_Function_class_(DRIVER_NOTIFICATION_CALLBACK_ROUTINE) +#ifdef __REACTOS__ +NTSTATUS NTAPI pnp_removal(PVOID NotificationStructure, PVOID Context) { +#else +NTSTATUS pnp_removal(PVOID NotificationStructure, PVOID Context) { +#endif + TARGET_DEVICE_REMOVAL_NOTIFICATION* tdrn = (TARGET_DEVICE_REMOVAL_NOTIFICATION*)NotificationStructure; + pdo_device_extension* pdode = (pdo_device_extension*)Context; + + if (RtlCompareMemory(&tdrn->Event, &GUID_TARGET_DEVICE_QUERY_REMOVE, sizeof(GUID)) == sizeof(GUID)) { + TRACE("GUID_TARGET_DEVICE_QUERY_REMOVE\n"); + + if (pdode->vde && pdode->vde->mounted_device) + return pnp_query_remove_device(pdode->vde->mounted_device, NULL); + } + + return STATUS_SUCCESS; +} + +static BOOL allow_degraded_mount(BTRFS_UUID* uuid) { + HANDLE h; + NTSTATUS Status; + OBJECT_ATTRIBUTES oa; + UNICODE_STRING path, adus; + UINT32 degraded = mount_allow_degraded; + ULONG i, j, kvfilen, retlen; + KEY_VALUE_FULL_INFORMATION* kvfi; + + path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR)); + path.Buffer = ExAllocatePoolWithTag(PagedPool, path.Length, ALLOC_TAG); + + if (!path.Buffer) { + ERR("out of memory\n"); + return FALSE; + } + + RtlCopyMemory(path.Buffer, registry_path.Buffer, registry_path.Length); + i = registry_path.Length / sizeof(WCHAR); + + path.Buffer[i] = '\\'; + i++; + + for (j = 0; j < 16; j++) { + path.Buffer[i] = hex_digit((uuid->uuid[j] & 0xF0) >> 4); + path.Buffer[i+1] = hex_digit(uuid->uuid[j] & 0xF); + + i += 2; + + if (j == 3 || j == 5 || j == 7 || j == 9) { + path.Buffer[i] = '-'; + i++; + } + } + + InitializeObjectAttributes(&oa, &path, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL); + + kvfilen = (ULONG)offsetof(KEY_VALUE_FULL_INFORMATION, Name[0]) + (255 * sizeof(WCHAR)); + kvfi = ExAllocatePoolWithTag(PagedPool, kvfilen, ALLOC_TAG); + if (!kvfi) { + ERR("out of memory\n"); + ExFreePool(path.Buffer); + return FALSE; + } + + Status = ZwOpenKey(&h, KEY_QUERY_VALUE, &oa); + if (Status == STATUS_OBJECT_NAME_NOT_FOUND) + goto end; + else if (!NT_SUCCESS(Status)) { + ERR("ZwOpenKey returned %08x\n", Status); + goto end; + } + + adus.Buffer = L"AllowDegraded"; + adus.Length = adus.MaximumLength = (USHORT)(wcslen(adus.Buffer) * sizeof(WCHAR)); + + if (NT_SUCCESS(ZwQueryValueKey(h, &adus, KeyValueFullInformation, kvfi, kvfilen, &retlen))) { + if (kvfi->Type == REG_DWORD && kvfi->DataLength >= sizeof(UINT32)) { + UINT32* val = (UINT32*)((UINT8*)kvfi + kvfi->DataOffset); + + degraded = *val; + } + } + + ZwClose(h); + + ExFreePool(kvfi); + +end: + ExFreePool(path.Buffer); + + return degraded; +} + +void add_volume_device(superblock* sb, PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath, UINT64 length, ULONG disk_num, ULONG part_num) { + NTSTATUS Status; + LIST_ENTRY* le; + PDEVICE_OBJECT DeviceObject; + volume_child* vc; + PFILE_OBJECT FileObject; + UNICODE_STRING devpath2; + BOOL inserted = FALSE, new_pdo = FALSE; + pdo_device_extension* pdode = NULL; + PDEVICE_OBJECT pdo = NULL; + + if (devpath->Length == 0) + return; + + ExAcquireResourceExclusiveLite(&pdo_list_lock, TRUE); + + le = pdo_list.Flink; + while (le != &pdo_list) { + pdo_device_extension* pdode2 = CONTAINING_RECORD(le, pdo_device_extension, list_entry); + + if (RtlCompareMemory(&pdode2->uuid, &sb->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + pdode = pdode2; + break; + } + + le = le->Flink; + } + + Status = IoGetDeviceObjectPointer(devpath, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + ExReleaseResourceLite(&pdo_list_lock); + return; + } + + if (!pdode) { + if (no_pnp) { + Status = IoReportDetectedDevice(drvobj, InterfaceTypeUndefined, 0xFFFFFFFF, 0xFFFFFFFF, NULL, NULL, 0, &pdo); + + if (!NT_SUCCESS(Status)) { + ERR("IoReportDetectedDevice returned %08x\n", Status); + ExReleaseResourceLite(&pdo_list_lock); + return; + } + + pdode = ExAllocatePoolWithTag(NonPagedPool, sizeof(pdo_device_extension), ALLOC_TAG); + + if (!pdode) { + ERR("out of memory\n"); + ExReleaseResourceLite(&pdo_list_lock); + return; + } + } else { + Status = IoCreateDevice(drvobj, sizeof(pdo_device_extension), NULL, FILE_DEVICE_DISK, + FILE_AUTOGENERATED_DEVICE_NAME | FILE_DEVICE_SECURE_OPEN, FALSE, &pdo); + if (!NT_SUCCESS(Status)) { + ERR("IoCreateDevice returned %08x\n", Status); + ExReleaseResourceLite(&pdo_list_lock); + goto fail; + } + + pdo->Flags |= DO_BUS_ENUMERATED_DEVICE; + + pdode = pdo->DeviceExtension; + } + + RtlZeroMemory(pdode, sizeof(pdo_device_extension)); + + pdode->type = VCB_TYPE_PDO; + pdode->pdo = pdo; + pdode->uuid = sb->uuid; + + ExInitializeResourceLite(&pdode->child_lock); + InitializeListHead(&pdode->children); + pdode->num_children = sb->num_devices; + pdode->children_loaded = 0; + + pdo->Flags &= ~DO_DEVICE_INITIALIZING; + pdo->SectorSize = (USHORT)sb->sector_size; + + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + + new_pdo = TRUE; + } else { + ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE); + ExConvertExclusiveToSharedLite(&pdo_list_lock); + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc2 = CONTAINING_RECORD(le, volume_child, list_entry); + + if (RtlCompareMemory(&vc2->uuid, &sb->dev_item.device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + // duplicate, ignore + ExReleaseResourceLite(&pdode->child_lock); + ExReleaseResourceLite(&pdo_list_lock); + goto fail; + } + + le = le->Flink; + } + } + + vc = ExAllocatePoolWithTag(PagedPool, sizeof(volume_child), ALLOC_TAG); + if (!vc) { + ERR("out of memory\n"); + + ExReleaseResourceLite(&pdode->child_lock); + ExReleaseResourceLite(&pdo_list_lock); + + goto fail; + } + + vc->uuid = sb->dev_item.device_uuid; + vc->devid = sb->dev_item.dev_id; + vc->generation = sb->generation; + vc->notification_entry = NULL; + + Status = IoRegisterPlugPlayNotification(EventCategoryTargetDeviceChange, 0, FileObject, + drvobj, pnp_removal, pdode, &vc->notification_entry); + if (!NT_SUCCESS(Status)) + WARN("IoRegisterPlugPlayNotification returned %08x\n", Status); + + vc->devobj = DeviceObject; + vc->fileobj = FileObject; + + devpath2 = *devpath; + + // The PNP path sometimes begins \\?\ and sometimes \??\. We need to remove this prefix + // so we can compare properly if the device is removed. + if (devpath->Length > 4 * sizeof(WCHAR) && devpath->Buffer[0] == '\\' && (devpath->Buffer[1] == '\\' || devpath->Buffer[1] == '?') && + devpath->Buffer[2] == '?' && devpath->Buffer[3] == '\\') { + devpath2.Buffer = &devpath2.Buffer[3]; + devpath2.Length -= 3 * sizeof(WCHAR); + devpath2.MaximumLength -= 3 * sizeof(WCHAR); + } + + vc->pnp_name.Length = vc->pnp_name.MaximumLength = devpath2.Length; + vc->pnp_name.Buffer = ExAllocatePoolWithTag(PagedPool, devpath2.Length, ALLOC_TAG); + + if (vc->pnp_name.Buffer) + RtlCopyMemory(vc->pnp_name.Buffer, devpath2.Buffer, devpath2.Length); + else { + ERR("out of memory\n"); + vc->pnp_name.Length = vc->pnp_name.MaximumLength = 0; + } + + vc->size = length; + vc->seeding = sb->flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE; + vc->disk_num = disk_num; + vc->part_num = part_num; + vc->had_drive_letter = FALSE; + + le = pdode->children.Flink; + while (le != &pdode->children) { + volume_child* vc2 = CONTAINING_RECORD(le, volume_child, list_entry); + + if (vc2->generation < vc->generation) { + if (le == pdode->children.Flink) + pdode->num_children = sb->num_devices; + + InsertHeadList(vc2->list_entry.Blink, &vc->list_entry); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&pdode->children, &vc->list_entry); + + pdode->children_loaded++; + + if (pdode->vde && pdode->vde->mounted_device) { + device_extension* Vcb = pdode->vde->mounted_device->DeviceExtension; + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, &sb->dev_item.device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + dev->devobj = DeviceObject; + dev->disk_num = disk_num; + dev->part_num = part_num; + init_device(Vcb, dev, FALSE); + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->tree_lock); + } + + if (DeviceObject->Characteristics & FILE_REMOVABLE_MEDIA) { + pdode->removable = TRUE; + + if (pdode->vde && pdode->vde->device) + pdode->vde->device->Characteristics |= FILE_REMOVABLE_MEDIA; + } + + if (pdode->num_children == pdode->children_loaded || (pdode->children_loaded == 1 && allow_degraded_mount(&sb->uuid))) { + if (pdode->num_children == 1) { + Status = remove_drive_letter(mountmgr, devpath); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) + WARN("remove_drive_letter returned %08x\n", Status); + + vc->had_drive_letter = NT_SUCCESS(Status); + } else { + le = pdode->children.Flink; + + while (le != &pdode->children) { + UNICODE_STRING name; + + vc = CONTAINING_RECORD(le, volume_child, list_entry); + + name.Length = name.MaximumLength = vc->pnp_name.Length + (3 * sizeof(WCHAR)); + name.Buffer = ExAllocatePoolWithTag(PagedPool, name.Length, ALLOC_TAG); + + if (!name.Buffer) { + ERR("out of memory\n"); + + ExReleaseResourceLite(&pdode->child_lock); + ExReleaseResourceLite(&pdo_list_lock); + + goto fail; + } + + RtlCopyMemory(name.Buffer, L"\\??", 3 * sizeof(WCHAR)); + RtlCopyMemory(&name.Buffer[3], vc->pnp_name.Buffer, vc->pnp_name.Length); + + Status = remove_drive_letter(mountmgr, &name); + + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) + WARN("remove_drive_letter returned %08x\n", Status); + + ExFreePool(name.Buffer); + + vc->had_drive_letter = NT_SUCCESS(Status); + + le = le->Flink; + } + } + + if ((!new_pdo || !no_pnp) && pdode->vde) { + Status = IoSetDeviceInterfaceState(&pdode->vde->bus_name, TRUE); + if (!NT_SUCCESS(Status)) + WARN("IoSetDeviceInterfaceState returned %08x\n", Status); + } + } + + ExReleaseResourceLite(&pdode->child_lock); + + if (new_pdo) { + control_device_extension* cde = master_devobj->DeviceExtension; + + InsertTailList(&pdo_list, &pdode->list_entry); + + if (!no_pnp) + IoInvalidateDeviceRelations(cde->buspdo, BusRelations); + } + + ExReleaseResourceLite(&pdo_list_lock); + + if (new_pdo && no_pnp) + AddDevice(drvobj, pdo); + + return; + +fail: + ObDereferenceObject(FileObject); +} diff --git a/reactos/drivers/filesystems/btrfs/worker-thread.c b/reactos/drivers/filesystems/btrfs/worker-thread.c index 276e5071882..4344c6cf410 100644 --- a/reactos/drivers/filesystems/btrfs/worker-thread.c +++ b/reactos/drivers/filesystems/btrfs/worker-thread.c @@ -1,17 +1,17 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ @@ -31,60 +31,64 @@ void do_read_job(PIRP Irp) { PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb = FileObject->FsContext; BOOL fcb_lock = FALSE; - + Irp->IoStatus.Information = 0; - + if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); fcb_lock = TRUE; } - - Status = do_read(Irp, TRUE, &bytes_read); - + + _SEH2_TRY { + Status = do_read(Irp, TRUE, &bytes_read); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + if (fcb_lock) ExReleaseResourceLite(fcb->Header.Resource); + if (!NT_SUCCESS(Status)) + ERR("do_read returned %08x\n", Status); + Irp->IoStatus.Status = Status; - -// // fastfat doesn't do this, but the Wine ntdll file test seems to think we ought to -// if (Irp->UserIosb) -// *Irp->UserIosb = Irp->IoStatus; - - TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status); - TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information); - TRACE("returning %08x\n", Status); - + + TRACE("read %lu bytes\n", Irp->IoStatus.Information); + IoCompleteRequest(Irp, IO_NO_INCREMENT); - - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); + + TRACE("returning %08x\n", Status); } void do_write_job(device_extension* Vcb, PIRP Irp) { BOOL top_level = is_top_level(Irp); NTSTATUS Status; - + _SEH2_TRY { Status = write_file(Vcb, Irp, TRUE, TRUE); } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { Status = _SEH2_GetExceptionCode(); } _SEH2_END; - + if (!NT_SUCCESS(Status)) ERR("write_file returned %08x\n", Status); - + Irp->IoStatus.Status = Status; TRACE("wrote %u bytes\n", Irp->IoStatus.Information); - + IoCompleteRequest(Irp, IO_NO_INCREMENT); - - if (top_level) + + if (top_level) IoSetTopLevelIrp(NULL); - + TRACE("returning %08x\n", Status); } +_Function_class_(WORKER_THREAD_ROUTINE) #ifdef __REACTOS__ static void NTAPI do_job(void* context) { #else @@ -92,34 +96,34 @@ static void do_job(void* context) { #endif job_info* ji = context; PIO_STACK_LOCATION IrpSp = ji->Irp ? IoGetCurrentIrpStackLocation(ji->Irp) : NULL; - + if (IrpSp->MajorFunction == IRP_MJ_READ) { do_read_job(ji->Irp); } else if (IrpSp->MajorFunction == IRP_MJ_WRITE) { do_write_job(ji->Vcb, ji->Irp); } - + ExFreePool(ji); } BOOL add_thread_job(device_extension* Vcb, PIRP Irp) { job_info* ji; - + ji = ExAllocatePoolWithTag(NonPagedPool, sizeof(job_info), ALLOC_TAG); if (!ji) { ERR("out of memory\n"); return FALSE; } - + ji->Vcb = Vcb; ji->Irp = Irp; - + if (!Irp->MdlAddress) { PMDL Mdl; LOCK_OPERATION op; ULONG len; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); - + if (IrpSp->MajorFunction == IRP_MJ_READ) { op = IoWriteAccess; len = IrpSp->Parameters.Read.Length; @@ -128,16 +132,18 @@ BOOL add_thread_job(device_extension* Vcb, PIRP Irp) { len = IrpSp->Parameters.Write.Length; } else { ERR("unexpected major function %u\n", IrpSp->MajorFunction); + ExFreePool(ji); return FALSE; } - + Mdl = IoAllocateMdl(Irp->UserBuffer, len, FALSE, FALSE, Irp); if (!Mdl) { ERR("out of memory\n"); + ExFreePool(ji); return FALSE; } - + _SEH2_TRY { MmProbeAndLockPages(Mdl, Irp->RequestorMode, op); } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { @@ -145,13 +151,14 @@ BOOL add_thread_job(device_extension* Vcb, PIRP Irp) { IoFreeMdl(Mdl); Irp->MdlAddress = NULL; + ExFreePool(ji); _SEH2_YIELD(return FALSE); } _SEH2_END; } - + ExInitializeWorkItem(&ji->item, do_job, ji); ExQueueWorkItem(&ji->item, DelayedWorkQueue); - + return TRUE; } diff --git a/reactos/drivers/filesystems/btrfs/write.c b/reactos/drivers/filesystems/btrfs/write.c index c4698e0b25d..95e6397424d 100644 --- a/reactos/drivers/filesystems/btrfs/write.c +++ b/reactos/drivers/filesystems/btrfs/write.c @@ -1,114 +1,114 @@ -/* Copyright (c) Mark Harmstone 2016 - * +/* Copyright (c) Mark Harmstone 2016-17 + * * This file is part of WinBtrfs. - * + * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. - * + * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. - * + * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include "btrfs_drv.h" -// BOOL did_split; -BOOL chunk_test = FALSE; - typedef struct { UINT64 start; UINT64 end; UINT8* data; - UINT32 skip_start; - UINT32 skip_end; + PMDL mdl; + UINT64 irp_offset; } write_stripe; -typedef struct { - LONG stripes_left; - KEVENT event; -} read_stripe_master; +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr); +#else +static NTSTATUS write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr); +#endif -typedef struct { - PIRP Irp; - PDEVICE_OBJECT devobj; - IO_STATUS_BLOCK iosb; - read_stripe_master* master; -} read_stripe; - -// static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len); -static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr); static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback); -extern tPsUpdateDiskCounters PsUpdateDiskCounters; -extern tCcCopyWriteEx CcCopyWriteEx; +extern tPsUpdateDiskCounters fPsUpdateDiskCounters; +extern tCcCopyWriteEx fCcCopyWriteEx; +extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; extern BOOL diskacc; BOOL find_data_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) { LIST_ENTRY* le; space* s; - + TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address); - + + if (length > c->chunk_item->size - c->used) + return FALSE; + + if (!c->cache_loaded) { + NTSTATUS Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + return FALSE; + } + } + if (IsListEmpty(&c->space_size)) return FALSE; - + le = c->space_size.Flink; while (le != &c->space_size) { s = CONTAINING_RECORD(le, space, list_entry_size); - + if (s->size == length) { *address = s->address; return TRUE; } else if (s->size < length) { if (le == c->space_size.Flink) return FALSE; - + s = CONTAINING_RECORD(le->Blink, space, list_entry_size); - + *address = s->address; return TRUE; } - + le = le->Flink; } - + s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size); - + if (s->size > length) { *address = s->address; return TRUE; } - + return FALSE; } chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) { LIST_ENTRY* le2; - chunk* c; - + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); - + le2 = Vcb->chunks.Flink; while (le2 != &Vcb->chunks) { - c = CONTAINING_RECORD(le2, chunk, list_entry); - -// TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size); - + chunk* c = CONTAINING_RECORD(le2, chunk, list_entry); + if (address >= c->offset && address < c->offset + c->chunk_item->size) { ExReleaseResourceLite(&Vcb->chunk_lock); return c; } - + le2 = le2->Flink; } - + ExReleaseResourceLite(&Vcb->chunk_lock); - + return NULL; } @@ -120,48 +120,48 @@ typedef struct { static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) { UINT64 lastaddr; LIST_ENTRY* le; - + lastaddr = 0xc00000; - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { chunk* c = CONTAINING_RECORD(le, chunk, list_entry); - + if (c->offset >= lastaddr + size) return lastaddr; - + lastaddr = c->offset + c->chunk_item->size; - + le = le->Flink; } - + return lastaddr; } -static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size) { +static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size, BOOL full_size) { UINT64 devusage = 0xffffffffffffffff; space *devdh1 = NULL, *devdh2 = NULL; LIST_ENTRY* le; - device* dev2; - + device* dev2 = NULL; + le = Vcb->devices.Flink; - + while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - - if (!dev->readonly && !dev->reloc) { + + if (!dev->readonly && !dev->reloc && dev->devobj) { UINT64 usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes; - + // favour devices which have been used the least if (usage < devusage) { if (!IsListEmpty(&dev->space)) { LIST_ENTRY* le2; space *dh1 = NULL, *dh2 = NULL; - + le2 = dev->space.Flink; while (le2 != &dev->space) { space* dh = CONTAINING_RECORD(le2, space, list_entry); - + if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) { dh2 = dh1; dh1 = dh; @@ -169,7 +169,7 @@ static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 le2 = le2->Flink; } - + if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) { dev2 = dev; devusage = usage; @@ -179,28 +179,31 @@ static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 } } } - + le = le->Flink; } - + if (!devdh1) { UINT64 size = 0; - + // Can't find hole of at least max_stripe_size; look for the largest one we can find - + + if (full_size) + return FALSE; + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); - + if (!dev->readonly && !dev->reloc) { if (!IsListEmpty(&dev->space)) { LIST_ENTRY* le2; space *dh1 = NULL, *dh2 = NULL; - + le2 = dev->space.Flink; while (le2 != &dev->space) { space* dh = CONTAINING_RECORD(le2, space, list_entry); - + if (!dh1 || !dh2 || dh->size < dh1->size) { dh2 = dh1; dh1 = dh; @@ -208,57 +211,57 @@ static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 le2 = le2->Flink; } - + if (dh1) { UINT64 devsize; - + if (dh2) devsize = max(dh1->size / 2, min(dh1->size, dh2->size)); else - devsize = min(dh1->size, dh2->size); - + devsize = dh1->size / 2; + if (devsize > size) { dev2 = dev; devdh1 = dh1; - + if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2) devdh2 = dh2; else devdh2 = dh1; - + size = devsize; } } } } - + le = le->Flink; } - + if (!devdh1) return FALSE; } - + stripes[0].device = stripes[1].device = dev2; stripes[0].dh = devdh1; stripes[1].dh = devdh2; - + return TRUE; } -static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UINT64 max_stripe_size, UINT16 type) { +static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UINT64 max_stripe_size, BOOL allow_missing, BOOL full_size) { UINT64 k, devusage = 0xffffffffffffffff; space* devdh = NULL; LIST_ENTRY* le; device* dev2 = NULL; - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); UINT64 usage; BOOL skip = FALSE; - - if (dev->readonly || dev->reloc) { + + if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) { le = le->Flink; continue; } @@ -272,19 +275,19 @@ static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UI } } } - + if (!skip) { usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes; - + // favour devices which have been used the least if (usage < devusage) { if (!IsListEmpty(&dev->space)) { LIST_ENTRY* le2; - + le2 = dev->space.Flink; while (le2 != &dev->space) { space* dh = CONTAINING_RECORD(le2, space, list_entry); - + if ((dev2 != dev && dh->size >= max_stripe_size) || (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size) ) { @@ -298,19 +301,22 @@ static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UI } } } - + le = le->Flink; } - + if (!devdh) { // Can't find hole of at least max_stripe_size; look for the largest one we can find - + + if (full_size) + return FALSE; + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); BOOL skip = FALSE; - - if (dev->readonly || dev->reloc) { + + if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) { le = le->Flink; continue; } @@ -324,15 +330,15 @@ static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UI } } } - + if (!skip) { if (!IsListEmpty(&dev->space)) { LIST_ENTRY* le2; - + le2 = dev->space.Flink; while (le2 != &dev->space) { space* dh = CONTAINING_RECORD(le2, space, list_entry); - + if (!devdh || devdh->size < dh->size) { devdh = dh; dev2 = dev; @@ -342,44 +348,42 @@ static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UI } } } - + le = le->Flink; } - + if (!devdh) return FALSE; } - + stripes[i].dh = devdh; stripes[i].device = dev2; return TRUE; } -chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { +NTSTATUS alloc_chunk(device_extension* Vcb, UINT64 flags, chunk** pc, BOOL full_size) { + NTSTATUS Status; UINT64 max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor; - UINT64 total_size = 0, i, logaddr; - UINT16 type, num_stripes, sub_stripes, max_stripes, min_stripes; + UINT64 total_size = 0, logaddr; + UINT16 i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing; stripe* stripes = NULL; - ULONG cisize; + UINT16 cisize; CHUNK_ITEM_STRIPE* cis; chunk* c = NULL; space* s = NULL; - BOOL success = FALSE; LIST_ENTRY* le; - - ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); - + le = Vcb->devices.Flink; while (le != &Vcb->devices) { device* dev = CONTAINING_RECORD(le, device, list_entry); total_size += dev->devitem.num_bytes; - + le = le->Flink; } - + TRACE("total_size = %llx\n", total_size); - + // We purposely check for DATA first - mixed blocks have the same size // as DATA ones. if (flags & BLOCK_FLAG_DATA) { @@ -390,101 +394,133 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { max_stripe_size = 0x40000000; // 1 GB else max_stripe_size = 0x10000000; // 256 MB - + max_chunk_size = max_stripe_size; } else if (flags & BLOCK_FLAG_SYSTEM) { max_stripe_size = 0x2000000; // 32 MB max_chunk_size = 2 * max_stripe_size; + } else { + ERR("unknown chunk type\n"); + return STATUS_INTERNAL_ERROR; } - + max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10% - + TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size); - + if (flags & BLOCK_FLAG_DUPLICATE) { min_stripes = 2; max_stripes = 2; sub_stripes = 0; type = BLOCK_FLAG_DUPLICATE; + allowed_missing = 0; } else if (flags & BLOCK_FLAG_RAID0) { min_stripes = 2; - max_stripes = Vcb->superblock.num_devices; + max_stripes = (UINT16)min(0xffff, Vcb->superblock.num_devices); sub_stripes = 0; type = BLOCK_FLAG_RAID0; + allowed_missing = 0; } else if (flags & BLOCK_FLAG_RAID1) { min_stripes = 2; max_stripes = 2; sub_stripes = 1; type = BLOCK_FLAG_RAID1; + allowed_missing = 1; } else if (flags & BLOCK_FLAG_RAID10) { min_stripes = 4; - max_stripes = Vcb->superblock.num_devices; + max_stripes = (UINT16)min(0xffff, Vcb->superblock.num_devices); sub_stripes = 2; type = BLOCK_FLAG_RAID10; + allowed_missing = 1; } else if (flags & BLOCK_FLAG_RAID5) { min_stripes = 3; - max_stripes = Vcb->superblock.num_devices; + max_stripes = (UINT16)min(0xffff, Vcb->superblock.num_devices); sub_stripes = 1; type = BLOCK_FLAG_RAID5; + allowed_missing = 1; } else if (flags & BLOCK_FLAG_RAID6) { min_stripes = 4; max_stripes = 257; sub_stripes = 1; type = BLOCK_FLAG_RAID6; + allowed_missing = 2; } else { // SINGLE min_stripes = 1; max_stripes = 1; sub_stripes = 1; type = 0; + allowed_missing = 0; } - + stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG); if (!stripes) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + num_stripes = 0; - + if (type == BLOCK_FLAG_DUPLICATE) { - if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size)) + if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) { + Status = STATUS_DISK_FULL; goto end; + } else num_stripes = max_stripes; } else { for (i = 0; i < max_stripes; i++) { - if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, type)) + if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, FALSE, full_size)) break; else num_stripes++; } } - + + if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) { + UINT16 added_missing = 0; + + for (i = num_stripes; i < max_stripes; i++) { + if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, TRUE, full_size)) + break; + else { + added_missing++; + if (added_missing >= allowed_missing) + break; + } + } + + num_stripes += added_missing; + } + // for RAID10, round down to an even number of stripes if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) { num_stripes -= num_stripes % sub_stripes; } - + if (num_stripes < min_stripes) { WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes); + Status = STATUS_DISK_FULL; goto end; } - + c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG); if (!c) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE)); c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG); if (!c->chunk_item) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel - + if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh) stripe_size = min(stripes[0].dh->size / 2, max_stripe_size); else { @@ -494,7 +530,7 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { stripe_size = stripes[i].dh->size; } } - + if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1) factor = 1; else if (type == BLOCK_FLAG_RAID0) @@ -505,138 +541,162 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { factor = num_stripes - 1; else if (type == BLOCK_FLAG_RAID6) factor = num_stripes - 2; - + if (stripe_size * factor > max_chunk_size) stripe_size = max_chunk_size / factor; - + if (stripe_size % stripe_length > 0) stripe_size -= stripe_size % stripe_length; - - if (stripe_size == 0) + + if (stripe_size == 0) { + Status = STATUS_INTERNAL_ERROR; goto end; - + } + c->chunk_item->size = stripe_size * factor; c->chunk_item->root_id = Vcb->extent_root->id; c->chunk_item->stripe_length = stripe_length; c->chunk_item->type = flags; - c->chunk_item->opt_io_alignment = c->chunk_item->stripe_length; - c->chunk_item->opt_io_width = c->chunk_item->stripe_length; + c->chunk_item->opt_io_alignment = (UINT32)c->chunk_item->stripe_length; + c->chunk_item->opt_io_width = (UINT32)c->chunk_item->stripe_length; c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size; c->chunk_item->num_stripes = num_stripes; c->chunk_item->sub_stripes = sub_stripes; - + c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG); if (!c->devices) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; for (i = 0; i < num_stripes; i++) { cis[i].dev_id = stripes[i].device->devitem.dev_id; - + if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh) cis[i].offset = stripes[0].dh->address + stripe_size; else cis[i].offset = stripes[i].dh->address; - + cis[i].dev_uuid = stripes[i].device->devitem.device_uuid; - + c->devices[i] = stripes[i].device; } - + logaddr = find_new_chunk_address(Vcb, c->chunk_item->size); - + Vcb->superblock.chunk_root_generation = Vcb->superblock.generation; - + c->size = cisize; c->offset = logaddr; c->used = c->oldused = 0; - c->cache = NULL; + c->cache = c->old_cache = NULL; c->readonly = FALSE; c->reloc = FALSE; c->last_alloc_set = FALSE; - + c->last_stripe = 0; + c->cache_loaded = TRUE; + c->changed = FALSE; + c->space_changed = FALSE; + c->balance_num = 0; + InitializeListHead(&c->space); InitializeListHead(&c->space_size); InitializeListHead(&c->deleting); InitializeListHead(&c->changed_extents); - + InitializeListHead(&c->range_locks); - KeInitializeSpinLock(&c->range_locks_spinlock); + ExInitializeResourceLite(&c->range_locks_lock); KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE); - + + InitializeListHead(&c->partial_stripes); + ExInitializeResourceLite(&c->partial_stripes_lock); + ExInitializeResourceLite(&c->lock); ExInitializeResourceLite(&c->changed_extents_lock); - + s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG); if (!s) { ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + s->address = c->offset; s->size = c->chunk_item->size; InsertTailList(&c->space, &s->list_entry); InsertTailList(&c->space_size, &s->list_entry_size); - - protect_superblocks(Vcb, c); - + + protect_superblocks(c); + for (i = 0; i < num_stripes; i++) { stripes[i].device->devitem.bytes_used += stripe_size; - - space_list_subtract2(Vcb, &stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL); + + space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL); } - - success = TRUE; - + + Status = STATUS_SUCCESS; + if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6) Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56; - + end: if (stripes) ExFreePool(stripes); - - if (!success) { - if (c && c->chunk_item) ExFreePool(c->chunk_item); - if (c) ExFreePool(c); + + if (!NT_SUCCESS(Status)) { + if (c) { + if (c->devices) + ExFreePool(c->devices); + + if (c->chunk_item) + ExFreePool(c->chunk_item); + + ExFreePool(c); + } + if (s) ExFreePool(s); } else { - LIST_ENTRY* le; BOOL done = FALSE; - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); - + if (c2->offset > c->offset) { InsertHeadList(le->Blink, &c->list_entry); done = TRUE; break; } - + le = le->Flink; } - + if (!done) InsertTailList(&Vcb->chunks, &c->list_entry); - + c->created = TRUE; - InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); + c->changed = TRUE; + c->space_changed = TRUE; c->list_entry_balance.Flink = NULL; + + *pc = c; } - - ExReleaseResourceLite(&Vcb->chunk_lock); - return success ? c : NULL; + return Status; } -static NTSTATUS prepare_raid0_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) { +static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ UINT64 address, _In_reads_bytes_(length) void* data, + _In_ UINT32 length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ UINT64 irp_offset, _In_ write_data_context* wtc) { UINT64 startoff, endoff; UINT16 startoffstripe, endoffstripe, stripenum; UINT64 pos, *stripeoff; UINT32 i; - + BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0); + PMDL master_mdl; + PFN_NUMBER* pfns; + stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG); if (!stripeoff) { ERR("out of memory\n"); @@ -645,899 +705,1188 @@ static NTSTATUS prepare_raid0_write(chunk* c, UINT64 address, void* data, UINT32 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); - + + if (file_write) { + master_mdl = Irp->MdlAddress; + + pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1); + pfns = &pfns[irp_offset >> PAGE_SHIFT]; + } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) { + wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + if (!wtc->scratch) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(wtc->scratch, data, length); + + master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + MmBuildMdlForNonPagedPool(master_mdl); + + wtc->mdl = master_mdl; + + pfns = (PFN_NUMBER*)(master_mdl + 1); + } else { + NTSTATUS Status = STATUS_SUCCESS; + + master_mdl = IoAllocateMdl(data, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + return Status; + } + + wtc->mdl = master_mdl; + + pfns = (PFN_NUMBER*)(master_mdl + 1); + } + for (i = 0; i < c->chunk_item->num_stripes; i++) { - if (startoffstripe > i) { + if (startoffstripe > i) stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (startoffstripe == i) { + else if (startoffstripe == i) stripes[i].start = startoff; - } else { + else stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); - } - - if (endoffstripe > i) { + + if (endoffstripe > i) stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (endoffstripe == i) { + else if (endoffstripe == i) stripes[i].end = endoff + 1; - } else { + else stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length); - } - + if (stripes[i].start != stripes[i].end) { - stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG); - - if (!stripes[i].data) { - ERR("out of memory\n"); + stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL); + if (!stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); ExFreePool(stripeoff); return STATUS_INSUFFICIENT_RESOURCES; } } } - + pos = 0; RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes); - + stripenum = startoffstripe; + while (pos < length) { + PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1); + if (pos == 0) { - UINT32 writelen = min(stripes[stripenum].end - stripes[stripenum].start, - c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length)); - - RtlCopyMemory(stripes[stripenum].data, data, writelen); + UINT32 writelen = (UINT32)min(stripes[stripenum].end - stripes[stripenum].start, + c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length)); + + RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + stripeoff[stripenum] += writelen; pos += writelen; } else if (length - pos < c->chunk_item->stripe_length) { - RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos); + RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); break; } else { - RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length); + RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + stripeoff[stripenum] += c->chunk_item->stripe_length; pos += c->chunk_item->stripe_length; } - + stripenum = (stripenum + 1) % c->chunk_item->num_stripes; } ExFreePool(stripeoff); - + return STATUS_SUCCESS; } -static NTSTATUS prepare_raid10_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) { +static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c, + _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length, _In_ write_stripe* stripes, + _In_ PIRP Irp, _In_ UINT64 irp_offset, _In_ write_data_context* wtc) { UINT64 startoff, endoff; UINT16 startoffstripe, endoffstripe, stripenum; UINT64 pos, *stripeoff; UINT32 i; - - stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG); - if (!stripeoff) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } + BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0); + PMDL master_mdl; + PFN_NUMBER* pfns; get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe); get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe); + stripenum = startoffstripe; startoffstripe *= c->chunk_item->sub_stripes; endoffstripe *= c->chunk_item->sub_stripes; + if (file_write) { + master_mdl = Irp->MdlAddress; + + pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1); + pfns = &pfns[irp_offset >> PAGE_SHIFT]; + } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) { + wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + if (!wtc->scratch) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(wtc->scratch, data, length); + + master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + MmBuildMdlForNonPagedPool(master_mdl); + + wtc->mdl = master_mdl; + + pfns = (PFN_NUMBER*)(master_mdl + 1); + } else { + NTSTATUS Status = STATUS_SUCCESS; + + master_mdl = IoAllocateMdl(data, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + return Status; + } + + wtc->mdl = master_mdl; + + pfns = (PFN_NUMBER*)(master_mdl + 1); + } + for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) { UINT16 j; - - if (startoffstripe > i) { + + if (startoffstripe > i) stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (startoffstripe == i) { + else if (startoffstripe == i) stripes[i].start = startoff; - } else { + else stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); - } - - if (endoffstripe > i) { + + if (endoffstripe > i) stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (endoffstripe == i) { + else if (endoffstripe == i) stripes[i].end = endoff + 1; - } else { + else stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length); + + stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL); + if (!stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - - if (stripes[i].start != stripes[i].end) { - stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG); - - if (!stripes[i].data) { - ERR("out of memory\n"); - ExFreePool(stripeoff); - return STATUS_INSUFFICIENT_RESOURCES; - } - } - + for (j = 1; j < c->chunk_item->sub_stripes; j++) { stripes[i+j].start = stripes[i].start; stripes[i+j].end = stripes[i].end; stripes[i+j].data = stripes[i].data; + stripes[i+j].mdl = stripes[i].mdl; } } pos = 0; - RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes); - stripenum = startoffstripe / c->chunk_item->sub_stripes; - while (pos < length) { - if (pos == 0) { - UINT32 writelen = min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start, - c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length)); - - RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data, data, writelen); - stripeoff[stripenum] += writelen; - pos += writelen; - } else if (length - pos < c->chunk_item->stripe_length) { - RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos); + stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes); + + while (pos < length) { + PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1); + + if (pos == 0) { + UINT32 writelen = (UINT32)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start, + c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length)); + + RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + stripeoff[stripenum] += writelen; + pos += writelen; + } else if (length - pos < c->chunk_item->stripe_length) { + RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); break; } else { - RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length); + RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + stripeoff[stripenum] += c->chunk_item->stripe_length; pos += c->chunk_item->stripe_length; } - + stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes); } ExFreePool(stripeoff); - + return STATUS_SUCCESS; } -static NTSTATUS STDCALL read_stripe_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID ptr) { - read_stripe* stripe = ptr; - read_stripe_master* master = stripe->master; - ULONG stripes_left = InterlockedDecrement(&master->stripes_left); - - stripe->iosb = Irp->IoStatus; - - if (stripes_left == 0) - KeSetEvent(&master->event, 0, FALSE); - - return STATUS_MORE_PROCESSING_REQUIRED; -} +static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk *c, UINT64 address, UINT32 length, void* data) { + NTSTATUS Status; + LIST_ENTRY* le; + partial_stripe* ps; + UINT64 stripe_addr; + UINT16 num_data_stripes; + ULONG bmplen; -static NTSTATUS make_read_irp(PIRP old_irp, read_stripe* stripe, UINT64 offset, void* data, UINT32 length) { - PIO_STACK_LOCATION IrpSp; - PIRP Irp; - - if (!old_irp) { - Irp = IoAllocateIrp(stripe->devobj->StackSize, FALSE); - - if (!Irp) { - ERR("IoAllocateIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - } else { - Irp = IoMakeAssociatedIrp(old_irp, stripe->devobj->StackSize); - - if (!Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } + num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2); + stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length)); + + ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE); + + le = c->partial_stripes.Flink; + while (le != &c->partial_stripes) { + ps = CONTAINING_RECORD(le, partial_stripe, list_entry); + + if (ps->address == stripe_addr) { + // update existing entry + + RtlCopyMemory(ps->data + address - stripe_addr, data, length); + RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size); + + // if now filled, flush + if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size))) { + Status = flush_partial_stripe(Vcb, c, ps); + if (!NT_SUCCESS(Status)) { + ERR("flush_partial_stripe returned %08x\n", Status); + goto end; + } + + RemoveEntryList(&ps->list_entry); + + if (ps->bmparr) + ExFreePool(ps->bmparr); + + ExFreePool(ps); + } + + Status = STATUS_SUCCESS; + goto end; + } else if (ps->address > stripe_addr) + break; + + le = le->Flink; } - - IrpSp = IoGetNextIrpStackLocation(Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (stripe->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - IoFreeIrp(Irp); - return STATUS_INTERNAL_ERROR; - } else if (stripe->devobj->Flags & DO_DIRECT_IO) { - Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL); - if (!Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - IoFreeIrp(Irp); - return STATUS_INSUFFICIENT_RESOURCES; - } - - MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - Irp->UserBuffer = data; - } - - IrpSp->Parameters.Read.Length = length; - IrpSp->Parameters.Read.ByteOffset.QuadPart = offset; - - Irp->UserIosb = &stripe->iosb; - - IoSetCompletionRoutine(Irp, read_stripe_completion, stripe, TRUE, TRUE, TRUE); - - stripe->Irp = Irp; - - return STATUS_SUCCESS; + + // add new entry + + ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG); + if (!ps) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + bmplen = (ULONG)sector_align(((num_data_stripes * c->chunk_item->stripe_length) / (8 * Vcb->superblock.sector_size) + 1), sizeof(ULONG)); + + ps->address = stripe_addr; + ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, bmplen, ALLOC_TAG); + if (!ps->bmparr) { + ERR("out of memory\n"); + ExFreePool(ps); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlInitializeBitMap(&ps->bmp, ps->bmparr, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size)); + RtlSetAllBits(&ps->bmp); + + RtlCopyMemory(ps->data + address - stripe_addr, data, length); + RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size); + + InsertHeadList(le->Blink, &ps->list_entry); + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&c->partial_stripes_lock); + + return Status; } -static NTSTATUS prepare_raid5_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) { - UINT64 startoff, endoff; - UINT16 startoffstripe, endoffstripe, stripenum, parity, logstripe; - UINT64 start = 0xffffffffffffffff, end = 0; - UINT64 pos, stripepos; - UINT32 firststripesize, laststripesize; +typedef struct { + PMDL mdl; + PFN_NUMBER* pfns; +} log_stripe; + +static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes, PIRP Irp, + UINT64 irp_offset, ULONG priority, write_data_context* wtc) { + UINT64 startoff, endoff, parity_start, parity_end; + UINT16 startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1; + UINT64 pos, parity_pos, *stripeoff = NULL; UINT32 i; - UINT8* data2 = (UINT8*)data; - UINT32 num_reads; - BOOL same_stripe = FALSE, multiple_stripes; - - get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &startoff, &startoffstripe); - get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &endoff, &endoffstripe); - - for (i = 0; i < c->chunk_item->num_stripes - 1; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % c->chunk_item->stripe_length); + BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0); + PMDL master_mdl; + NTSTATUS Status; + PFN_NUMBER *pfns, *parity_pfns; + log_stripe* log_stripes = NULL; + + if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) { + UINT64 delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length); + + delta = min(irp_offset + length, delta); + Status = add_partial_stripe(Vcb, c, address + length - delta, (UINT32)delta, (UINT8*)data + irp_offset + length - delta); + if (!NT_SUCCESS(Status)) { + ERR("add_partial_stripe returned %08x\n", Status); + goto exit; } - if (endoffstripe > i) { - stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % c->chunk_item->stripe_length); + length -= (UINT32)delta; + } + + if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) { + UINT64 delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length)); + + Status = add_partial_stripe(Vcb, c, address, (UINT32)delta, (UINT8*)data + irp_offset); + if (!NT_SUCCESS(Status)) { + ERR("add_partial_stripe returned %08x\n", Status); + goto exit; } - if (ststart != stend) { - stripes[i].start = ststart; - stripes[i].end = stend; - - if (ststart < start) { - start = ststart; - firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length); + address += delta; + length -= (UINT32)delta; + irp_offset += delta; + } + + if (length == 0) { + Status = STATUS_SUCCESS; + goto exit; + } + + get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe); + get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe); + + pos = 0; + while (pos < length) { + parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes; + ULONG skip, writelen; + + i = startoffstripe; + while (stripe != parity) { + if (i == startoffstripe) { + writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length)); + + stripes[stripe].start = startoff; + stripes[stripe].end = startoff + writelen; + + pos += writelen; + + if (pos == length) + break; + } else { + writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length); + + stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length); + stripes[stripe].end = stripes[stripe].start + writelen; + + pos += writelen; + + if (pos == length) + break; + } + + i++; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + if (pos == length) + break; + + for (i = 0; i < startoffstripe; i++) { + stripe = (parity + i + 1) % c->chunk_item->num_stripes; + + stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + } + + stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + + if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) { + skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length; + } + + pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length; + } + } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) { + for (i = 0; i < c->chunk_item->num_stripes; i++) { + stripes[i].end += c->chunk_item->stripe_length; } - if (stend > end) { - end = stend; - laststripesize = stend % c->chunk_item->stripe_length; - if (laststripesize == 0) - laststripesize = c->chunk_item->stripe_length; + pos += c->chunk_item->stripe_length * num_data_stripes; + } else { + UINT16 stripe = (parity + 1) % c->chunk_item->num_stripes; + + i = 0; + while (stripe != parity) { + if (endoffstripe == i) { + stripes[stripe].end = endoff + 1; + break; + } else if (endoffstripe > i) + stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + + i++; + stripe = (stripe + 1) % c->chunk_item->num_stripes; } + + break; } } - - if (start == end) { - ERR("error: start == end (%llx)\n", start); - return STATUS_INTERNAL_ERROR; + + parity_start = 0xffffffffffffffff; + parity_end = 0; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (stripes[i].start != 0 || stripes[i].end != 0) { + parity_start = min(stripes[i].start, parity_start); + parity_end = max(stripes[i].end, parity_end); + } } - - if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) { - firststripesize = end - start; - laststripesize = firststripesize; + + if (parity_end == parity_start) { + Status = STATUS_SUCCESS; + goto exit; } - for (i = 0; i < c->chunk_item->num_stripes; i++) { - stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG); - if (!stripes[i].data) { + parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + stripes[parity].start = parity_start; + + parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + stripes[parity].end = parity_end; + + log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG); + if (!log_stripes) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes); + + for (i = 0; i < num_data_stripes; i++) { + log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL); + if (!log_stripes[i].mdl) { ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - if (i < c->chunk_item->num_stripes - 1) { - if (stripes[i].start == 0 && stripes[i].end == 0) - stripes[i].start = stripes[i].end = start; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } + + log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL; + log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1); } - - num_reads = 0; - multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length; - - for (i = 0; i < c->chunk_item->num_stripes - 1; i++) { - if (stripes[i].start == stripes[i].end) { - num_reads++; - - if (multiple_stripes) - num_reads++; - } else { - if (stripes[i].start > start) - num_reads++; - - if (stripes[i].end < end) - num_reads++; - } + + wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG); + if (!wtc->parity1) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - if (num_reads > 0) { - UINT32 j; - read_stripe_master* master; - read_stripe* read_stripes; - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; - NTSTATUS Status; - - master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG); - if (!master) { + + wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL); + if (!wtc->parity1_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + MmBuildMdlForNonPagedPool(wtc->parity1_mdl); + + if (file_write) + master_mdl = Irp->MdlAddress; + else if (((ULONG_PTR)data % PAGE_SIZE) != 0) { + wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + if (!wtc->scratch) { ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG); - if (!read_stripes) { + + RtlCopyMemory(wtc->scratch, (UINT8*)data + irp_offset, length); + + master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL); + if (!master_mdl) { ERR("out of memory\n"); - ExFreePool(master); - return STATUS_INSUFFICIENT_RESOURCES; - } - - parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; - stripenum = (parity + 1) % c->chunk_item->num_stripes; - - j = 0; - for (i = 0; i < c->chunk_item->num_stripes - 1; i++) { - if (stripes[i].start > start || stripes[i].start == stripes[i].end) { - ULONG readlen; - - read_stripes[j].Irp = NULL; - read_stripes[j].devobj = c->devices[stripenum]->devobj; - read_stripes[j].master = master; - - if (stripes[i].start != stripes[i].end) - readlen = stripes[i].start - start; - else - readlen = firststripesize; - - Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen); - - if (!NT_SUCCESS(Status)) { - ERR("make_read_irp returned %08x\n", Status); - j++; - goto readend; - } - - stripes[stripenum].skip_start = readlen; - - j++; - if (j == num_reads) break; - } - - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - } - - if (j < num_reads) { - parity = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; - stripenum = (parity + 1) % c->chunk_item->num_stripes; - - for (i = 0; i < c->chunk_item->num_stripes - 1; i++) { - if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) { - read_stripes[j].Irp = NULL; - read_stripes[j].devobj = c->devices[stripenum]->devobj; - read_stripes[j].master = master; - - if (stripes[i].start == stripes[i].end) { - Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize); - stripes[stripenum].skip_end = laststripesize; - } else { - Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end); - stripes[stripenum].skip_end = end - stripes[i].end; - } - - if (!NT_SUCCESS(Status)) { - ERR("make_read_irp returned %08x\n", Status); - j++; - goto readend; - } - - j++; - if (j == num_reads) break; - } - - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - } - } - - master->stripes_left = j; - KeInitializeEvent(&master->event, NotificationEvent, FALSE); - - for (i = 0; i < j; i++) { - Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp); - if (!NT_SUCCESS(Status)) { - ERR("IoCallDriver returned %08x\n", Status); - goto readend; - } + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL); - - for (i = 0; i < j; i++) { - if (!NT_SUCCESS(read_stripes[i].iosb.Status)) { - Status = read_stripes[i].iosb.Status; - goto readend; - } + + MmBuildMdlForNonPagedPool(master_mdl); + + wtc->mdl = master_mdl; + } else { + master_mdl = IoAllocateMdl((UINT8*)data + irp_offset, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - + Status = STATUS_SUCCESS; -readend: - for (i = 0; i < j; i++) { - if (read_stripes[i].Irp) { - if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(read_stripes[i].Irp->MdlAddress); - IoFreeMdl(read_stripes[i].Irp->MdlAddress); - } - - IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running? + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + return Status; + } + + wtc->mdl = master_mdl; + } + + pfns = (PFN_NUMBER*)(master_mdl + 1); + parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1); + + if (file_write) + pfns = &pfns[irp_offset >> PAGE_SHIFT]; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (stripes[i].start != stripes[i].end) { + stripes[i].mdl = IoAllocateMdl((UINT8*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL); + if (!stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; } } - - ExFreePool(read_stripes); - ExFreePool(master); - - if (!NT_SUCCESS(Status)) - return Status; } - + + stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes); + pos = 0; - - parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; - stripepos = 0; - - if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) > 0) { - UINT16 firstdata; - BOOL first = TRUE; - - stripenum = (parity + 1) % c->chunk_item->num_stripes; - - for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 1; logstripe++) { - ULONG copylen; - - if (pos >= length) - break; - - if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) { - copylen = min(start + firststripesize - stripes[logstripe].start, length - pos); - - if (!first && copylen < c->chunk_item->stripe_length) { - same_stripe = TRUE; + parity_pos = 0; + + while (pos < length) { + PFN_NUMBER* stripe_pfns; + + parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes; + UINT32 writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, + c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length))); + UINT32 maxwritelen = writelen; + + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + + RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT; + + stripeoff[stripe] = writelen; + pos += writelen; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i = startoffstripe + 1; + + while (stripe != parity) { + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length)); + + if (writelen == 0) break; - } - RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen); - - pos += copylen; - first = FALSE; + if (writelen > maxwritelen) + maxwritelen = writelen; + + RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + log_stripes[i].pfns += writelen >> PAGE_SHIFT; + + stripeoff[stripe] = writelen; + pos += writelen; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i++; } - - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - } - - firstdata = parity == 0 ? 1 : 0; - - RtlCopyMemory(stripes[parity].data, stripes[firstdata].data, firststripesize); - - for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) { - if (i != parity) - do_xor(&stripes[parity].data[0], &stripes[i].data[0], firststripesize); - } - - if (!same_stripe) { - stripepos = firststripesize; - parity = (parity + 1) % c->chunk_item->num_stripes; - } - } - - while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) { - UINT16 firstdata; - - stripenum = (parity + 1) % c->chunk_item->num_stripes; - - for (i = 0; i < c->chunk_item->num_stripes - 1; i++) { - RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length); - - pos += c->chunk_item->stripe_length; - stripenum = (stripenum +1) % c->chunk_item->num_stripes; - } - - firstdata = parity == 0 ? 1 : 0; - - RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], c->chunk_item->stripe_length); - - for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) { - if (i != parity) - do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length); - } - - parity = (parity + 1) % c->chunk_item->num_stripes; - stripepos += c->chunk_item->stripe_length; - } - - if (pos < length) { - UINT16 firstdata; - - if (!same_stripe) { - stripenum = (parity + 1) % c->chunk_item->num_stripes; + + stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1); + + RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + stripeoff[parity] = maxwritelen; + parity_pos = maxwritelen; + } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) { + UINT16 stripe = (parity + 1) % c->chunk_item->num_stripes; + i = 0; - } else - i = logstripe; - - while (pos < length) { - ULONG copylen; - - copylen = min(stripes[i].end - start - stripepos, length - pos); - - RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen); - - pos += copylen; - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - i++; - } - - firstdata = parity == 0 ? 1 : 0; - - RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], laststripesize); - - for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) { - if (i != parity) - do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], laststripesize); + while (stripe != parity) { + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + + RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT; + + stripeoff[stripe] += c->chunk_item->stripe_length; + pos += c->chunk_item->stripe_length; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i++; + } + + stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1); + + RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + stripeoff[parity] += c->chunk_item->stripe_length; + parity_pos += c->chunk_item->stripe_length; + } else { + UINT16 stripe = (parity + 1) % c->chunk_item->num_stripes; + UINT32 writelen, maxwritelen = 0; + + i = 0; + while (pos < length) { + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length)); + + if (writelen == 0) + break; + + if (writelen > maxwritelen) + maxwritelen = writelen; + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + log_stripes[i].pfns += writelen >> PAGE_SHIFT; + + stripeoff[stripe] += writelen; + pos += writelen; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i++; + } + + stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1); + + RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); } } - - for (i = 0; i < c->chunk_item->num_stripes; i++) { - stripes[i].start = start; - stripes[i].end = end; + + for (i = 0; i < num_data_stripes; i++) { + UINT8* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority); + + if (i == 0) + RtlCopyMemory(wtc->parity1, ss, (UINT32)(parity_end - parity_start)); + else + do_xor(wtc->parity1, ss, (UINT32)(parity_end - parity_start)); } - - return STATUS_SUCCESS; + + Status = STATUS_SUCCESS; + +exit: + if (log_stripes) { + for (i = 0; i < num_data_stripes; i++) { + if (log_stripes[i].mdl) + IoFreeMdl(log_stripes[i].mdl); + } + + ExFreePool(log_stripes); + } + + if (stripeoff) + ExFreePool(stripeoff); + + return Status; } -static NTSTATUS prepare_raid6_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) { - UINT64 startoff, endoff; - UINT16 startoffstripe, endoffstripe, stripenum, parity1, parity2, logstripe; - UINT64 start = 0xffffffffffffffff, end = 0; - UINT64 pos, stripepos; - UINT32 firststripesize, laststripesize; +static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes, PIRP Irp, + UINT64 irp_offset, ULONG priority, write_data_context* wtc) { + UINT64 startoff, endoff, parity_start, parity_end; + UINT16 startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2; + UINT64 pos, parity_pos, *stripeoff = NULL; UINT32 i; - UINT8* data2 = (UINT8*)data; - UINT32 num_reads; - BOOL same_stripe = FALSE, multiple_stripes; - - get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &startoff, &startoffstripe); - get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &endoff, &endoffstripe); - - for (i = 0; i < c->chunk_item->num_stripes - 2; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % c->chunk_item->stripe_length); + BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0); + PMDL master_mdl; + NTSTATUS Status; + PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns; + log_stripe* log_stripes = NULL; + + if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) { + UINT64 delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length); + + delta = min(irp_offset + length, delta); + Status = add_partial_stripe(Vcb, c, address + length - delta, (UINT32)delta, (UINT8*)data + irp_offset + length - delta); + if (!NT_SUCCESS(Status)) { + ERR("add_partial_stripe returned %08x\n", Status); + goto exit; } - if (endoffstripe > i) { - stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % c->chunk_item->stripe_length); + length -= (UINT32)delta; + } + + if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) { + UINT64 delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length)); + + Status = add_partial_stripe(Vcb, c, address, (UINT32)delta, (UINT8*)data + irp_offset); + if (!NT_SUCCESS(Status)) { + ERR("add_partial_stripe returned %08x\n", Status); + goto exit; } - if (ststart != stend) { - stripes[i].start = ststart; - stripes[i].end = stend; - - if (ststart < start) { - start = ststart; - firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length); + address += delta; + length -= (UINT32)delta; + irp_offset += delta; + } + + if (length == 0) { + Status = STATUS_SUCCESS; + goto exit; + } + + get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe); + get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe); + + pos = 0; + while (pos < length) { + parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes; + UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes; + ULONG skip, writelen; + + i = startoffstripe; + while (stripe != parity1) { + if (i == startoffstripe) { + writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length)); + + stripes[stripe].start = startoff; + stripes[stripe].end = startoff + writelen; + + pos += writelen; + + if (pos == length) + break; + } else { + writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length); + + stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length); + stripes[stripe].end = stripes[stripe].start + writelen; + + pos += writelen; + + if (pos == length) + break; + } + + i++; + stripe = (stripe + 1) % c->chunk_item->num_stripes; + } + + if (pos == length) + break; + + for (i = 0; i < startoffstripe; i++) { + stripe = (parity1 + i + 2) % c->chunk_item->num_stripes; + + stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + } + + stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end = + startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + + if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) { + skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1); + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length; + } + + pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length; + } + } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) { + for (i = 0; i < c->chunk_item->num_stripes; i++) { + stripes[i].end += c->chunk_item->stripe_length; } - if (stend > end) { - end = stend; - laststripesize = stend % c->chunk_item->stripe_length; - if (laststripesize == 0) - laststripesize = c->chunk_item->stripe_length; + pos += c->chunk_item->stripe_length * num_data_stripes; + } else { + UINT16 stripe = (parity1 + 2) % c->chunk_item->num_stripes; + + i = 0; + while (stripe != parity1) { + if (endoffstripe == i) { + stripes[stripe].end = endoff + 1; + break; + } else if (endoffstripe > i) + stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; + + i++; + stripe = (stripe + 1) % c->chunk_item->num_stripes; } + + break; } } - - if (start == end) { - ERR("error: start == end (%llx)\n", start); - return STATUS_INTERNAL_ERROR; + + parity_start = 0xffffffffffffffff; + parity_end = 0; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (stripes[i].start != 0 || stripes[i].end != 0) { + parity_start = min(stripes[i].start, parity_start); + parity_end = max(stripes[i].end, parity_end); + } } - - if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) { - firststripesize = end - start; - laststripesize = firststripesize; + + if (parity_end == parity_start) { + Status = STATUS_SUCCESS; + goto exit; } - for (i = 0; i < c->chunk_item->num_stripes; i++) { - stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG); - if (!stripes[i].data) { + parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start; + + parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end; + + log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG); + if (!log_stripes) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes); + + for (i = 0; i < num_data_stripes; i++) { + log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL); + if (!log_stripes[i].mdl) { ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - if (i < c->chunk_item->num_stripes - 2) { - if (stripes[i].start == 0 && stripes[i].end == 0) - stripes[i].start = stripes[i].end = start; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } + + log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL; + log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1); } - - num_reads = 0; - multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length; - - for (i = 0; i < c->chunk_item->num_stripes - 2; i++) { - if (stripes[i].start == stripes[i].end) { - num_reads++; - - if (multiple_stripes) - num_reads++; - } else { - if (stripes[i].start > start) - num_reads++; - - if (stripes[i].end < end) - num_reads++; - } + + wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG); + if (!wtc->parity1) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG); + if (!wtc->parity2) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL); + if (!wtc->parity1_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + MmBuildMdlForNonPagedPool(wtc->parity1_mdl); + + wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL); + if (!wtc->parity2_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - if (num_reads > 0) { - UINT32 j; - read_stripe_master* master; - read_stripe* read_stripes; - CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; - NTSTATUS Status; - - master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG); - if (!master) { + + MmBuildMdlForNonPagedPool(wtc->parity2_mdl); + + if (file_write) + master_mdl = Irp->MdlAddress; + else if (((ULONG_PTR)data % PAGE_SIZE) != 0) { + wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); + if (!wtc->scratch) { ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG); - if (!read_stripes) { + + RtlCopyMemory(wtc->scratch, (UINT8*)data + irp_offset, length); + + master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL); + if (!master_mdl) { ERR("out of memory\n"); - ExFreePool(master); - return STATUS_INSUFFICIENT_RESOURCES; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; - stripenum = (parity1 + 2) % c->chunk_item->num_stripes; - - j = 0; - for (i = 0; i < c->chunk_item->num_stripes - 2; i++) { - if (stripes[i].start > start || stripes[i].start == stripes[i].end) { - ULONG readlen; - - read_stripes[j].Irp = NULL; - read_stripes[j].devobj = c->devices[stripenum]->devobj; - read_stripes[j].master = master; - - if (stripes[i].start != stripes[i].end) - readlen = stripes[i].start - start; - else - readlen = firststripesize; - - Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen); - - if (!NT_SUCCESS(Status)) { - ERR("make_read_irp returned %08x\n", Status); - j++; - goto readend; - } - - stripes[stripenum].skip_start = readlen; - - j++; - if (j == num_reads) break; - } - - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; + + MmBuildMdlForNonPagedPool(master_mdl); + + wtc->mdl = master_mdl; + } else { + master_mdl = IoAllocateMdl((UINT8*)data + irp_offset, length, FALSE, FALSE, NULL); + if (!master_mdl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - if (j < num_reads) { - parity1 = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; - stripenum = (parity1 + 2) % c->chunk_item->num_stripes; - - for (i = 0; i < c->chunk_item->num_stripes - 2; i++) { - if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) { - read_stripes[j].Irp = NULL; - read_stripes[j].devobj = c->devices[stripenum]->devobj; - read_stripes[j].master = master; - - if (stripes[i].start == stripes[i].end) { - Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize); - stripes[stripenum].skip_end = laststripesize; - } else { - Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end); - stripes[stripenum].skip_end = end - stripes[i].end; - } - - if (!NT_SUCCESS(Status)) { - ERR("make_read_irp returned %08x\n", Status); - j++; - goto readend; - } - - j++; - if (j == num_reads) break; - } - - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(master_mdl); + goto exit; } - - master->stripes_left = j; - KeInitializeEvent(&master->event, NotificationEvent, FALSE); - - for (i = 0; i < j; i++) { - Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp); - if (!NT_SUCCESS(Status)) { - ERR("IoCallDriver returned %08x\n", Status); - goto readend; + + wtc->mdl = master_mdl; + } + + pfns = (PFN_NUMBER*)(master_mdl + 1); + parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1); + parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1); + + if (file_write) + pfns = &pfns[irp_offset >> PAGE_SHIFT]; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (stripes[i].start != stripes[i].end) { + stripes[i].mdl = IoAllocateMdl((UINT8*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL); + if (!stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; } } - - KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL); - - for (i = 0; i < j; i++) { - if (!NT_SUCCESS(read_stripes[i].iosb.Status)) { - Status = read_stripes[i].iosb.Status; - goto readend; + } + + stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes); + + pos = 0; + parity_pos = 0; + + while (pos < length) { + PFN_NUMBER* stripe_pfns; + + parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes; + + if (pos == 0) { + UINT16 stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2; + UINT32 writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, + c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length))); + UINT32 maxwritelen = writelen; + + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + + RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT; + + stripeoff[stripe] = writelen; + pos += writelen; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i = startoffstripe + 1; + + while (stripe != parity1) { + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length)); + + if (writelen == 0) + break; + + if (writelen > maxwritelen) + maxwritelen = writelen; + + RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + log_stripes[i].pfns += writelen >> PAGE_SHIFT; + + stripeoff[stripe] = writelen; + pos += writelen; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i++; } - } - - Status = STATUS_SUCCESS; -readend: - for (i = 0; i < j; i++) { - if (read_stripes[i].Irp) { - if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(read_stripes[i].Irp->MdlAddress); - IoFreeMdl(read_stripes[i].Irp->MdlAddress); - } - - IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running? + stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1); + RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + stripeoff[parity1] = maxwritelen; + + parity2 = (parity1 + 1) % c->chunk_item->num_stripes; + + stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1); + RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + stripeoff[parity2] = maxwritelen; + + parity_pos = maxwritelen; + } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) { + UINT16 stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2; + + i = 0; + while (stripe != parity1) { + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + + RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT; + + stripeoff[stripe] += c->chunk_item->stripe_length; + pos += c->chunk_item->stripe_length; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i++; + } + + stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1); + RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + stripeoff[parity1] += c->chunk_item->stripe_length; + + parity2 = (parity1 + 1) % c->chunk_item->num_stripes; + + stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1); + RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); + stripeoff[parity2] += c->chunk_item->stripe_length; + + parity_pos += c->chunk_item->stripe_length; + } else { + UINT16 stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2; + UINT32 writelen, maxwritelen = 0; + + i = 0; + while (pos < length) { + stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1); + writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length)); + + if (writelen == 0) + break; + + if (writelen > maxwritelen) + maxwritelen = writelen; + + RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + log_stripes[i].pfns += writelen >> PAGE_SHIFT; + + stripeoff[stripe] += writelen; + pos += writelen; + + stripe = (stripe + 1) % c->chunk_item->num_stripes; + i++; } + + stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1); + RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); + + parity2 = (parity1 + 1) % c->chunk_item->num_stripes; + + stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1); + RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); } - - ExFreePool(read_stripes); - ExFreePool(master); - - if (!NT_SUCCESS(Status)) - return Status; } - - pos = 0; - - parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; - parity2 = (parity1 + 1) % c->chunk_item->num_stripes; - stripepos = 0; - - if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) > 0) { - BOOL first = TRUE; - - stripenum = (parity2 + 1) % c->chunk_item->num_stripes; - - for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 2; logstripe++) { - ULONG copylen; - - if (pos >= length) - break; - - if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) { - copylen = min(start + firststripesize - stripes[logstripe].start, length - pos); - - if (!first && copylen < c->chunk_item->stripe_length) { - same_stripe = TRUE; - break; - } - RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen); - - pos += copylen; - first = FALSE; - } - - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - } - - i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); - RtlCopyMemory(stripes[parity1].data, stripes[i].data, firststripesize); - RtlCopyMemory(stripes[parity2].data, stripes[i].data, firststripesize); - i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1); - - do { - do_xor(stripes[parity1].data, stripes[i].data, firststripesize); - - galois_double(stripes[parity2].data, firststripesize); - do_xor(stripes[parity2].data, stripes[i].data, firststripesize); - - i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1); - } while (i != parity2); - - if (!same_stripe) { - stripepos = firststripesize; - parity1 = parity2; - parity2 = (parity2 + 1) % c->chunk_item->num_stripes; + for (i = 0; i < num_data_stripes; i++) { + UINT8* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority); + + if (i == 0) { + RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start)); + RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start)); + } else { + do_xor(wtc->parity1, ss, (UINT32)(parity_end - parity_start)); + + galois_double(wtc->parity2, (UINT32)(parity_end - parity_start)); + do_xor(wtc->parity2, ss, (UINT32)(parity_end - parity_start)); } } - - while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) { - stripenum = (parity2 + 1) % c->chunk_item->num_stripes; - - for (i = 0; i < c->chunk_item->num_stripes - 2; i++) { - RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length); - - pos += c->chunk_item->stripe_length; - stripenum = (stripenum +1) % c->chunk_item->num_stripes; - } - - i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); - RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length); - RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length); - i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1); - - do { - do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length); - - galois_double(&stripes[parity2].data[stripepos], c->chunk_item->stripe_length); - do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length); - - i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1); - } while (i != parity2); - - parity1 = parity2; - parity2 = (parity2 + 1) % c->chunk_item->num_stripes; - stripepos += c->chunk_item->stripe_length; - } - - if (pos < length) { - if (!same_stripe) { - stripenum = (parity2 + 1) % c->chunk_item->num_stripes; - i = 0; - } else - i = logstripe; - - while (pos < length) { - ULONG copylen; - - copylen = min(stripes[i].end - start - stripepos, length - pos); - - RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen); - - pos += copylen; - stripenum = (stripenum + 1) % c->chunk_item->num_stripes; - i++; + + Status = STATUS_SUCCESS; + +exit: + if (log_stripes) { + for (i = 0; i < num_data_stripes; i++) { + if (log_stripes[i].mdl) + IoFreeMdl(log_stripes[i].mdl); } - - i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); - RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize); - RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize); - i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1); - - do { - do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize); - - galois_double(&stripes[parity2].data[stripepos], laststripesize); - do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize); - - i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1); - } while (i != parity2); - } - - for (i = 0; i < c->chunk_item->num_stripes; i++) { - stripes[i].start = start; - stripes[i].end = end; + + ExFreePool(log_stripes); } - - return STATUS_SUCCESS; + + if (stripeoff) + ExFreePool(stripeoff); + + return Status; } -NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c) { +NTSTATUS write_data(_In_ device_extension* Vcb, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length, _In_ write_data_context* wtc, + _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ BOOL file_write, _In_ UINT64 irp_offset, _In_ ULONG priority) { NTSTATUS Status; UINT32 i; CHUNK_ITEM_STRIPE* cis; write_data_stripe* stripe; write_stripe* stripes = NULL; - BOOL need_free2; - + UINT64 total_writing = 0; + ULONG allowed_missing, missing; + TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length); - + if (!c) { c = get_chunk_from_address(Vcb, address); if (!c) { @@ -1545,149 +1894,177 @@ NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, B return STATUS_INTERNAL_ERROR; } } - + stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); if (!stripes) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes); - + cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; - + if (c->chunk_item->type & BLOCK_FLAG_RAID0) { - Status = prepare_raid0_write(c, address, data, length, stripes); + Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc); if (!NT_SUCCESS(Status)) { ERR("prepare_raid0_write returned %08x\n", Status); - ExFreePool(stripes); - return Status; + goto prepare_failed; } - - if (need_free) - ExFreePool(data); - need_free2 = TRUE; + allowed_missing = 0; } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) { - Status = prepare_raid10_write(c, address, data, length, stripes); + Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc); if (!NT_SUCCESS(Status)) { ERR("prepare_raid10_write returned %08x\n", Status); - ExFreePool(stripes); - return Status; + goto prepare_failed; } - - if (need_free) - ExFreePool(data); - need_free2 = TRUE; + allowed_missing = 1; } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { - Status = prepare_raid5_write(Irp, c, address, data, length, stripes); + Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc); if (!NT_SUCCESS(Status)) { ERR("prepare_raid5_write returned %08x\n", Status); - ExFreePool(stripes); - return Status; + goto prepare_failed; } - - if (need_free) - ExFreePool(data); - need_free2 = TRUE; + allowed_missing = 1; } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { - Status = prepare_raid6_write(Irp, c, address, data, length, stripes); + Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc); if (!NT_SUCCESS(Status)) { ERR("prepare_raid6_write returned %08x\n", Status); - ExFreePool(stripes); - return Status; + goto prepare_failed; } - - if (need_free) - ExFreePool(data); - need_free2 = TRUE; + allowed_missing = 2; } else { // write same data to every location - SINGLE, DUP, RAID1 for (i = 0; i < c->chunk_item->num_stripes; i++) { stripes[i].start = address - c->offset; stripes[i].end = stripes[i].start + length; stripes[i].data = data; + stripes[i].irp_offset = irp_offset; + + if (c->devices[i]->devobj) { + if (file_write) { + UINT8* va; + ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start); + + va = (UINT8*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset; + + stripes[i].mdl = IoAllocateMdl(va, writelen, FALSE, FALSE, NULL); + if (!stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto prepare_failed; + } + + IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen); + } else { + stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL); + if (!stripes[i].mdl) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto prepare_failed; + } + + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + IoFreeMdl(stripes[i].mdl); + stripes[i].mdl = NULL; + goto prepare_failed; + } + } + } } - need_free2 = need_free; + + allowed_missing = c->chunk_item->num_stripes - 1; + } + + missing = 0; + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (!c->devices[i]->devobj) + missing++; + } + + if (missing > allowed_missing) { + ERR("cannot write as %u missing devices (maximum %u)\n", missing, allowed_missing); + Status = STATUS_DEVICE_NOT_READY; + goto prepare_failed; } for (i = 0; i < c->chunk_item->num_stripes; i++) { PIO_STACK_LOCATION IrpSp; - - // FIXME - handle missing devices - + stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG); if (!stripe) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - - if (stripes[i].start + stripes[i].skip_start == stripes[i].end - stripes[i].skip_end || stripes[i].start == stripes[i].end) { + + if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) { stripe->status = WriteDataStatus_Ignore; stripe->Irp = NULL; stripe->buf = stripes[i].data; - stripe->need_free = need_free2; + stripe->mdl = NULL; } else { stripe->context = (struct _write_data_context*)wtc; stripe->buf = stripes[i].data; - stripe->need_free = need_free2; stripe->device = c->devices[i]; RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK)); stripe->status = WriteDataStatus_Pending; - + stripe->mdl = stripes[i].mdl; + if (!Irp) { stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE); - + if (!stripe->Irp) { ERR("IoAllocateIrp failed\n"); - Status = STATUS_INTERNAL_ERROR; + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } } else { stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize); - + if (!stripe->Irp) { ERR("IoMakeAssociatedIrp failed\n"); - Status = STATUS_INTERNAL_ERROR; + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } } - + IrpSp = IoGetNextIrpStackLocation(stripe->Irp); IrpSp->MajorFunction = IRP_MJ_WRITE; - + if (stripe->device->devobj->Flags & DO_BUFFERED_IO) { - stripe->Irp->AssociatedIrp.SystemBuffer = stripes[i].data + stripes[i].skip_start; + stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority); stripe->Irp->Flags = IRP_BUFFERED_IO; - } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) { - stripe->Irp->MdlAddress = IoAllocateMdl(stripes[i].data + stripes[i].skip_start, - stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end, FALSE, FALSE, NULL); - if (!stripe->Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - stripe->Irp->UserBuffer = stripes[i].data + stripes[i].skip_start; - } - + } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) + stripe->Irp->MdlAddress = stripe->mdl; + else + stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority); + #ifdef DEBUG_PARANOID - if (stripes[i].end < stripes[i].start + stripes[i].skip_start + stripes[i].skip_end) { - ERR("trying to write stripe with negative length (%llx < %llx + %x + %x)\n", - stripes[i].end, stripes[i].start, stripes[i].skip_start, stripes[i].skip_end); + if (stripes[i].end < stripes[i].start) { + ERR("trying to write stripe with negative length (%llx < %llx)\n", stripes[i].end, stripes[i].start); int3; } #endif - IrpSp->Parameters.Write.Length = stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end; - IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset + stripes[i].skip_start; - + IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start); + IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset; + + total_writing += IrpSp->Parameters.Write.Length; + stripe->Irp->UserIosb = &stripe->iosb; wtc->stripes_left++; @@ -1696,88 +2073,101 @@ NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, B InsertTailList(&wtc->stripes, &stripe->list_entry); } - + + if (diskacc) + fFsRtlUpdateDiskCounters(0, total_writing); + Status = STATUS_SUCCESS; - + end: if (stripes) ExFreePool(stripes); - - if (!NT_SUCCESS(Status)) { + + if (!NT_SUCCESS(Status)) free_write_data_stripes(wtc); - ExFreePool(wtc); + + return Status; + +prepare_failed: + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) { + if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(stripes[i].mdl); + + IoFreeMdl(stripes[i].mdl); + } + } + + if (wtc->parity1_mdl) { + if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(wtc->parity1_mdl); + + IoFreeMdl(wtc->parity1_mdl); + wtc->parity1_mdl = NULL; + } + + if (wtc->parity2_mdl) { + if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(wtc->parity2_mdl); + + IoFreeMdl(wtc->parity2_mdl); + wtc->parity2_mdl = NULL; + } + + if (wtc->mdl) { + if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(wtc->mdl); + + IoFreeMdl(wtc->mdl); + wtc->mdl = NULL; + } + + if (wtc->parity1) { + ExFreePool(wtc->parity1); + wtc->parity1 = NULL; + } + + if (wtc->parity2) { + ExFreePool(wtc->parity2); + wtc->parity2 = NULL; + } + + if (wtc->scratch) { + ExFreePool(wtc->scratch); + wtc->scratch = NULL; } - + + ExFreePool(stripes); return Status; } void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen) { UINT64 startoff, endoff; UINT16 startoffstripe, endoffstripe, datastripes; - UINT64 start = 0xffffffffffffffff, end = 0, logend; - UINT16 i; - + datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2); - + get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe); get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe); - for (i = 0; i < datastripes; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % c->chunk_item->stripe_length); - } - - if (endoffstripe > i) { - stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % c->chunk_item->stripe_length); - } - - if (ststart != stend) { - if (ststart < start) - start = ststart; + startoff -= startoff % c->chunk_item->stripe_length; + endoff = sector_align(endoff, c->chunk_item->stripe_length); - if (stend > end) - end = stend; - } - } - - *lockaddr = c->offset + ((start / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes) + - start % c->chunk_item->stripe_length; - - logend = c->offset + ((end / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes); - logend += c->chunk_item->stripe_length * (datastripes - 1); - logend += end % c->chunk_item->stripe_length == 0 ? c->chunk_item->stripe_length : (end % c->chunk_item->stripe_length); - *locklen = logend - *lockaddr; + *lockaddr = c->offset + (startoff * datastripes); + *locklen = (endoff - startoff) * datastripes; } -NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c) { - write_data_context* wtc; +NTSTATUS write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c, BOOL file_write, UINT64 irp_offset, ULONG priority) { + write_data_context wtc; NTSTATUS Status; UINT64 lockaddr, locklen; -// #ifdef DEBUG_PARANOID -// UINT8* buf2; -// #endif - - wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG); - if (!wtc) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE); - InitializeListHead(&wtc->stripes); - wtc->tree = FALSE; - wtc->stripes_left = 0; - + KeInitializeEvent(&wtc.Event, NotificationEvent, FALSE); + InitializeListHead(&wtc.stripes); + wtc.stripes_left = 0; + wtc.parity1 = wtc.parity2 = wtc.scratch = NULL; + wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL; + if (!c) { c = get_chunk_from_address(Vcb, address); if (!c) { @@ -1785,104 +2175,110 @@ NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void return STATUS_INTERNAL_ERROR; } } - + if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) { get_raid56_lock_range(c, address, length, &lockaddr, &locklen); chunk_lock_range(Vcb, c, lockaddr, locklen); } - - Status = write_data(Vcb, address, data, FALSE, length, wtc, Irp, c); + + _SEH2_TRY { + Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + if (!NT_SUCCESS(Status)) { ERR("write_data returned %08x\n", Status); - + if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) chunk_unlock_range(Vcb, c, lockaddr, locklen); - - free_write_data_stripes(wtc); - ExFreePool(wtc); + + free_write_data_stripes(&wtc); return Status; } - - if (wtc->stripes.Flink != &wtc->stripes) { + + if (wtc.stripes.Flink != &wtc.stripes) { // launch writes and wait - LIST_ENTRY* le = wtc->stripes.Flink; - while (le != &wtc->stripes) { + LIST_ENTRY* le = wtc.stripes.Flink; + BOOL no_wait = TRUE; + + while (le != &wtc.stripes) { write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - if (stripe->status != WriteDataStatus_Ignore) + + if (stripe->status != WriteDataStatus_Ignore) { IoCallDriver(stripe->device->devobj, stripe->Irp); - + no_wait = FALSE; + } + le = le->Flink; } - - KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL); - - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { + + if (!no_wait) + KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, FALSE, NULL); + + le = wtc.stripes.Flink; + while (le != &wtc.stripes) { write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - + if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { Status = stripe->iosb.Status; + + log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS); break; } - + le = le->Flink; } - - free_write_data_stripes(wtc); + + free_write_data_stripes(&wtc); } - + if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) chunk_unlock_range(Vcb, c, lockaddr, locklen); - ExFreePool(wtc); - -// #ifdef DEBUG_PARANOID -// buf2 = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); -// Status = read_data(Vcb, address, length, NULL, FALSE, buf2, NULL, Irp); -// -// if (!NT_SUCCESS(Status) || RtlCompareMemory(buf2, data, length) != length) -// int3; -// -// ExFreePool(buf2); -// #endif - - return STATUS_SUCCESS; + return Status; } -static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +_Function_class_(IO_COMPLETION_ROUTINE) +#ifdef __REACTOS__ +static NTSTATUS NTAPI write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#else +static NTSTATUS write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { +#endif write_data_stripe* stripe = conptr; write_data_context* context = (write_data_context*)stripe->context; LIST_ENTRY* le; - + + UNUSED(DeviceObject); + // FIXME - we need a lock here - + if (stripe->status == WriteDataStatus_Cancelling) { stripe->status = WriteDataStatus_Cancelled; goto end; } - + stripe->iosb = Irp->IoStatus; - + if (NT_SUCCESS(Irp->IoStatus.Status)) { stripe->status = WriteDataStatus_Success; } else { le = context->stripes.Flink; - + stripe->status = WriteDataStatus_Error; - + while (le != &context->stripes) { write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry); - + if (s2->status == WriteDataStatus_Pending) { s2->status = WriteDataStatus_Cancelling; IoCancelIrp(s2->Irp); } - + le = le->Flink; } } - + end: if (InterlockedDecrement(&context->stripes_left) == 0) KeSetEvent(&context->Event, 0, FALSE); @@ -1891,271 +2287,122 @@ end: } void free_write_data_stripes(write_data_context* wtc) { - LIST_ENTRY *le, *le2, *nextle; - + LIST_ENTRY* le; + PMDL last_mdl = NULL; + + if (wtc->parity1_mdl) { + if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(wtc->parity1_mdl); + + IoFreeMdl(wtc->parity1_mdl); + } + + if (wtc->parity2_mdl) { + if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(wtc->parity2_mdl); + + IoFreeMdl(wtc->parity2_mdl); + } + + if (wtc->mdl) { + if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(wtc->mdl); + + IoFreeMdl(wtc->mdl); + } + + if (wtc->parity1) + ExFreePool(wtc->parity1); + + if (wtc->parity2) + ExFreePool(wtc->parity2); + + if (wtc->scratch) + ExFreePool(wtc->scratch); + le = wtc->stripes.Flink; while (le != &wtc->stripes) { write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - if (stripe->Irp) { - if (stripe->device->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(stripe->Irp->MdlAddress); - IoFreeMdl(stripe->Irp->MdlAddress); - } + + if (stripe->mdl && stripe->mdl != last_mdl) { + if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED) + MmUnlockPages(stripe->mdl); + + IoFreeMdl(stripe->mdl); } - + + last_mdl = stripe->mdl; + le = le->Flink; } - - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { - write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - nextle = le->Flink; - - if (stripe->buf && stripe->need_free) { - ExFreePool(stripe->buf); - - le2 = le->Flink; - while (le2 != &wtc->stripes) { - write_data_stripe* s2 = CONTAINING_RECORD(le2, write_data_stripe, list_entry); - - if (s2->buf == stripe->buf) - s2->buf = NULL; - - le2 = le2->Flink; - } - - } - + + while (!IsListEmpty(&wtc->stripes)) { + write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry); + ExFreePool(stripe); - - le = nextle; } } -NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback) { - NTSTATUS Status; - LIST_ENTRY* le; - - le = fcb->extents.Flink; +void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) { + LIST_ENTRY* le = prevextle->Flink; while (le != &fcb->extents) { - LIST_ENTRY* le2 = le->Flink; extent* ext = CONTAINING_RECORD(le, extent, list_entry); - EXTENT_DATA* ed = ext->data; - EXTENT_DATA2* ed2; - UINT64 len; - - if (!ext->ignore) { - if (ext->datalen < sizeof(EXTENT_DATA)) { - ERR("extent at %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA)); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { - if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { - ERR("extent at %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - ed2 = (EXTENT_DATA2*)ed->data; - } - - len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes; - - if (ext->offset < end_data && ext->offset + len > start_data) { - if (ed->type == EXTENT_TYPE_INLINE) { - if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all - remove_fcb_extent(fcb, ext, rollback); - - fcb->inode_item.st_blocks -= len; - fcb->inode_item_changed = TRUE; - } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning - EXTENT_DATA* ned; - UINT64 size; - extent* newext; - - size = len - (end_data - ext->offset); - - ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - if (!newext) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); - goto end; - } - - ned->generation = Vcb->superblock.generation; - ned->decoded_size = size; - ned->compression = ed->compression; - ned->encryption = ed->encryption; - ned->encoding = ed->encoding; - ned->type = ed->type; - - RtlCopyMemory(&ned->data[0], &ed->data[end_data - ext->offset], size); - - newext->offset = end_data; - newext->data = ned; - newext->datalen = sizeof(EXTENT_DATA) - 1 + size; - newext->unique = ext->unique; - newext->ignore = FALSE; - newext->inserted = TRUE; - newext->csum = NULL; - InsertHeadList(&ext->list_entry, &newext->list_entry); - - remove_fcb_extent(fcb, ext, rollback); - - fcb->inode_item.st_blocks -= end_data - ext->offset; - fcb->inode_item_changed = TRUE; - } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end - EXTENT_DATA* ned; - UINT64 size; - extent* newext; - - size = start_data - ext->offset; - - ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - if (!newext) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); - goto end; - } - - ned->generation = Vcb->superblock.generation; - ned->decoded_size = size; - ned->compression = ed->compression; - ned->encryption = ed->encryption; - ned->encoding = ed->encoding; - ned->type = ed->type; - - RtlCopyMemory(&ned->data[0], &ed->data[0], size); - - newext->offset = ext->offset; - newext->data = ned; - newext->datalen = sizeof(EXTENT_DATA) - 1 + size; - newext->unique = ext->unique; - newext->ignore = FALSE; - newext->inserted = TRUE; - newext->csum = NULL; - InsertHeadList(&ext->list_entry, &newext->list_entry); - - remove_fcb_extent(fcb, ext, rollback); - - fcb->inode_item.st_blocks -= ext->offset + len - start_data; - fcb->inode_item_changed = TRUE; - } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle - EXTENT_DATA *ned1, *ned2; - UINT64 size; - extent *newext1, *newext2; - - size = start_data - ext->offset; - - ned1 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG); - if (!ned1) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - if (!newext1) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned1); - goto end; - } - - ned1->generation = Vcb->superblock.generation; - ned1->decoded_size = size; - ned1->compression = ed->compression; - ned1->encryption = ed->encryption; - ned1->encoding = ed->encoding; - ned1->type = ed->type; - - RtlCopyMemory(&ned1->data[0], &ed->data[0], size); - newext1->offset = ext->offset; - newext1->data = ned1; - newext1->datalen = sizeof(EXTENT_DATA) - 1 + size; - newext1->unique = ext->unique; - newext1->ignore = FALSE; - newext1->inserted = TRUE; - newext1->csum = NULL; - - size = ext->offset + len - end_data; - - ned2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG); - if (!ned2) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned1); - ExFreePool(newext1); - goto end; - } - - newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - if (!newext2) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned1); - ExFreePool(newext1); - ExFreePool(ned2); - goto end; - } - - ned2->generation = Vcb->superblock.generation; - ned2->decoded_size = size; - ned2->compression = ed->compression; - ned2->encryption = ed->encryption; - ned2->encoding = ed->encoding; - ned2->type = ed->type; - - RtlCopyMemory(&ned2->data[0], &ed->data[end_data - ext->offset], size); - - newext2->offset = end_data; - newext2->data = ned2; - newext2->datalen = sizeof(EXTENT_DATA) - 1 + size; - newext2->unique = ext->unique; - newext2->ignore = FALSE; - newext2->inserted = TRUE; - newext2->csum = NULL; - - InsertHeadList(&ext->list_entry, &newext1->list_entry); - InsertHeadList(&newext1->list_entry, &newext2->list_entry); - + if (ext->offset >= newext->offset) { + InsertHeadList(ext->list_entry.Blink, &newext->list_entry); + return; + } + + le = le->Flink; + } + + InsertTailList(&fcb->extents, &newext->list_entry); +} + +NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; + LIST_ENTRY* le; + + le = fcb->extents.Flink; + + while (le != &fcb->extents) { + LIST_ENTRY* le2 = le->Flink; + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + EXTENT_DATA* ed = &ext->extent_data; + EXTENT_DATA2* ed2 = NULL; + UINT64 len; + + if (!ext->ignore) { + if (ed->type != EXTENT_TYPE_INLINE) + ed2 = (EXTENT_DATA2*)ed->data; + + len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes; + + if (ext->offset < end_data && ext->offset + len > start_data) { + if (ed->type == EXTENT_TYPE_INLINE) { + if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all remove_fcb_extent(fcb, ext, rollback); - - fcb->inode_item.st_blocks -= end_data - start_data; + + fcb->inode_item.st_blocks -= len; fcb->inode_item_changed = TRUE; + } else { + ERR("trying to split inline extent\n"); +#ifdef DEBUG_PARANOID + int3; +#endif + return STATUS_INTERNAL_ERROR; } - } else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { + } else if (ed->type != EXTENT_TYPE_INLINE) { if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all if (ed2->size != 0) { chunk* c; - + fcb->inode_item.st_blocks -= len; fcb->inode_item_changed = TRUE; - + c = get_chunk_from_address(Vcb, ed2->address); - + if (!c) { ERR("get_chunk_from_address(%llx) failed\n", ed2->address); } else { @@ -2167,172 +2414,147 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT } } } - + remove_fcb_extent(fcb, ext, rollback); } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning - EXTENT_DATA* ned; EXTENT_DATA2* ned2; extent* newext; - + if (ed2->size != 0) { fcb->inode_item.st_blocks -= end_data - ext->offset; fcb->inode_item_changed = TRUE; } - - ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); if (!newext) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); goto end; } - - ned2 = (EXTENT_DATA2*)&ned->data[0]; - - ned->generation = Vcb->superblock.generation; - ned->decoded_size = ed->decoded_size; - ned->compression = ed->compression; - ned->encryption = ed->encryption; - ned->encoding = ed->encoding; - ned->type = ed->type; + + ned2 = (EXTENT_DATA2*)newext->extent_data.data; + + newext->extent_data.generation = Vcb->superblock.generation; + newext->extent_data.decoded_size = ed->decoded_size; + newext->extent_data.compression = ed->compression; + newext->extent_data.encryption = ed->encryption; + newext->extent_data.encoding = ed->encoding; + newext->extent_data.type = ed->type; ned2->address = ed2->address; ned2->size = ed2->size; ned2->offset = ed2->offset + (end_data - ext->offset); ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset); newext->offset = end_data; - newext->data = ned; newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext->unique = ext->unique; newext->ignore = FALSE; newext->inserted = TRUE; - + if (ext->csum) { if (ed->compression == BTRFS_COMPRESSION_NONE) { - newext->csum = ExAllocatePoolWithTag(PagedPool, ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); ExFreePool(newext); goto end; } - + RtlCopyMemory(newext->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size], - ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size)); } else { - newext->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); ExFreePool(newext); goto end; } - - RtlCopyMemory(newext->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + + RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size)); } } else newext->csum = NULL; - - InsertHeadList(&ext->list_entry, &newext->list_entry); - + + add_extent(fcb, &ext->list_entry, newext); + remove_fcb_extent(fcb, ext, rollback); } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end - EXTENT_DATA* ned; EXTENT_DATA2* ned2; extent* newext; - + if (ed2->size != 0) { fcb->inode_item.st_blocks -= ext->offset + len - start_data; fcb->inode_item_changed = TRUE; } - - ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); if (!newext) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); goto end; } - - ned2 = (EXTENT_DATA2*)&ned->data[0]; - - ned->generation = Vcb->superblock.generation; - ned->decoded_size = ed->decoded_size; - ned->compression = ed->compression; - ned->encryption = ed->encryption; - ned->encoding = ed->encoding; - ned->type = ed->type; + + ned2 = (EXTENT_DATA2*)newext->extent_data.data; + + newext->extent_data.generation = Vcb->superblock.generation; + newext->extent_data.decoded_size = ed->decoded_size; + newext->extent_data.compression = ed->compression; + newext->extent_data.encryption = ed->encryption; + newext->extent_data.encoding = ed->encoding; + newext->extent_data.type = ed->type; ned2->address = ed2->address; ned2->size = ed2->size; ned2->offset = ed2->offset; ned2->num_bytes = start_data - ext->offset; newext->offset = ext->offset; - newext->data = ned; newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext->unique = ext->unique; newext->ignore = FALSE; newext->inserted = TRUE; - + if (ext->csum) { if (ed->compression == BTRFS_COMPRESSION_NONE) { - newext->csum = ExAllocatePoolWithTag(PagedPool, ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); ExFreePool(newext); goto end; } - - RtlCopyMemory(newext->csum, ext->csum, ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + + RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size)); } else { - newext->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(ned); ExFreePool(newext); goto end; } - - RtlCopyMemory(newext->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + + RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size)); } } else newext->csum = NULL; - + InsertHeadList(&ext->list_entry, &newext->list_entry); - + remove_fcb_extent(fcb, ext, rollback); } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle - EXTENT_DATA *neda, *nedb; EXTENT_DATA2 *neda2, *nedb2; extent *newext1, *newext2; - + if (ed2->size != 0) { chunk* c; - + fcb->inode_item.st_blocks -= end_data - start_data; fcb->inode_item_changed = TRUE; - + c = get_chunk_from_address(Vcb, ed2->address); - + if (!c) { ERR("get_chunk_from_address(%llx) failed\n", ed2->address); } else { @@ -2344,144 +2566,115 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT } } } - - neda = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); - if (!neda) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); if (!newext1) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(neda); - goto end; - } - - nedb = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); - if (!nedb) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(neda); - ExFreePool(newext1); goto end; } - - newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG); if (!newext2) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(neda); ExFreePool(newext1); - ExFreePool(nedb); goto end; } - - neda2 = (EXTENT_DATA2*)&neda->data[0]; - - neda->generation = Vcb->superblock.generation; - neda->decoded_size = ed->decoded_size; - neda->compression = ed->compression; - neda->encryption = ed->encryption; - neda->encoding = ed->encoding; - neda->type = ed->type; + + neda2 = (EXTENT_DATA2*)newext1->extent_data.data; + + newext1->extent_data.generation = Vcb->superblock.generation; + newext1->extent_data.decoded_size = ed->decoded_size; + newext1->extent_data.compression = ed->compression; + newext1->extent_data.encryption = ed->encryption; + newext1->extent_data.encoding = ed->encoding; + newext1->extent_data.type = ed->type; neda2->address = ed2->address; neda2->size = ed2->size; neda2->offset = ed2->offset; neda2->num_bytes = start_data - ext->offset; - nedb2 = (EXTENT_DATA2*)&nedb->data[0]; - - nedb->generation = Vcb->superblock.generation; - nedb->decoded_size = ed->decoded_size; - nedb->compression = ed->compression; - nedb->encryption = ed->encryption; - nedb->encoding = ed->encoding; - nedb->type = ed->type; + nedb2 = (EXTENT_DATA2*)newext2->extent_data.data; + + newext2->extent_data.generation = Vcb->superblock.generation; + newext2->extent_data.decoded_size = ed->decoded_size; + newext2->extent_data.compression = ed->compression; + newext2->extent_data.encryption = ed->encryption; + newext2->extent_data.encoding = ed->encoding; + newext2->extent_data.type = ed->type; nedb2->address = ed2->address; nedb2->size = ed2->size; nedb2->offset = ed2->offset + (end_data - ext->offset); nedb2->num_bytes = ext->offset + len - end_data; - + newext1->offset = ext->offset; - newext1->data = neda; newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext1->unique = ext->unique; newext1->ignore = FALSE; newext1->inserted = TRUE; - + newext2->offset = end_data; - newext2->data = nedb; newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext2->unique = ext->unique; newext2->ignore = FALSE; newext2->inserted = TRUE; - + if (ext->csum) { if (ed->compression == BTRFS_COMPRESSION_NONE) { - newext1->csum = ExAllocatePoolWithTag(PagedPool, neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext1->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(neda); ExFreePool(newext1); - ExFreePool(nedb); ExFreePool(newext2); goto end; } - - newext2->csum = ExAllocatePoolWithTag(PagedPool, nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + + newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext2->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; ExFreePool(newext1->csum); - ExFreePool(neda); ExFreePool(newext1); - ExFreePool(nedb); ExFreePool(newext2); goto end; } - - RtlCopyMemory(newext1->csum, ext->csum, neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + + RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size)); RtlCopyMemory(newext2->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size], - nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + (ULONG)(nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size)); } else { - newext1->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); if (!newext1->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(neda); ExFreePool(newext1); - ExFreePool(nedb); ExFreePool(newext2); goto end; } - - newext2->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); - if (!newext1->csum) { + + newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG); + if (!newext2->csum) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; ExFreePool(newext1->csum); - ExFreePool(neda); ExFreePool(newext1); - ExFreePool(nedb); ExFreePool(newext2); goto end; } - - RtlCopyMemory(newext1->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); - RtlCopyMemory(newext2->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + + RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size)); + RtlCopyMemory(newext2->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size)); } } else { newext1->csum = NULL; newext2->csum = NULL; } - + InsertHeadList(&ext->list_entry, &newext1->list_entry); - InsertHeadList(&newext1->list_entry, &newext2->list_entry); - + add_extent(fcb, &newext1->list_entry, newext2); + remove_fcb_extent(fcb, ext, rollback); } } @@ -2490,212 +2683,212 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT le = le2; } - + Status = STATUS_SUCCESS; end: fcb->extents_changed = TRUE; mark_fcb_dirty(fcb); - + return Status; } -static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) { +void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) { rollback_extent* re; - + re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG); if (!re) { ERR("out of memory\n"); return; } - + re->fcb = fcb; re->ext = ext; - - add_rollback(fcb->Vcb, rollback, ROLLBACK_INSERT_EXTENT, re); + + add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re); } -static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG edsize, BOOL unique, UINT32* csum, LIST_ENTRY* rollback) { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(suppress: 28194) +#endif +NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ UINT64 offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ UINT16 edsize, + _In_ BOOL unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) UINT32* csum, _In_ LIST_ENTRY* rollback) { extent* ext; LIST_ENTRY* le; - - ext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG); if (!ext) { ERR("out of memory\n"); - return FALSE; + return STATUS_INSUFFICIENT_RESOURCES; } - + ext->offset = offset; - ext->data = ed; ext->datalen = edsize; ext->unique = unique; ext->ignore = FALSE; ext->inserted = TRUE; ext->csum = csum; - + + RtlCopyMemory(&ext->extent_data, ed, edsize); + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* oldext = CONTAINING_RECORD(le, extent, list_entry); - - if (!oldext->ignore) { - if (oldext->offset > offset) { - InsertHeadList(le->Blink, &ext->list_entry); - goto end; - } + + if (oldext->offset >= offset) { + InsertHeadList(le->Blink, &ext->list_entry); + goto end; } - + le = le->Flink; } - + InsertTailList(&fcb->extents, &ext->list_entry); - + end: add_insert_extent_rollback(rollback, fcb, ext); - return TRUE; + return STATUS_SUCCESS; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) { if (!ext->ignore) { rollback_extent* re; - + ext->ignore = TRUE; - + re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG); if (!re) { ERR("out of memory\n"); return; } - + re->fcb = fcb; re->ext = ext; - - add_rollback(fcb->Vcb, rollback, ROLLBACK_DELETE_EXTENT, re); + + add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re); } } -static NTSTATUS calc_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { +NTSTATUS calc_csum(_In_ device_extension* Vcb, _In_reads_bytes_(sectors*Vcb->superblock.sector_size) UINT8* data, + _In_ UINT32 sectors, _Out_writes_bytes_(sectors*sizeof(UINT32)) UINT32* csum) { NTSTATUS Status; calc_job* cj; - + // From experimenting, it seems that 40 sectors is roughly the crossover // point where offloading the crc32 calculation becomes worth it. - - if (sectors < 40) { + + if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) { ULONG j; - + for (j = 0; j < sectors; j++) { csum[j] = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); } - + return STATUS_SUCCESS; } - + Status = add_calc_job(Vcb, data, sectors, csum, &cj); if (!NT_SUCCESS(Status)) { ERR("add_calc_job returned %08x\n", Status); return Status; } - + KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); free_calc_job(cj); return STATUS_SUCCESS; } -BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, - PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size) { +_Requires_lock_held_(c->lock) +_When_(return != 0, _Releases_lock_(c->lock)) +BOOL insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ UINT64 start_data, _In_ UINT64 length, _In_ BOOL prealloc, _In_opt_ void* data, + _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ UINT8 compression, _In_ UINT64 decoded_size, _In_ BOOL file_write, _In_ UINT64 irp_offset) { UINT64 address; NTSTATUS Status; EXTENT_DATA* ed; EXTENT_DATA2* ed2; - ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); + UINT16 edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)); UINT32* csum = NULL; -// #ifdef DEBUG_PARANOID -// traverse_ptr tp; -// KEY searchkey; -// #endif - + TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback); - + if (!find_data_address_in_chunk(Vcb, c, length, &address)) return FALSE; - -// #ifdef DEBUG_PARANOID -// searchkey.obj_id = address; -// searchkey.obj_type = TYPE_EXTENT_ITEM; -// searchkey.offset = 0xffffffffffffffff; -// -// Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE); -// if (!NT_SUCCESS(Status)) { -// ERR("error - find_item returned %08x\n", Status); -// } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { -// ERR("address %llx already allocated\n", address); -// int3; -// } -// #endif - + // add extent data to inode ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG); if (!ed) { ERR("out of memory\n"); return FALSE; } - + ed->generation = Vcb->superblock.generation; ed->decoded_size = decoded_size; ed->compression = compression; ed->encryption = BTRFS_ENCRYPTION_NONE; ed->encoding = BTRFS_ENCODING_NONE; ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR; - + ed2 = (EXTENT_DATA2*)ed->data; ed2->address = address; ed2->size = length; ed2->offset = 0; ed2->num_bytes = decoded_size; - + if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - ULONG sl = length / Vcb->superblock.sector_size; - + ULONG sl = (ULONG)(length / Vcb->superblock.sector_size); + csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); if (!csum) { ERR("out of memory\n"); + ExFreePool(ed); return FALSE; } - + Status = calc_csum(Vcb, data, sl, csum); if (!NT_SUCCESS(Status)) { ERR("calc_csum returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(ed); return FALSE; } } - - if (!add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, csum, rollback)) { - ERR("add_extent_to_fcb failed\n"); + + Status = add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, csum, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); + if (csum) ExFreePool(csum); ExFreePool(ed); return FALSE; } - - increase_chunk_usage(c, length); - space_list_subtract(Vcb, c, FALSE, address, length, rollback); - + + ExFreePool(ed); + + c->used += length; + space_list_subtract(c, FALSE, address, length, rollback); + fcb->inode_item.st_blocks += decoded_size; - + fcb->extents_changed = TRUE; fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE); - + add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM); - + ExReleaseResourceLite(&c->changed_extents_lock); - + ExReleaseResourceLite(&c->lock); - + if (data) { - Status = write_data_complete(Vcb, address, data, length, Irp, NULL); + Status = write_data_complete(Vcb, address, data, (UINT32)length, Irp, NULL, file_write, irp_offset, + fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); if (!NT_SUCCESS(Status)) ERR("write_data_complete returned %08x\n", Status); } @@ -2704,221 +2897,277 @@ BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start } static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, - PIRP Irp, UINT64* written, LIST_ENTRY* rollback) { + PIRP Irp, UINT64* written, BOOL file_write, UINT64 irp_offset, LIST_ENTRY* rollback) { BOOL success = FALSE; EXTENT_DATA* ed; EXTENT_DATA2* ed2; chunk* c; LIST_ENTRY* le; - space* s; extent* ext = NULL; - + le = fcb->extents.Flink; - + while (le != &fcb->extents) { extent* nextext = CONTAINING_RECORD(le, extent, list_entry); - + if (!nextext->ignore) { if (nextext->offset == start_data) { ext = nextext; break; } else if (nextext->offset > start_data) break; - + ext = nextext; } - + le = le->Flink; } - + if (!ext) return FALSE; - ed = ext->data; - - if (ext->datalen < sizeof(EXTENT_DATA)) { - ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA)); - return FALSE; - } - + ed = &ext->extent_data; + if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) { TRACE("not extending extent which is not regular or prealloc\n"); return FALSE; } - + ed2 = (EXTENT_DATA2*)ed->data; - - if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { - ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)); - return FALSE; - } if (ext->offset + ed2->num_bytes != start_data) { TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", ext->offset, ed2->num_bytes, start_data); return FALSE; } - + c = get_chunk_from_address(Vcb, ed2->address); - + if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags) return FALSE; - + ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + + if (length > c->chunk_item->size - c->used) { + ExReleaseResourceLite(&c->lock); + return FALSE; + } + + if (!c->cache_loaded) { + NTSTATUS Status = load_cache_chunk(Vcb, c, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_cache_chunk returned %08x\n", Status); + ExReleaseResourceLite(&c->lock); + return FALSE; + } + } + le = c->space.Flink; while (le != &c->space) { - s = CONTAINING_RECORD(le, space, list_entry); - + space* s = CONTAINING_RECORD(le, space, list_entry); + if (s->address == ed2->address + ed2->size) { UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE); - - success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen); - + + success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset); + if (success) *written += newlen; - + else + ExReleaseResourceLite(&c->lock); + return success; } else if (s->address > ed2->address + ed2->size) break; - + le = le->Flink; } - + ExReleaseResourceLite(&c->lock); - + return FALSE; } +static NTSTATUS insert_chunk_fragmented(fcb* fcb, UINT64 start, UINT64 length, UINT8* data, BOOL prealloc, LIST_ENTRY* rollback) { + LIST_ENTRY* le; + UINT64 flags = fcb->Vcb->data_flags; + BOOL page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE; + NTSTATUS Status; + chunk* c; + + ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE); + + // first create as many chunks as we can + do { + Status = alloc_chunk(fcb->Vcb, flags, &c, FALSE); + } while (NT_SUCCESS(Status)); + + if (Status != STATUS_DISK_FULL) { + ERR("alloc_chunk returned %08x\n", Status); + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + return Status; + } + + le = fcb->Vcb->chunks.Flink; + while (le != &fcb->Vcb->chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry); + + if (!c->readonly && !c->reloc) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (c->chunk_item->type == flags) { + while (!IsListEmpty(&c->space_size) && length > 0) { + space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size); + UINT64 extlen = min(length, s->size); + + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, FALSE, 0)) { + start += extlen; + length -= extlen; + if (data) data += extlen; + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + } + } + } + + ExReleaseResourceLite(&c->lock); + + if (length == 0) + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + + return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL; +} + static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) { LIST_ENTRY* le; chunk* c; -#ifdef __REACTOS__ UINT64 flags; -#else - UINT64 flags, origlength = length; -#endif NTSTATUS Status; BOOL page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE; - + flags = fcb->Vcb->data_flags; - - // FIXME - try and maximize contiguous ranges first. If we can't do that, - // allocate all the free space we find until it's enough. - + do { UINT64 extlen = min(MAX_EXTENT_SIZE, length); - + ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE); - + le = fcb->Vcb->chunks.Flink; while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, FALSE, 0)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); goto cont; } } - + ExReleaseResourceLite(&c->lock); } - le = le->Flink; + le = le->Flink; + } + + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + + ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE); + + Status = alloc_chunk(fcb->Vcb, flags, &c, FALSE); + + ExReleaseResourceLite(&fcb->Vcb->chunk_lock); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); + goto end; + } + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, FALSE, 0)) + goto cont; } - - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - - ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE); - - if ((c = alloc_chunk(fcb->Vcb, flags))) { - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - - ExAcquireResourceExclusiveLite(&c->lock, TRUE); - - if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) - goto cont; - } - - ExReleaseResourceLite(&c->lock); - } else - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - - WARN("couldn't find any data chunks with %llx bytes free\n", origlength); - Status = STATUS_DISK_FULL; + + ExReleaseResourceLite(&c->lock); + + Status = insert_chunk_fragmented(fcb, start, length, NULL, TRUE, rollback); + if (!NT_SUCCESS(Status)) + ERR("insert_chunk_fragmented returned %08x\n", Status); + goto end; - + cont: length -= extlen; start += extlen; } while (length > 0); - + Status = STATUS_SUCCESS; - + end: return Status; } -// static void print_tree(tree* t) { -// LIST_ENTRY* le = t->itemlist.Flink; -// while (le != &t->itemlist) { -// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); -// ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE"); -// le = le->Flink; -// } -// } - -NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, + PIRP Irp, BOOL file_write, UINT64 irp_offset, LIST_ENTRY* rollback) { + NTSTATUS Status; LIST_ENTRY* le; chunk* c; UINT64 flags, orig_length = length, written = 0; - + TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data); - + if (start_data > 0) { - try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, rollback); - + try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback); + if (written == length) return STATUS_SUCCESS; else if (written > 0) { start_data += written; + irp_offset += written; length -= written; data = &((UINT8*)data)[written]; } } - + flags = Vcb->data_flags; - + while (written < orig_length) { UINT64 newlen = min(length, MAX_EXTENT_SIZE); BOOL done = FALSE; - + // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB. // First, see if we can write the extent part to an existing chunk. - + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); - + le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen && - insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) { + insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) { written += newlen; - + if (written == orig_length) { ExReleaseResourceLite(&Vcb->chunk_lock); return STATUS_SUCCESS; } else { done = TRUE; start_data += newlen; + irp_offset += newlen; length -= newlen; data = &((UINT8*)data)[newlen]; break; @@ -2929,60 +3178,125 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6 le = le->Flink; } - - ExReleaseResourceLite(&fcb->Vcb->chunk_lock); - + + ExReleaseResourceLite(&Vcb->chunk_lock); + if (done) continue; - + // Otherwise, see if we can put it in a new chunk. - - ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE); - - if ((c = alloc_chunk(Vcb, flags))) { - ExReleaseResourceLite(&Vcb->chunk_lock); - + + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + Status = alloc_chunk(Vcb, flags, &c, FALSE); + + ExReleaseResourceLite(&Vcb->chunk_lock); + + if (!NT_SUCCESS(Status)) { + ERR("alloc_chunk returned %08x\n", Status); + return Status; + } + + if (c) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); - + if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen && - insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) { + insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) { written += newlen; - + if (written == orig_length) return STATUS_SUCCESS; else { done = TRUE; start_data += newlen; + irp_offset += newlen; length -= newlen; data = &((UINT8*)data)[newlen]; } - } else + } else ExReleaseResourceLite(&c->lock); - } else - ExReleaseResourceLite(&Vcb->chunk_lock); - + } + if (!done) { - FIXME("FIXME - not enough room to write whole extent part, try to write bits and pieces\n"); // FIXME - break; + Status = insert_chunk_fragmented(fcb, start_data, length, data, FALSE, rollback); + if (!NT_SUCCESS(Status)) + ERR("insert_chunk_fragmented returned %08x\n", Status); + + return Status; } } - - WARN("couldn't find any data chunks with %llx bytes free\n", length); return STATUS_DISK_FULL; } NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; - + // FIXME - convert into inline extent if short enough - + + if (end > 0 && fcb_is_inline(fcb)) { + UINT8* buf; + BOOL make_inline = end <= fcb->Vcb->options.max_inline; + + buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG); + if (!buf) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("read_file returned %08x\n", Status); + ExFreePool(buf); + return Status; + } + + Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + ExFreePool(buf); + return Status; + } + + if (!make_inline) { + RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end)); + + Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, FALSE, 0, rollback); + if (!NT_SUCCESS(Status)) { + ERR("do_write_file returned %08x\n", Status); + ExFreePool(buf); + return Status; + } + } else { + EXTENT_DATA* ed = (EXTENT_DATA*)buf; + + ed->generation = fcb->Vcb->superblock.generation; + ed->decoded_size = end; + ed->compression = BTRFS_COMPRESSION_NONE; + ed->encryption = BTRFS_ENCRYPTION_NONE; + ed->encoding = BTRFS_ENCODING_NONE; + ed->type = EXTENT_TYPE_INLINE; + + Status = add_extent_to_fcb(fcb, 0, ed, (UINT16)(offsetof(EXTENT_DATA, data[0]) + end), FALSE, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); + ExFreePool(buf); + return Status; + } + + fcb->inode_item.st_blocks += end; + } + + ExFreePool(buf); + return STATUS_SUCCESS; + } + Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size), sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback); if (!NT_SUCCESS(Status)) { - ERR("error - excise_extents failed\n"); + ERR("excise_extents returned %08x\n", Status); return Status; } - + fcb->inode_item.st_size = end; fcb->inode_item_changed = TRUE; TRACE("setting st_size to %llx\n", end); @@ -2991,9 +3305,9 @@ NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback) { fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size; fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size; // FIXME - inform cache manager of this - + TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart); - + return STATUS_SUCCESS; } @@ -3001,266 +3315,268 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR UINT64 oldalloc, newalloc; BOOL cur_inline; NTSTATUS Status; - + TRACE("(%p, %p, %x, %u)\n", fcb, fileref, end, prealloc); - if (fcb->ads) - return stream_set_end_of_file_information(fcb->Vcb, end, fcb, fileref, NULL, FALSE, rollback); - else { + if (fcb->ads) { + if (end > 0xffff) + return STATUS_DISK_FULL; + + return stream_set_end_of_file_information(fcb->Vcb, (UINT16)end, fcb, fileref, FALSE); + } else { extent* ext = NULL; LIST_ENTRY* le; - + le = fcb->extents.Blink; while (le != &fcb->extents) { extent* ext2 = CONTAINING_RECORD(le, extent, list_entry); - + if (!ext2->ignore) { ext = ext2; break; } - + le = le->Blink; } - + oldalloc = 0; if (ext) { - EXTENT_DATA* ed = ext->data; + EXTENT_DATA* ed = &ext->extent_data; EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - - if (ext->datalen < sizeof(EXTENT_DATA)) { - ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA)); - return STATUS_INTERNAL_ERROR; - } - + oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes); cur_inline = ed->type == EXTENT_TYPE_INLINE; - + if (cur_inline && end > fcb->Vcb->options.max_inline) { UINT64 origlength, length; UINT8* data; - UINT64 offset = ext->offset; - + TRACE("giving inline file proper extents\n"); - + origlength = ed->decoded_size; - + cur_inline = FALSE; - + length = sector_align(origlength, fcb->Vcb->superblock.sector_size); - - data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG); + + data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG); if (!data) { ERR("could not allocate %llx bytes for data\n", length); return STATUS_INSUFFICIENT_RESOURCES; } - - if (length > origlength) - RtlZeroMemory(data + origlength, length - origlength); - - RtlCopyMemory(data, ed->data, origlength); - - fcb->inode_item.st_blocks -= origlength; - fcb->inode_item_changed = TRUE; - mark_fcb_dirty(fcb); - - remove_fcb_extent(fcb, ext, rollback); - - if (write_fcb_compressed(fcb)) { - Status = write_compressed(fcb, offset, offset + length, data, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("write_compressed returned %08x\n", Status); - ExFreePool(data); - return Status; - } - } else { - Status = insert_extent(fcb->Vcb, fcb, offset, length, data, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("insert_extent returned %08x\n", Status); - ExFreePool(data); - return Status; - } + + Status = read_file(fcb, data, 0, origlength, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("read_file returned %08x\n", Status); + ExFreePool(data); + return Status; + } + + RtlZeroMemory(data + origlength, (ULONG)(length - origlength)); + + Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + ExFreePool(data); + return Status; + } + + Status = do_write_file(fcb, 0, length, data, Irp, FALSE, 0, rollback); + if (!NT_SUCCESS(Status)) { + ERR("do_write_file returned %08x\n", Status); + ExFreePool(data); + return Status; } - + oldalloc = ext->offset + length; - + ExFreePool(data); } - + if (cur_inline) { - ULONG edsize; - + UINT16 edsize; + if (end > oldalloc) { - edsize = sizeof(EXTENT_DATA) - 1 + end - ext->offset; + edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset); ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG); - + if (!ed) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlZeroMemory(ed, edsize); - RtlCopyMemory(ed, ext->data, ext->datalen); - + + ed->generation = fcb->Vcb->superblock.generation; ed->decoded_size = end - ext->offset; - + ed->compression = BTRFS_COMPRESSION_NONE; + ed->encryption = BTRFS_ENCRYPTION_NONE; + ed->encoding = BTRFS_ENCODING_NONE; + ed->type = EXTENT_TYPE_INLINE; + + Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp); + if (!NT_SUCCESS(Status)) { + ERR("read_file returned %08x\n", Status); + ExFreePool(ed); + return Status; + } + + RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc)); + remove_fcb_extent(fcb, ext, rollback); - - if (!add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback)) { - ERR("add_extent_to_fcb failed\n"); + + Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); ExFreePool(ed); - return STATUS_INTERNAL_ERROR; + return Status; } - + + ExFreePool(ed); + fcb->extents_changed = TRUE; mark_fcb_dirty(fcb); } - + TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end); - + fcb->inode_item.st_size = end; TRACE("setting st_size to %llx\n", end); - + fcb->inode_item.st_blocks = end; fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end; } else { newalloc = sector_align(end, fcb->Vcb->superblock.sector_size); - + if (newalloc > oldalloc) { if (prealloc) { // FIXME - try and extend previous extent first - + Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback); - + if (!NT_SUCCESS(Status)) { ERR("insert_prealloc_extent returned %08x\n", Status); return Status; } } - + fcb->extents_changed = TRUE; } - + fcb->inode_item.st_size = end; fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + TRACE("setting st_size to %llx\n", end); - + TRACE("newalloc = %llx\n", newalloc); - + fcb->Header.AllocationSize.QuadPart = newalloc; fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end; } } else { if (end > fcb->Vcb->options.max_inline) { newalloc = sector_align(end, fcb->Vcb->superblock.sector_size); - + if (prealloc) { Status = insert_prealloc_extent(fcb, 0, newalloc, rollback); - + if (!NT_SUCCESS(Status)) { ERR("insert_prealloc_extent returned %08x\n", Status); return Status; } } - + fcb->extents_changed = TRUE; fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + fcb->inode_item.st_size = end; TRACE("setting st_size to %llx\n", end); - + TRACE("newalloc = %llx\n", newalloc); - + fcb->Header.AllocationSize.QuadPart = newalloc; fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end; } else { EXTENT_DATA* ed; - ULONG edsize; - - edsize = sizeof(EXTENT_DATA) - 1 + end; + UINT16 edsize; + + edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + end); ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG); - + if (!ed) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - + ed->generation = fcb->Vcb->superblock.generation; ed->decoded_size = end; ed->compression = BTRFS_COMPRESSION_NONE; ed->encryption = BTRFS_ENCRYPTION_NONE; ed->encoding = BTRFS_ENCODING_NONE; ed->type = EXTENT_TYPE_INLINE; - - RtlZeroMemory(ed->data, end); - - if (!add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, NULL, rollback)) { - ERR("add_extent_to_fcb failed\n"); + + RtlZeroMemory(ed->data, (ULONG)end); + + Status = add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); ExFreePool(ed); - return STATUS_INTERNAL_ERROR; + return Status; } - + + ExFreePool(ed); + fcb->extents_changed = TRUE; fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + fcb->inode_item.st_size = end; TRACE("setting st_size to %llx\n", end); - + fcb->inode_item.st_blocks = end; fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end; } } } - + return STATUS_SUCCESS; } static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, UINT64 end_data, void* data, UINT64* written, - PIRP Irp, LIST_ENTRY* rollback) { - EXTENT_DATA* ed = ext->data; + PIRP Irp, BOOL file_write, UINT64 irp_offset, ULONG priority, LIST_ENTRY* rollback) { + EXTENT_DATA* ed = &ext->extent_data; EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; NTSTATUS Status; - chunk* c; - + chunk* c = NULL; + if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all - EXTENT_DATA* ned; extent* newext; - - ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext) { ERR("out of memory\n"); - ExFreePool(ned); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(ned, ext->data, ext->datalen); - - ned->type = EXTENT_TYPE_REGULAR; - - Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, ed2->num_bytes, Irp, NULL); + + RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen); + + newext->extent_data.type = EXTENT_TYPE_REGULAR; + + Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, (UINT32)ed2->num_bytes, Irp, + NULL, file_write, irp_offset + ext->offset - start_data, priority); if (!NT_SUCCESS(Status)) { ERR("write_data_complete returned %08x\n", Status); return Status; } - + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - ULONG sl = ed2->num_bytes / fcb->Vcb->superblock.sector_size; + ULONG sl = (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size); UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); - + if (!csum) { ERR("out of memory\n"); - ExFreePool(ned); ExFreePool(newext); return STATUS_INSUFFICIENT_RESOURCES; } @@ -3269,19 +3585,17 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, if (!NT_SUCCESS(Status)) { ERR("calc_csum returned %08x\n", Status); ExFreePool(csum); - ExFreePool(ned); ExFreePool(newext); return Status; } - + newext->csum = csum; } else newext->csum = NULL; - + *written = ed2->num_bytes; - + newext->offset = ext->offset; - newext->data = ned; newext->datalen = ext->datalen; newext->unique = ext->unique; newext->ignore = FALSE; @@ -3289,67 +3603,50 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, InsertHeadList(&ext->list_entry, &newext->list_entry); add_insert_extent_rollback(rollback, fcb, newext); - + remove_fcb_extent(fcb, ext, rollback); + + c = get_chunk_from_address(fcb->Vcb, ed2->address); } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning - EXTENT_DATA *ned, *nedb; EXTENT_DATA2* ned2; extent *newext1, *newext2; - - ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - nedb = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!nedb) { - ERR("out of memory\n"); - ExFreePool(ned); - return STATUS_INSUFFICIENT_RESOURCES; - } - - newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext1) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); return STATUS_INSUFFICIENT_RESOURCES; } - - newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext2) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); ExFreePool(newext1); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(ned, ext->data, ext->datalen); - ned->type = EXTENT_TYPE_REGULAR; - ned2 = (EXTENT_DATA2*)ned->data; + + RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen); + newext1->extent_data.type = EXTENT_TYPE_REGULAR; + ned2 = (EXTENT_DATA2*)newext1->extent_data.data; ned2->num_bytes = end_data - ext->offset; - - RtlCopyMemory(nedb, ext->data, ext->datalen); - ned2 = (EXTENT_DATA2*)nedb->data; + + RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen); + ned2 = (EXTENT_DATA2*)newext2->extent_data.data; ned2->offset += end_data - ext->offset; ned2->num_bytes -= end_data - ext->offset; - - Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, end_data - ext->offset, Irp, NULL); + + Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, (UINT32)(end_data - ext->offset), + Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority); if (!NT_SUCCESS(Status)) { ERR("write_data_complete returned %08x\n", Status); return Status; } - + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - ULONG sl = (end_data - ext->offset) / fcb->Vcb->superblock.sector_size; + ULONG sl = (ULONG)((end_data - ext->offset) / fcb->Vcb->superblock.sector_size); UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); - + if (!csum) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); ExFreePool(newext1); ExFreePool(newext2); return STATUS_INSUFFICIENT_RESOURCES; @@ -3358,49 +3655,45 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, Status = calc_csum(fcb->Vcb, (UINT8*)data + ext->offset - start_data, sl, csum); if (!NT_SUCCESS(Status)) { ERR("calc_csum returned %08x\n", Status); - ExFreePool(ned); - ExFreePool(nedb); ExFreePool(newext1); ExFreePool(newext2); ExFreePool(csum); return Status; } - + newext1->csum = csum; } else newext1->csum = NULL; - + *written = end_data - ext->offset; - + newext1->offset = ext->offset; - newext1->data = ned; newext1->datalen = ext->datalen; newext1->unique = ext->unique; newext1->ignore = FALSE; newext1->inserted = TRUE; InsertHeadList(&ext->list_entry, &newext1->list_entry); - + add_insert_extent_rollback(rollback, fcb, newext1); - + newext2->offset = end_data; - newext2->data = nedb; newext2->datalen = ext->datalen; newext2->unique = ext->unique; newext2->ignore = FALSE; newext2->inserted = TRUE; newext2->csum = NULL; - InsertHeadList(&newext1->list_entry, &newext2->list_entry); - + add_extent(fcb, &newext1->list_entry, newext2); + add_insert_extent_rollback(rollback, fcb, newext2); - + c = get_chunk_from_address(fcb->Vcb, ed2->address); - + if (!c) ERR("get_chunk_from_address(%llx) failed\n", ed2->address); else { Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp); - + if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); return Status; @@ -3409,66 +3702,46 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, remove_fcb_extent(fcb, ext, rollback); } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end - EXTENT_DATA *ned, *nedb; EXTENT_DATA2* ned2; extent *newext1, *newext2; - - ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - nedb = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!nedb) { - ERR("out of memory\n"); - ExFreePool(ned); - return STATUS_INSUFFICIENT_RESOURCES; - } - - newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext1) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); return STATUS_INSUFFICIENT_RESOURCES; } - - newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext2) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); ExFreePool(newext1); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(ned, ext->data, ext->datalen); - - ned2 = (EXTENT_DATA2*)ned->data; + + RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen); + + ned2 = (EXTENT_DATA2*)newext1->extent_data.data; ned2->num_bytes = start_data - ext->offset; - - RtlCopyMemory(nedb, ext->data, ext->datalen); - - nedb->type = EXTENT_TYPE_REGULAR; - ned2 = (EXTENT_DATA2*)nedb->data; + + RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen); + + newext2->extent_data.type = EXTENT_TYPE_REGULAR; + ned2 = (EXTENT_DATA2*)newext2->extent_data.data; ned2->offset += start_data - ext->offset; ned2->num_bytes = ext->offset + ed2->num_bytes - start_data; - - Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, ned2->num_bytes, Irp, NULL); + + Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (UINT32)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority); if (!NT_SUCCESS(Status)) { ERR("write_data_complete returned %08x\n", Status); return Status; } - + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - ULONG sl = ned2->num_bytes / fcb->Vcb->superblock.sector_size; + ULONG sl = (ULONG)(ned2->num_bytes / fcb->Vcb->superblock.sector_size); UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); - + if (!csum) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); ExFreePool(newext1); ExFreePool(newext2); return STATUS_INSUFFICIENT_RESOURCES; @@ -3477,144 +3750,109 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, Status = calc_csum(fcb->Vcb, data, sl, csum); if (!NT_SUCCESS(Status)) { ERR("calc_csum returned %08x\n", Status); - ExFreePool(ned); - ExFreePool(nedb); ExFreePool(newext1); ExFreePool(newext2); ExFreePool(csum); return Status; } - + newext2->csum = csum; } else newext2->csum = NULL; - + *written = ned2->num_bytes; - + newext1->offset = ext->offset; - newext1->data = ned; newext1->datalen = ext->datalen; newext1->unique = ext->unique; newext1->ignore = FALSE; newext1->inserted = TRUE; newext1->csum = NULL; InsertHeadList(&ext->list_entry, &newext1->list_entry); - + add_insert_extent_rollback(rollback, fcb, newext1); - + newext2->offset = start_data; - newext2->data = nedb; newext2->datalen = ext->datalen; newext2->unique = ext->unique; newext2->ignore = FALSE; newext2->inserted = TRUE; - InsertHeadList(&newext1->list_entry, &newext2->list_entry); - + add_extent(fcb, &newext1->list_entry, newext2); + add_insert_extent_rollback(rollback, fcb, newext2); - + c = get_chunk_from_address(fcb->Vcb, ed2->address); - + if (!c) ERR("get_chunk_from_address(%llx) failed\n", ed2->address); else { Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp); - + if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); return Status; } } - remove_fcb_extent(fcb, ext, rollback); - } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle - EXTENT_DATA *ned, *nedb, *nedc; - EXTENT_DATA2* ned2; - extent *newext1, *newext2, *newext3; - - ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!ned) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - nedb = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!nedb) { - ERR("out of memory\n"); - ExFreePool(ned); - return STATUS_INSUFFICIENT_RESOURCES; - } - - nedc = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!nedb) { - ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + remove_fcb_extent(fcb, ext, rollback); + } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle + EXTENT_DATA2* ned2; + extent *newext1, *newext2, *newext3; + + newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext1) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); - ExFreePool(nedc); return STATUS_INSUFFICIENT_RESOURCES; } - - newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); + + newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); if (!newext2) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); - ExFreePool(nedc); ExFreePool(newext1); return STATUS_INSUFFICIENT_RESOURCES; } - - newext3 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - if (!newext2) { + + newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG); + if (!newext3) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); - ExFreePool(nedc); ExFreePool(newext1); ExFreePool(newext2); return STATUS_INSUFFICIENT_RESOURCES; } - - RtlCopyMemory(ned, ext->data, ext->datalen); - RtlCopyMemory(nedb, ext->data, ext->datalen); - RtlCopyMemory(nedc, ext->data, ext->datalen); - - ned2 = (EXTENT_DATA2*)ned->data; + + RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen); + RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen); + RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen); + + ned2 = (EXTENT_DATA2*)newext1->extent_data.data; ned2->num_bytes = start_data - ext->offset; - - nedb->type = EXTENT_TYPE_REGULAR; - ned2 = (EXTENT_DATA2*)nedb->data; + + newext2->extent_data.type = EXTENT_TYPE_REGULAR; + ned2 = (EXTENT_DATA2*)newext2->extent_data.data; ned2->offset += start_data - ext->offset; ned2->num_bytes = end_data - start_data; - - ned2 = (EXTENT_DATA2*)nedc->data; + + ned2 = (EXTENT_DATA2*)newext3->extent_data.data; ned2->offset += end_data - ext->offset; ned2->num_bytes -= end_data - ext->offset; - - ned2 = (EXTENT_DATA2*)nedb->data; - Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, end_data - start_data, Irp, NULL); + + ned2 = (EXTENT_DATA2*)newext2->extent_data.data; + Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (UINT32)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority); if (!NT_SUCCESS(Status)) { ERR("write_data_complete returned %08x\n", Status); + ExFreePool(newext1); + ExFreePool(newext2); + ExFreePool(newext3); return Status; } - + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - ULONG sl = (end_data - start_data) / fcb->Vcb->superblock.sector_size; + ULONG sl = (ULONG)((end_data - start_data) / fcb->Vcb->superblock.sector_size); UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); - + if (!csum) { ERR("out of memory\n"); - ExFreePool(ned); - ExFreePool(nedb); - ExFreePool(nedc); ExFreePool(newext1); ExFreePool(newext2); ExFreePool(newext3); @@ -3624,62 +3862,56 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, Status = calc_csum(fcb->Vcb, data, sl, csum); if (!NT_SUCCESS(Status)) { ERR("calc_csum returned %08x\n", Status); - ExFreePool(ned); - ExFreePool(nedb); - ExFreePool(nedc); ExFreePool(newext1); ExFreePool(newext2); ExFreePool(newext3); ExFreePool(csum); return Status; } - + newext2->csum = csum; } else newext2->csum = NULL; *written = end_data - start_data; - + newext1->offset = ext->offset; - newext1->data = ned; newext1->datalen = ext->datalen; newext1->unique = ext->unique; newext1->ignore = FALSE; newext1->inserted = TRUE; newext1->csum = NULL; InsertHeadList(&ext->list_entry, &newext1->list_entry); - + add_insert_extent_rollback(rollback, fcb, newext1); - + newext2->offset = start_data; - newext2->data = nedb; newext2->datalen = ext->datalen; newext2->unique = ext->unique; newext2->ignore = FALSE; newext2->inserted = TRUE; - InsertHeadList(&newext1->list_entry, &newext2->list_entry); - + add_extent(fcb, &newext1->list_entry, newext2); + add_insert_extent_rollback(rollback, fcb, newext2); - + newext3->offset = end_data; - newext3->data = nedc; newext3->datalen = ext->datalen; newext3->unique = ext->unique; newext3->ignore = FALSE; newext3->inserted = TRUE; newext3->csum = NULL; - InsertHeadList(&newext2->list_entry, &newext3->list_entry); - + add_extent(fcb, &newext2->list_entry, newext3); + add_insert_extent_rollback(rollback, fcb, newext3); - + c = get_chunk_from_address(fcb->Vcb, ed2->address); - + if (!c) ERR("get_chunk_from_address(%llx) failed\n", ed2->address); else { Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp); - + if (!NT_SUCCESS(Status)) { ERR("update_changed_extent_ref returned %08x\n", Status); return Status; @@ -3688,136 +3920,145 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, remove_fcb_extent(fcb, ext, rollback); } - + + if (c) + c->changed = TRUE; + return STATUS_SUCCESS; } -NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, PIRP Irp, BOOL file_write, UINT32 irp_offset, LIST_ENTRY* rollback) { NTSTATUS Status; LIST_ENTRY *le, *le2; UINT64 written = 0, length = end_data - start; UINT64 last_cow_start; + ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority; #ifdef DEBUG_PARANOID UINT64 last_off; #endif - + last_cow_start = 0; - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + le2 = le->Flink; - + if (!ext->ignore) { - EXTENT_DATA* ed = ext->data; + EXTENT_DATA* ed = &ext->extent_data; EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data; UINT64 len; - + len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes; - + if (ext->offset + len <= start) goto nextitem; - + if (ext->offset > start + written + length) break; - - if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique) { + + if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) { if (max(last_cow_start, start + written) < ext->offset) { UINT64 start_write = max(last_cow_start, start + written); - + Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); return Status; } - - Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, data, Irp, rollback); + + Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (UINT8*)data + written, Irp, file_write, irp_offset + written, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_extent returned %08x\n", Status); return Status; } - + written += ext->offset - start_write; length -= ext->offset - start_write; - + if (length == 0) break; } - + if (ed->type == EXTENT_TYPE_REGULAR) { UINT64 writeaddr = ed2->address + ed2->offset + start + written - ext->offset; UINT64 write_len = min(len, length); - + chunk* c; + TRACE("doing non-COW write to %llx\n", writeaddr); - - Status = write_data_complete(fcb->Vcb, writeaddr, (UINT8*)data + written, write_len, Irp, NULL); + + Status = write_data_complete(fcb->Vcb, writeaddr, (UINT8*)data + written, (UINT32)write_len, Irp, NULL, file_write, irp_offset + written, priority); if (!NT_SUCCESS(Status)) { ERR("write_data_complete returned %08x\n", Status); return Status; } - + + c = get_chunk_from_address(fcb->Vcb, writeaddr); + if (c) + c->changed = TRUE; + // This shouldn't ever get called - nocow files should always also be nosum. if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - calc_csum(fcb->Vcb, (UINT8*)data + written, write_len / fcb->Vcb->superblock.sector_size, + calc_csum(fcb->Vcb, (UINT8*)data + written, (UINT32)(write_len / fcb->Vcb->superblock.sector_size), &ext->csum[(start + written - ext->offset) / fcb->Vcb->superblock.sector_size]); - + ext->inserted = TRUE; } - + written += write_len; length -= write_len; - + if (length == 0) break; } else if (ed->type == EXTENT_TYPE_PREALLOC) { UINT64 write_len; - + Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (UINT8*)data + written, &write_len, - Irp, rollback); + Irp, file_write, irp_offset + written, priority, rollback); if (!NT_SUCCESS(Status)) { ERR("do_write_file_prealloc returned %08x\n", Status); return Status; } - + written += write_len; length -= write_len; - + if (length == 0) break; } - + last_cow_start = ext->offset + len; } } - + nextitem: le = le2; } - + if (length > 0) { UINT64 start_write = max(last_cow_start, start + written); - + Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); return Status; } - - Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, data, Irp, rollback); + + Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (UINT8*)data + written, Irp, file_write, irp_offset + written, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_extent returned %08x\n", Status); return Status; } } - + #ifdef DEBUG_PARANOID last_off = 0xffffffffffffffff; - + le = fcb->extents.Flink; while (le != &fcb->extents) { extent* ext = CONTAINING_RECORD(le, extent, list_entry); - + if (!ext->ignore) { if (ext->offset == last_off) { ERR("offset %llx duplicated\n", ext->offset); @@ -3826,73 +4067,73 @@ nextitem: ERR("offsets out of order\n"); int3; } - + last_off = ext->offset; } - + le = le->Flink; } #endif - + fcb->extents_changed = TRUE; mark_fcb_dirty(fcb); - + return STATUS_SUCCESS; } NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; UINT64 i; - + for (i = 0; i < sector_align(end_data - start_data, COMPRESSED_EXTENT_SIZE) / COMPRESSED_EXTENT_SIZE; i++) { UINT64 s2, e2; BOOL compressed; - + s2 = start_data + (i * COMPRESSED_EXTENT_SIZE); e2 = min(s2 + COMPRESSED_EXTENT_SIZE, end_data); - + Status = write_compressed_bit(fcb, s2, e2, (UINT8*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("write_compressed_bit returned %08x\n", Status); return Status; } - + // If the first 128 KB of a file is incompressible, we set the nocompress flag so we don't // bother with the rest of it. if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) { fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS; fcb->inode_item_changed = TRUE; mark_fcb_dirty(fcb); - + // write subsequent data non-compressed if (e2 < end_data) { - Status = do_write_file(fcb, e2, end_data, (UINT8*)data + e2, Irp, rollback); - + Status = do_write_file(fcb, e2, end_data, (UINT8*)data + e2, Irp, FALSE, 0, rollback); + if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); return Status; } } - + return STATUS_SUCCESS; } } - + return STATUS_SUCCESS; } -NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, - BOOL wait, BOOL deferred_write, LIST_ENTRY* rollback) { +NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOLEAN paging_io, BOOLEAN no_cache, + BOOLEAN wait, BOOLEAN deferred_write, BOOLEAN write_irp, LIST_ENTRY* rollback) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); PFILE_OBJECT FileObject = IrpSp->FileObject; EXTENT_DATA* ed2; - UINT64 newlength, start_data, end_data; + UINT64 off64, newlength, start_data, end_data; UINT32 bufhead; BOOL make_inline; UINT8* data; INODE_ITEM* origii; - BOOL changed_length = FALSE/*, lazy_writer = FALSE, write_eof = FALSE*/; + BOOL changed_length = FALSE; NTSTATUS Status; LARGE_INTEGER time; BTRFS_TIME now; @@ -3901,44 +4142,44 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void file_ref* fileref; BOOL paging_lock = FALSE, fcb_lock = FALSE, tree_lock = FALSE, pagefile; ULONG filter = 0; - + TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache); - + if (*length == 0) { WARN("returning success for zero-length write\n"); return STATUS_SUCCESS; } - + if (!FileObject) { ERR("error - FileObject was NULL\n"); return STATUS_ACCESS_DENIED; } - + fcb = FileObject->FsContext; ccb = FileObject->FsContext2; fileref = ccb ? ccb->fileref : NULL; - + if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) { WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb); return STATUS_INVALID_DEVICE_REQUEST; } - - if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) { + + if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) offset = fcb->Header.FileSize; -// write_eof = TRUE; - } - + + off64 = offset.QuadPart; + TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags); - + if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write)) return STATUS_PENDING; - + if (!wait && no_cache) return STATUS_PENDING; - + if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) { IO_STATUS_BLOCK iosb; - + ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE); CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb); @@ -3948,12 +4189,12 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void ERR("CcFlushCache returned %08x\n", iosb.Status); return iosb.Status; } - + paging_lock = TRUE; CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE); } - + if (paging_io) { if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) { Status = STATUS_PENDING; @@ -3961,9 +4202,9 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void } else paging_lock = TRUE; } - + pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io; - + if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) { if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) { Status = STATUS_PENDING; @@ -3971,7 +4212,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void } else tree_lock = TRUE; } - + if (no_cache) { if (pagefile) { if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { @@ -3987,46 +4228,46 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void fcb_lock = TRUE; } } - + newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size; - + if (fcb->deleted) newlength = 0; - + TRACE("newlength = %llx\n", newlength); - -// if (KeGetCurrentThread() == fcb->lazy_writer_thread) { -// ERR("lazy writer on the TV\n"); -// lazy_writer = TRUE; -// } - - if (offset.QuadPart + *length > newlength) { + + if (off64 + *length > newlength) { if (paging_io) { - if (offset.QuadPart >= newlength) { - TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length); + if (off64 >= newlength) { + TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, off64, *length); TRACE("filename %S\n", file_desc(FileObject)); TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); Status = STATUS_SUCCESS; goto end; } - - *length = newlength - offset.QuadPart; + + *length = (ULONG)(newlength - off64); } else { - newlength = offset.QuadPart + *length; + newlength = off64 + *length; changed_length = TRUE; - + TRACE("extending length to %llx\n", newlength); } } - - make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->options.max_inline; - + + if (fcb->ads) + make_inline = FALSE; + else if (fcb->type == BTRFS_TYPE_SYMLINK) + make_inline = newlength <= (Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(EXTENT_DATA, data[0])); + else + make_inline = newlength <= fcb->Vcb->options.max_inline; + if (changed_length) { - if (newlength > fcb->Header.AllocationSize.QuadPart) { + if (newlength > (UINT64)fcb->Header.AllocationSize.QuadPart) { if (!tree_lock) { - // We need to acquire the tree lock if we don't have it already - - // we can't give an inline file proper extents at the same as we're + // We need to acquire the tree lock if we don't have it already - + // we can't give an inline file proper extents at the same time as we're // doing a flush. if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) { Status = STATUS_PENDING; @@ -4034,7 +4275,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void } else tree_lock = TRUE; } - + Status = extend_file(fcb, fileref, newlength, FALSE, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("extend_file returned %08x\n", Status); @@ -4042,150 +4283,163 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void } } else if (!fcb->ads) fcb->inode_item.st_size = newlength; - + fcb->Header.FileSize.QuadPart = newlength; fcb->Header.ValidDataLength.QuadPart = newlength; - + TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart); TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart); TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart); } - + if (!no_cache) { - if (!FileObject->PrivateCacheMap || changed_length) { - CC_FILE_SIZES ccfs; - - ccfs.AllocationSize = fcb->Header.AllocationSize; - ccfs.FileSize = fcb->Header.FileSize; - ccfs.ValidDataLength = fcb->Header.ValidDataLength; - - if (!FileObject->PrivateCacheMap) - init_file_cache(FileObject, &ccfs); - - CcSetFileSizes(FileObject, &ccfs); - } - - if (IrpSp->MinorFunction & IRP_MN_MDL) { - CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus); + Status = STATUS_SUCCESS; - Status = Irp->IoStatus.Status; - goto end; - } else { - if (CcCopyWriteEx) { - TRACE("CcCopyWriteEx(%p, %llx, %x, %u, %p, %p)\n", FileObject, offset.QuadPart, *length, wait, buf, Irp->Tail.Overlay.Thread); - if (!CcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) { - Status = STATUS_PENDING; - goto end; - } - TRACE("CcCopyWriteEx finished\n"); + _SEH2_TRY { + if (!FileObject->PrivateCacheMap || changed_length) { + CC_FILE_SIZES ccfs; + + ccfs.AllocationSize = fcb->Header.AllocationSize; + ccfs.FileSize = fcb->Header.FileSize; + ccfs.ValidDataLength = fcb->Header.ValidDataLength; + + if (!FileObject->PrivateCacheMap) + init_file_cache(FileObject, &ccfs); + + CcSetFileSizes(FileObject, &ccfs); + } + + if (IrpSp->MinorFunction & IRP_MN_MDL) { + CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus); + + Status = Irp->IoStatus.Status; + goto end; } else { - TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf); - if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) { - Status = STATUS_PENDING; - goto end; + if (fCcCopyWriteEx) { + TRACE("CcCopyWriteEx(%p, %llx, %x, %u, %p, %p)\n", FileObject, off64, *length, wait, buf, Irp->Tail.Overlay.Thread); + if (!fCcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) { + Status = STATUS_PENDING; + goto end; + } + TRACE("CcCopyWriteEx finished\n"); + } else { + TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, off64, *length, wait, buf); + if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) { + Status = STATUS_PENDING; + goto end; + } + TRACE("CcCopyWrite finished\n"); } - TRACE("CcCopyWrite finished\n"); } - } - - Status = STATUS_SUCCESS; + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + goto end; } - + if (fcb->ads) { if (changed_length) { char* data2; - + if (newlength > fcb->adsmaxlen) { ERR("error - xattr too long (%llu > %u)\n", newlength, fcb->adsmaxlen); Status = STATUS_DISK_FULL; goto end; } - data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG); + data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG); if (!data2) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } - + if (fcb->adsdata.Buffer) { RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length); ExFreePool(fcb->adsdata.Buffer); } - + if (newlength > fcb->adsdata.Length) - RtlZeroMemory(&data2[fcb->adsdata.Length], newlength - fcb->adsdata.Length); - - + RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length)); + + fcb->adsdata.Buffer = data2; - fcb->adsdata.Length = fcb->adsdata.MaximumLength = newlength; - + fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength; + fcb->Header.AllocationSize.QuadPart = newlength; fcb->Header.FileSize.QuadPart = newlength; fcb->Header.ValidDataLength.QuadPart = newlength; } - + if (*length > 0) - RtlCopyMemory(&fcb->adsdata.Buffer[offset.QuadPart], buf, *length); - + RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length); + fcb->Header.ValidDataLength.QuadPart = newlength; - + mark_fcb_dirty(fcb); - + if (fileref) mark_fileref_dirty(fileref); } else { - BOOL compress = write_fcb_compressed(fcb); - + BOOL compress = write_fcb_compressed(fcb), no_buf = FALSE; + if (make_inline) { start_data = 0; end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size); bufhead = sizeof(EXTENT_DATA) - 1; } else if (compress) { - start_data = offset.QuadPart & ~(UINT64)(COMPRESSED_EXTENT_SIZE - 1); - end_data = min(sector_align(offset.QuadPart + *length, COMPRESSED_EXTENT_SIZE), + start_data = off64 & ~(UINT64)(COMPRESSED_EXTENT_SIZE - 1); + end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE), sector_align(newlength, fcb->Vcb->superblock.sector_size)); bufhead = 0; } else { - start_data = offset.QuadPart & ~(UINT64)(fcb->Vcb->superblock.sector_size - 1); - end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size); + start_data = off64 & ~(UINT64)(fcb->Vcb->superblock.sector_size - 1); + end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size); bufhead = 0; } - + + if (fcb_is_inline(fcb)) + end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size)); + fcb->Header.ValidDataLength.QuadPart = newlength; TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart); - - data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG); - if (!data) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlZeroMemory(data + bufhead, end_data - start_data); - - TRACE("start_data = %llx\n", start_data); - TRACE("end_data = %llx\n", end_data); - - if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) { - if (changed_length) { - if (fcb->inode_item.st_size > start_data) - Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp, TRUE); - else - Status = STATUS_SUCCESS; - } else - Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp, TRUE); - - if (!NT_SUCCESS(Status)) { - ERR("read_file returned %08x\n", Status); - ExFreePool(data); + + if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) { + data = buf; + no_buf = TRUE; + } else { + data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG); + if (!data) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } + + RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data)); + + TRACE("start_data = %llx\n", start_data); + TRACE("end_data = %llx\n", end_data); + + if (off64 > start_data || off64 + *length < end_data) { + if (changed_length) { + if (fcb->inode_item.st_size > start_data) + Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp); + else + Status = STATUS_SUCCESS; + } else + Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("read_file returned %08x\n", Status); + ExFreePool(data); + goto end; + } + } + + RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length); } - - RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length); - + if (make_inline) { Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -4193,7 +4447,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void ExFreePool(data); goto end; } - + ed2 = (EXTENT_DATA*)data; ed2->generation = fcb->Vcb->superblock.generation; ed2->decoded_size = newlength; @@ -4201,59 +4455,75 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void ed2->encryption = BTRFS_ENCRYPTION_NONE; ed2->encoding = BTRFS_ENCODING_NONE; ed2->type = EXTENT_TYPE_INLINE; - - if (!add_extent_to_fcb(fcb, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, FALSE, NULL, rollback)) { - ERR("add_extent_to_fcb failed\n"); + + Status = add_extent_to_fcb(fcb, 0, ed2, (UINT16)(offsetof(EXTENT_DATA, data[0]) + newlength), FALSE, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_extent_to_fcb returned %08x\n", Status); ExFreePool(data); - Status = STATUS_INTERNAL_ERROR; goto end; } - + fcb->inode_item.st_blocks += newlength; } else if (compress) { Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback); - + if (!NT_SUCCESS(Status)) { ERR("write_compressed returned %08x\n", Status); ExFreePool(data); goto end; } - + ExFreePool(data); } else { - Status = do_write_file(fcb, start_data, end_data, data, Irp, rollback); - + if (write_irp && Irp->MdlAddress && no_buf) { + BOOL locked = Irp->MdlAddress->MdlFlags & MDL_PAGES_LOCKED; + + if (!locked) { + Status = STATUS_SUCCESS; + + _SEH2_TRY { + MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!NT_SUCCESS(Status)) { + ERR("MmProbeAndLockPages threw exception %08x\n", Status); + goto end; + } + } + + _SEH2_TRY { + Status = do_write_file(fcb, start_data, end_data, data, Irp, TRUE, 0, rollback); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + + if (!locked) + MmUnlockPages(Irp->MdlAddress); + } else { + _SEH2_TRY { + Status = do_write_file(fcb, start_data, end_data, data, Irp, FALSE, 0, rollback); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + } _SEH2_END; + } + if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); - ExFreePool(data); + if (!no_buf) ExFreePool(data); goto end; } - - ExFreePool(data); + + if (!no_buf) + ExFreePool(data); } } - + + KeQuerySystemTime(&time); + win_time_to_unix(time, &now); + if (!pagefile) { - KeQuerySystemTime(&time); - win_time_to_unix(time, &now); - -// ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap); -// -// if (!no_cache) { -// if (!FileObject->PrivateCacheMap) { -// CC_FILE_SIZES ccfs; -// -// ccfs.AllocationSize = fcb->Header.AllocationSize; -// ccfs.FileSize = fcb->Header.FileSize; -// ccfs.ValidDataLength = fcb->Header.ValidDataLength; -// -// TRACE("calling CcInitializeCacheMap...\n"); -// CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb); -// -// changed_length = FALSE; -// } -// } - if (fcb->ads) { if (fileref && fileref->parent) origii = &fileref->parent->fcb->inode_item; @@ -4264,182 +4534,155 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void } } else origii = &fcb->inode_item; - + origii->transid = Vcb->superblock.generation; origii->sequence++; - + if (!ccb->user_set_change_time) origii->st_ctime = now; - + if (!fcb->ads) { if (changed_length) { TRACE("setting st_size to %llx\n", newlength); origii->st_size = newlength; filter |= FILE_NOTIFY_CHANGE_SIZE; } - - if (!ccb->user_set_write_time) { - origii->st_mtime = now; - filter |= FILE_NOTIFY_CHANGE_LAST_WRITE; - } - + fcb->inode_item_changed = TRUE; - } else + } else { fileref->parent->fcb->inode_item_changed = TRUE; - + + if (changed_length) + filter |= FILE_NOTIFY_CHANGE_SIZE; + } + + if (!ccb->user_set_write_time) { + origii->st_mtime = now; + filter |= FILE_NOTIFY_CHANGE_LAST_WRITE; + } + mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb); } - + if (changed_length) { CC_FILE_SIZES ccfs; - + ccfs.AllocationSize = fcb->Header.AllocationSize; ccfs.FileSize = fcb->Header.FileSize; ccfs.ValidDataLength = fcb->Header.ValidDataLength; - CcSetFileSizes(FileObject, &ccfs); + _SEH2_TRY { + CcSetFileSizes(FileObject, &ccfs); + } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { + Status = _SEH2_GetExceptionCode(); + goto end; + } _SEH2_END; } - - // FIXME - make sure this still called if STATUS_PENDING and async -// if (!no_cache) { -// if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) { -// ERR("CcCopyWrite failed.\n"); -// } -// } - + fcb->subvol->root_item.ctransid = Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; - + Status = STATUS_SUCCESS; - + if (filter != 0) - send_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, FILE_ACTION_MODIFIED); - + send_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL); + end: if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) { TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart); FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0); TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart); } - + if (fcb_lock) ExReleaseResourceLite(fcb->Header.Resource); - + if (tree_lock) ExReleaseResourceLite(&Vcb->tree_lock); - + if (paging_lock) ExReleaseResourceLite(fcb->Header.PagingIoResource); return Status; } -NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_write) { +NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOLEAN wait, BOOLEAN deferred_write) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); void* buf; NTSTATUS Status; LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset; PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb = FileObject ? FileObject->FsContext : NULL; -// BOOL locked = FALSE; -// LARGE_INTEGER freq, time1, time2; LIST_ENTRY rollback; - + InitializeListHead(&rollback); - -// time1 = KeQueryPerformanceCounter(&freq); - + TRACE("write\n"); - + Irp->IoStatus.Information = 0; - + TRACE("offset = %llx\n", offset.QuadPart); TRACE("length = %x\n", IrpSp->Parameters.Write.Length); - + if (!Irp->AssociatedIrp.SystemBuffer) { - buf = map_user_buffer(Irp); - + buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); + if (Irp->MdlAddress && !buf) { ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; - } + } } else buf = Irp->AssociatedIrp.SystemBuffer; - + TRACE("buf = %p\n", buf); - -// if (Irp->Flags & IRP_NOCACHE) { -// if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) { -// Status = STATUS_PENDING; -// goto exit; -// } -// locked = TRUE; -// } - + if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) { WARN("tried to write to locked region\n"); Status = STATUS_FILE_LOCK_CONFLICT; goto exit; } - -// ERR("Irp->Flags = %x\n", Irp->Flags); + Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE, - wait, deferred_write, &rollback); - + wait, deferred_write, TRUE, &rollback); + if (Status == STATUS_PENDING) goto exit; else if (!NT_SUCCESS(Status)) { ERR("write_file2 returned %08x\n", Status); goto exit; } - -// if (locked) -// Status = consider_write(Vcb); if (NT_SUCCESS(Status)) { Irp->IoStatus.Information = IrpSp->Parameters.Write.Length; - -#ifdef DEBUG_PARANOID -// if (locked) -// check_extents_consistent(Vcb, FileObject->FsContext); // TESTING - -// check_extent_tree_consistent(Vcb); -#endif - + if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) { PETHREAD thread = NULL; - + if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) thread = Irp->Tail.Overlay.Thread; else if (!IoIsSystemThread(PsGetCurrentThread())) thread = PsGetCurrentThread(); else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) thread = PsGetCurrentThread(); - + if (thread) - PsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0); + fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0); } } - + exit: -// if (locked) { - if (NT_SUCCESS(Status)) - clear_rollback(Vcb, &rollback); - else - do_rollback(Vcb, &rollback); -// -// ExReleaseResourceLite(&Vcb->tree_lock); -// } - -// time2 = KeQueryPerformanceCounter(NULL); - -// ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart); - + if (NT_SUCCESS(Status)) + clear_rollback(&rollback); + else + do_rollback(Vcb, &rollback); + return Status; } -NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { +_Dispatch_type_(IRP_MJ_WRITE) +_Function_class_(DRIVER_DISPATCH) +NTSTATUS drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; BOOL top_level; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); @@ -4447,66 +4690,67 @@ NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb = FileObject ? FileObject->FsContext : NULL; ccb* ccb = FileObject ? FileObject->FsContext2 : NULL; - BOOL wait = FileObject ? IoIsOperationSynchronous(Irp) : TRUE; + BOOLEAN wait = FileObject ? IoIsOperationSynchronous(Irp) : TRUE; FsRtlEnterFileSystem(); top_level = is_top_level(Irp); - - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_passthrough(DeviceObject, Irp); + + if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { + Status = vol_write(DeviceObject, Irp); goto exit; + } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { + Status = STATUS_INVALID_PARAMETER; + goto end; } - + if (!fcb) { ERR("fcb was NULL\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (!ccb) { ERR("ccb was NULL\n"); Status = STATUS_INVALID_PARAMETER; goto end; } - + if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) { WARN("insufficient permissions\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + if (fcb == Vcb->volume_fcb) { if (!Vcb->locked || Vcb->locked_fileobj != FileObject) { ERR("trying to write to volume when not locked, or locked with another FileObject\n"); Status = STATUS_ACCESS_DENIED; goto end; } - + TRACE("writing directly to volume\n"); - + IoSkipCurrentIrpStackLocation(Irp); - + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); goto exit; } - - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { + + if (is_subvol_readonly(fcb->subvol, Irp)) { Status = STATUS_ACCESS_DENIED; goto end; } - + if (Vcb->readonly) { Status = STATUS_MEDIA_WRITE_PROTECTED; goto end; } - -// ERR("recursive = %s\n", Irp != IoGetTopLevelIrp() ? "TRUE" : "FALSE"); - + _SEH2_TRY { if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress); - + Irp->MdlAddress = NULL; Status = STATUS_SUCCESS; } else { @@ -4514,34 +4758,34 @@ NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { // deadlocks in CcCopyWrite. if (Irp->Flags & IRP_PAGING_IO) wait = TRUE; - + Status = write_file(Vcb, Irp, wait, FALSE); } } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { Status = _SEH2_GetExceptionCode(); } _SEH2_END; - + end: Irp->IoStatus.Status = Status; TRACE("wrote %u bytes\n", Irp->IoStatus.Information); - + if (Status != STATUS_PENDING) IoCompleteRequest(Irp, IO_NO_INCREMENT); else { IoMarkIrpPending(Irp); - + if (!add_thread_job(Vcb, Irp)) do_write_job(Vcb, Irp); } - + exit: - if (top_level) + if (top_level) IoSetTopLevelIrp(NULL); - - FsRtlExitFileSystem(); - + TRACE("returning %08x\n", Status); + FsRtlExitFileSystem(); + return Status; } diff --git a/reactos/media/doc/README.FSD b/reactos/media/doc/README.FSD index f73d2c17b28..c0edcb46d42 100644 --- a/reactos/media/doc/README.FSD +++ b/reactos/media/doc/README.FSD @@ -3,7 +3,7 @@ The following FSD are shared with: https://github.com/maharmstone/btrfs. -reactos/drivers/filesystems/btrfs # Synced to 0.8 +reactos/drivers/filesystems/btrfs # Synced to 1.0 reactos/dll/shellext/shellbtrfs # Synced to 0.7 reactos/sdk/lib/fslib/btrfslib # Synced to 0.8