From: Pierre Schweitzer Date: Sun, 1 Jan 2017 17:12:12 +0000 (+0000) Subject: [BTRFS] X-Git-Tag: ReactOS-0.4.4-FOSDEM2017~39 X-Git-Url: https://git.reactos.org/?p=reactos.git;a=commitdiff_plain;h=3049f1a5b47694ee680e51f777a918c1743ab000 [BTRFS] Sync btrfs to 0.8. CORE-12617 svn path=/trunk/; revision=73498 --- diff --git a/reactos/drivers/filesystems/btrfs/CMakeLists.txt b/reactos/drivers/filesystems/btrfs/CMakeLists.txt index 118429ecf1d..581bab99ae9 100644 --- a/reactos/drivers/filesystems/btrfs/CMakeLists.txt +++ b/reactos/drivers/filesystems/btrfs/CMakeLists.txt @@ -4,8 +4,10 @@ include_directories(${REACTOS_SOURCE_DIR}/sdk/include/reactos/drivers inc) list(APPEND SOURCE + balance.c btrfs.c cache.c + calcthread.c compress.c crc32c.c create.c @@ -33,7 +35,7 @@ add_library(btrfs SHARED ${SOURCE} btrfs.rc) add_definitions(-D__KERNEL__) set_module_type(btrfs kernelmodedriver) -target_link_libraries(btrfs ntoskrnl_vista zlib_solo ${PSEH_LIB}) +target_link_libraries(btrfs rtlver ntoskrnl_vista zlib_solo wdmguid ${PSEH_LIB}) add_importlibs(btrfs ntoskrnl hal) add_pch(btrfs btrfs_drv.h SOURCE) add_cd_file(TARGET btrfs DESTINATION reactos/system32/drivers NO_CAB FOR all) diff --git a/reactos/drivers/filesystems/btrfs/balance.c b/reactos/drivers/filesystems/btrfs/balance.c new file mode 100644 index 00000000000..63e8821c884 --- /dev/null +++ b/reactos/drivers/filesystems/btrfs/balance.c @@ -0,0 +1,3180 @@ +/* Copyright (c) Mark Harmstone 2016 + * + * This file is part of WinBtrfs. + * + * WinBtrfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or + * (at your option) any later version. + * + * WinBtrfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public Licence for more details. + * + * You should have received a copy of the GNU Lesser General Public Licence + * along with WinBtrfs. If not, see . */ + +#include "btrfs_drv.h" +#include "btrfsioctl.h" + +typedef struct { + UINT64 address; + UINT64 new_address; + tree_header* data; + EXTENT_ITEM* ei; + tree* t; + BOOL system; + LIST_ENTRY refs; + LIST_ENTRY list_entry; +} metadata_reloc; + +typedef struct { + UINT8 type; + + union { + TREE_BLOCK_REF tbr; + SHARED_BLOCK_REF sbr; + }; + + metadata_reloc* parent; + BOOL top; + LIST_ENTRY list_entry; +} metadata_reloc_ref; + +typedef struct { + UINT64 address; + UINT64 size; + UINT64 new_address; + chunk* newchunk; + EXTENT_ITEM* ei; + LIST_ENTRY refs; + LIST_ENTRY list_entry; +} data_reloc; + +typedef struct { + UINT8 type; + + union { + EXTENT_DATA_REF edr; + SHARED_DATA_REF sdr; + }; + + metadata_reloc* parent; + LIST_ENTRY list_entry; +} data_reloc_ref; + +extern LIST_ENTRY volumes; +extern ERESOURCE volumes_lock; + +static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) { + metadata_reloc* mr; + EXTENT_ITEM* ei; + UINT16 len; + UINT64 inline_rc; + UINT8* ptr; + + mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG); + if (!mr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + mr->address = tp->item->key.obj_id; + mr->data = NULL; + mr->ei = (EXTENT_ITEM*)tp->item->data; + mr->system = FALSE; + InitializeListHead(&mr->refs); + + delete_tree_item(Vcb, tp, rollback); + + if (!c) + c = get_chunk_from_address(Vcb, tp->item->key.obj_id); + + if (c) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + decrease_chunk_usage(c, Vcb->superblock.node_size); + + space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, Vcb->superblock.node_size, rollback); + + ExReleaseResourceLite(&c->lock); + } + + ei = (EXTENT_ITEM*)tp->item->data; + inline_rc = 0; + + len = tp->item->size - sizeof(EXTENT_ITEM); + ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM); + if (!skinny) { + len -= sizeof(EXTENT_ITEM2); + ptr += sizeof(EXTENT_ITEM2); + } + + while (len > 0) { + UINT8 secttype = *ptr; + ULONG sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0); + metadata_reloc_ref* ref; + + len--; + + if (sectlen > len) { + ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen); + return STATUS_INTERNAL_ERROR; + } + + if (sectlen == 0) { + ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype); + return STATUS_INTERNAL_ERROR; + } + + ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + if (secttype == TYPE_TREE_BLOCK_REF) { + ref->type = TYPE_TREE_BLOCK_REF; + RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF)); + inline_rc++; + } else if (secttype == TYPE_SHARED_BLOCK_REF) { + ref->type = TYPE_SHARED_BLOCK_REF; + RtlCopyMemory(&ref->sbr, ptr + sizeof(UINT8), sizeof(SHARED_BLOCK_REF)); + inline_rc++; + } else { + ERR("unexpected tree type %x\n", secttype); + ExFreePool(ref); + return STATUS_INTERNAL_ERROR; + } + + ref->parent = NULL; + ref->top = FALSE; + InsertTailList(&mr->refs, &ref->list_entry); + + len -= sectlen; + ptr += sizeof(UINT8) + sectlen; + } + + if (inline_rc < ei->refcount) { // look for non-inline entries + traverse_ptr tp2 = *tp, next_tp; + + while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) { + tp2 = next_tp; + + if (tp2.item->key.obj_id == tp->item->key.obj_id) { + if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF && tp2.item->size >= sizeof(TREE_BLOCK_REF)) { + metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ref->type = TYPE_TREE_BLOCK_REF; + RtlCopyMemory(&ref->tbr, tp2.item->data, sizeof(TREE_BLOCK_REF)); + ref->parent = NULL; + ref->top = FALSE; + InsertTailList(&mr->refs, &ref->list_entry); + + delete_tree_item(Vcb, &tp2, rollback); + } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF && tp2.item->size >= sizeof(SHARED_BLOCK_REF)) { + metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ref->type = TYPE_SHARED_BLOCK_REF; + RtlCopyMemory(&ref->sbr, tp2.item->data, sizeof(SHARED_BLOCK_REF)); + ref->parent = NULL; + ref->top = FALSE; + InsertTailList(&mr->refs, &ref->list_entry); + + delete_tree_item(Vcb, &tp2, rollback); + } + } else + break; + } + } + + InsertTailList(items, &mr->list_entry); + + if (mr2) + *mr2 = mr; + + return STATUS_SUCCESS; +} + +static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* items, UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) { + LIST_ENTRY* le; + KEY searchkey; + traverse_ptr tp; + BOOL skinny = FALSE; + NTSTATUS Status; + + le = items->Flink; + while (le != items) { + metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); + + if (mr->address == address) { + *mr2 = mr; + return STATUS_SUCCESS; + } + + le = le->Flink; + } + + searchkey.obj_id = address; + searchkey.obj_type = TYPE_METADATA_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) + skinny = TRUE; + else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size && + tp.item->size >= sizeof(EXTENT_ITEM)) { + EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; + + if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) { + ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address); + return STATUS_INTERNAL_ERROR; + } + } else { + ERR("could not find valid EXTENT_ITEM for address %llx\n", address); + return STATUS_INTERNAL_ERROR; + } + + Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_reloc* mr, LIST_ENTRY* rollback) { + LIST_ENTRY* le; + UINT64 rc = 0; + UINT16 inline_len; + BOOL all_inline = TRUE; + metadata_reloc_ref* first_noninline = NULL; + EXTENT_ITEM* ei; + UINT8* ptr; + + inline_len = sizeof(EXTENT_ITEM); + if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) + inline_len += sizeof(EXTENT_ITEM2); + + le = mr->refs.Flink; + while (le != &mr->refs) { + metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); + ULONG extlen = 0; + + rc++; + + if (ref->type == TYPE_TREE_BLOCK_REF) + extlen += sizeof(TREE_BLOCK_REF); + else if (ref->type == TYPE_SHARED_BLOCK_REF) + extlen += sizeof(SHARED_BLOCK_REF); + + if (all_inline) { + if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) { + all_inline = FALSE; + first_noninline = ref; + } else + inline_len += extlen + 1; + } + + le = le->Flink; + } + + ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); + if (!ei) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ei->refcount = rc; + ei->generation = mr->ei->generation; + ei->flags = mr->ei->flags; + ptr = (UINT8*)&ei[1]; + + if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) { + EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr; + + ei2->firstitem = *(KEY*)&mr->data[1]; + ei2->level = mr->data->level; + + ptr += sizeof(EXTENT_ITEM2); + } + + le = mr->refs.Flink; + while (le != &mr->refs) { + metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); + + if (ref == first_noninline) + break; + + *ptr = ref->type; + ptr++; + + if (ref->type == TYPE_TREE_BLOCK_REF) { + TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr; + + tbr->offset = ref->tbr.offset; + + ptr += sizeof(TREE_BLOCK_REF); + } else if (ref->type == TYPE_SHARED_BLOCK_REF) { + SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr; + + sbr->offset = ref->parent->new_address; + + ptr += sizeof(SHARED_BLOCK_REF); + } + + le = le->Flink; + } + + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { + if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } else { + if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } + + if (!all_inline) { + le = &first_noninline->list_entry; + + while (le != &mr->refs) { + metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry); + + if (ref->type == TYPE_TREE_BLOCK_REF) { + TREE_BLOCK_REF* tbr; + + tbr = ExAllocatePoolWithTag(PagedPool, sizeof(TREE_BLOCK_REF), ALLOC_TAG); + if (!tbr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + tbr->offset = ref->tbr.offset; + + if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, tbr->offset, tbr, sizeof(TREE_BLOCK_REF), NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } else if (ref->type == TYPE_SHARED_BLOCK_REF) { + SHARED_BLOCK_REF* sbr; + + sbr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_BLOCK_REF), ALLOC_TAG); + if (!sbr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sbr->offset = ref->parent->new_address; + + if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, sbr->offset, sbr, sizeof(SHARED_BLOCK_REF), NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } + + le = le->Flink; + } + } + + if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) { + if (mr->data->level > 0) { + UINT16 i; + internal_node* in = (internal_node*)&mr->data[1]; + + for (i = 0; i < mr->data->num_items; i++) { + UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL); + + if (sbrrc > 0) { + NTSTATUS Status; + SHARED_BLOCK_REF sbr; + + sbr.offset = mr->new_address; + + Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, + NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("increase_extent_refcount returned %08x\n", Status); + return Status; + } + + sbr.offset = mr->address; + + Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, + sbr.offset, FALSE, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("decrease_extent_refcount returned %08x\n", Status); + return Status; + } + } + } + } else { + UINT16 i; + leaf_node* ln = (leaf_node*)&mr->data[1]; + + for (i = 0; i < mr->data->num_items; i++) { + if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { + EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset); + + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed2->size > 0) { // not sparse + UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL); + + if (sdrrc > 0) { + NTSTATUS Status; + SHARED_DATA_REF sdr; + chunk* c; + + sdr.offset = mr->new_address; + sdr.count = sdrrc; + + Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, + NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("increase_extent_refcount returned %08x\n", Status); + return Status; + } + + sdr.offset = mr->address; + + Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, + sdr.offset, FALSE, NULL, rollback); + if (!NT_SUCCESS(Status)) { + ERR("decrease_extent_refcount returned %08x\n", Status); + return Status; + } + + c = get_chunk_from_address(Vcb, ed2->address); + + if (c) { + // check changed_extents + + ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE); + + le = c->changed_extents.Flink; + + while (le != &c->changed_extents) { + changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry); + + if (ce->address == ed2->address) { + LIST_ENTRY* le2; + + le2 = ce->refs.Flink; + while (le2 != &ce->refs) { + changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); + + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) { + cer->sdr.offset = mr->new_address; + break; + } + + le2 = le2->Flink; + } + + le2 = ce->old_refs.Flink; + while (le2 != &ce->old_refs) { + changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); + + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) { + cer->sdr.offset = mr->new_address; + break; + } + + le2 = le2->Flink; + } + + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&c->changed_extents_lock); + } + } + } + } + } + } + } + } + + return STATUS_SUCCESS; +} + +static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) { + LIST_ENTRY tree_writes, *le; + NTSTATUS Status; + traverse_ptr tp; + UINT8 level, max_level = 0; + chunk* newchunk = NULL; + + InitializeListHead(&tree_writes); + + le = items->Flink; + while (le != items) { + metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); + LIST_ENTRY* le2; + chunk* pc; + +// ERR("address %llx\n", mr->address); + + mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); + if (!mr->data) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data, + c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + return Status; + } + + if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM) + mr->system = TRUE; + + if (data_items && mr->data->level == 0) { + LIST_ENTRY* le2 = data_items->Flink; + while (le2 != data_items) { + data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); + leaf_node* ln = (leaf_node*)&mr->data[1]; + UINT16 i; + + for (i = 0; i < mr->data->num_items; i++) { + if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { + EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset); + + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed2->address == dr->address) + ed2->address = dr->new_address; + } + } + } + + le2 = le2->Flink; + } + } + + if (mr->data->level > max_level) + max_level = mr->data->level; + + le2 = mr->refs.Flink; + while (le2 != &mr->refs) { + metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry); + + if (ref->type == TYPE_TREE_BLOCK_REF) { + KEY* firstitem; + root* r = NULL; + LIST_ENTRY* le3; + tree* t; + + firstitem = (KEY*)&mr->data[1]; + + le3 = Vcb->roots.Flink; + while (le3 != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le3, root, list_entry); + + if (r2->id == ref->tbr.offset) { + r = r2; + break; + } + + le3 = le3->Flink; + } + + if (!r) { + ERR("could not find subvol with id %llx\n", ref->tbr.offset); + return STATUS_INTERNAL_ERROR; + } + + Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("find_item_to_level returned %08x\n", Status); + return Status; + } + + t = tp.tree; + while (t && t->header.level < mr->data->level + 1) { + t = t->parent; + } + + if (!t) + ref->top = TRUE; + else { + metadata_reloc* mr2; + + Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + return Status; + } + + ref->parent = mr2; + } + } else if (ref->type == TYPE_SHARED_BLOCK_REF) { + metadata_reloc* mr2; + + Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + return Status; + } + + ref->parent = mr2; + } + + le2 = le2->Flink; + } + + le = le->Flink; + } + + le = items->Flink; + while (le != items) { + metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); + LIST_ENTRY* le2; + UINT32 hash; + + mr->t = NULL; + + hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64)); + + le2 = Vcb->trees_ptrs[hash >> 24]; + + if (le2) { + while (le2 != &Vcb->trees_hash) { + tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash); + + if (t->header.address == mr->address) { + mr->t = t; + break; + } else if (t->hash > hash) + break; + + le2 = le2->Flink; + } + } + + le = le->Flink; + } + + for (level = 0; level <= max_level; level++) { + le = items->Flink; + while (le != items) { + metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); + + if (mr->data->level == level) { + BOOL done = FALSE; + LIST_ENTRY* le2; + tree_write* tw; + UINT64 flags; + tree* t3; + + if (mr->system) + flags = Vcb->system_flags; + else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) + flags = Vcb->data_flags; + else + flags = Vcb->metadata_flags; + + if (newchunk) { + ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); + + if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) { + increase_chunk_usage(newchunk, Vcb->superblock.node_size); + space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); + done = TRUE; + } + + ExReleaseResourceLite(&newchunk->lock); + } + + if (!done) { + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + le2 = Vcb->chunks.Flink; + while (le2 != &Vcb->chunks) { + chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); + + if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) { + ExAcquireResourceExclusiveLite(&c2->lock, TRUE); + + if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) { + if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) { + increase_chunk_usage(c2, Vcb->superblock.node_size); + space_list_subtract(Vcb, c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); + ExReleaseResourceLite(&c2->lock); + newchunk = c2; + done = TRUE; + break; + } + } + + ExReleaseResourceLite(&c2->lock); + } + + le2 = le2->Flink; + } + + // allocate new chunk if necessary + if (!done) { + newchunk = alloc_chunk(Vcb, flags); + + if (!newchunk) { + ERR("could not allocate new chunk\n"); + ExReleaseResourceLite(&Vcb->chunk_lock); + Status = STATUS_DISK_FULL; + goto end; + } + + ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); + + if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) { + ExReleaseResourceLite(&newchunk->lock); + ERR("could not find address in new chunk\n"); + Status = STATUS_DISK_FULL; + goto end; + } else { + increase_chunk_usage(newchunk, Vcb->superblock.node_size); + space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback); + } + + ExReleaseResourceLite(&newchunk->lock); + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + } + + // update parents + le2 = mr->refs.Flink; + while (le2 != &mr->refs) { + metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry); + + if (ref->parent) { + UINT16 i; + internal_node* in = (internal_node*)&ref->parent->data[1]; + + for (i = 0; i < ref->parent->data->num_items; i++) { + if (in[i].address == mr->address) { + in[i].address = mr->new_address; + break; + } + } + + if (ref->parent->t) { + LIST_ENTRY* le3; + + le3 = ref->parent->t->itemlist.Flink; + while (le3 != &ref->parent->t->itemlist) { + tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry); + + if (!td->inserted && td->treeholder.address == mr->address) + td->treeholder.address = mr->new_address; + + le3 = le3->Flink; + } + } + } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) { + LIST_ENTRY* le3; + root* r = NULL; + + // alter ROOT_ITEM + + le3 = Vcb->roots.Flink; + while (le3 != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le3, root, list_entry); + + if (r2->id == ref->tbr.offset) { + r = r2; + break; + } + + le3 = le3->Flink; + } + + if (r) { + r->treeholder.address = mr->new_address; + + if (r == Vcb->root_root) + Vcb->superblock.root_tree_addr = mr->new_address; + else if (r == Vcb->chunk_root) + Vcb->superblock.chunk_tree_addr = mr->new_address; + else if (r->root_item.block_number == mr->address) { + KEY searchkey; + ROOT_ITEM* ri; + + r->root_item.block_number = mr->new_address; + + searchkey.obj_id = r->id; + searchkey.obj_type = TYPE_ROOT_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) { + ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG); + if (!ri) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM)); + + delete_tree_item(Vcb, &tp, rollback); + + if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } + } + } + + le2 = le2->Flink; + } + + mr->data->address = mr->new_address; + + t3 = mr->t; + + while (t3) { + UINT8 h; + BOOL inserted; + tree* t4 = NULL; + + // check if tree loaded more than once + if (t3->list_entry.Flink != &Vcb->trees_hash) { + tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash); + + if (nt->header.address == t3->header.address) + t4 = nt; + } + + t3->header.address = mr->new_address; + + h = t3->hash >> 24; + + if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) { + if (t3->list_entry_hash.Flink == &Vcb->trees_hash) + Vcb->trees_ptrs[h] = NULL; + else { + tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash); + + if (t2->hash >> 24 == h) + Vcb->trees_ptrs[h] = &t2->list_entry_hash; + else + Vcb->trees_ptrs[h] = NULL; + } + } + + RemoveEntryList(&t3->list_entry_hash); + + t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64)); + h = t3->hash >> 24; + + if (!Vcb->trees_ptrs[h]) { + UINT8 h2 = h; + + le2 = Vcb->trees_hash.Flink; + + if (h2 > 0) { + h2--; + do { + if (Vcb->trees_ptrs[h2]) { + le2 = Vcb->trees_ptrs[h2]; + break; + } + + h2--; + } while (h2 > 0); + } + } else + le2 = Vcb->trees_ptrs[h]; + + inserted = FALSE; + while (le2 != &Vcb->trees_hash) { + tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash); + + if (t2->hash >= t3->hash) { + InsertHeadList(le2->Blink, &t3->list_entry_hash); + inserted = TRUE; + break; + } + + le2 = le2->Flink; + } + + if (!inserted) + InsertTailList(&Vcb->trees_hash, &t3->list_entry_hash); + + if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h]) + Vcb->trees_ptrs[h] = &t3->list_entry_hash; + + if (data_items && level == 0) { + le2 = data_items->Flink; + + while (le2 != data_items) { + data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); + LIST_ENTRY* le3 = t3->itemlist.Flink; + + while (le3 != &t3->itemlist) { + tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry); + + if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { + EXTENT_DATA* ed = (EXTENT_DATA*)td->data; + + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + if (ed2->address == dr->address) + ed2->address = dr->new_address; + } + } + + le3 = le3->Flink; + } + + le2 = le2->Flink; + } + } + + t3 = t4; + } + + *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum)); + + tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG); + if (!tw) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + tw->address = mr->new_address; + tw->length = Vcb->superblock.node_size; + tw->data = (UINT8*)mr->data; + tw->overlap = FALSE; + + if (IsListEmpty(&tree_writes)) + InsertTailList(&tree_writes, &tw->list_entry); + else { + BOOL inserted = FALSE; + + le2 = tree_writes.Flink; + while (le2 != &tree_writes) { + tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry); + + if (tw2->address > tw->address) { + InsertHeadList(le2->Blink, &tw->list_entry); + inserted = TRUE; + break; + } + + le2 = le2->Flink; + } + + if (!inserted) + InsertTailList(&tree_writes, &tw->list_entry); + } + } + + le = le->Flink; + } + } + + le = items->Flink; + while (le != items) { + metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry); + + Status = add_metadata_reloc_extent_item(Vcb, mr, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_extent_item returned %08x\n", Status); + goto end; + } + + le = le->Flink; + } + + Status = do_tree_writes(Vcb, &tree_writes, NULL); + if (!NT_SUCCESS(Status)) { + ERR("do_tree_writes returned %08x\n", Status); + goto end; + } + + Status = STATUS_SUCCESS; + +end: + while (!IsListEmpty(&tree_writes)) { + tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry); + ExFreePool(tw); + } + + return Status; +} + +static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* changed) { + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + BOOL b; + LIST_ENTRY items, rollback; + UINT32 loaded = 0; + + TRACE("chunk %llx\n", c->offset); + + InitializeListHead(&rollback); + InitializeListHead(&items); + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + searchkey.obj_id = c->offset; + searchkey.obj_type = TYPE_METADATA_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) + break; + + if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { + BOOL tree = FALSE, skinny = FALSE; + + if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { + tree = TRUE; + skinny = TRUE; + } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size && + tp.item->size >= sizeof(EXTENT_ITEM)) { + EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; + + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) + tree = TRUE; + } + + if (tree) { + Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback); + + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc returned %08x\n", Status); + goto end; + } + + loaded++; + + if (loaded >= 64) // only do 64 at a time + break; + } + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); + + if (b) + tp = next_tp; + } while (b); + + if (IsListEmpty(&items)) { + *changed = FALSE; + Status = STATUS_SUCCESS; + goto end; + } else + *changed = TRUE; + + Status = write_metadata_items(Vcb, &items, NULL, c, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("write_metadata_items returned %08x\n", Status); + goto end; + } + + Status = STATUS_SUCCESS; + + Vcb->need_write = TRUE; + +end: + if (NT_SUCCESS(Status)) + clear_rollback(Vcb, &rollback); + else + do_rollback(Vcb, &rollback); + + ExReleaseResourceLite(&Vcb->tree_lock); + + while (!IsListEmpty(&items)) { + metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry); + + while (!IsListEmpty(&mr->refs)) { + metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); + + ExFreePool(ref); + } + + ExFreePool(mr); + } + + return Status; +} + +static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) { + data_reloc* dr; + EXTENT_ITEM* ei; + UINT16 len; + UINT64 inline_rc; + UINT8* ptr; + + dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG); + if (!dr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dr->address = tp->item->key.obj_id; + dr->size = tp->item->key.offset; + dr->ei = (EXTENT_ITEM*)tp->item->data; + InitializeListHead(&dr->refs); + + delete_tree_item(Vcb, tp, rollback); + + if (!c) + c = get_chunk_from_address(Vcb, tp->item->key.obj_id); + + if (c) { + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + decrease_chunk_usage(c, tp->item->key.offset); + + space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, tp->item->key.offset, rollback); + + ExReleaseResourceLite(&c->lock); + } + + ei = (EXTENT_ITEM*)tp->item->data; + inline_rc = 0; + + len = tp->item->size - sizeof(EXTENT_ITEM); + ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM); + + while (len > 0) { + UINT8 secttype = *ptr; + ULONG sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0); + data_reloc_ref* ref; + NTSTATUS Status; + metadata_reloc* mr; + + len--; + + if (sectlen > len) { + ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen); + return STATUS_INTERNAL_ERROR; + } + + if (sectlen == 0) { + ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype); + return STATUS_INTERNAL_ERROR; + } + + ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + if (secttype == TYPE_EXTENT_DATA_REF) { + LIST_ENTRY* le; + KEY searchkey; + traverse_ptr tp3; + root* r = NULL; + + ref->type = TYPE_EXTENT_DATA_REF; + RtlCopyMemory(&ref->edr, ptr + sizeof(UINT8), sizeof(EXTENT_DATA_REF)); + inline_rc += ref->edr.count; + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == ref->edr.root) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("could not find subvol %llx\n", ref->edr.count); + ExFreePool(ref); + return STATUS_INTERNAL_ERROR; + } + + searchkey.obj_id = ref->edr.objid; + searchkey.obj_type = TYPE_EXTENT_DATA; + searchkey.offset = ref->edr.offset; + + Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + if (keycmp(tp3.item->key, searchkey)) { + ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id); + ExFreePool(ref); + return STATUS_INTERNAL_ERROR; + } + + Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + ref->parent = mr; + } else if (secttype == TYPE_SHARED_DATA_REF) { + ref->type = TYPE_SHARED_DATA_REF; + RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF)); + inline_rc += ref->sdr.count; + + Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + ref->parent = mr; + } else { + ERR("unexpected tree type %x\n", secttype); + ExFreePool(ref); + return STATUS_INTERNAL_ERROR; + } + + InsertTailList(&dr->refs, &ref->list_entry); + + len -= sectlen; + ptr += sizeof(UINT8) + sectlen; + } + + if (inline_rc < ei->refcount) { // look for non-inline entries + traverse_ptr tp2 = *tp, next_tp; + + while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) { + metadata_reloc* mr; + NTSTATUS Status; + + tp2 = next_tp; + + if (tp2.item->key.obj_id == tp->item->key.obj_id) { + if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) { + data_reloc_ref* ref; + LIST_ENTRY* le; + KEY searchkey; + traverse_ptr tp3; + root* r = NULL; + + ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ref->type = TYPE_EXTENT_DATA_REF; + RtlCopyMemory(&ref->edr, tp2.item->data, sizeof(EXTENT_DATA_REF)); + + le = Vcb->roots.Flink; + while (le != &Vcb->roots) { + root* r2 = CONTAINING_RECORD(le, root, list_entry); + + if (r2->id == ref->edr.root) { + r = r2; + break; + } + + le = le->Flink; + } + + if (!r) { + ERR("could not find subvol %llx\n", ref->edr.count); + ExFreePool(ref); + return STATUS_INTERNAL_ERROR; + } + + searchkey.obj_id = ref->edr.objid; + searchkey.obj_type = TYPE_EXTENT_DATA; + searchkey.offset = ref->edr.offset; + + Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + if (!keycmp(tp3.item->key, searchkey)) { + ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id); + ExFreePool(ref); + return STATUS_INTERNAL_ERROR; + } + + Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + ref->parent = mr; + InsertTailList(&dr->refs, &ref->list_entry); + + delete_tree_item(Vcb, &tp2, rollback); + } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(SHARED_DATA_REF)) { + data_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG); + if (!ref) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ref->type = TYPE_SHARED_DATA_REF; + RtlCopyMemory(&ref->sdr, tp2.item->data, sizeof(SHARED_DATA_REF)); + + Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_metadata_reloc_parent returned %08x\n", Status); + ExFreePool(ref); + return Status; + } + + ref->parent = mr; + InsertTailList(&dr->refs, &ref->list_entry); + + delete_tree_item(Vcb, &tp2, rollback); + } + } else + break; + } + } + + InsertTailList(items, &dr->list_entry); + + return STATUS_SUCCESS; +} + +static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr, LIST_ENTRY* rollback) { + LIST_ENTRY* le; + UINT64 rc = 0; + UINT16 inline_len; + BOOL all_inline = TRUE; + data_reloc_ref* first_noninline = NULL; + EXTENT_ITEM* ei; + UINT8* ptr; + + inline_len = sizeof(EXTENT_ITEM); + + le = dr->refs.Flink; + while (le != &dr->refs) { + data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); + ULONG extlen = 0; + + rc++; + + if (ref->type == TYPE_EXTENT_DATA_REF) + extlen += sizeof(EXTENT_DATA_REF); + else if (ref->type == TYPE_SHARED_DATA_REF) + extlen += sizeof(SHARED_DATA_REF); + + if (all_inline) { + if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) { + all_inline = FALSE; + first_noninline = ref; + } else + inline_len += extlen + 1; + } + + le = le->Flink; + } + + ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG); + if (!ei) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ei->refcount = rc; + ei->generation = dr->ei->generation; + ei->flags = dr->ei->flags; + ptr = (UINT8*)&ei[1]; + + le = dr->refs.Flink; + while (le != &dr->refs) { + data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); + + if (ref == first_noninline) + break; + + *ptr = ref->type; + ptr++; + + if (ref->type == TYPE_EXTENT_DATA_REF) { + EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr; + + RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF)); + + ptr += sizeof(EXTENT_DATA_REF); + } else if (ref->type == TYPE_SHARED_DATA_REF) { + SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr; + + sdr->offset = ref->parent->new_address; + sdr->count = ref->sdr.count; + + ptr += sizeof(SHARED_DATA_REF); + } + + le = le->Flink; + } + + if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + + if (!all_inline) { + le = &first_noninline->list_entry; + + while (le != &dr->refs) { + data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry); + + if (ref->type == TYPE_EXTENT_DATA_REF) { + EXTENT_DATA_REF* edr; + UINT64 off; + + edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG); + if (!edr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF)); + + off = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset); + + if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, off, edr, sizeof(EXTENT_DATA_REF), NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } else if (ref->type == TYPE_SHARED_DATA_REF) { + SHARED_DATA_REF* sdr; + + sdr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_DATA_REF), ALLOC_TAG); + if (!sdr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sdr->offset = ref->parent->new_address; + sdr->count = ref->sdr.count; + + if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, sdr->offset, sdr, sizeof(SHARED_DATA_REF), NULL, NULL, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } + + le = le->Flink; + } + } + + return STATUS_SUCCESS; +} + +static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* changed) { + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + BOOL b; + LIST_ENTRY items, metadata_items, rollback, *le; + UINT64 loaded = 0, num_loaded = 0; + chunk* newchunk = NULL; + UINT8* data = NULL; + + TRACE("chunk %llx\n", c->offset); + + InitializeListHead(&rollback); + InitializeListHead(&items); + InitializeListHead(&metadata_items); + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + searchkey.obj_id = c->offset; + searchkey.obj_type = TYPE_EXTENT_ITEM; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + do { + traverse_ptr next_tp; + + if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) + break; + + if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) { + BOOL tree = FALSE; + + if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) { + EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; + + if (ei->flags & EXTENT_ITEM_TREE_BLOCK) + tree = TRUE; + } + + if (!tree) { + Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback); + + if (!NT_SUCCESS(Status)) { + ERR("add_data_reloc returned %08x\n", Status); + goto end; + } + + loaded += tp.item->key.offset; + num_loaded++; + + if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously + break; + } + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); + + if (b) + tp = next_tp; + } while (b); + + if (IsListEmpty(&items)) { + *changed = FALSE; + Status = STATUS_SUCCESS; + goto end; + } else + *changed = TRUE; + + data = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG); + if (!data) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + le = items.Flink; + while (le != &items) { + data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry); + BOOL done = FALSE; + LIST_ENTRY* le2; + UINT32* csum; + UINT64 off; + + if (newchunk) { + ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); + + if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) { + increase_chunk_usage(newchunk, dr->size); + space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback); + done = TRUE; + } + + ExReleaseResourceLite(&newchunk->lock); + } + + if (!done) { + ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); + + le2 = Vcb->chunks.Flink; + while (le2 != &Vcb->chunks) { + chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); + + if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) { + ExAcquireResourceExclusiveLite(&c2->lock, TRUE); + + if ((c2->chunk_item->size - c2->used) >= dr->size) { + if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) { + increase_chunk_usage(c2, dr->size); + space_list_subtract(Vcb, c2, FALSE, dr->new_address, dr->size, &rollback); + ExReleaseResourceLite(&c2->lock); + newchunk = c2; + done = TRUE; + break; + } + } + + ExReleaseResourceLite(&c2->lock); + } + + le2 = le2->Flink; + } + + // allocate new chunk if necessary + if (!done) { + newchunk = alloc_chunk(Vcb, Vcb->data_flags); + + if (!newchunk) { + ERR("could not allocate new chunk\n"); + ExReleaseResourceLite(&Vcb->chunk_lock); + Status = STATUS_DISK_FULL; + goto end; + } + + ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE); + + if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) { + ExReleaseResourceLite(&newchunk->lock); + ERR("could not find address in new chunk\n"); + Status = STATUS_DISK_FULL; + goto end; + } else { + increase_chunk_usage(newchunk, dr->size); + space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback); + } + + ExReleaseResourceLite(&newchunk->lock); + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + } + + dr->newchunk = newchunk; + + csum = ExAllocatePoolWithTag(PagedPool, dr->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + Status = load_csum(Vcb, csum, dr->address, dr->size / Vcb->superblock.sector_size, NULL); + + if (NT_SUCCESS(Status)) { + add_checksum_entry(Vcb, dr->new_address, dr->size / Vcb->superblock.sector_size, csum, NULL, &rollback); + add_checksum_entry(Vcb, dr->address, dr->size / Vcb->superblock.sector_size, NULL, NULL, &rollback); + } + + ExFreePool(csum); + + off = 0; + + while (off < dr->size) { + ULONG ds = min(dr->size - off, 0x100000); + + Status = read_data(Vcb, dr->address + off, ds, NULL, FALSE, data, c, NULL, NULL, FALSE); + if (!NT_SUCCESS(Status)) { + ERR("read_data returned %08x\n", Status); + goto end; + } + + Status = write_data_complete(Vcb, dr->new_address + off, data, ds, NULL, newchunk); + if (!NT_SUCCESS(Status)) { + ERR("write_data_complete returned %08x\n", Status); + goto end; + } + + off += ds; + } + + le = le->Flink; + } + + ExFreePool(data); + data = NULL; + + Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("write_metadata_items returned %08x\n", Status); + goto end; + } + + le = items.Flink; + while (le != &items) { + data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry); + + Status = add_data_reloc_extent_item(Vcb, dr, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("add_data_reloc_extent_item returned %08x\n", Status); + goto end; + } + + le = le->Flink; + } + + le = c->changed_extents.Flink; + while (le != &c->changed_extents) { + LIST_ENTRY *le2, *le3; + changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry); + + le3 = le->Flink; + + le2 = items.Flink; + while (le2 != &items) { + data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry); + + if (ce->address == dr->address) { + ce->address = dr->new_address; + RemoveEntryList(&ce->list_entry); + InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry); + break; + } + + le2 = le2->Flink; + } + + le = le3; + } + + // update open FCBs + // FIXME - speed this up + + ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE); + + le = Vcb->all_fcbs.Flink; + while (le != &Vcb->all_fcbs) { + struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all); + LIST_ENTRY* le2; + + ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); + + le2 = fcb->extents.Flink; + while (le2 != &fcb->extents) { + extent* ext = CONTAINING_RECORD(le2, extent, list_entry); + + if (!ext->ignore) { + if (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; + + if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) { + LIST_ENTRY* le3 = items.Flink; + while (le3 != &items) { + data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry); + + if (ed2->address == dr->address) { + ed2->address = dr->new_address; + break; + } + + le3 = le3->Flink; + } + } + } + } + + le2 = le2->Flink; + } + + ExReleaseResourceLite(fcb->Header.Resource); + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->fcb_lock); + + Status = STATUS_SUCCESS; + + Vcb->need_write = TRUE; + +end: + if (NT_SUCCESS(Status)) + clear_rollback(Vcb, &rollback); + else + do_rollback(Vcb, &rollback); + + ExReleaseResourceLite(&Vcb->tree_lock); + + if (data) + ExFreePool(data); + + while (!IsListEmpty(&items)) { + data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry); + + while (!IsListEmpty(&dr->refs)) { + data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry); + + ExFreePool(ref); + } + + ExFreePool(dr); + } + + while (!IsListEmpty(&metadata_items)) { + metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry); + + while (!IsListEmpty(&mr->refs)) { + metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry); + + ExFreePool(ref); + } + + ExFreePool(mr); + } + + return Status; +} + +static __inline UINT64 get_chunk_dup_type(chunk* c) { + if (c->chunk_item->type & BLOCK_FLAG_RAID0) + return BLOCK_FLAG_RAID0; + else if (c->chunk_item->type & BLOCK_FLAG_RAID1) + return BLOCK_FLAG_RAID1; + else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) + return BLOCK_FLAG_DUPLICATE; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + return BLOCK_FLAG_RAID10; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) + return BLOCK_FLAG_RAID5; + else if (c->chunk_item->type & BLOCK_FLAG_RAID6) + return BLOCK_FLAG_RAID6; + else + return BLOCK_FLAG_SINGLE; +} + +static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) { + btrfs_balance_opts* opts; + + opts = &Vcb->balance.opts[sort]; + + if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED)) + return FALSE; + + if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) { + UINT64 type = get_chunk_dup_type(c); + + if (!(type & opts->profiles)) + return FALSE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) { + UINT16 i; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + BOOL b = FALSE; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (cis[i].dev_id == opts->devid) { + b = TRUE; + break; + } + } + + if (!b) + return FALSE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) { + UINT16 i, factor; + UINT64 physsize; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + BOOL b = FALSE; + + if (c->chunk_item->type & BLOCK_FLAG_RAID0) + factor = c->chunk_item->num_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) + factor = c->chunk_item->num_stripes - 1; + else if (c->chunk_item->type & BLOCK_FLAG_RAID6) + factor = c->chunk_item->num_stripes - 2; + else // SINGLE, DUPLICATE, RAID1 + factor = 1; + + physsize = c->chunk_item->size / factor; + + for (i = 0; i < c->chunk_item->num_stripes; i++) { + if (cis[i].offset >= opts->drange_start && cis[i].offset + physsize < opts->drange_end) { + b = TRUE; + break; + } + } + + if (!b) + return FALSE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) { + if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end) + return FALSE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) { + if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end) + return FALSE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) { + UINT64 usage = c->used * 100 / c->chunk_item->size; + + // usage == 0 should mean completely empty, not just that usage rounds to 0% + if (c->used > 0 && usage == 0) + usage = 1; + + if (usage < opts->usage_start || usage > opts->usage_end) + return FALSE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) { + UINT64 type = get_chunk_dup_type(c); + + if (type == opts->convert) + return FALSE; + } + + return TRUE; +} + +static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { + if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) { + args->profiles = opts->profiles; + args->flags |= BALANCE_ARGS_FLAGS_PROFILES; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) { + if (args->usage_start == 0) { + args->flags |= BALANCE_ARGS_FLAGS_USAGE_RANGE; + args->usage_start = opts->usage_start; + args->usage_end = opts->usage_end; + } else { + args->flags |= BALANCE_ARGS_FLAGS_USAGE; + args->usage = opts->usage_end; + } + } + + if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) { + args->devid = opts->devid; + args->flags |= BALANCE_ARGS_FLAGS_DEVID; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) { + args->drange_start = opts->drange_start; + args->drange_end = opts->drange_end; + args->flags |= BALANCE_ARGS_FLAGS_DRANGE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) { + args->vrange_start = opts->vrange_start; + args->vrange_end = opts->vrange_end; + args->flags |= BALANCE_ARGS_FLAGS_VRANGE; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) { + args->convert = opts->convert; + args->flags |= BALANCE_ARGS_FLAGS_CONVERT; + + if (opts->flags & BTRFS_BALANCE_OPTS_SOFT) + args->flags |= BALANCE_ARGS_FLAGS_SOFT; + } + + if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) { + if (args->limit_start == 0) { + args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE; + args->limit_start = opts->limit_start; + args->limit_end = opts->limit_end; + } else { + args->flags |= BALANCE_ARGS_FLAGS_LIMIT; + args->limit = opts->limit_end; + } + } + + if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) { + args->stripes_start = opts->stripes_start; + args->stripes_end = opts->stripes_end; + args->flags |= BALANCE_ARGS_FLAGS_STRIPES_RANGE; + } +} + +static NTSTATUS add_balance_item(device_extension* Vcb) { + LIST_ENTRY rollback; + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + BALANCE_ITEM* bi; + + InitializeListHead(&rollback); + + searchkey.obj_id = BALANCE_ITEM_ID; + searchkey.obj_type = TYPE_TEMP_ITEM; + searchkey.offset = 0; + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (!keycmp(tp.item->key, searchkey)) + delete_tree_item(Vcb, &tp, &rollback); + + bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG); + if (!bi) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(bi, sizeof(BALANCE_ITEM)); + + if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) { + bi->flags |= BALANCE_FLAGS_DATA; + copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data); + } + + if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) { + bi->flags |= BALANCE_FLAGS_METADATA; + copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata); + } + + if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) { + bi->flags |= BALANCE_FLAGS_SYSTEM; + copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system); + } + + if (!insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL, &rollback)) { + ERR("insert_tree_item failed\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + Status = STATUS_SUCCESS; + +end: + if (NT_SUCCESS(Status)) { + do_write(Vcb, NULL, &rollback); + free_trees(Vcb); + + clear_rollback(Vcb, &rollback); + } else + do_rollback(Vcb, &rollback); + + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static NTSTATUS remove_balance_item(device_extension* Vcb) { + LIST_ENTRY rollback; + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + + InitializeListHead(&rollback); + + searchkey.obj_id = BALANCE_ITEM_ID; + searchkey.obj_type = TYPE_TEMP_ITEM; + searchkey.offset = 0; + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + goto end; + } + + if (!keycmp(tp.item->key, searchkey)) { + delete_tree_item(Vcb, &tp, &rollback); + + do_write(Vcb, NULL, &rollback); + free_trees(Vcb); + } + + Status = STATUS_SUCCESS; + +end: + if (NT_SUCCESS(Status)) + clear_rollback(Vcb, &rollback); + else + do_rollback(Vcb, &rollback); + + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) { + opts->flags = BTRFS_BALANCE_OPTS_ENABLED; + + if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) { + opts->flags |= BTRFS_BALANCE_OPTS_PROFILES; + opts->profiles = args->profiles; + } + + if (args->flags & BALANCE_ARGS_FLAGS_USAGE) { + opts->flags |= BTRFS_BALANCE_OPTS_USAGE; + + opts->usage_start = 0; + opts->usage_end = args->usage; + } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) { + opts->flags |= BTRFS_BALANCE_OPTS_USAGE; + + opts->usage_start = args->usage_start; + opts->usage_end = args->usage_end; + } + + if (args->flags & BALANCE_ARGS_FLAGS_DEVID) { + opts->flags |= BTRFS_BALANCE_OPTS_DEVID; + opts->devid = args->devid; + } + + if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) { + opts->flags |= BTRFS_BALANCE_OPTS_DRANGE; + opts->drange_start = args->drange_start; + opts->drange_end = args->drange_end; + } + + if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) { + opts->flags |= BTRFS_BALANCE_OPTS_VRANGE; + opts->vrange_start = args->vrange_start; + opts->vrange_end = args->vrange_end; + } + + if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) { + opts->flags |= BTRFS_BALANCE_OPTS_LIMIT; + + opts->limit_start = 0; + opts->limit_end = args->limit; + } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) { + opts->flags |= BTRFS_BALANCE_OPTS_LIMIT; + + opts->limit_start = args->limit_start; + opts->limit_end = args->limit_end; + } + + if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) { + opts->flags |= BTRFS_BALANCE_OPTS_STRIPES; + + opts->stripes_start = args->stripes_start; + opts->stripes_end = args->stripes_end; + } + + if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) { + opts->flags |= BTRFS_BALANCE_OPTS_CONVERT; + opts->convert = args->convert; + + if (args->flags & BALANCE_ARGS_FLAGS_SOFT) + opts->flags |= BTRFS_BALANCE_OPTS_SOFT; + } +} + +static NTSTATUS remove_superblocks(device* dev) { + NTSTATUS Status; + superblock* sb; + int i = 0; + + sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG); + if (!sb) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(sb, sizeof(superblock)); + + while (superblock_addrs[i] > 0 && dev->length >= superblock_addrs[i] + sizeof(superblock)) { + Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock)); + + if (!NT_SUCCESS(Status)) { + ExFreePool(sb); + return Status; + } + + i++; + } + + ExFreePool(sb); + + return STATUS_SUCCESS; +} + +static NTSTATUS replace_mount_dev(device_extension* Vcb, device* dev, PDEVICE_OBJECT mountmgr, BOOL part0) { + NTSTATUS Status; + MOUNTDEV_NAME mdn, *mdn2 = NULL, *mdn3 = NULL; + ULONG mdnsize, mmpsize; + MOUNTMGR_MOUNT_POINT* mmp = NULL; + MOUNTMGR_MOUNT_POINTS mmps, *mmps2 = NULL; + ULONG i; + UNICODE_STRING us; + + // get old device name + + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + return Status; + } + + mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; + + mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); + if (!mdn2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end; + } + + // get new device name + + Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end2; + } + + mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength; + + mdn3 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG); + if (!mdn3) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; + } + + Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn3, mdnsize, TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end2; + } + + // query and delete existing mount points + + mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + mdn2->NameLength; + + mmp = ExAllocatePoolWithTag(PagedPool, mmpsize, ALLOC_TAG); + if (!mmp) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; + } + + RtlZeroMemory(mmp, sizeof(MOUNTMGR_MOUNT_POINT)); + mmp->DeviceNameOffset = sizeof(MOUNTMGR_MOUNT_POINT); + mmp->DeviceNameLength = mdn2->NameLength; + RtlCopyMemory(&mmp[1], mdn2->Name, mdn2->NameLength); + + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, mmp, mmpsize, &mmps, mmpsize, TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status); + goto end2; + } + + mmps2 = ExAllocatePoolWithTag(PagedPool, mmps.Size, ALLOC_TAG); + if (!mmps2) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; + } + + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, mmps2, mmps.Size, TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ERR("IOCTL_MOUNTMGR_DELETE_POINTS returned %08x\n", Status); + goto end2; + } + + // re-create mount points + + for (i = 0; i < mmps2->NumberOfMountPoints; i++) { + if (mmps2->MountPoints[i].SymbolicLinkNameOffset != 0) { + ULONG mcpilen; + MOUNTMGR_CREATE_POINT_INPUT* mcpi; + + mcpilen = sizeof(MOUNTMGR_CREATE_POINT_INPUT) + mmps2->MountPoints[i].SymbolicLinkNameLength + mdn3->NameLength; + + mcpi = ExAllocatePoolWithTag(PagedPool, mcpilen, ALLOC_TAG); + if (!mcpi) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; + } + + mcpi->SymbolicLinkNameOffset = sizeof(MOUNTMGR_CREATE_POINT_INPUT); + mcpi->SymbolicLinkNameLength = mmps2->MountPoints[i].SymbolicLinkNameLength; + mcpi->DeviceNameOffset = mcpi->SymbolicLinkNameOffset + mcpi->SymbolicLinkNameLength; + mcpi->DeviceNameLength = mdn3->NameLength; + + RtlCopyMemory((UINT8*)mcpi + mcpi->SymbolicLinkNameOffset, (UINT8*)mmps2 + mmps2->MountPoints[i].SymbolicLinkNameOffset, + mcpi->SymbolicLinkNameLength); + RtlCopyMemory((UINT8*)mcpi + mcpi->DeviceNameOffset, mdn3->Name, mdn3->NameLength); + + Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_CREATE_POINT, mcpi, mcpilen, NULL, 0, TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_MOUNTMGR_CREATE_POINT returned %08x\n", Status); + ExFreePool(mcpi); + goto end2; + } + + ExFreePool(mcpi); + } + } + + Status = STATUS_SUCCESS; + +end2: + // re-add old device back to mountmgr + + if (!part0) { + us.Buffer = mdn2->Name; + us.Length = us.MaximumLength = mdn2->NameLength; + + add_volume(mountmgr, &us); + } + +end: + if (mdn2) + ExFreePool(mdn2); + + if (mdn3) + ExFreePool(mdn3); + + if (mmp) + ExFreePool(mmp); + + if (mmps2) + ExFreePool(mmps2); + + return Status; +} + +static NTSTATUS finish_removing_device(device_extension* Vcb, device* dev) { + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + LIST_ENTRY rollback, *le; + BOOL first_dev, part0 = FALSE; + + InitializeListHead(&rollback); + + if (Vcb->need_write) + do_write(Vcb, NULL, &rollback); + + free_trees(Vcb); + + clear_rollback(Vcb, &rollback); + + // remove entry in chunk tree + + searchkey.obj_id = 1; + searchkey.obj_type = TYPE_DEV_ITEM; + searchkey.offset = dev->devitem.dev_id; + + Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (!keycmp(searchkey, tp.item->key)) + delete_tree_item(Vcb, &tp, &rollback); + + // remove stats entry in device tree + + searchkey.obj_id = 0; + searchkey.obj_type = TYPE_DEV_STATS; + searchkey.offset = dev->devitem.dev_id; + + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (!keycmp(searchkey, tp.item->key)) + delete_tree_item(Vcb, &tp, &rollback); + + // update superblock + + Vcb->superblock.num_devices--; + Vcb->superblock.total_bytes -= dev->devitem.num_bytes; + Vcb->devices_loaded--; + + first_dev = first_device(Vcb) == dev; + + RemoveEntryList(&dev->list_entry); + + // flush + + do_write(Vcb, NULL, &rollback); + + free_trees(Vcb); + + clear_rollback(Vcb, &rollback); + + if (!dev->readonly) { + Status = remove_superblocks(dev); + if (!NT_SUCCESS(Status)) + WARN("remove_superblocks returned %08x\n", Status); + } + + // remove entry in volume list + + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); + + le = volumes.Flink; + while (le != &volumes) { + volume* v = CONTAINING_RECORD(le, volume, list_entry); + + if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && + RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + PFILE_OBJECT FileObject; + PDEVICE_OBJECT mountmgr; + UNICODE_STRING mmdevpath; + + RemoveEntryList(&v->list_entry); + + // re-add entry to mountmgr + + if (!first_dev && v->part_num != 0) { + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + add_volume(mountmgr, &v->devpath); + ObDereferenceObject(FileObject); + } + } + + part0 = v->part_num == 0 ? TRUE : FALSE; + + if (v->devpath.Buffer) + ExFreePool(v->devpath.Buffer); + + ExFreePool(v); + break; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&volumes_lock); + + if (first_dev) { + PDEVICE_OBJECT DeviceObject, olddev; + device* newfirstdev; + PFILE_OBJECT FileObject; + UNICODE_STRING mmdevpath; + PDEVICE_OBJECT mountmgr; + + DeviceObject = Vcb->Vpb->DeviceObject; + + olddev = DeviceObject->Vpb->RealDevice; + newfirstdev = first_device(Vcb); + + ObReferenceObject(newfirstdev->devobj); + DeviceObject->Vpb->RealDevice = newfirstdev->devobj; + ObDereferenceObject(olddev); + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + Status = replace_mount_dev(Vcb, dev, mountmgr, part0); + if (!NT_SUCCESS(Status)) + ERR("replace_mount_dev returned %08x\n", Status); + + ObDereferenceObject(FileObject); + } + + } + + // free dev + + ObDereferenceObject(dev->devobj); + + while (!IsListEmpty(&dev->space)) { + LIST_ENTRY* le2 = RemoveHeadList(&dev->space); + space* s = CONTAINING_RECORD(le2, space, list_entry); + + ExFreePool(s); + } + + ExFreePool(dev); + + return STATUS_SUCCESS; +} + +#ifndef __REACTOS__ +static void balance_thread(void* context) { +#else +static void NTAPI balance_thread(void* context) { +#endif + device_extension* Vcb = (device_extension*)context; + LIST_ENTRY chunks; + LIST_ENTRY* le; + UINT64 num_chunks[3]; + NTSTATUS Status; + + Vcb->balance.stopping = FALSE; + Vcb->balance.cancelling = FALSE; + KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE); + + if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) + Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert); + + if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) + Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert); + + if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) + Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert); + + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) { + if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) + RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); + else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) + RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); + } + + // FIXME - what are we supposed to do with limit_start? + + if (!Vcb->readonly) { + if (!Vcb->balance.removing) { + Status = add_balance_item(Vcb); + if (!NT_SUCCESS(Status)) { + ERR("add_balance_item returned %08x\n", Status); + goto end; + } + } else { + if (Vcb->need_write) { + LIST_ENTRY rollback; + + InitializeListHead(&rollback); + do_write(Vcb, NULL, &rollback); + free_trees(Vcb); + + clear_rollback(Vcb, &rollback); + } + } + } + + num_chunks[0] = num_chunks[1] = num_chunks[2] = 0; + Vcb->balance.total_chunks = 0; + + InitializeListHead(&chunks); + + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); + + if (Vcb->balance.stopping) + goto end; + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + UINT8 sort; + + ExAcquireResourceExclusiveLite(&c->lock, TRUE); + + if (c->chunk_item->type & BLOCK_FLAG_DATA) + sort = BALANCE_OPTS_DATA; + else if (c->chunk_item->type & BLOCK_FLAG_METADATA) + sort = BALANCE_OPTS_METADATA; + else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) + sort = BALANCE_OPTS_SYSTEM; + else { + ERR("unexpected chunk type %llx\n", c->chunk_item->type); + ExReleaseResourceLite(&c->lock); + break; + } + + if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) && + should_balance_chunk(Vcb, sort, c)) { + c->reloc = TRUE; + + InsertTailList(&chunks, &c->list_entry_balance); + + num_chunks[sort]++; + Vcb->balance.total_chunks++; + } + + ExReleaseResourceLite(&c->lock); + + le = le->Flink; + } + + ExReleaseResourceLite(&Vcb->chunk_lock); + + Vcb->balance.chunks_left = Vcb->balance.total_chunks; + + // do data chunks before metadata + le = chunks.Flink; + while (le != &chunks) { + chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance); + LIST_ENTRY* le2 = le->Flink; + + if (c->chunk_item->type & BLOCK_FLAG_DATA) { + NTSTATUS Status; + BOOL changed; + + do { + changed = FALSE; + + FsRtlEnterFileSystem(); + + Status = balance_data_chunk(Vcb, c, &changed); + + FsRtlExitFileSystem(); + + if (!NT_SUCCESS(Status)) { + ERR("balance_data_chunk returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } + + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); + + if (Vcb->balance.stopping) + break; + } while (changed); + + if (!c->list_entry_changed.Flink) + InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); + } + + if (Vcb->balance.stopping) { + while (le != &chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry_balance); + c->reloc = FALSE; + + le = le->Flink; + } + goto end; + } + + if (c->chunk_item->type & BLOCK_FLAG_DATA && + (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) { + RemoveEntryList(&c->list_entry_balance); + c->list_entry_balance.Flink = NULL; + + Vcb->balance.chunks_left--; + } + + le = le2; + } + + // do metadata chunks + while (!IsListEmpty(&chunks)) { + chunk* c; + NTSTATUS Status; + BOOL changed; + + le = RemoveHeadList(&chunks); + c = CONTAINING_RECORD(le, chunk, list_entry_balance); + + if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) { + do { + FsRtlEnterFileSystem(); + + Status = balance_metadata_chunk(Vcb, c, &changed); + + FsRtlExitFileSystem(); + + if (!NT_SUCCESS(Status)) { + ERR("balance_metadata_chunk returned %08x\n", Status); + Vcb->balance.status = Status; + goto end; + } + + KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL); + + if (Vcb->balance.stopping) + break; + } while (changed); + + if (!c->list_entry_changed.Flink) + InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); + } + + if (Vcb->balance.stopping) { + while (le != &chunks) { + c = CONTAINING_RECORD(le, chunk, list_entry_balance); + c->reloc = FALSE; + + le = le->Flink; + c->list_entry_balance.Flink = NULL; + } + break; + } + + c->list_entry_balance.Flink = NULL; + + Vcb->balance.chunks_left--; + } + +end: + if (!Vcb->readonly) { + if (!Vcb->balance.removing) { + FsRtlEnterFileSystem(); + Status = remove_balance_item(Vcb); + FsRtlExitFileSystem(); + + if (!NT_SUCCESS(Status)) { + ERR("remove_balance_item returned %08x\n", Status); + goto end; + } + } else { + device* dev = NULL; + + FsRtlEnterFileSystem(); + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) { + dev = dev2; + break; + } + + le = le->Flink; + } + + if (dev) { + if (Vcb->balance.chunks_left == 0) { + Status = finish_removing_device(Vcb, dev); + + if (!NT_SUCCESS(Status)) { + ERR("finish_removing_device returned %08x\n", Status); + dev->reloc = FALSE; + } + } else + dev->reloc = FALSE; + } + + ExReleaseResourceLite(&Vcb->tree_lock); + FsRtlExitFileSystem(); + } + } + + ZwClose(Vcb->balance.thread); + Vcb->balance.thread = NULL; + + KeSetEvent(&Vcb->balance.finished, 0, FALSE); +} + +NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { + NTSTATUS Status; + btrfs_start_balance* bsb = (btrfs_start_balance*)data; + UINT8 i; + + if (length < sizeof(btrfs_start_balance) || !data) + return STATUS_INVALID_PARAMETER; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (Vcb->balance.thread) { + WARN("balance already running\n"); + return STATUS_DEVICE_NOT_READY; + } + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + if (!(bsb->opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) && + !(bsb->opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) && + !(bsb->opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED)) + return STATUS_SUCCESS; + + for (i = 0; i < 3; i++) { + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) { + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) { + bsb->opts[i].profiles &= BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE | BLOCK_FLAG_RAID10 | + BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 | BLOCK_FLAG_SINGLE; + + if (bsb->opts[i].profiles == 0) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) { + if (bsb->opts[i].devid == 0) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) { + if (bsb->opts[i].drange_start > bsb->opts[i].drange_end) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) { + if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) { + bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start); + bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end); + + if (bsb->opts[i].limit_start > bsb->opts[i].limit_end) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) { + bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start); + bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end); + + if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) { + bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start); + bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end); + + if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end) + return STATUS_INVALID_PARAMETER; + } + + if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) { + if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 && + bsb->opts[i].convert != BLOCK_FLAG_DUPLICATE && bsb->opts[i].convert != BLOCK_FLAG_RAID10 && + bsb->opts[i].convert != BLOCK_FLAG_RAID5 && bsb->opts[i].convert != BLOCK_FLAG_RAID6 && + bsb->opts[i].convert != BLOCK_FLAG_SINGLE) + return STATUS_INVALID_PARAMETER; + } + } + } + + RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); + RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bsb->opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); + RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bsb->opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts)); + + Vcb->balance.paused = FALSE; + Vcb->balance.removing = FALSE; + Vcb->balance.status = STATUS_SUCCESS; + KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); + + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; +} + +NTSTATUS look_for_balance_item(device_extension* Vcb) { + LIST_ENTRY rollback; + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + BALANCE_ITEM* bi; + int i; + + InitializeListHead(&rollback); + + searchkey.obj_id = BALANCE_ITEM_ID; + searchkey.obj_type = TYPE_TEMP_ITEM; + searchkey.offset = 0; + + Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (keycmp(tp.item->key, searchkey)) { + TRACE("no balance item found\n"); + return STATUS_NOT_FOUND; + } + + if (tp.item->size < sizeof(BALANCE_ITEM)) { + WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(BALANCE_ITEM)); + return STATUS_INTERNAL_ERROR; + } + + bi = (BALANCE_ITEM*)tp.item->data; + + if (bi->flags & BALANCE_FLAGS_DATA) + load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data); + + if (bi->flags & BALANCE_FLAGS_METADATA) + load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata); + + if (bi->flags & BALANCE_FLAGS_SYSTEM) + load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system); + + // do the heuristics that Linux driver does + + for (i = 0; i < 3; i++) { + if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) { + // if converting, don't redo chunks already done + + if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) + Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT; + + // don't balance chunks more than 90% filled - presumably these + // have already been done + + if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) && + !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) + ) { + Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_USAGE; + Vcb->balance.opts[i].usage_start = 0; + Vcb->balance.opts[i].usage_end = 90; + } + } + } + + if (Vcb->readonly || Vcb->options.skip_balance) + Vcb->balance.paused = TRUE; + else + Vcb->balance.paused = FALSE; + + Vcb->balance.removing = FALSE; + Vcb->balance.status = STATUS_SUCCESS; + KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); + + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; +} + +NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) { + btrfs_query_balance* bqb = (btrfs_query_balance*)data; + + if (length < sizeof(btrfs_query_balance) || !data) + return STATUS_INVALID_PARAMETER; + + if (!Vcb->balance.thread) { + bqb->status = BTRFS_BALANCE_STOPPED; + + if (!NT_SUCCESS(Vcb->balance.status)) { + bqb->status |= BTRFS_BALANCE_ERROR; + bqb->error = Vcb->balance.status; + } + + return STATUS_SUCCESS; + } + + bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING; + + if (Vcb->balance.removing) + bqb->status |= BTRFS_BALANCE_REMOVAL; + + if (!NT_SUCCESS(Vcb->balance.status)) + bqb->status |= BTRFS_BALANCE_ERROR; + + bqb->chunks_left = Vcb->balance.chunks_left; + bqb->total_chunks = Vcb->balance.total_chunks; + bqb->error = Vcb->balance.status; + RtlCopyMemory(&bqb->data_opts, &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts)); + RtlCopyMemory(&bqb->metadata_opts, &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts)); + RtlCopyMemory(&bqb->system_opts, &Vcb->balance.opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts)); + + return STATUS_SUCCESS; +} + +NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!Vcb->balance.thread) + return STATUS_DEVICE_NOT_READY; + + if (Vcb->balance.paused) + return STATUS_DEVICE_NOT_READY; + + Vcb->balance.paused = TRUE; + KeClearEvent(&Vcb->balance.event); + + return STATUS_SUCCESS; +} + +NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!Vcb->balance.thread) + return STATUS_DEVICE_NOT_READY; + + if (!Vcb->balance.paused) + return STATUS_DEVICE_NOT_READY; + + if (Vcb->readonly) + return STATUS_MEDIA_WRITE_PROTECTED; + + Vcb->balance.paused = FALSE; + KeSetEvent(&Vcb->balance.event, 0, FALSE); + + return STATUS_SUCCESS; +} + +NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (!Vcb->balance.thread) + return STATUS_DEVICE_NOT_READY; + + Vcb->balance.paused = FALSE; + Vcb->balance.stopping = TRUE; + Vcb->balance.cancelling = TRUE; + Vcb->balance.status = STATUS_SUCCESS; + KeSetEvent(&Vcb->balance.event, 0, FALSE); + + return STATUS_SUCCESS; +} + +NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { + UINT64 devid; + LIST_ENTRY* le; + device* dev = NULL; + NTSTATUS Status; + int i; + UINT64 num_rw_devices; + + TRACE("(%p, %p, %x)\n", Vcb, data, length); + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (length < sizeof(UINT64)) + return STATUS_INVALID_PARAMETER; + + devid = *(UINT64*)data; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + if (Vcb->readonly) { + ExReleaseResourceLite(&Vcb->tree_lock); + return STATUS_MEDIA_WRITE_PROTECTED; + } + + num_rw_devices = 0; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devitem.dev_id == devid) + dev = dev2; + + if (!dev2->readonly) + num_rw_devices++; + + le = le->Flink; + } + + if (!dev) { + ExReleaseResourceLite(&Vcb->tree_lock); + WARN("device %llx not found\n", devid); + return STATUS_NOT_FOUND; + } + + if (!dev->readonly) { + if (num_rw_devices == 1) { + ExReleaseResourceLite(&Vcb->tree_lock); + WARN("not removing last non-readonly device\n"); + return STATUS_INVALID_PARAMETER; + } + + if (num_rw_devices == 4 && + ((Vcb->data_flags & BLOCK_FLAG_RAID10 || Vcb->metadata_flags & BLOCK_FLAG_RAID10 || Vcb->system_flags & BLOCK_FLAG_RAID10) || + (Vcb->data_flags & BLOCK_FLAG_RAID6 || Vcb->metadata_flags & BLOCK_FLAG_RAID6 || Vcb->system_flags & BLOCK_FLAG_RAID6)) + ) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n"); + return STATUS_CANNOT_DELETE; + } + + if (num_rw_devices == 3 && (Vcb->data_flags & BLOCK_FLAG_RAID5 || Vcb->metadata_flags & BLOCK_FLAG_RAID5 || Vcb->system_flags & BLOCK_FLAG_RAID5)) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n"); + return STATUS_CANNOT_DELETE; + } + + if (num_rw_devices == 2 && + ((Vcb->data_flags & BLOCK_FLAG_RAID0 || Vcb->metadata_flags & BLOCK_FLAG_RAID0 || Vcb->system_flags & BLOCK_FLAG_RAID0) || + (Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->metadata_flags & BLOCK_FLAG_RAID1 || Vcb->system_flags & BLOCK_FLAG_RAID1)) + ) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n"); + return STATUS_CANNOT_DELETE; + } + } + + ExReleaseResourceLite(&Vcb->tree_lock); + + if (Vcb->balance.thread) { + WARN("balance already running\n"); + return STATUS_DEVICE_NOT_READY; + } + + dev->reloc = TRUE; + + RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3); + + for (i = 0; i < 3; i++) { + Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID; + Vcb->balance.opts[i].devid = devid; + } + + Vcb->balance.paused = FALSE; + Vcb->balance.removing = TRUE; + KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused); + + Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb); + if (!NT_SUCCESS(Status)) { + ERR("PsCreateSystemThread returned %08x\n", Status); + dev->reloc = FALSE; + return Status; + } + + return STATUS_SUCCESS; +} diff --git a/reactos/drivers/filesystems/btrfs/btrfs.c b/reactos/drivers/filesystems/btrfs/btrfs.c index ed8a2836f20..2a54cc3c4c1 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs.c +++ b/reactos/drivers/filesystems/btrfs/btrfs.c @@ -52,6 +52,8 @@ BOOL have_sse42 = FALSE, have_sse2 = FALSE; UINT64 num_reads = 0; LIST_ENTRY uid_map_list; LIST_ENTRY volumes; +ERESOURCE volumes_lock; +LIST_ENTRY pnp_disks; LIST_ENTRY VcbList; ERESOURCE global_loading_lock; UINT32 debug_log_level = 0; @@ -63,8 +65,15 @@ UINT32 mount_flush_interval = 30; UINT32 mount_max_inline = 2048; UINT32 mount_raid5_recalculation = 1; UINT32 mount_raid6_recalculation = 1; +UINT32 mount_skip_balance = 0; BOOL log_started = FALSE; UNICODE_STRING log_device, log_file, registry_path; +tPsUpdateDiskCounters PsUpdateDiskCounters; +tCcCopyReadEx CcCopyReadEx; +tCcCopyWriteEx CcCopyWriteEx; +tCcSetAdditionalCacheAttributesEx CcSetAdditionalCacheAttributesEx; +BOOL diskacc = FALSE; +void* notification_entry = NULL; #ifdef _DEBUG PFILE_OBJECT comfo = NULL; @@ -72,8 +81,6 @@ PDEVICE_OBJECT comdo = NULL; HANDLE log_handle = NULL; #endif -int __security_cookie = __LINE__; - static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObject); typedef struct { @@ -263,6 +270,13 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) { free_cache(); IoUnregisterFileSystem(DriverObject->DeviceObject); + + if (notification_entry) +#ifdef __REACTOS__ + IoUnregisterPlugPlayNotification(notification_entry); +#else + IoUnregisterPlugPlayNotificationEx(notification_entry); +#endif dosdevice_nameW.Buffer = dosdevice_name; dosdevice_nameW.Length = dosdevice_nameW.MaximumLength = (USHORT)wcslen(dosdevice_name) * sizeof(WCHAR); @@ -280,6 +294,7 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) { } // FIXME - free volumes and their devpaths + // FIXME - free pnp_disks and their devpaths #ifdef _DEBUG if (comfo) @@ -291,6 +306,8 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) { ExDeleteResourceLite(&global_loading_lock); + ExDeleteResourceLite(&volumes_lock); + if (log_device.Buffer) ExFreePool(log_device.Buffer); @@ -342,46 +359,18 @@ static BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) { return TRUE; } -BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp) { - KEY searchkey; - traverse_ptr tp; - DIR_ITEM* xa; - ULONG size, xasize; - NTSTATUS Status; - - TRACE("(%p, %llx, %llx, %s, %08x, %p, %p)\n", Vcb, subvol->id, inode, name, crc32, data, datalen); - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = crc32; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return FALSE; - } - - if (keycmp(tp.item->key, searchkey)) { - TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); - return FALSE; - } - - if (tp.item->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - return FALSE; - } - - xa = (DIR_ITEM*)tp.item->data; - size = tp.item->size; +BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen) { + DIR_ITEM* xa = (DIR_ITEM*)item; + USHORT xasize; while (TRUE) { if (size < sizeof(DIR_ITEM) || size < (sizeof(DIR_ITEM) - 1 + xa->m + xa->n)) { - WARN("(%llx,%x,%llx) is truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + WARN("DIR_ITEM is truncated\n"); return FALSE; } if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) { - TRACE("found xattr %s in (%llx,%x,%llx)\n", name, searchkey.obj_id, searchkey.obj_type, searchkey.offset); + TRACE("found xattr %s\n", name); *datalen = xa->m; @@ -408,11 +397,41 @@ BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* break; } - TRACE("xattr %s not found in (%llx,%x,%llx)\n", name, searchkey.obj_id, searchkey.obj_type, searchkey.offset); + TRACE("xattr %s not found\n", name); return FALSE; } +BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp) { + KEY searchkey; + traverse_ptr tp; + NTSTATUS Status; + + TRACE("(%p, %llx, %llx, %s, %08x, %p, %p)\n", Vcb, subvol->id, inode, name, crc32, data, datalen); + + searchkey.obj_id = inode; + searchkey.obj_type = TYPE_XATTR_ITEM; + searchkey.offset = crc32; + + Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + return FALSE; + } + + if (keycmp(tp.item->key, searchkey)) { + TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); + return FALSE; + } + + if (tp.item->size < sizeof(DIR_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); + return FALSE; + } + + return extract_xattr(tp.item->data, tp.item->size, name, data, datalen); +} + static NTSTATUS STDCALL drv_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp; @@ -603,7 +622,10 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj TRACE("FileFsDeviceInformation\n"); ffdi->DeviceType = FILE_DEVICE_DISK; - ffdi->Characteristics = Vcb->devices[0].devobj->Characteristics; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + ffdi->Characteristics = first_device(Vcb)->devobj->Characteristics; + ExReleaseResourceLite(&Vcb->tree_lock); if (Vcb->readonly) ffdi->Characteristics |= FILE_READ_ONLY_DEVICE; @@ -716,7 +738,7 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS; break; } - + #ifndef __REACTOS__ #ifdef _MSC_VER // not in mingw yet case FileFsSectorSizeInformation: @@ -947,9 +969,9 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t t->new_address = 0; t->has_new_address = FALSE; t->updated_extents = FALSE; - t->flags = tp.tree->flags; InsertTailList(&Vcb->trees, &t->list_entry); + t->list_entry_hash.Flink = NULL; t->write = TRUE; Vcb->need_write = TRUE; @@ -1183,6 +1205,61 @@ void STDCALL tree_test(void* context) { } #endif +// static void test_calc_thread(device_extension* Vcb) { +// UINT8* data; +// ULONG sectors, max_sectors, i, j; +// calc_job* cj; +// LARGE_INTEGER* sertimes; +// LARGE_INTEGER* partimes; +// LARGE_INTEGER time1, time2; +// +// max_sectors = 256; +// +// sertimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG); +// partimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG); +// RtlZeroMemory(sertimes, sizeof(LARGE_INTEGER) * max_sectors); +// RtlZeroMemory(partimes, sizeof(LARGE_INTEGER) * max_sectors); +// +// for (sectors = 1; sectors <= max_sectors; sectors++) { +// data = ExAllocatePoolWithTag(PagedPool, sectors * Vcb->superblock.sector_size, ALLOC_TAG); +// RtlZeroMemory(data, sectors * Vcb->superblock.sector_size); +// +// for (j = 0; j < 100; j++) { +// time1 = KeQueryPerformanceCounter(NULL); +// +// for (i = 0; i < sectors; i++) { +// UINT32 tmp; +// +// tmp = ~calc_crc32c(0xffffffff, data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); +// } +// +// time2 = KeQueryPerformanceCounter(NULL); +// +// sertimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart; +// +// time1 = KeQueryPerformanceCounter(NULL); +// +// add_calc_job(Vcb, data, sectors, &cj); +// KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); +// +// time2 = KeQueryPerformanceCounter(NULL); +// +// partimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart; +// +// free_calc_job(cj); +// } +// +// ExFreePool(data); +// } +// +// for (sectors = 1; sectors <= max_sectors; sectors++) { +// ERR("%u sectors: serial %llu, parallel %llu\n", sectors, sertimes[sectors - 1].QuadPart, partimes[sectors - 1].QuadPart); +// } +// +// ExFreePool(partimes); +// ExFreePool(sertimes); +// } + static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATION* ffli) { ULONG utf8len; NTSTATUS Status; @@ -1234,6 +1311,7 @@ static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATI // test_creating_root(Vcb); // test_alloc_chunk(Vcb); // test_space_list(Vcb); +// test_calc_thread(Vcb); Vcb->need_write = TRUE; @@ -1312,245 +1390,6 @@ exit: return Status; } -NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, UINT32 crc32, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) { - KEY searchkey; - traverse_ptr tp; - NTSTATUS Status; - - searchkey.obj_id = parinode; - searchkey.obj_type = TYPE_DIR_ITEM; - searchkey.offset = crc32; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(searchkey, tp.item->key)) { - if (tp.item->size < sizeof(DIR_ITEM)) { - WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - } else { - DIR_ITEM* di; - LONG len; - - di = (DIR_ITEM*)tp.item->data; - len = tp.item->size; - - do { - if (di->n == utf8->Length && RtlCompareMemory(di->name, utf8->Buffer, di->n) == di->n) { - ULONG newlen = tp.item->size - (sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m); - - delete_tree_item(Vcb, &tp, rollback); - - if (newlen == 0) { - TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } else { - UINT8 *newdi = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *dioff; - - if (!newdi) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - - if ((UINT8*)di > tp.item->data) { - RtlCopyMemory(newdi, tp.item->data, (UINT8*)di - tp.item->data); - dioff = newdi + ((UINT8*)di - tp.item->data); - } else { - dioff = newdi; - } - - if ((UINT8*)&di->name[di->n + di->m] - tp.item->data < tp.item->size) - RtlCopyMemory(dioff, &di->name[di->n + di->m], tp.item->size - ((UINT8*)&di->name[di->n + di->m] - tp.item->data)); - - insert_tree_item(Vcb, subvol, parinode, TYPE_DIR_ITEM, crc32, newdi, newlen, NULL, Irp, rollback); - } - - break; - } - - len -= sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m; - di = (DIR_ITEM*)&di->name[di->n + di->m]; - } while (len > 0); - } - } else { - WARN("could not find DIR_ITEM for crc32 %08x\n", crc32); - } - - return STATUS_SUCCESS; -} - -NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) { - KEY searchkey; - traverse_ptr tp; - BOOL changed = FALSE; - NTSTATUS Status; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_REF; - searchkey.offset = parinode; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(searchkey, tp.item->key)) { - if (tp.item->size < sizeof(INODE_REF)) { - WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); - } else { - INODE_REF* ir; - ULONG len; - - ir = (INODE_REF*)tp.item->data; - len = tp.item->size; - - do { - ULONG itemlen; - - if (len < sizeof(INODE_REF) || len < sizeof(INODE_REF) - 1 + ir->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - itemlen = sizeof(INODE_REF) - sizeof(char) + ir->n; - - if (ir->n == utf8->Length && RtlCompareMemory(ir->name, utf8->Buffer, ir->n) == ir->n) { - ULONG newlen = tp.item->size - itemlen; - - delete_tree_item(Vcb, &tp, rollback); - changed = TRUE; - - if (newlen == 0) { - TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } else { - UINT8 *newir = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *iroff; - - if (!newir) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - - if ((UINT8*)ir > tp.item->data) { - RtlCopyMemory(newir, tp.item->data, (UINT8*)ir - tp.item->data); - iroff = newir + ((UINT8*)ir - tp.item->data); - } else { - iroff = newir; - } - - if ((UINT8*)&ir->name[ir->n] - tp.item->data < tp.item->size) - RtlCopyMemory(iroff, &ir->name[ir->n], tp.item->size - ((UINT8*)&ir->name[ir->n] - tp.item->data)); - - insert_tree_item(Vcb, subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newir, newlen, NULL, Irp, rollback); - } - - break; - } - - if (len > itemlen) { - len -= itemlen; - ir = (INODE_REF*)&ir->name[ir->n]; - } else - break; - } while (len > 0); - - if (!changed) { - WARN("found INODE_REF entry, but couldn't find filename\n"); - } - } - } else { - WARN("could not find INODE_REF entry for inode %llx in %llx\n", searchkey.obj_id, searchkey.offset); - } - - if (changed) - return STATUS_SUCCESS; - - if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF)) - return STATUS_INTERNAL_ERROR; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_EXTREF; - searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length); - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(searchkey, tp.item->key)) { - if (tp.item->size < sizeof(INODE_EXTREF)) { - WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_EXTREF)); - } else { - INODE_EXTREF* ier; - ULONG len; - - ier = (INODE_EXTREF*)tp.item->data; - len = tp.item->size; - - do { - ULONG itemlen; - - if (len < sizeof(INODE_EXTREF) || len < sizeof(INODE_EXTREF) - 1 + ier->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - itemlen = sizeof(INODE_EXTREF) - sizeof(char) + ier->n; - - if (ier->dir == parinode && ier->n == utf8->Length && RtlCompareMemory(ier->name, utf8->Buffer, ier->n) == ier->n) { - ULONG newlen = tp.item->size - itemlen; - - delete_tree_item(Vcb, &tp, rollback); - changed = TRUE; - - if (newlen == 0) { - TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } else { - UINT8 *newier = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *ieroff; - - if (!newier) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - - if ((UINT8*)ier > tp.item->data) { - RtlCopyMemory(newier, tp.item->data, (UINT8*)ier - tp.item->data); - ieroff = newier + ((UINT8*)ier - tp.item->data); - } else { - ieroff = newier; - } - - if ((UINT8*)&ier->name[ier->n] - tp.item->data < tp.item->size) - RtlCopyMemory(ieroff, &ier->name[ier->n], tp.item->size - ((UINT8*)&ier->name[ier->n] - tp.item->data)); - - insert_tree_item(Vcb, subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newier, newlen, NULL, Irp, rollback); - } - - break; - } - - if (len > itemlen) { - len -= itemlen; - ier = (INODE_EXTREF*)&ier->name[ier->n]; - } else - break; - } while (len > 0); - } - } else { - WARN("couldn't find INODE_EXTREF entry either (offset = %08x)\n", (UINT32)searchkey.offset); - } - - return changed ? STATUS_SUCCESS : STATUS_INTERNAL_ERROR; -} - static WCHAR* file_desc_fcb(fcb* fcb) { char s[60]; UNICODE_STRING us; @@ -1811,7 +1650,7 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line) ExDeleteResourceLite(&fcb->nonpaged->resource); ExDeleteResourceLite(&fcb->nonpaged->paging_resource); - ExDeleteResourceLite(&fcb->nonpaged->index_lock); + ExDeleteResourceLite(&fcb->nonpaged->dir_children_lock); ExFreePool(fcb->nonpaged); if (fcb->sd) @@ -1836,19 +1675,13 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line) LIST_ENTRY* le = RemoveHeadList(&fcb->extents); extent* ext = CONTAINING_RECORD(le, extent, list_entry); + if (ext->csum) + ExFreePool(ext->csum); + ExFreePool(ext->data); ExFreePool(ext); } - while (!IsListEmpty(&fcb->index_list)) { - LIST_ENTRY* le = RemoveHeadList(&fcb->index_list); - index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry); - - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer); - ExFreePool(ie); - } - while (!IsListEmpty(&fcb->hardlinks)) { LIST_ENTRY* le = RemoveHeadList(&fcb->hardlinks); hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); @@ -1862,6 +1695,22 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line) ExFreePool(hl); } + while (!IsListEmpty(&fcb->dir_children_index)) { + LIST_ENTRY* le = RemoveHeadList(&fcb->dir_children_index); + dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index); + + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc->name_uc.Buffer); + ExFreePool(dc); + } + + if (fcb->hash_ptrs) + ExFreePool(fcb->hash_ptrs); + + if (fcb->hash_ptrs_uc) + ExFreePool(fcb->hash_ptrs_uc); + FsRtlUninitializeFileLock(&fcb->lock); ExFreePool(fcb); @@ -1932,6 +1781,9 @@ void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned in if (fr->fcb->fileref == fr) fr->fcb->fileref = NULL; + if (fr->dc) + fr->dc->fileref = NULL; + if (fr->list_entry.Flink) RemoveEntryList(&fr->list_entry); @@ -2017,8 +1869,15 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) { RemoveEntryList(&Vcb->list_entry); + if (Vcb->balance.thread) { + Vcb->balance.paused = FALSE; + Vcb->balance.stopping = TRUE; + KeSetEvent(&Vcb->balance.event, 0, FALSE); + KeWaitForSingleObject(&Vcb->balance.finished, Executive, KernelMode, FALSE, NULL); + } + Status = registry_mark_volume_unmounted(&Vcb->superblock.uuid); - if (!NT_SUCCESS(Status)) + if (!NT_SUCCESS(Status) && Status != STATUS_TOO_LATE) WARN("registry_mark_volume_unmounted returned %08x\n", Status); if (flush) { @@ -2036,6 +1895,21 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) { ExReleaseResourceLite(&Vcb->tree_lock); } + for (i = 0; i < Vcb->calcthreads.num_threads; i++) { + Vcb->calcthreads.threads[i].quit = TRUE; + } + + KeSetEvent(&Vcb->calcthreads.event, 0, FALSE); + + for (i = 0; i < Vcb->calcthreads.num_threads; i++) { + KeWaitForSingleObject(&Vcb->calcthreads.threads[i].finished, Executive, KernelMode, FALSE, NULL); + + ZwClose(Vcb->calcthreads.threads[i].handle); + } + + ExDeleteResourceLite(&Vcb->calcthreads.lock); + ExFreePool(Vcb->calcthreads.threads); + time.QuadPart = 0; KeSetTimer(&Vcb->flush_thread_timer, time, NULL); // trigger the timer early KeWaitForSingleObject(&Vcb->flush_thread_finished, Executive, KernelMode, FALSE, NULL); @@ -2101,28 +1975,23 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) { // FIXME - free any open fcbs? - while (!IsListEmpty(&Vcb->sector_checksums)) { - LIST_ENTRY* le = RemoveHeadList(&Vcb->sector_checksums); - changed_sector* cs = (changed_sector*)le; + while (!IsListEmpty(&Vcb->devices)) { + LIST_ENTRY* le = RemoveHeadList(&Vcb->devices); + device* dev = CONTAINING_RECORD(le, device, list_entry); - ExFreePool(cs); - } - - for (i = 0; i < Vcb->superblock.num_devices; i++) { - while (!IsListEmpty(&Vcb->devices[i].space)) { - LIST_ENTRY* le = RemoveHeadList(&Vcb->devices[i].space); - space* s = CONTAINING_RECORD(le, space, list_entry); + while (!IsListEmpty(&dev->space)) { + LIST_ENTRY* le2 = RemoveHeadList(&dev->space); + space* s = CONTAINING_RECORD(le2, space, list_entry); ExFreePool(s); } + + ExFreePool(dev); } - ExFreePool(Vcb->devices); - ExDeleteResourceLite(&Vcb->fcb_lock); ExDeleteResourceLite(&Vcb->load_lock); ExDeleteResourceLite(&Vcb->tree_lock); - ExDeleteResourceLite(&Vcb->checksum_lock); ExDeleteResourceLite(&Vcb->chunk_lock); ExDeletePagedLookasideList(&Vcb->tree_data_lookaside); @@ -2162,12 +2031,13 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI mark_fcb_dirty(fileref->fcb); + fileref->fcb->inode_item_changed = TRUE; + if (fileref->fcb->inode_item.st_nlink > 1) { fileref->fcb->inode_item.st_nlink--; fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation; fileref->fcb->inode_item.sequence++; fileref->fcb->inode_item.st_ctime = now; - fileref->fcb->inode_item_changed = TRUE; } else { fileref->fcb->deleted = TRUE; @@ -2234,6 +2104,22 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI mark_fcb_dirty(fileref->fcb); } + // remove dir_child from parent + + if (fileref->dc) { + ExAcquireResourceExclusiveLite(&fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); + RemoveEntryList(&fileref->dc->list_entry_index); + remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc); + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + + ExFreePool(fileref->dc->utf8.Buffer); + ExFreePool(fileref->dc->name.Buffer); + ExFreePool(fileref->dc->name_uc.Buffer); + ExFreePool(fileref->dc); + + fileref->dc = NULL; + } + // update INODE_ITEM of parent TRACE("delete file %.*S\n", fileref->filepart.Length / sizeof(WCHAR), fileref->filepart.Buffer); @@ -2395,6 +2281,32 @@ exit2: return Status; } +BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts) { + if (len > 2 && val[0] == '0' && val[1] == 'x') { + int i; + ULONG dosnum = 0; + + for (i = 2; i < len; i++) { + dosnum *= 0x10; + + if (val[i] >= '0' && val[i] <= '9') + dosnum |= val[i] - '0'; + else if (val[i] >= 'a' && val[i] <= 'f') + dosnum |= val[i] + 10 - 'a'; + else if (val[i] >= 'A' && val[i] <= 'F') + dosnum |= val[i] + 10 - 'a'; + } + + TRACE("DOSATTRIB: %08x\n", dosnum); + + *atts = dosnum; + + return TRUE; + } + + return FALSE; +} + ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp) { ULONG att; char* eaval; @@ -2403,33 +2315,24 @@ ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r // ii can be NULL if (!ignore_xa && get_xattr(Vcb, r, inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, (UINT8**)&eaval, &ealen, Irp)) { - if (ealen > 2) { - if (eaval[0] == '0' && eaval[1] == 'x') { - int i; - ULONG dosnum = 0; - - for (i = 2; i < ealen; i++) { - dosnum *= 0x10; - - if (eaval[i] >= '0' && eaval[i] <= '9') - dosnum |= eaval[i] - '0'; - else if (eaval[i] >= 'a' && eaval[i] <= 'f') - dosnum |= eaval[i] + 10 - 'a'; - else if (eaval[i] >= 'A' && eaval[i] <= 'F') - dosnum |= eaval[i] + 10 - 'a'; - } - - TRACE("DOSATTRIB: %08x\n", dosnum); - - ExFreePool(eaval); - - if (type == BTRFS_TYPE_DIRECTORY) - dosnum |= FILE_ATTRIBUTE_DIRECTORY; - else if (type == BTRFS_TYPE_SYMLINK) - dosnum |= FILE_ATTRIBUTE_REPARSE_POINT; - - return dosnum; + ULONG dosnum = 0; + + if (get_file_attributes_from_xattr(eaval, ealen, &dosnum)) { + ExFreePool(eaval); + + if (type == BTRFS_TYPE_DIRECTORY) + dosnum |= FILE_ATTRIBUTE_DIRECTORY; + else if (type == BTRFS_TYPE_SYMLINK) + dosnum |= FILE_ATTRIBUTE_REPARSE_POINT; + + if (inode == SUBVOL_ROOT_INODE) { + if (r->root_item.flags & BTRFS_SUBVOL_READONLY) + dosnum |= FILE_ATTRIBUTE_READONLY; + else + dosnum &= ~FILE_ATTRIBUTE_READONLY; } + + return dosnum; } ExFreePool(eaval); @@ -2455,6 +2358,13 @@ ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r att |= FILE_ATTRIBUTE_ARCHIVE; + if (inode == SUBVOL_ROOT_INODE) { + if (r->root_item.flags & BTRFS_SUBVOL_READONLY) + att |= FILE_ATTRIBUTE_READONLY; + else + att &= ~FILE_ATTRIBUTE_READONLY; + } + // FIXME - get READONLY from ii->st_mode // FIXME - return SYSTEM for block/char devices? @@ -2464,7 +2374,7 @@ ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r return att; } -static NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override) { +NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override) { IO_STATUS_BLOCK* IoStatus; LARGE_INTEGER Offset; PIRP Irp; @@ -2568,7 +2478,7 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de unsigned int i, to_read; UINT8 valid_superblocks; - to_read = sector_align(sizeof(superblock), device->SectorSize); + to_read = device->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), device->SectorSize); sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG); if (!sb) { @@ -2592,15 +2502,22 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de return Status; } - TRACE("got superblock %u!\n", i); - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); - - if (crc32 != *((UINT32*)sb->checksum)) - WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum)); - else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) { - RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock)); - valid_superblocks++; + if (sb->magic != BTRFS_MAGIC) { + if (i == 0) { + TRACE("not a BTRFS volume\n"); + return STATUS_UNRECOGNIZED_VOLUME; + } + } else { + TRACE("got superblock %u!\n", i); + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); + + if (crc32 != *((UINT32*)sb->checksum)) + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum)); + else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) { + RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock)); + valid_superblocks++; + } } i++; @@ -2912,20 +2829,46 @@ static NTSTATUS find_disk_holes(device_extension* Vcb, device* dev, PIRP Irp) { return STATUS_SUCCESS; } +static void add_device_to_list(device_extension* Vcb, device* dev) { + LIST_ENTRY* le; + + le = Vcb->devices.Flink; + + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->devitem.dev_id > dev->devitem.dev_id) { + InsertHeadList(le->Blink, &dev->list_entry); + return; + } + + le = le->Flink; + } + + InsertTailList(&Vcb->devices, &dev->list_entry); +} + device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) { - UINT64 i; + LIST_ENTRY* le; - for (i = 0; i < Vcb->devices_loaded; i++) { - TRACE("device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", i, - Vcb->devices[i].devitem.device_uuid.uuid[0], Vcb->devices[i].devitem.device_uuid.uuid[1], Vcb->devices[i].devitem.device_uuid.uuid[2], Vcb->devices[i].devitem.device_uuid.uuid[3], Vcb->devices[i].devitem.device_uuid.uuid[4], Vcb->devices[i].devitem.device_uuid.uuid[5], Vcb->devices[i].devitem.device_uuid.uuid[6], Vcb->devices[i].devitem.device_uuid.uuid[7], - Vcb->devices[i].devitem.device_uuid.uuid[8], Vcb->devices[i].devitem.device_uuid.uuid[9], Vcb->devices[i].devitem.device_uuid.uuid[10], Vcb->devices[i].devitem.device_uuid.uuid[11], Vcb->devices[i].devitem.device_uuid.uuid[12], Vcb->devices[i].devitem.device_uuid.uuid[13], Vcb->devices[i].devitem.device_uuid.uuid[14], Vcb->devices[i].devitem.device_uuid.uuid[15]); + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + TRACE("device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", dev->devitem.dev_id, + dev->devitem.device_uuid.uuid[0], dev->devitem.device_uuid.uuid[1], dev->devitem.device_uuid.uuid[2], dev->devitem.device_uuid.uuid[3], dev->devitem.device_uuid.uuid[4], dev->devitem.device_uuid.uuid[5], dev->devitem.device_uuid.uuid[6], dev->devitem.device_uuid.uuid[7], + dev->devitem.device_uuid.uuid[8], dev->devitem.device_uuid.uuid[9], dev->devitem.device_uuid.uuid[10], dev->devitem.device_uuid.uuid[11], dev->devitem.device_uuid.uuid[12], dev->devitem.device_uuid.uuid[13], dev->devitem.device_uuid.uuid[14], dev->devitem.device_uuid.uuid[15]); - if (Vcb->devices[i].devobj && RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - TRACE("returning device %llx\n", i); - return &Vcb->devices[i]; + if (dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + TRACE("returning device %llx\n", dev->devitem.dev_id); + return dev; } + + le = le->Flink; } + ExAcquireResourceSharedLite(&volumes_lock, TRUE); + if (Vcb->devices_loaded < Vcb->superblock.num_devices && !IsListEmpty(&volumes)) { LIST_ENTRY* le = volumes.Flink; @@ -2938,9 +2881,11 @@ device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) { NTSTATUS Status; PFILE_OBJECT FileObject; PDEVICE_OBJECT DeviceObject; + device* dev; Status = IoGetDeviceObjectPointer(&v->devpath, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); if (!NT_SUCCESS(Status)) { + ExReleaseResourceLite(&volumes_lock); ERR("IoGetDeviceObjectPointer(%.*S) returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status); return NULL; } @@ -2950,20 +2895,38 @@ device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) { ObReferenceObject(DeviceObject); ObDereferenceObject(FileObject); - Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject; - Vcb->devices[Vcb->devices_loaded].devitem.device_uuid = *uuid; - Vcb->devices[Vcb->devices_loaded].seeding = v->seeding; - Vcb->devices[Vcb->devices_loaded].readonly = Vcb->devices[Vcb->devices_loaded].seeding; - Vcb->devices[Vcb->devices_loaded].removable = FALSE; + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); + if (!dev) { + ExReleaseResourceLite(&volumes_lock); + ERR("out of memory\n"); + ObDereferenceObject(DeviceObject); + return NULL; + } + + RtlZeroMemory(dev, sizeof(device)); + dev->devobj = DeviceObject; + dev->devitem.device_uuid = *uuid; + dev->devitem.dev_id = v->devnum; + dev->seeding = v->seeding; + dev->readonly = dev->seeding; + dev->reloc = FALSE; + dev->removable = FALSE; + dev->disk_num = v->disk_num; + dev->part_num = v->part_num; + add_device_to_list(Vcb, dev); Vcb->devices_loaded++; - return &Vcb->devices[Vcb->devices_loaded - 1]; + ExReleaseResourceLite(&volumes_lock); + + return dev; } le = le->Flink; } } + ExReleaseResourceLite(&volumes_lock); + WARN("could not find device with uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", uuid->uuid[0], uuid->uuid[1], uuid->uuid[2], uuid->uuid[3], uuid->uuid[4], uuid->uuid[5], uuid->uuid[6], uuid->uuid[7], uuid->uuid[8], uuid->uuid[9], uuid->uuid[10], uuid->uuid[11], uuid->uuid[12], uuid->uuid[13], uuid->uuid[14], uuid->uuid[15]); @@ -3005,9 +2968,8 @@ static ULONG get_device_change_count(PDEVICE_OBJECT devobj) { return cc; } -static void init_device(device_extension* Vcb, device* dev, BOOL get_length) { +void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_nums) { NTSTATUS Status; - GET_LENGTH_INFORMATION gli; ULONG aptelen; ATA_PASS_THROUGH_EX* apte; IDENTIFY_DEVICE_DATA* idd; @@ -3016,18 +2978,37 @@ static void init_device(device_extension* Vcb, device* dev, BOOL get_length) { dev->change_count = dev->removable ? get_device_change_count(dev->devobj) : 0; if (get_length) { + GET_LENGTH_INFORMATION gli; + Status = dev_ioctl(dev->devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, - &gli, sizeof(gli), TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("error reading length information: %08x\n", Status); - } + &gli, sizeof(GET_LENGTH_INFORMATION), TRUE, NULL); + + if (!NT_SUCCESS(Status)) + ERR("IOCTL_DISK_GET_LENGTH_INFO returned %08x\n", Status); dev->length = gli.Length.QuadPart; } + if (get_nums) { + STORAGE_DEVICE_NUMBER sdn; + + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, + &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); + + if (!NT_SUCCESS(Status)) { + WARN("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); + dev->disk_num = 0; + dev->part_num = 0; + } else { + dev->disk_num = sdn.DeviceNumber; + dev->part_num = sdn.PartitionNumber; + } + } + dev->ssd = FALSE; dev->trim = FALSE; dev->readonly = dev->seeding; + dev->reloc = FALSE; if (!dev->readonly) { Status = dev_ioctl(dev->devobj, IOCTL_DISK_IS_WRITABLE, NULL, 0, @@ -3055,9 +3036,9 @@ static void init_device(device_extension* Vcb, device* dev, BOOL get_length) { Status = dev_ioctl(dev->devobj, IOCTL_ATA_PASS_THROUGH, apte, aptelen, apte, aptelen, TRUE, NULL); - if (!NT_SUCCESS(Status)) { - ERR("error calling ATA IDENTIFY DEVICE: %08x\n", Status); - } else { + if (!NT_SUCCESS(Status)) + TRACE("IOCTL_ATA_PASS_THROUGH returned %08x for IDENTIFY DEVICE\n", Status); + else { idd = (IDENTIFY_DEVICE_DATA*)((UINT8*)apte + sizeof(ATA_PASS_THROUGH_EX)); if (idd->NominalMediaRotationRate == 1) { @@ -3084,7 +3065,6 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { KEY searchkey; BOOL b; chunk* c; - UINT64 i; NTSTATUS Status; searchkey.obj_id = 0; @@ -3092,6 +3072,8 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { searchkey.offset = 0; Vcb->data_flags = 0; + Vcb->metadata_flags = 0; + Vcb->system_flags = 0; Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp); if (!NT_SUCCESS(Status)) { @@ -3107,21 +3089,29 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_ITEM)); } else { DEV_ITEM* di = (DEV_ITEM*)tp.item->data; + LIST_ENTRY* le; BOOL done = FALSE; - for (i = 0; i < Vcb->devices_loaded; i++) { - if (Vcb->devices[i].devobj && RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &di->device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - RtlCopyMemory(&Vcb->devices[i].devitem, tp.item->data, min(tp.item->size, sizeof(DEV_ITEM))); + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, &di->device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + RtlCopyMemory(&dev->devitem, tp.item->data, min(tp.item->size, sizeof(DEV_ITEM))); - if (i > 0) - init_device(Vcb, &Vcb->devices[i], TRUE); + if (le != Vcb->devices.Flink) + init_device(Vcb, dev, TRUE, TRUE); done = TRUE; break; } + + le = le->Flink; } if (!done) { + ExAcquireResourceSharedLite(&volumes_lock, TRUE); + if (!IsListEmpty(&volumes) && Vcb->devices_loaded < Vcb->superblock.num_devices) { LIST_ENTRY* le = volumes.Flink; @@ -3131,9 +3121,11 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { if (RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { PFILE_OBJECT FileObject; PDEVICE_OBJECT DeviceObject; + device* dev; Status = IoGetDeviceObjectPointer(&v->devpath, FILE_READ_DATA | FILE_WRITE_DATA, &FileObject, &DeviceObject); if (!NT_SUCCESS(Status)) { + ExReleaseResourceLite(&volumes_lock); ERR("IoGetDeviceObjectPointer(%.*S) returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status); return Status; } @@ -3143,13 +3135,25 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { ObReferenceObject(DeviceObject); ObDereferenceObject(FileObject); - Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject; - RtlCopyMemory(&Vcb->devices[Vcb->devices_loaded].devitem, di, min(tp.item->size, sizeof(DEV_ITEM))); - init_device(Vcb, &Vcb->devices[i], FALSE); - - Vcb->devices[i].seeding = v->seeding; - - Vcb->devices[i].length = v->length; + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); + if (!dev) { + ExReleaseResourceLite(&volumes_lock); + ERR("out of memory\n"); + ObDereferenceObject(DeviceObject); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(dev, sizeof(device)); + + dev->devobj = DeviceObject; + RtlCopyMemory(&dev->devitem, di, min(tp.item->size, sizeof(DEV_ITEM))); + dev->seeding = v->seeding; + init_device(Vcb, dev, FALSE, FALSE); + + dev->length = v->length; + dev->disk_num = v->disk_num; + dev->part_num = v->part_num; + add_device_to_list(Vcb, dev); Vcb->devices_loaded++; done = TRUE; @@ -3166,6 +3170,8 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { } } else ERR("unexpected device %llx found\n", tp.item->key.offset); + + ExReleaseResourceLite(&volumes_lock); } } } else if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) { @@ -3185,6 +3191,7 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { c->cache = NULL; c->created = FALSE; c->readonly = FALSE; + c->reloc = FALSE; c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, tp.item->size, ALLOC_TAG); @@ -3199,8 +3206,15 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { if (c->chunk_item->type & BLOCK_FLAG_DATA && c->chunk_item->type > Vcb->data_flags) Vcb->data_flags = c->chunk_item->type; + if (c->chunk_item->type & BLOCK_FLAG_METADATA && c->chunk_item->type > Vcb->metadata_flags) + Vcb->metadata_flags = c->chunk_item->type; + + if (c->chunk_item->type & BLOCK_FLAG_SYSTEM && c->chunk_item->type > Vcb->system_flags) + Vcb->system_flags = c->chunk_item->type; + if (c->chunk_item->num_stripes > 0) { CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; + UINT16 i; c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * c->chunk_item->num_stripes, ALLOC_TAG); @@ -3239,10 +3253,13 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { InitializeListHead(&c->range_locks); KeInitializeSpinLock(&c->range_locks_spinlock); KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE); + + c->last_alloc_set = FALSE; InsertTailList(&Vcb->chunks, &c->list_entry); c->list_entry_changed.Flink = NULL; + c->list_entry_balance.Flink = NULL; } } @@ -3257,6 +3274,17 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) { if (Vcb->data_flags == 0) Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID0 : 0); + if (Vcb->metadata_flags == 0) + Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID1 : BLOCK_FLAG_DUPLICATE); + + if (Vcb->system_flags == 0) + Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID1 : BLOCK_FLAG_DUPLICATE); + + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) { + Vcb->metadata_flags |= BLOCK_FLAG_DATA; + Vcb->data_flags = Vcb->metadata_flags; + } + return STATUS_SUCCESS; } @@ -3365,9 +3393,6 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) { BLOCK_GROUP_ITEM* bgi; NTSTATUS Status; -// c00000,c0,800000 -// block_group_item size=7f0000 chunktreeid=100 flags=1 - searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM; while (le != &Vcb->chunks) { @@ -3394,27 +3419,23 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) { Vcb->extent_root->id, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM)); } } - -// if (addr >= c->offset && (addr - c->offset) < c->chunk_item->size && c->chunk_item->num_stripes > 0) { -// cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; -// -// return (addr - c->offset) + cis->offset; -// } - // It doesn't make a great deal of sense to load the free space cache of a - // readonly seeding chunk, as we'll never write to it. But btrfs check will - // complain if we don't write a valid cache, so we have to do it anyway... + if (!Vcb->readonly) { + // It doesn't make a great deal of sense to load the free space cache of a + // readonly seeding chunk, as we'll never write to it. But btrfs check will + // complain if we don't write a valid cache, so we have to do it anyway... + + // FIXME - make sure we free occasionally after doing one of these, or we + // might use up a lot of memory with a big disk. - // FIXME - make sure we free occasionally after doing one of these, or we - // might use up a lot of memory with a big disk. - - Status = load_free_space_cache(Vcb, c, Irp); - if (!NT_SUCCESS(Status)) { - ERR("load_free_space_cache returned %08x\n", Status); - return Status; + Status = load_free_space_cache(Vcb, c, Irp); + if (!NT_SUCCESS(Status)) { + ERR("load_free_space_cache returned %08x\n", Status); + return Status; + } + + protect_superblocks(Vcb, c); } - - protect_superblocks(Vcb, c); le = le->Flink; } @@ -3608,81 +3629,137 @@ end: return NULL; } +void init_file_cache(PFILE_OBJECT FileObject, CC_FILE_SIZES* ccfs) { + TRACE("(%p, %p)\n", FileObject, ccfs); + + CcInitializeCacheMap(FileObject, ccfs, FALSE, cache_callbacks, FileObject); + + if (diskacc) + CcSetAdditionalCacheAttributesEx(FileObject, CC_ENABLE_DISK_IO_ACCOUNTING); + + CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY); +} + +static NTSTATUS create_calc_threads(PDEVICE_OBJECT DeviceObject) { + device_extension* Vcb = DeviceObject->DeviceExtension; + ULONG i; + + Vcb->calcthreads.num_threads = KeQueryActiveProcessorCount(NULL); + + Vcb->calcthreads.threads = ExAllocatePoolWithTag(NonPagedPool, sizeof(drv_calc_thread) * Vcb->calcthreads.num_threads, ALLOC_TAG); + if (!Vcb->calcthreads.threads) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + InitializeListHead(&Vcb->calcthreads.job_list); + ExInitializeResourceLite(&Vcb->calcthreads.lock); + KeInitializeEvent(&Vcb->calcthreads.event, NotificationEvent, FALSE); + + RtlZeroMemory(Vcb->calcthreads.threads, sizeof(drv_calc_thread) * Vcb->calcthreads.num_threads); + + for (i = 0; i < Vcb->calcthreads.num_threads; i++) { + NTSTATUS Status; + + Vcb->calcthreads.threads[i].DeviceObject = DeviceObject; + KeInitializeEvent(&Vcb->calcthreads.threads[i].finished, NotificationEvent, FALSE); + + Status = PsCreateSystemThread(&Vcb->calcthreads.threads[i].handle, 0, NULL, NULL, NULL, calc_thread, &Vcb->calcthreads.threads[i]); + if (!NT_SUCCESS(Status)) { + ULONG j; + + ERR("PsCreateSystemThread returned %08x\n", Status); + + for (j = 0; j < i; j++) { + Vcb->calcthreads.threads[i].quit = TRUE; + } + + KeSetEvent(&Vcb->calcthreads.event, 0, FALSE); + + return Status; + } + } + + return STATUS_SUCCESS; +} + static BOOL raid_generations_okay(device_extension* Vcb) { - UINT64 i; + LIST_ENTRY* le2; // FIXME - if the difference between superblocks is small, we should try to recover - for (i = 0; i < Vcb->superblock.num_devices; i++) { - LIST_ENTRY* le = volumes.Flink; + le2 = Vcb->devices.Flink; + while (le2 != &Vcb->devices) { + LIST_ENTRY* le; + device* dev = CONTAINING_RECORD(le2, device, list_entry); + + ExAcquireResourceSharedLite(&volumes_lock, TRUE); + + le = volumes.Flink; + while (le != &volumes) { volume* v = CONTAINING_RECORD(le, volume, list_entry); if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && - RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) + RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) ) { if (v->gen1 != Vcb->superblock.generation - 1) { - WARN("device %llu had generation %llx, expected %llx\n", i, v->gen1, Vcb->superblock.generation - 1); + WARN("device %llu had generation %llx, expected %llx\n", dev->devitem.dev_id, v->gen1, Vcb->superblock.generation - 1); + ExReleaseResourceLite(&volumes_lock); return FALSE; } else break; } le = le->Flink; } + + ExReleaseResourceLite(&volumes_lock); + + le2 = le2->Flink; } return TRUE; } static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { - PIO_STACK_LOCATION Stack; + PIO_STACK_LOCATION IrpSp; PDEVICE_OBJECT NewDeviceObject = NULL; PDEVICE_OBJECT DeviceToMount; NTSTATUS Status; device_extension* Vcb = NULL; GET_LENGTH_INFORMATION gli; - UINT64 i; LIST_ENTRY *le, batchlist; KEY searchkey; traverse_ptr tp; fcb* root_fcb = NULL; ccb* root_ccb = NULL; BOOL init_lookaside = FALSE; + device* dev; - TRACE("mount_vol called\n"); + TRACE("(%p, %p)\n", DeviceObject, Irp); - if (DeviceObject != devobj) - { + if (DeviceObject != devobj) { Status = STATUS_INVALID_DEVICE_REQUEST; goto exit; } - Stack = IoGetCurrentIrpStackLocation(Irp); - DeviceToMount = Stack->Parameters.MountVolume.DeviceObject; + IrpSp = IoGetCurrentIrpStackLocation(Irp); + DeviceToMount = IrpSp->Parameters.MountVolume.DeviceObject; - Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, - &gli, sizeof(gli), TRUE, NULL); + Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, &gli, sizeof(gli), TRUE, NULL); if (!NT_SUCCESS(Status)) { ERR("error reading length information: %08x\n", Status); Status = STATUS_UNRECOGNIZED_VOLUME; goto exit; } - Status = IoCreateDevice(drvobj, - sizeof(device_extension), - NULL, - FILE_DEVICE_DISK_FILE_SYSTEM, - 0, - FALSE, - &NewDeviceObject); + Status = IoCreateDevice(drvobj, sizeof(device_extension), NULL, FILE_DEVICE_DISK_FILE_SYSTEM, 0, FALSE, &NewDeviceObject); if (!NT_SUCCESS(Status)) { ERR("IoCreateDevice returned %08x\n", Status); Status = STATUS_UNRECOGNIZED_VOLUME; goto exit; } -// TRACE("DEV_ITEM = %x, superblock = %x\n", sizeof(DEV_ITEM), sizeof(superblock)); - NewDeviceObject->Flags |= DO_DIRECT_IO; Vcb = (PVOID)NewDeviceObject->DeviceExtension; RtlZeroMemory(Vcb, sizeof(device_extension)); @@ -3693,39 +3770,13 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Vcb->need_write = FALSE; ExInitializeResourceLite(&Vcb->fcb_lock); - ExInitializeResourceLite(&Vcb->DirResource); - ExInitializeResourceLite(&Vcb->checksum_lock); ExInitializeResourceLite(&Vcb->chunk_lock); - ExAcquireResourceExclusiveLite(&global_loading_lock, TRUE); - InsertTailList(&VcbList, &Vcb->list_entry); - ExReleaseResourceLite(&global_loading_lock); - ExInitializeResourceLite(&Vcb->load_lock); ExAcquireResourceExclusiveLite(&Vcb->load_lock, TRUE); -// Vcb->Identifier.Type = NTFS_TYPE_VCB; -// Vcb->Identifier.Size = sizeof(NTFS_TYPE_VCB); -// -// Status = NtfsGetVolumeData(DeviceToMount, -// Vcb); -// if (!NT_SUCCESS(Status)) -// goto ByeBye; - -// Vcb->device = DeviceToMount; DeviceToMount->Flags |= DO_DIRECT_IO; -// Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_DRIVE_GEOMETRY, NULL, 0, -// &Vcb->geometry, sizeof(DISK_GEOMETRY), TRUE); -// if (!NT_SUCCESS(Status)) { -// ERR("error reading disk geometry: %08x\n", Status); -// goto exit; -// } else { -// TRACE("media type = %u, cylinders = %u, tracks per cylinder = %u, sectors per track = %u, bytes per sector = %u\n", -// Vcb->geometry.MediaType, Vcb->geometry.Cylinders, Vcb->geometry.TracksPerCylinder, -// Vcb->geometry.SectorsPerTrack, Vcb->geometry.BytesPerSector); -// } - TRACE("partition length = %llx\n", gli.Length.QuadPart); Status = read_superblock(Vcb, DeviceToMount, gli.Length.QuadPart); @@ -3734,14 +3785,6 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } - if (Vcb->superblock.magic != BTRFS_MAGIC) { - ERR("not a BTRFS volume\n"); - Status = STATUS_UNRECOGNIZED_VOLUME; - goto exit; - } else { - TRACE("btrfs magic found\n"); - } - Status = registry_load_volume_options(Vcb); if (!NT_SUCCESS(Status)) { ERR("registry_load_volume_options returned %08x\n", Status); @@ -3760,12 +3803,15 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } + ExAcquireResourceSharedLite(&volumes_lock, TRUE); + le = volumes.Flink; while (le != &volumes) { volume* v = CONTAINING_RECORD(le, volume, list_entry); if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && v->devnum < Vcb->superblock.dev_item.dev_id) { // skipping over device in RAID which isn't the first one + ExReleaseResourceLite(&volumes_lock); Status = STATUS_UNRECOGNIZED_VOLUME; goto exit; } @@ -3773,6 +3819,8 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { le = le->Flink; } + ExReleaseResourceLite(&volumes_lock); + Vcb->readonly = FALSE; if (Vcb->superblock.compat_ro_flags & ~COMPAT_RO_SUPPORTED) { WARN("mounting read-only because of unsupported flags (%llx)\n", Vcb->superblock.compat_ro_flags & ~COMPAT_RO_SUPPORTED); @@ -3785,31 +3833,30 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Vcb->superblock.generation++; Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF; - Vcb->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device) * Vcb->superblock.num_devices, ALLOC_TAG); - if (!Vcb->devices) { + InitializeListHead(&Vcb->devices); + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); + if (!dev) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - Vcb->devices[0].devobj = DeviceToMount; - RtlCopyMemory(&Vcb->devices[0].devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM)); - - Vcb->devices[0].seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE; + dev->devobj = DeviceToMount; + RtlCopyMemory(&dev->devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM)); - init_device(Vcb, &Vcb->devices[0], FALSE); - Vcb->devices[0].length = gli.Length.QuadPart; + dev->seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE; - if (Vcb->superblock.num_devices > 1) - RtlZeroMemory(&Vcb->devices[1], sizeof(DEV_ITEM) * (Vcb->superblock.num_devices - 1)); + init_device(Vcb, dev, FALSE, TRUE); + dev->length = gli.Length.QuadPart; + InsertTailList(&Vcb->devices, &dev->list_entry); Vcb->devices_loaded = 1; if (DeviceToMount->Flags & DO_SYSTEM_BOOT_PARTITION) Vcb->disallow_dismount = TRUE; TRACE("DeviceToMount = %p\n", DeviceToMount); - TRACE("Stack->Parameters.MountVolume.Vpb = %p\n", Stack->Parameters.MountVolume.Vpb); + TRACE("IrpSp->Parameters.MountVolume.Vpb = %p\n", IrpSp->Parameters.MountVolume.Vpb); NewDeviceObject->StackSize = DeviceToMount->StackSize + 1; NewDeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; @@ -3837,10 +3884,10 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { InitializeListHead(&Vcb->chunks); InitializeListHead(&Vcb->chunks_changed); InitializeListHead(&Vcb->trees); + InitializeListHead(&Vcb->trees_hash); InitializeListHead(&Vcb->all_fcbs); InitializeListHead(&Vcb->dirty_fcbs); InitializeListHead(&Vcb->dirty_filerefs); - InitializeListHead(&Vcb->sector_checksums); KeInitializeSpinLock(&Vcb->dirty_fcbs_lock); KeInitializeSpinLock(&Vcb->dirty_filerefs_lock); @@ -3872,17 +3919,22 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } - if (Vcb->devices[0].readonly && !Vcb->readonly) { + if (dev->readonly && !Vcb->readonly) { Vcb->readonly = TRUE; - for (i = 0; i < Vcb->superblock.num_devices; i++) { - if (Vcb->devices[i].readonly && !Vcb->devices[i].seeding) + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + if (dev2->readonly && !dev2->seeding) break; - if (!Vcb->devices[i].readonly) { + if (!dev2->readonly) { Vcb->readonly = FALSE; break; } + + le = le->Flink; } if (Vcb->readonly) @@ -3898,7 +3950,7 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } } else { - if (Vcb->devices[0].readonly) { + if (dev->readonly) { WARN("setting volume to readonly as device is readonly\n"); Vcb->readonly = TRUE; } @@ -3918,12 +3970,10 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } - if (!Vcb->readonly) { - Status = find_chunk_usage(Vcb, Irp); - if (!NT_SUCCESS(Status)) { - ERR("find_chunk_usage returned %08x\n", Status); - goto exit; - } + Status = find_chunk_usage(Vcb, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_chunk_usage returned %08x\n", Status); + goto exit; } InitializeListHead(&batchlist); @@ -3975,6 +4025,12 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } + Status = load_dir_children(root_fcb, TRUE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("load_dir_children returned %08x\n", Status); + goto exit; + } + searchkey.obj_id = root_fcb->inode; searchkey.obj_type = TYPE_INODE_ITEM; searchkey.offset = 0xffffffffffffffff; @@ -3994,7 +4050,7 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { if (tp.item->size > 0) RtlCopyMemory(&root_fcb->inode_item, tp.item->data, min(sizeof(INODE_ITEM), tp.item->size)); - fcb_get_sd(root_fcb, NULL, Irp); + fcb_get_sd(root_fcb, NULL, TRUE, Irp); root_fcb->atts = get_file_attributes(Vcb, &root_fcb->inode_item, root_fcb->subvol, root_fcb->inode, root_fcb->type, FALSE, FALSE, Irp); @@ -4036,21 +4092,22 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } _SEH2_END; - for (i = 0; i < Vcb->superblock.num_devices; i++) { - Status = find_disk_holes(Vcb, &Vcb->devices[i], Irp); + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + + Status = find_disk_holes(Vcb, dev2, Irp); if (!NT_SUCCESS(Status)) { ERR("find_disk_holes returned %08x\n", Status); goto exit; } + + le = le->Flink; } -// root_test(Vcb); - - KeInitializeSpinLock(&Vcb->FcbListLock); - - NewDeviceObject->Vpb = Stack->Parameters.MountVolume.Vpb; - Stack->Parameters.MountVolume.Vpb->DeviceObject = NewDeviceObject; - Stack->Parameters.MountVolume.Vpb->Flags |= VPB_MOUNTED; + NewDeviceObject->Vpb = IrpSp->Parameters.MountVolume.Vpb; + IrpSp->Parameters.MountVolume.Vpb->DeviceObject = NewDeviceObject; + IrpSp->Parameters.MountVolume.Vpb->Flags |= VPB_MOUNTED; NewDeviceObject->Vpb->VolumeLabelLength = 4; // FIXME NewDeviceObject->Vpb->VolumeLabel[0] = '?'; NewDeviceObject->Vpb->VolumeLabel[1] = 0; @@ -4065,10 +4122,20 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } + Status = create_calc_threads(NewDeviceObject); + if (!NT_SUCCESS(Status)) { + ERR("create_calc_threads returned %08x\n", Status); + goto exit; + } + Status = registry_mark_volume_mounted(&Vcb->superblock.uuid); if (!NT_SUCCESS(Status)) WARN("registry_mark_volume_mounted returned %08x\n", Status); + Status = look_for_balance_item(Vcb); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) + WARN("look_for_balance_item returned %08x\n", Status); + Status = STATUS_SUCCESS; exit: @@ -4099,34 +4166,41 @@ exit: ExDeleteResourceLite(&Vcb->tree_lock); ExDeleteResourceLite(&Vcb->load_lock); ExDeleteResourceLite(&Vcb->fcb_lock); - ExDeleteResourceLite(&Vcb->DirResource); - ExDeleteResourceLite(&Vcb->checksum_lock); ExDeleteResourceLite(&Vcb->chunk_lock); - if (Vcb->devices) - ExFreePoolWithTag(Vcb->devices, ALLOC_TAG); - - RemoveEntryList(&Vcb->list_entry); + if (Vcb->devices.Flink) { + while (!IsListEmpty(&Vcb->devices)) { + LIST_ENTRY* le = RemoveHeadList(&Vcb->devices); + device* dev = CONTAINING_RECORD(le, device, list_entry); + + ExFreePool(dev); + } + } } if (NewDeviceObject) IoDeleteDevice(NewDeviceObject); - } else + } else { + ExAcquireResourceExclusiveLite(&global_loading_lock, TRUE); + InsertTailList(&VcbList, &Vcb->list_entry); + ExReleaseResourceLite(&global_loading_lock); + FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_MOUNT); + } TRACE("mount_vol done (status: %lx)\n", Status); return Status; } -static NTSTATUS verify_volume(PDEVICE_OBJECT device) { - device_extension* Vcb = device->DeviceExtension; +static NTSTATUS verify_volume(PDEVICE_OBJECT devobj) { + device_extension* Vcb = devobj->DeviceExtension; ULONG cc, to_read; IO_STATUS_BLOCK iosb; NTSTATUS Status; superblock* sb; UINT32 crc32; - UINT64 i; + LIST_ENTRY* le; if (Vcb->removing) return STATUS_WRONG_VOLUME; @@ -4138,7 +4212,7 @@ static NTSTATUS verify_volume(PDEVICE_OBJECT device) { return Status; } - to_read = sector_align(sizeof(superblock), device->SectorSize); + to_read = devobj->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), devobj->SectorSize); sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG); if (!sb) { @@ -4176,30 +4250,41 @@ static NTSTATUS verify_volume(PDEVICE_OBJECT device) { ExFreePool(sb); - for (i = 0; i < Vcb->superblock.num_devices; i++) { - if (Vcb->devices[i].removable) { + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->removable) { NTSTATUS Status; ULONG cc; IO_STATUS_BLOCK iosb; - Status = dev_ioctl(Vcb->devices[i].devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); if (!NT_SUCCESS(Status)) { + ExReleaseResourceLite(&Vcb->tree_lock); ERR("dev_ioctl returned %08x\n", Status); return Status; } if (iosb.Information < sizeof(ULONG)) { + ExReleaseResourceLite(&Vcb->tree_lock); ERR("iosb.Information was too short\n"); return STATUS_INTERNAL_ERROR; } - Vcb->devices[i].change_count = cc; + dev->change_count = cc; } - Vcb->devices[i].devobj->Flags &= ~DO_VERIFY_VOLUME; + dev->devobj->Flags &= ~DO_VERIFY_VOLUME; + + le = le->Flink; } + ExReleaseResourceLite(&Vcb->tree_lock); + Vcb->Vpb->RealDevice->Flags &= ~DO_VERIFY_VOLUME; return STATUS_SUCCESS; @@ -4575,6 +4660,7 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist PDEVICE_OBJECT DeviceObject; UNICODE_STRING device_nameW; UNICODE_STRING dosdevice_nameW; + control_device_extension* cde; InitializeListHead(&uid_map_list); @@ -4609,6 +4695,38 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist #endif // TRACE("check CRC32C: %08x\n", calc_crc32c((UINT8*)"123456789", 9)); // should be e3069283 + + if (RtlIsNtDdiVersionAvailable(NTDDI_WIN8)) { + UNICODE_STRING name; + tPsIsDiskCountersEnabled PsIsDiskCountersEnabled; + + RtlInitUnicodeString(&name, L"PsIsDiskCountersEnabled"); + PsIsDiskCountersEnabled = (tPsIsDiskCountersEnabled)MmGetSystemRoutineAddress(&name); + + if (PsIsDiskCountersEnabled) { + diskacc = PsIsDiskCountersEnabled(); + + RtlInitUnicodeString(&name, L"PsUpdateDiskCounters"); + PsUpdateDiskCounters = (tPsUpdateDiskCounters)MmGetSystemRoutineAddress(&name); + + if (!PsUpdateDiskCounters) + diskacc = FALSE; + } + + RtlInitUnicodeString(&name, L"CcCopyReadEx"); + CcCopyReadEx = (tCcCopyReadEx)MmGetSystemRoutineAddress(&name); + + RtlInitUnicodeString(&name, L"CcCopyWriteEx"); + CcCopyWriteEx = (tCcCopyWriteEx)MmGetSystemRoutineAddress(&name); + + RtlInitUnicodeString(&name, L"CcSetAdditionalCacheAttributesEx"); + CcSetAdditionalCacheAttributesEx = (tCcSetAdditionalCacheAttributesEx)MmGetSystemRoutineAddress(&name); + } else { + PsUpdateDiskCounters = NULL; + CcCopyReadEx = NULL; + CcCopyWriteEx = NULL; + CcSetAdditionalCacheAttributesEx = NULL; + } drvobj = DriverObject; @@ -4642,13 +4760,17 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist dosdevice_nameW.Buffer = dosdevice_name; dosdevice_nameW.Length = dosdevice_nameW.MaximumLength = (USHORT)wcslen(dosdevice_name) * sizeof(WCHAR); - Status = IoCreateDevice(DriverObject, 0, &device_nameW, FILE_DEVICE_DISK_FILE_SYSTEM, FILE_DEVICE_SECURE_OPEN, FALSE, &DeviceObject); + Status = IoCreateDevice(DriverObject, sizeof(control_device_extension), &device_nameW, FILE_DEVICE_DISK_FILE_SYSTEM, + FILE_DEVICE_SECURE_OPEN, FALSE, &DeviceObject); if (!NT_SUCCESS(Status)) { ERR("IoCreateDevice returned %08x\n", Status); return Status; } devobj = DeviceObject; + cde = (control_device_extension*)devobj->DeviceExtension; + + cde->type = VCB_TYPE_CONTROL; DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; @@ -4665,10 +4787,16 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist } InitializeListHead(&volumes); - look_for_vols(DriverObject, &volumes); + InitializeListHead(&pnp_disks); InitializeListHead(&VcbList); ExInitializeResourceLite(&global_loading_lock); + ExInitializeResourceLite(&volumes_lock); + + Status = IoRegisterPlugPlayNotification(EventCategoryDeviceInterfaceChange, PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, + (PVOID)&GUID_DEVINTERFACE_DISK, DriverObject, pnp_notification, DriverObject, ¬ification_entry); + if (!NT_SUCCESS(Status)) + ERR("IoRegisterPlugPlayNotification returned %08x\n", Status); IoRegisterFileSystem(DeviceObject); diff --git a/reactos/drivers/filesystems/btrfs/btrfs.h b/reactos/drivers/filesystems/btrfs/btrfs.h index c5184cde398..f7a3d457c3b 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs.h +++ b/reactos/drivers/filesystems/btrfs/btrfs.h @@ -37,6 +37,8 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4 #define TYPE_DEV_EXTENT 0xCC #define TYPE_DEV_ITEM 0xD8 #define TYPE_CHUNK_ITEM 0xE4 +#define TYPE_TEMP_ITEM 0xF8 +#define TYPE_DEV_STATS 0xF9 #define TYPE_SUBVOL_UUID 0xFB #define BTRFS_ROOT_ROOT 1 @@ -72,6 +74,7 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4 #define FREE_SPACE_CACHE_ID 0xFFFFFFFFFFFFFFF5 #define EXTENT_CSUM_ID 0xFFFFFFFFFFFFFFF6 +#define BALANCE_ITEM_ID 0xFFFFFFFFFFFFFFFC #define BTRFS_INODE_NODATASUM 0x001 #define BTRFS_INODE_NODATACOW 0x002 @@ -436,6 +439,62 @@ typedef struct { BTRFS_UUID chunktree_uuid; } DEV_EXTENT; +#define BALANCE_FLAGS_DATA 0x1 +#define BALANCE_FLAGS_SYSTEM 0x2 +#define BALANCE_FLAGS_METADATA 0x4 + +#define BALANCE_ARGS_FLAGS_PROFILES 0x001 +#define BALANCE_ARGS_FLAGS_USAGE 0x002 +#define BALANCE_ARGS_FLAGS_DEVID 0x004 +#define BALANCE_ARGS_FLAGS_DRANGE 0x008 +#define BALANCE_ARGS_FLAGS_VRANGE 0x010 +#define BALANCE_ARGS_FLAGS_LIMIT 0x020 +#define BALANCE_ARGS_FLAGS_LIMIT_RANGE 0x040 +#define BALANCE_ARGS_FLAGS_STRIPES_RANGE 0x080 +#define BALANCE_ARGS_FLAGS_CONVERT 0x100 +#define BALANCE_ARGS_FLAGS_SOFT 0x200 +#define BALANCE_ARGS_FLAGS_USAGE_RANGE 0x400 + +typedef struct { + UINT64 profiles; + + union { + UINT64 usage; + struct { + UINT32 usage_start; + UINT32 usage_end; + }; + }; + + UINT64 devid; + UINT64 drange_start; + UINT64 drange_end; + UINT64 vrange_start; + UINT64 vrange_end; + UINT64 convert; + UINT64 flags; + + union { + UINT64 limit; + struct { + UINT32 limit_start; + UINT32 limit_end; + }; + }; + + UINT32 stripes_start; + UINT32 stripes_end; + UINT8 reserved[48]; +} BALANCE_ARGS; + +typedef struct { + UINT64 flags; + BALANCE_ARGS data; + BALANCE_ARGS metadata; + BALANCE_ARGS system; + UINT8 reserved[32]; +} BALANCE_ITEM; + #pragma pack(pop) #endif diff --git a/reactos/drivers/filesystems/btrfs/btrfs.rc b/reactos/drivers/filesystems/btrfs/btrfs.rc index 467f2e8b07a..333e8b31ad2 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs.rc +++ b/reactos/drivers/filesystems/btrfs/btrfs.rc @@ -70,12 +70,12 @@ BEGIN BLOCK "080904b0" BEGIN VALUE "FileDescription", "WinBtrfs" - VALUE "FileVersion", "0.7" + VALUE "FileVersion", "0.8" VALUE "InternalName", "btrfs" VALUE "LegalCopyright", "Copyright (c) Mark Harmstone 2016" VALUE "OriginalFilename", "btrfs.sys" VALUE "ProductName", "WinBtrfs" - VALUE "ProductVersion", "0.7" + VALUE "ProductVersion", "0.8" END END BLOCK "VarFileInfo" diff --git a/reactos/drivers/filesystems/btrfs/btrfs_drv.h b/reactos/drivers/filesystems/btrfs/btrfs_drv.h index 22ae32e6a89..b3aebf418f9 100644 --- a/reactos/drivers/filesystems/btrfs/btrfs_drv.h +++ b/reactos/drivers/filesystems/btrfs/btrfs_drv.h @@ -46,6 +46,7 @@ #include #include #include "btrfs.h" +#include "btrfsioctl.h" #ifdef _DEBUG // #define DEBUG_FCB_REFCOUNTS @@ -108,15 +109,23 @@ typedef struct { UINT64 gen1, gen2; BOOL seeding; BOOL processed; + DWORD disk_num; + DWORD part_num; LIST_ENTRY list_entry; } volume; +typedef struct { + UNICODE_STRING devpath; + ULONG disk_num; + LIST_ENTRY list_entry; +} pnp_disk; + typedef struct _fcb_nonpaged { FAST_MUTEX HeaderMutex; SECTION_OBJECT_POINTERS segment_object; ERESOURCE resource; ERESOURCE paging_resource; - ERESOURCE index_lock; + ERESOURCE dir_children_lock; } fcb_nonpaged; struct _root; @@ -127,21 +136,12 @@ typedef struct { ULONG datalen; BOOL unique; BOOL ignore; + BOOL inserted; + UINT32* csum; LIST_ENTRY list_entry; } extent; -typedef struct { - UINT32 hash; - KEY key; - UINT8 type; - UINT64 index; - ANSI_STRING utf8; - UNICODE_STRING filepart_uc; - - LIST_ENTRY list_entry; -} index_entry; - typedef struct { UINT64 parent; UINT64 index; @@ -152,6 +152,21 @@ typedef struct { struct _file_ref; +typedef struct { + KEY key; + UINT64 index; + UINT8 type; + ANSI_STRING utf8; + UINT32 hash; + UNICODE_STRING name; + UINT32 hash_uc; + UNICODE_STRING name_uc; + struct _file_ref* fileref; + LIST_ENTRY list_entry_index; + LIST_ENTRY list_entry_hash; + LIST_ENTRY list_entry_hash_uc; +} dir_child; + typedef struct _fcb { FSRTL_ADVANCED_FCB_HEADER Header; struct _fcb_nonpaged* nonpaged; @@ -177,8 +192,11 @@ typedef struct _fcb { struct _file_ref* fileref; BOOL inode_item_changed; - BOOL index_loaded; - LIST_ENTRY index_list; + LIST_ENTRY dir_children_index; + LIST_ENTRY dir_children_hash; + LIST_ENTRY dir_children_hash_uc; + LIST_ENTRY** hash_ptrs; + LIST_ENTRY** hash_ptrs_uc; BOOL dirty; BOOL sd_dirty; @@ -223,6 +241,7 @@ typedef struct _file_ref { LONG open_count; struct _file_ref* parent; WCHAR* debug_desc; + dir_child* dc; BOOL dirty; @@ -244,6 +263,8 @@ typedef struct _ccb { UNICODE_STRING query_string; BOOL has_wildcard; BOOL specific_file; + BOOL manage_volume_privilege; + BOOL allow_extended_dasd_io; ACCESS_MASK access; file_ref* fileref; UNICODE_STRING filename; @@ -309,6 +330,7 @@ typedef struct _tree { // UINT64 address; // UINT8 level; tree_header header; + UINT32 hash; BOOL has_address; UINT32 size; struct _device_extension* Vcb; @@ -318,10 +340,10 @@ typedef struct _tree { // tree_nonpaged* nonpaged; LIST_ENTRY itemlist; LIST_ENTRY list_entry; + LIST_ENTRY list_entry_hash; UINT64 new_address; BOOL has_new_address; BOOL updated_extents; - UINT64 flags; BOOL write; } tree; @@ -343,10 +365,15 @@ typedef struct _root { enum batch_operation { Batch_Insert, + Batch_Delete, Batch_SetXattr, Batch_DirItem, Batch_InodeRef, Batch_InodeExtRef, + Batch_DeleteInode, + Batch_DeleteDirItem, + Batch_DeleteInodeRef, + Batch_DeleteInodeExtRef, }; typedef struct { @@ -386,11 +413,15 @@ typedef struct { BOOL removable; BOOL seeding; BOOL readonly; + BOOL reloc; BOOL ssd; BOOL trim; ULONG change_count; UINT64 length; + ULONG disk_num; + ULONG part_num; LIST_ENTRY space; + LIST_ENTRY list_entry; } device; typedef struct { @@ -419,9 +450,13 @@ typedef struct { ERESOURCE changed_extents_lock; BOOL created; BOOL readonly; + BOOL reloc; + BOOL last_alloc_set; + UINT64 last_alloc; LIST_ENTRY list_entry; LIST_ENTRY list_entry_changed; + LIST_ENTRY list_entry_balance; } chunk; typedef struct { @@ -455,6 +490,31 @@ typedef struct { LIST_ENTRY list_entry; } sys_chunk; +typedef struct { + UINT8* data; + UINT32* csum; + UINT32 sectors; + LONG pos, done; + KEVENT event; + LONG refcount; + LIST_ENTRY list_entry; +} calc_job; + +typedef struct { + PDEVICE_OBJECT DeviceObject; + HANDLE handle; + KEVENT finished; + BOOL quit; +} drv_calc_thread; + +typedef struct { + ULONG num_threads; + LIST_ENTRY job_list; + ERESOURCE lock; + drv_calc_thread* threads; + KEVENT event; +} drv_calc_threads; + typedef struct { BOOL ignore; BOOL compress; @@ -467,10 +527,12 @@ typedef struct { UINT64 subvol_id; UINT32 raid5_recalculation; UINT32 raid6_recalculation; + BOOL skip_balance; } mount_options; #define VCB_TYPE_VOLUME 1 #define VCB_TYPE_PARTITION0 2 +#define VCB_TYPE_CONTROL 3 #ifdef DEBUG_STATS typedef struct { @@ -479,14 +541,40 @@ typedef struct { UINT64 read_total_time; UINT64 read_csum_time; UINT64 read_disk_time; + + UINT64 num_opens; + UINT64 open_total_time; + UINT64 num_overwrites; + UINT64 overwrite_total_time; + UINT64 num_creates; + UINT64 create_total_time; } debug_stats; #endif +#define BALANCE_OPTS_DATA 0 +#define BALANCE_OPTS_METADATA 1 +#define BALANCE_OPTS_SYSTEM 2 + +typedef struct { + HANDLE thread; + UINT64 total_chunks; + UINT64 chunks_left; + btrfs_balance_opts opts[3]; + BOOL paused; + BOOL stopping; + BOOL cancelling; + BOOL removing; + BOOL dev_readonly; + NTSTATUS status; + KEVENT event; + KEVENT finished; +} balance_info; + typedef struct _device_extension { UINT32 type; mount_options options; PVPB Vpb; - device* devices; + LIST_ENTRY devices; #ifdef DEBUG_STATS debug_stats stats; #endif @@ -497,14 +585,13 @@ typedef struct _device_extension { BOOL readonly; BOOL removing; BOOL locked; + BOOL lock_paused_balance; BOOL disallow_dismount; BOOL trim; PFILE_OBJECT locked_fileobj; fcb* volume_fcb; file_ref* root_fileref; LONG open_files; - ERESOURCE DirResource; - KSPIN_LOCK FcbListLock; ERESOURCE fcb_lock; ERESOURCE load_lock; ERESOURCE tree_lock; @@ -514,8 +601,9 @@ typedef struct _device_extension { BOOL need_write; // ERESOURCE LogToPhysLock; // UINT64 chunk_root_phys_addr; - UINT64 root_tree_phys_addr; UINT64 data_flags; + UINT64 metadata_flags; + UINT64 system_flags; // log_to_phys* log_to_phys; LIST_ENTRY roots; LIST_ENTRY drop_roots; @@ -531,17 +619,19 @@ typedef struct _device_extension { LIST_ENTRY chunks; LIST_ENTRY chunks_changed; LIST_ENTRY trees; + LIST_ENTRY trees_hash; + LIST_ENTRY* trees_ptrs[256]; LIST_ENTRY all_fcbs; LIST_ENTRY dirty_fcbs; KSPIN_LOCK dirty_fcbs_lock; LIST_ENTRY dirty_filerefs; KSPIN_LOCK dirty_filerefs_lock; - ERESOURCE checksum_lock; ERESOURCE chunk_lock; - LIST_ENTRY sector_checksums; HANDLE flush_thread_handle; KTIMER flush_thread_timer; KEVENT flush_thread_finished; + drv_calc_threads calcthreads; + balance_info balance; PFILE_OBJECT root_file; PAGED_LOOKASIDE_LIST tree_data_lookaside; PAGED_LOOKASIDE_LIST traverse_ptr_lookaside; @@ -558,24 +648,16 @@ typedef struct { UNICODE_STRING name; } part0_device_extension; +typedef struct { + UINT32 type; +} control_device_extension; + typedef struct { LIST_ENTRY listentry; PSID sid; UINT32 uid; } uid_map; -typedef struct { - LIST_ENTRY list_entry; - UINT64 key; -} ordered_list; - -typedef struct { - ordered_list ol; - ULONG length; - UINT32* checksums; - BOOL deleted; -} changed_sector; - enum write_data_status { WriteDataStatus_Pending, WriteDataStatus_Success, @@ -605,6 +687,14 @@ typedef struct _write_data_context { BOOL tree; } write_data_context; +typedef struct { + UINT64 address; + UINT32 length; + BOOL overlap; + UINT8* data; + LIST_ENTRY list_entry; +} tree_write; + // #pragma pack(pop) static __inline void* map_user_buffer(PIRP Irp) { @@ -626,27 +716,6 @@ static __inline void win_time_to_unix(LARGE_INTEGER t, BTRFS_TIME* out) { out->nanoseconds = (l % 10000000) * 100; } -static __inline void insert_into_ordered_list(LIST_ENTRY* list, ordered_list* ins) { - LIST_ENTRY* le = list->Flink; - ordered_list* ol; - - while (le != list) { - ol = (ordered_list*)le; - - if (ol->key > ins->key) { - le->Blink->Flink = &ins->list_entry; - ins->list_entry.Blink = le->Blink; - le->Blink = &ins->list_entry; - ins->list_entry.Flink = le; - return; - } - - le = le->Flink; - } - - InsertTailList(list, &ins->list_entry); -} - static __inline void get_raid0_offset(UINT64 off, UINT64 stripe_length, UINT16 num_stripes, UINT64* stripeoff, UINT16* stripe) { UINT64 initoff, startoff; @@ -682,12 +751,12 @@ static UINT64 __inline make_file_id(root* r, UINT64 inode) { // in btrfs.c device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid); UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment ); +BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts); ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp); +BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen); BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp); void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line); void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line); -NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, UINT32 crc32, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback); fcb* create_fcb(POOL_TYPE pool_type); file_ref* create_fileref(); void protect_superblocks(device_extension* Vcb, chunk* c); @@ -708,6 +777,9 @@ void mark_fileref_dirty(file_ref* fileref); NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback); void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length); void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length); +void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_nums); +void init_file_cache(PFILE_OBJECT FileObject, CC_FILE_SIZES* ccfs); +NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override); #ifdef _MSC_VER #define funcname __FUNCTION__ @@ -729,6 +801,7 @@ extern UINT32 mount_flush_interval; extern UINT32 mount_max_inline; extern UINT32 mount_raid5_recalculation; extern UINT32 mount_raid6_recalculation; +extern UINT32 mount_skip_balance; #ifdef _DEBUG @@ -815,6 +888,8 @@ typedef struct { // in treefuncs.c NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line); +NTSTATUS STDCALL _find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, + PIRP Irp, const char* func, const char* file, unsigned int line); BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line); BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line); void STDCALL free_trees(device_extension* Vcb); @@ -832,6 +907,7 @@ void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, L void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist); #define find_item(Vcb, r, tp, searchkey, ignore, Irp) _find_item(Vcb, r, tp, searchkey, ignore, Irp, funcname, __FILE__, __LINE__) +#define find_item_to_level(Vcb, r, tp, searchkey, ignore, level, Irp) _find_item_to_level(Vcb, r, tp, searchkey, ignore, level, Irp, funcname, __FILE__, __LINE__) #define find_next_item(Vcb, tp, next_tp, ignore, Irp) _find_next_item(Vcb, tp, next_tp, ignore, Irp, funcname, __FILE__, __LINE__) #define find_prev_item(Vcb, tp, prev_tp, ignore, Irp) _find_prev_item(Vcb, tp, prev_tp, ignore, Irp, funcname, __FILE__, __LINE__) #define free_tree(t) _free_tree(t, funcname, __FILE__, __LINE__) @@ -839,7 +915,13 @@ void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist); #define do_load_tree(Vcb, th, r, t, td, loaded, Irp) _do_load_tree(Vcb, th, r, t, td, loaded, Irp, funcname, __FILE__, __LINE__) // in search.c -void STDCALL look_for_vols(PDRIVER_OBJECT DriverObject, LIST_ENTRY* volumes); +void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v); +void add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us); +#ifdef __REACTOS__ +NTSTATUS NTAPI pnp_notification(PVOID NotificationStructure, PVOID Context); +#else +NTSTATUS pnp_notification(PVOID NotificationStructure, PVOID Context); +#endif // in cache.c NTSTATUS STDCALL init_cache(); @@ -853,19 +935,18 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback); -void commit_checksum_changes(device_extension* Vcb, LIST_ENTRY* changed_sector_list); chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address); chunk* alloc_chunk(device_extension* Vcb, UINT64 flags); NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c); NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c); void free_write_data_stripes(write_data_context* wtc); NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, LIST_ENTRY* changed_sector_list, +BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size); -NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback); -BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address); +NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback); +BOOL find_data_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address); void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen); // in dirctrl.c @@ -875,7 +956,8 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, // in security.c NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); NTSTATUS STDCALL drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -void fcb_get_sd(fcb* fcb, struct _fcb* parent, PIRP Irp); +BOOL get_sd_from_xattr(fcb* fcb, ULONG buflen); +void fcb_get_sd(fcb* fcb, struct _fcb* parent, BOOL look_for_xattr, PIRP Irp); // UINT32 STDCALL get_uid(); void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid); UINT32 sid_to_uid(PSID sid); @@ -891,6 +973,8 @@ NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* nam NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp); NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +void insert_dir_child_into_hash_lists(fcb* fcb, dir_child* dc); +void remove_dir_child_from_hash_lists(fcb* fcb, dir_child* dc); // in reparse.c NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen); @@ -899,15 +983,16 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp); // in create.c NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); -NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr, - root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, BOOL case_sensitive, PIRP Irp); -NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset, +NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* parsed, ULONG* fn_offset, POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp); NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp); NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI_STRING* xattr, UINT32 streamhash, fcb* parent, fcb** pfcb, PIRP Irp); void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock); NTSTATUS fcb_get_last_dir_index(fcb* fcb, UINT64* index, PIRP Irp); NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp); +NTSTATUS load_csum(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp); +NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp); +NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, UINT64 index, PANSI_STRING utf8, PUNICODE_STRING name, PUNICODE_STRING name_uc, UINT8 type, dir_child** pdc); // in fsctl.c NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user); @@ -920,11 +1005,15 @@ NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENT void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length); BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp); +NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, PIRP Irp); +void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp, LIST_ENTRY* rollback); +BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address); // in read.c NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp); -NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp); -NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp); +NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, + PIRP Irp, BOOL check_nocsum_parity); +NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp, BOOL check_nocsum_parity); NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read); // in pnp.c @@ -948,7 +1037,8 @@ void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* // in extent-tree.c NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback); -NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, + UINT32 refcount, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback); NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback); void decrease_chunk_usage(chunk* c, UINT64 delta); // NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback); @@ -963,7 +1053,8 @@ void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp); UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp); NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, - UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback); + UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback); +UINT64 get_extent_data_ref_hash2(UINT64 root, UINT64 objid, UINT64 offset); // in worker-thread.c void do_read_job(PIRP Irp); @@ -977,7 +1068,7 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb); // in compress.c NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen); -NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback); +NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback); // in galois.c void galois_double(UINT8* data, UINT32 len); @@ -989,6 +1080,24 @@ UINT8 gdiv(UINT8 a, UINT8 b); // in devctrl.c NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp); +// in calcthread.c +#ifdef __REACTOS__ +void NTAPI calc_thread(void* context); +#else +void calc_thread(void* context); +#endif +NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum, calc_job** pcj); +void free_calc_job(calc_job* cj); + +// in balance.c +NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode); +NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length); +NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode); +NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode); +NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode); +NTSTATUS look_for_balance_item(device_extension* Vcb); +NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode); + #define fast_io_possible(fcb) (!FsRtlAreThereCurrentFileLocks(&fcb->lock) && !fcb->Vcb->readonly ? FastIoIsPossible : FastIoIsQuestionable) static __inline void print_open_trees(device_extension* Vcb) { @@ -1048,6 +1157,8 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { } } +#define first_device(Vcb) CONTAINING_RECORD(Vcb->devices.Flink, device, list_entry) + #ifdef DEBUG_FCB_REFCOUNTS #ifdef DEBUG_LONG_MESSAGES #define increase_fileref_refcount(fileref) {\ @@ -1144,6 +1255,29 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) { #define called_from_lxss() FALSE #endif +typedef BOOLEAN (*tPsIsDiskCountersEnabled)(); + +typedef VOID (*tPsUpdateDiskCounters)(PEPROCESS Process, ULONG64 BytesRead, ULONG64 BytesWritten, + ULONG ReadOperationCount, ULONG WriteOperationCount, ULONG FlushOperationCount); + +typedef BOOLEAN (*tCcCopyWriteEx)(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, + PVOID Buffer, PETHREAD IoIssuerThread); + +typedef BOOLEAN (*tCcCopyReadEx)(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, + PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PETHREAD IoIssuerThread); + +#define CC_ENABLE_DISK_IO_ACCOUNTING 0x00000010 + +typedef VOID (*tCcSetAdditionalCacheAttributesEx)(PFILE_OBJECT FileObject, ULONG Flags); + +#ifndef __REACTOS__ +#undef RtlIsNtDdiVersionAvailable + +BOOLEAN RtlIsNtDdiVersionAvailable(ULONG Version); + +PEPROCESS PsGetThreadProcess(PETHREAD Thread); // not in mingw +#endif + #if defined(__REACTOS__) && (NTDDI_VERSION < NTDDI_WIN7) NTSTATUS WINAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max, ULONG *utf8_bytes_written, diff --git a/reactos/drivers/filesystems/btrfs/btrfsioctl.h b/reactos/drivers/filesystems/btrfs/btrfsioctl.h index bc35ad0ad14..c28aafe4386 100644 --- a/reactos/drivers/filesystems/btrfs/btrfsioctl.h +++ b/reactos/drivers/filesystems/btrfs/btrfsioctl.h @@ -8,6 +8,17 @@ #define FSCTL_BTRFS_CREATE_SNAPSHOT CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82b, METHOD_IN_DIRECT, FILE_ANY_ACCESS) #define FSCTL_BTRFS_GET_INODE_INFO CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82c, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) #define FSCTL_BTRFS_SET_INODE_INFO CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82d, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_GET_DEVICES CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82e, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_GET_USAGE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82f, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_START_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x830, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_QUERY_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x831, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_PAUSE_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x832, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_RESUME_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x833, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_STOP_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x834, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_ADD_DEVICE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x835, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_REMOVE_DEVICE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x836, METHOD_IN_DIRECT, FILE_ANY_ACCESS) +#define IOCTL_BTRFS_QUERY_FILESYSTEMS CTL_CODE(FILE_DEVICE_UNKNOWN, 0x837, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) +#define FSCTL_BTRFS_GET_UUID CTL_CODE(FILE_DEVICE_UNKNOWN, 0x838, METHOD_OUT_DIRECT, FILE_ANY_ACCESS) typedef struct { UINT64 subvol; @@ -46,4 +57,92 @@ typedef struct { BOOL mode_changed; } btrfs_set_inode_info; +typedef struct { + UINT32 next_entry; + UINT64 dev_id; + UINT64 size; + BOOL readonly; + ULONG device_number; + ULONG partition_number; + USHORT namelen; + WCHAR name[1]; +} btrfs_device; + +typedef struct { + UINT64 dev_id; + UINT64 alloc; +} btrfs_usage_device; + +typedef struct { + UINT32 next_entry; + UINT64 type; + UINT64 size; + UINT64 used; + UINT64 num_devices; + btrfs_usage_device devices[1]; +} btrfs_usage; + +#define BTRFS_BALANCE_OPTS_ENABLED 0x001 +#define BTRFS_BALANCE_OPTS_PROFILES 0x002 +#define BTRFS_BALANCE_OPTS_DEVID 0x004 +#define BTRFS_BALANCE_OPTS_DRANGE 0x008 +#define BTRFS_BALANCE_OPTS_VRANGE 0x010 +#define BTRFS_BALANCE_OPTS_LIMIT 0x020 +#define BTRFS_BALANCE_OPTS_STRIPES 0x040 +#define BTRFS_BALANCE_OPTS_USAGE 0x080 +#define BTRFS_BALANCE_OPTS_CONVERT 0x100 +#define BTRFS_BALANCE_OPTS_SOFT 0x200 + +#define BLOCK_FLAG_SINGLE 0x1000000000000 // only used in balance + +typedef struct { + UINT64 flags; + UINT64 profiles; + UINT64 devid; + UINT64 drange_start; + UINT64 drange_end; + UINT64 vrange_start; + UINT64 vrange_end; + UINT64 limit_start; + UINT64 limit_end; + UINT16 stripes_start; + UINT16 stripes_end; + UINT8 usage_start; + UINT8 usage_end; + UINT64 convert; +} btrfs_balance_opts; + +#define BTRFS_BALANCE_STOPPED 0 +#define BTRFS_BALANCE_RUNNING 1 +#define BTRFS_BALANCE_PAUSED 2 +#define BTRFS_BALANCE_REMOVAL 4 +#define BTRFS_BALANCE_ERROR 8 + +typedef struct { + UINT32 status; + UINT64 chunks_left; + UINT64 total_chunks; + NTSTATUS error; + btrfs_balance_opts data_opts; + btrfs_balance_opts metadata_opts; + btrfs_balance_opts system_opts; +} btrfs_query_balance; + +typedef struct { + btrfs_balance_opts opts[3]; +} btrfs_start_balance; + +typedef struct { + UINT8 uuid[16]; + USHORT name_length; + WCHAR name[1]; +} btrfs_filesystem_device; + +typedef struct { + UINT32 next_entry; + UINT8 uuid[16]; + UINT32 num_devices; + btrfs_filesystem_device device; +} btrfs_filesystem; + #endif diff --git a/reactos/drivers/filesystems/btrfs/cache.c b/reactos/drivers/filesystems/btrfs/cache.c index 2a6ba4d054e..75d87dbc98c 100644 --- a/reactos/drivers/filesystems/btrfs/cache.c +++ b/reactos/drivers/filesystems/btrfs/cache.c @@ -28,9 +28,14 @@ static BOOLEAN STDCALL acquire_for_lazy_write(PVOID Context, BOOLEAN Wait) { // if (!fcb || FileObject->Flags & FO_CLEANUP_COMPLETE) // return FALSE; + + if (!ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, Wait)) + return FALSE; - if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, Wait)) + if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, Wait)) { + ExReleaseResourceLite(&fcb->Vcb->tree_lock); return FALSE; + } fcb->lazy_writer_thread = KeGetCurrentThread(); @@ -48,7 +53,9 @@ static void STDCALL release_from_lazy_write(PVOID Context) { fcb->lazy_writer_thread = NULL; - ExReleaseResourceLite(fcb->Header.PagingIoResource); + ExReleaseResourceLite(fcb->Header.Resource); + + ExReleaseResourceLite(&fcb->Vcb->tree_lock); } static BOOLEAN STDCALL acquire_for_read_ahead(PVOID Context, BOOLEAN Wait) { diff --git a/reactos/drivers/filesystems/btrfs/calcthread.c b/reactos/drivers/filesystems/btrfs/calcthread.c new file mode 100644 index 00000000000..e84107d064f --- /dev/null +++ b/reactos/drivers/filesystems/btrfs/calcthread.c @@ -0,0 +1,142 @@ +/* Copyright (c) Mark Harmstone 2016 + * + * This file is part of WinBtrfs. + * + * WinBtrfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public Licence as published by + * the Free Software Foundation, either version 3 of the Licence, or + * (at your option) any later version. + * + * WinBtrfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public Licence for more details. + * + * You should have received a copy of the GNU Lesser General Public Licence + * along with WinBtrfs. If not, see . */ + +#include "btrfs_drv.h" + +#define SECTOR_BLOCK 16 + +NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum, calc_job** pcj) { + calc_job* cj; + + cj = ExAllocatePoolWithTag(NonPagedPool, sizeof(calc_job), ALLOC_TAG); + if (!cj) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + cj->data = data; + cj->sectors = sectors; + cj->csum = csum; + cj->pos = 0; + cj->done = 0; + cj->refcount = 1; + KeInitializeEvent(&cj->event, NotificationEvent, FALSE); + + ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE); + InsertTailList(&Vcb->calcthreads.job_list, &cj->list_entry); + ExReleaseResourceLite(&Vcb->calcthreads.lock); + + KeSetEvent(&Vcb->calcthreads.event, 0, FALSE); + KeClearEvent(&Vcb->calcthreads.event); + + *pcj = cj; + + return STATUS_SUCCESS; +} + +void free_calc_job(calc_job* cj) { + LONG rc = InterlockedDecrement(&cj->refcount); + + if (rc == 0) + ExFreePool(cj); +} + +static BOOL do_calc(device_extension* Vcb, calc_job* cj) { + LONG pos, done; + UINT32* csum; + UINT8* data; + ULONG blocksize, i; + + pos = InterlockedIncrement(&cj->pos) - 1; + + if (pos * SECTOR_BLOCK >= cj->sectors) + return FALSE; + + csum = &cj->csum[pos * SECTOR_BLOCK]; + data = cj->data + (pos * SECTOR_BLOCK * Vcb->superblock.sector_size); + + blocksize = min(SECTOR_BLOCK, cj->sectors - (pos * SECTOR_BLOCK)); + for (i = 0; i < blocksize; i++) { + *csum = ~calc_crc32c(0xffffffff, data, Vcb->superblock.sector_size); + csum++; + data += Vcb->superblock.sector_size; + } + + done = InterlockedIncrement(&cj->done); + + if (done * SECTOR_BLOCK >= cj->sectors) { + ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE); + RemoveEntryList(&cj->list_entry); + ExReleaseResourceLite(&Vcb->calcthreads.lock); + + KeSetEvent(&cj->event, 0, FALSE); + } + + return TRUE; +} + +#ifdef __REACTOS__ +void NTAPI calc_thread(void* context) { +#else +void calc_thread(void* context) { +#endif + drv_calc_thread* thread = context; + device_extension* Vcb = thread->DeviceObject->DeviceExtension; + + ObReferenceObject(thread->DeviceObject); + + while (TRUE) { + KeWaitForSingleObject(&Vcb->calcthreads.event, Executive, KernelMode, FALSE, NULL); + + FsRtlEnterFileSystem(); + + while (TRUE) { + calc_job* cj; + BOOL b; + + ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE); + + if (IsListEmpty(&Vcb->calcthreads.job_list)) { + ExReleaseResourceLite(&Vcb->calcthreads.lock); + break; + } + + cj = CONTAINING_RECORD(Vcb->calcthreads.job_list.Flink, calc_job, list_entry); + cj->refcount++; + + ExReleaseResourceLite(&Vcb->calcthreads.lock); + + b = do_calc(Vcb, cj); + + free_calc_job(cj); + + if (!b) + break; + } + + FsRtlExitFileSystem(); + + if (thread->quit) + break; + } + + ObDereferenceObject(thread->DeviceObject); + + KeSetEvent(&thread->finished, 0, FALSE); + + PsTerminateSystemThread(STATUS_SUCCESS); +} diff --git a/reactos/drivers/filesystems/btrfs/compress.c b/reactos/drivers/filesystems/btrfs/compress.c index 67ad41b3c8b..e94211b8dab 100755 --- a/reactos/drivers/filesystems/btrfs/compress.c +++ b/reactos/drivers/filesystems/btrfs/compress.c @@ -357,7 +357,7 @@ NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT6 } } -static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; UINT8 compression; UINT64 comp_length; @@ -444,11 +444,11 @@ static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 en while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - if (!c->readonly) { + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); if (compression != BTRFS_COMPRESSION_NONE) @@ -474,7 +474,7 @@ static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 en ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { if (compression != BTRFS_COMPRESSION_NONE) ExFreePool(comp_data); @@ -732,7 +732,7 @@ static __inline UINT32 lzo_max_outlen(UINT32 inlen) { return inlen + (inlen / 16) + 64 + 3; // formula comes from LZO.FAQ } -static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; UINT8 compression; UINT64 comp_length; @@ -829,11 +829,11 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - if (!c->readonly) { + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); if (compression != BTRFS_COMPRESSION_NONE) @@ -859,7 +859,7 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) { if (compression != BTRFS_COMPRESSION_NONE) ExFreePool(comp_data); @@ -876,7 +876,7 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end return STATUS_DISK_FULL; } -NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) { UINT8 type; if (fcb->Vcb->options.compress_type != 0) @@ -890,7 +890,7 @@ NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void if (type == BTRFS_COMPRESSION_LZO) { fcb->Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO; - return lzo_write_compressed_bit(fcb, start_data, end_data, data, compressed, changed_sector_list, Irp, rollback); + return lzo_write_compressed_bit(fcb, start_data, end_data, data, compressed, Irp, rollback); } else - return zlib_write_compressed_bit(fcb, start_data, end_data, data, compressed, changed_sector_list, Irp, rollback); + return zlib_write_compressed_bit(fcb, start_data, end_data, data, compressed, Irp, rollback); } diff --git a/reactos/drivers/filesystems/btrfs/create.c b/reactos/drivers/filesystems/btrfs/create.c index f389dfb7eaa..04dc8f240e6 100644 --- a/reactos/drivers/filesystems/btrfs/create.c +++ b/reactos/drivers/filesystems/btrfs/create.c @@ -27,651 +27,6 @@ extern PDEVICE_OBJECT devobj; static WCHAR datastring[] = L"::$DATA"; -static NTSTATUS find_file_dir_index(device_extension* Vcb, root* r, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) { - KEY searchkey; - traverse_ptr tp; - NTSTATUS Status; - UINT64 index; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_REF; - searchkey.offset = parinode; - - Status = find_item(Vcb, r, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(tp.item->key, searchkey)) { - INODE_REF* ir; - ULONG len; - - index = 0; - - ir = (INODE_REF*)tp.item->data; - len = tp.item->size; - - do { - ULONG itemlen; - - if (len < sizeof(INODE_REF) || len < sizeof(INODE_REF) - 1 + ir->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - itemlen = sizeof(INODE_REF) - sizeof(char) + ir->n; - - if (ir->n == utf8->Length && RtlCompareMemory(ir->name, utf8->Buffer, ir->n) == ir->n) { - index = ir->index; - break; - } - - if (len > itemlen) { - len -= itemlen; - ir = (INODE_REF*)&ir->name[ir->n]; - } else - break; - } while (len > 0); - - if (index == 0) - return STATUS_NOT_FOUND; - - *pindex = index; - - return STATUS_SUCCESS; - } else - return STATUS_NOT_FOUND; -} - -static NTSTATUS find_file_dir_index_extref(device_extension* Vcb, root* r, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) { - KEY searchkey; - traverse_ptr tp; - NTSTATUS Status; - UINT64 index; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_EXTREF; - searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length); - - Status = find_item(Vcb, r, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(tp.item->key, searchkey)) { - INODE_EXTREF* ier; - ULONG len; - - index = 0; - - ier = (INODE_EXTREF*)tp.item->data; - len = tp.item->size; - - do { - ULONG itemlen; - - if (len < sizeof(INODE_EXTREF) || len < sizeof(INODE_EXTREF) - 1 + ier->n) { - ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - itemlen = sizeof(INODE_EXTREF) - sizeof(char) + ier->n; - - if (ier->n == utf8->Length && RtlCompareMemory(ier->name, utf8->Buffer, ier->n) == ier->n) { - index = ier->index; - break; - } - - if (len > itemlen) { - len -= itemlen; - ier = (INODE_EXTREF*)&ier->name[ier->n]; - } else - break; - } while (len > 0); - - if (index == 0) - return STATUS_NOT_FOUND; - - *pindex = index; - - return STATUS_SUCCESS; - } else - return STATUS_NOT_FOUND; -} - -static NTSTATUS find_subvol_dir_index(device_extension* Vcb, root* r, UINT64 subvolid, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) { - KEY searchkey; - traverse_ptr tp; - NTSTATUS Status; - ROOT_REF* rr; - - searchkey.obj_id = r->id; - searchkey.obj_type = TYPE_ROOT_REF; - searchkey.offset = subvolid; - - Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (keycmp(tp.item->key, searchkey)) { - ERR("couldn't find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset); - return STATUS_INTERNAL_ERROR; - } - - if (tp.item->size < sizeof(ROOT_REF)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, - tp.item->size, sizeof(ROOT_REF)); - return STATUS_INTERNAL_ERROR; - } - - rr = (ROOT_REF*)tp.item->data; - - if (tp.item->size < sizeof(ROOT_REF) - 1 + rr->n) { - ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, - tp.item->size, sizeof(ROOT_REF) - 1 + rr->n); - return STATUS_INTERNAL_ERROR; - } - - if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(utf8->Buffer, rr->name, rr->n) == rr->n) { - *pindex = rr->index; - return STATUS_SUCCESS; - } else - return STATUS_NOT_FOUND; -} - -static NTSTATUS load_index_list(fcb* fcb, PIRP Irp) { - KEY searchkey; - traverse_ptr tp, next_tp; - NTSTATUS Status; - BOOL b; - - searchkey.obj_id = fcb->inode; - searchkey.obj_type = TYPE_DIR_INDEX; - searchkey.offset = 2; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (keycmp(tp.item->key, searchkey) == -1) { - if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) { - tp = next_tp; - - TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } - } - - if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_DIR_INDEX) { - Status = STATUS_SUCCESS; - goto end; - } - - do { - DIR_ITEM* di; - - TRACE("key: %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - di = (DIR_ITEM*)tp.item->data; - - if (tp.item->size < sizeof(DIR_ITEM) || tp.item->size < (sizeof(DIR_ITEM) - 1 + di->m + di->n)) { - WARN("(%llx,%x,%llx) is truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } else { - index_entry* ie; - ULONG stringlen; - UNICODE_STRING us; - LIST_ENTRY* le; - BOOL inserted; - - ie = ExAllocatePoolWithTag(PagedPool, sizeof(index_entry), ALLOC_TAG); - if (!ie) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - ie->utf8.Length = ie->utf8.MaximumLength = di->n; - - if (di->n > 0) { - ie->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, ie->utf8.MaximumLength, ALLOC_TAG); - if (!ie->utf8.Buffer) { - ERR("out of memory\n"); - ExFreePool(ie); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(ie->utf8.Buffer, di->name, di->n); - } else - ie->utf8.Buffer = NULL; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, di->name, di->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - ExFreePool(ie); - goto nextitem; - } - - if (stringlen == 0) { - ERR("UTF8 length was 0\n"); - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - ExFreePool(ie); - goto nextitem; - } - - us.Length = us.MaximumLength = stringlen; - us.Buffer = ExAllocatePoolWithTag(PagedPool, us.MaximumLength, ALLOC_TAG); - - if (!us.Buffer) { - ERR("out of memory\n"); - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - ExFreePool(ie); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = RtlUTF8ToUnicodeN(us.Buffer, stringlen, &stringlen, di->name, di->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - ExFreePool(us.Buffer); - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - ExFreePool(ie); - goto nextitem; - } - - Status = RtlUpcaseUnicodeString(&ie->filepart_uc, &us, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - ExFreePool(us.Buffer); - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - ExFreePool(ie); - goto nextitem; - } - - ie->key = di->key; - ie->type = di->type; - ie->index = tp.item->key.offset; - - ie->hash = calc_crc32c(0xfffffffe, (UINT8*)ie->filepart_uc.Buffer, (ULONG)ie->filepart_uc.Length); - inserted = FALSE; - - le = fcb->index_list.Flink; - while (le != &fcb->index_list) { - index_entry* ie2 = CONTAINING_RECORD(le, index_entry, list_entry); - - if (ie2->hash >= ie->hash) { - InsertHeadList(le->Blink, &ie->list_entry); - inserted = TRUE; - break; - } - - le = le->Flink; - } - - if (!inserted) - InsertTailList(&fcb->index_list, &ie->list_entry); - } - -nextitem: - b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp); - - if (b) { - tp = next_tp; - - b = tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_DIR_INDEX; - } - } while (b); - - Status = STATUS_SUCCESS; - -end: - if (!NT_SUCCESS(Status)) { - while (!IsListEmpty(&fcb->index_list)) { - LIST_ENTRY* le = RemoveHeadList(&fcb->index_list); - index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry); - - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer); - ExFreePool(ie); - } - } else - mark_fcb_dirty(fcb); // It's not necessarily dirty, but this is an easy way of making sure - // the list remains in memory until the next flush. - - return Status; -} - -static NTSTATUS STDCALL find_file_in_dir_index(file_ref* fr, PUNICODE_STRING filename, root** subvol, UINT64* inode, UINT8* type, - UINT64* pindex, PANSI_STRING utf8, PIRP Irp) { - LIST_ENTRY* le; - NTSTATUS Status; - UNICODE_STRING us; - UINT32 hash; - - Status = RtlUpcaseUnicodeString(&us, filename, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - return Status; - } - - hash = calc_crc32c(0xfffffffe, (UINT8*)us.Buffer, (ULONG)us.Length); - - ExAcquireResourceExclusiveLite(&fr->fcb->nonpaged->index_lock, TRUE); - - if (!fr->fcb->index_loaded) { - Status = load_index_list(fr->fcb, Irp); - if (!NT_SUCCESS(Status)) { - ERR("load_index_list returned %08x\n", Status); - goto end; - } - - fr->fcb->index_loaded = TRUE; - } - - ExConvertExclusiveToSharedLite(&fr->fcb->nonpaged->index_lock); - - le = fr->fcb->index_list.Flink; - while (le != &fr->fcb->index_list) { - index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry); - - if (ie->hash == hash && ie->filepart_uc.Length == us.Length && RtlCompareMemory(ie->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length) { - LIST_ENTRY* le; - BOOL ignore_entry = FALSE; - - ExAcquireResourceSharedLite(&fr->nonpaged->children_lock, TRUE); - - le = fr->children.Flink; - while (le != &fr->children) { - file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry); - - if (fr2->index == ie->index) { - if (fr2->deleted || fr2->filepart_uc.Length != us.Length || - RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) != us.Length) { - ignore_entry = TRUE; - break; - } - break; - } else if (fr2->index > ie->index) - break; - - le = le->Flink; - } - - ExReleaseResourceLite(&fr->nonpaged->children_lock); - - if (ignore_entry) - goto nextitem; - - if (ie->key.obj_type == TYPE_ROOT_ITEM) { - if (subvol) { - *subvol = NULL; - - le = fr->fcb->Vcb->roots.Flink; - while (le != &fr->fcb->Vcb->roots) { - root* r2 = CONTAINING_RECORD(le, root, list_entry); - - if (r2->id == ie->key.obj_id) { - *subvol = r2; - break; - } - - le = le->Flink; - } - } - - if (inode) - *inode = SUBVOL_ROOT_INODE; - - if (type) - *type = BTRFS_TYPE_DIRECTORY; - } else { - if (subvol) - *subvol = fr->fcb->subvol; - - if (inode) - *inode = ie->key.obj_id; - - if (type) - *type = ie->type; - } - - if (utf8) { - utf8->MaximumLength = utf8->Length = ie->utf8.Length; - utf8->Buffer = ExAllocatePoolWithTag(PagedPool, utf8->MaximumLength, ALLOC_TAG); - if (!utf8->Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(utf8->Buffer, ie->utf8.Buffer, ie->utf8.Length); - } - - if (pindex) - *pindex = ie->index; - - Status = STATUS_SUCCESS; - goto end; - } else if (ie->hash > hash) { - Status = STATUS_OBJECT_NAME_NOT_FOUND; - goto end; - } - -nextitem: - le = le->Flink; - } - - Status = STATUS_OBJECT_NAME_NOT_FOUND; - -end: - ExReleaseResourceLite(&fr->fcb->nonpaged->index_lock); - - ExFreePool(us.Buffer); - - return Status; -} - -static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNICODE_STRING filename, UINT32 crc32, file_ref* fr, - root** subvol, UINT64* inode, UINT8* type, UINT64* pindex, PANSI_STRING utf8, - BOOL case_sensitive, PIRP Irp) { - DIR_ITEM* di; - KEY searchkey; - traverse_ptr tp; - NTSTATUS Status; - ULONG stringlen; - - TRACE("(%p, %.*S, %08x, (%llx, %llx), %p, %p, %p)\n", Vcb, filename->Length / sizeof(WCHAR), filename->Buffer, crc32, - fr->fcb->subvol->id, fr->fcb->inode, subvol, inode, type); - - searchkey.obj_id = fr->fcb->inode; - searchkey.obj_type = TYPE_DIR_ITEM; - searchkey.offset = crc32; - - Status = find_item(Vcb, fr->fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - TRACE("found item %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - - if (!keycmp(searchkey, tp.item->key)) { - UINT32 size = tp.item->size; - - // found by hash - - if (tp.item->size < sizeof(DIR_ITEM)) { - WARN("(%llx;%llx,%x,%llx) was %u bytes, expected at least %u\n", fr->fcb->subvol->id, tp.item->key.obj_id, tp.item->key.obj_type, - tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - } else { - di = (DIR_ITEM*)tp.item->data; - - while (size > 0) { - if (size < sizeof(DIR_ITEM) || size < (sizeof(DIR_ITEM) - 1 + di->m + di->n)) { - WARN("(%llx,%x,%llx) is truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - break; - } - - size -= sizeof(DIR_ITEM) - sizeof(char); - size -= di->n; - size -= di->m; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, di->name, di->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - } else { - WCHAR* utf16 = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - UNICODE_STRING us; - - if (!utf16) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = RtlUTF8ToUnicodeN(utf16, stringlen, &stringlen, di->name, di->n); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - } else { - ANSI_STRING nutf8; - - us.Buffer = utf16; - us.Length = us.MaximumLength = (USHORT)stringlen; - - if (FsRtlAreNamesEqual(filename, &us, !case_sensitive, NULL)) { - UINT64 index; - - if (di->key.obj_type == TYPE_ROOT_ITEM) { - LIST_ENTRY* le = Vcb->roots.Flink; - - if (subvol) { - *subvol = NULL; - - while (le != &Vcb->roots) { - root* r2 = CONTAINING_RECORD(le, root, list_entry); - - if (r2->id == di->key.obj_id) { - *subvol = r2; - break; - } - - le = le->Flink; - } - } - - if (inode) - *inode = SUBVOL_ROOT_INODE; - - if (type) - *type = BTRFS_TYPE_DIRECTORY; - } else { - if (subvol) - *subvol = fr->fcb->subvol; - - if (inode) - *inode = di->key.obj_id; - - if (type) - *type = di->type; - } - - if (utf8) { - utf8->MaximumLength = di->n; - utf8->Length = utf8->MaximumLength; - utf8->Buffer = ExAllocatePoolWithTag(PagedPool, utf8->MaximumLength, ALLOC_TAG); - if (!utf8->Buffer) { - ERR("out of memory\n"); - ExFreePool(utf16); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(utf8->Buffer, di->name, di->n); - } - - ExFreePool(utf16); - - index = 0; - - if (fr->fcb->subvol != Vcb->root_root) { - nutf8.Buffer = di->name; - nutf8.Length = nutf8.MaximumLength = di->n; - - if (di->key.obj_type == TYPE_ROOT_ITEM) { - Status = find_subvol_dir_index(Vcb, fr->fcb->subvol, di->key.obj_id, fr->fcb->inode, &nutf8, &index, Irp); - if (!NT_SUCCESS(Status)) { - ERR("find_subvol_dir_index returned %08x\n", Status); - return Status; - } - } else { - Status = find_file_dir_index(Vcb, fr->fcb->subvol, di->key.obj_id, fr->fcb->inode, &nutf8, &index, Irp); - if (!NT_SUCCESS(Status)) { - if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { - Status = find_file_dir_index_extref(Vcb, fr->fcb->subvol, di->key.obj_id, fr->fcb->inode, &nutf8, &index, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("find_file_dir_index_extref returned %08x\n", Status); - return Status; - } - } else { - ERR("find_file_dir_index returned %08x\n", Status); - return Status; - } - } - } - } - - if (index != 0) { - LIST_ENTRY* le = fr->children.Flink; - - while (le != &fr->children) { - file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry); - - if (fr2->index == index) { - if (fr2->deleted || !FsRtlAreNamesEqual(&fr2->filepart, filename, !case_sensitive, NULL)) { - goto byindex; - } - break; - } else if (fr2->index > index) - break; - - le = le->Flink; - } - } - -// TRACE("found %.*S by hash at (%llx,%llx)\n", filename->Length / sizeof(WCHAR), filename->Buffer, (*subvol)->id, *inode); - - if (pindex) - *pindex = index; - - return STATUS_SUCCESS; - } - } - - ExFreePool(utf16); - } - - di = (DIR_ITEM*)&di->name[di->n + di->m]; - } - } - } - -byindex: - if (case_sensitive) - return STATUS_OBJECT_NAME_NOT_FOUND; - - Status = find_file_in_dir_index(fr, filename, subvol, inode, type, pindex, utf8, Irp); - if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) { - ERR("find_file_in_dir_index returned %08x\n", Status); - return Status; - } - - return Status; -} - fcb* create_fcb(POOL_TYPE pool_type) { fcb* fcb; @@ -711,14 +66,17 @@ fcb* create_fcb(POOL_TYPE pool_type) { ExInitializeResourceLite(&fcb->nonpaged->resource); fcb->Header.Resource = &fcb->nonpaged->resource; - ExInitializeResourceLite(&fcb->nonpaged->index_lock); + ExInitializeResourceLite(&fcb->nonpaged->dir_children_lock); FsRtlInitializeFileLock(&fcb->lock, NULL, NULL); InitializeListHead(&fcb->extents); - InitializeListHead(&fcb->index_list); InitializeListHead(&fcb->hardlinks); + InitializeListHead(&fcb->dir_children_index); + InitializeListHead(&fcb->dir_children_hash); + InitializeListHead(&fcb->dir_children_hash_uc); + return fcb; } @@ -740,52 +98,148 @@ file_ref* create_fileref() { return NULL; } - fr->refcount = 1; - -#ifdef DEBUG_FCB_REFCOUNTS - WARN("fileref %p: refcount now 1\n", fr); -#endif - - InitializeListHead(&fr->children); - - ExInitializeResourceLite(&fr->nonpaged->children_lock); - - return fr; -} + fr->refcount = 1; + +#ifdef DEBUG_FCB_REFCOUNTS + WARN("fileref %p: refcount now 1\n", fr); +#endif + + InitializeListHead(&fr->children); + + ExInitializeResourceLite(&fr->nonpaged->children_lock); + + return fr; +} + +static NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, fcb* fcb, + root** subvol, UINT64* inode, dir_child** pdc, BOOL case_sensitive, PIRP Irp) { + NTSTATUS Status; + UNICODE_STRING fnus; + UINT32 hash; + LIST_ENTRY* le; + UINT8 c; + + if (!case_sensitive) { + Status = RtlUpcaseUnicodeString(&fnus, filename, TRUE); + + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); + return Status; + } + } else + fnus = *filename; + + hash = calc_crc32c(0xffffffff, (UINT8*)fnus.Buffer, fnus.Length); + + c = hash >> 24; + + ExAcquireResourceSharedLite(&fcb->nonpaged->dir_children_lock, TRUE); + + if (case_sensitive) { + if (!fcb->hash_ptrs[c]) { + Status = STATUS_OBJECT_NAME_NOT_FOUND; + goto end; + } + + le = fcb->hash_ptrs[c]; + while (le != &fcb->dir_children_hash) { + dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_hash); + + if (dc->hash == hash) { + if (dc->name.Length == fnus.Length && RtlCompareMemory(dc->name.Buffer, fnus.Buffer, fnus.Length) == fnus.Length) { + if (dc->key.obj_type == TYPE_ROOT_ITEM) { + LIST_ENTRY* le2; + + *subvol = NULL; + + le2 = fcb->Vcb->roots.Flink; + while (le2 != &fcb->Vcb->roots) { + root* r2 = CONTAINING_RECORD(le2, root, list_entry); + + if (r2->id == dc->key.obj_id) { + *subvol = r2; + break; + } + + le2 = le2->Flink; + } + + *inode = SUBVOL_ROOT_INODE; + } else { + *subvol = fcb->subvol; + *inode = dc->key.obj_id; + } + + *pdc = dc; + + Status = STATUS_SUCCESS; + goto end; + } + } else if (dc->hash > hash) { + Status = STATUS_OBJECT_NAME_NOT_FOUND; + goto end; + } + + le = le->Flink; + } + } else { + if (!fcb->hash_ptrs_uc[c]) { + Status = STATUS_OBJECT_NAME_NOT_FOUND; + goto end; + } + + le = fcb->hash_ptrs_uc[c]; + while (le != &fcb->dir_children_hash_uc) { + dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc); + + if (dc->hash_uc == hash) { + if (dc->name_uc.Length == fnus.Length && RtlCompareMemory(dc->name_uc.Buffer, fnus.Buffer, fnus.Length) == fnus.Length) { + if (dc->key.obj_type == TYPE_ROOT_ITEM) { + LIST_ENTRY* le2; + + *subvol = NULL; + + le2 = fcb->Vcb->roots.Flink; + while (le2 != &fcb->Vcb->roots) { + root* r2 = CONTAINING_RECORD(le2, root, list_entry); + + if (r2->id == dc->key.obj_id) { + *subvol = r2; + break; + } + + le2 = le2->Flink; + } + + *inode = SUBVOL_ROOT_INODE; + } else { + *subvol = fcb->subvol; + *inode = dc->key.obj_id; + } + + *pdc = dc; + + Status = STATUS_SUCCESS; + goto end; + } + } else if (dc->hash_uc > hash) { + Status = STATUS_OBJECT_NAME_NOT_FOUND; + goto end; + } + + le = le->Flink; + } + } + + Status = STATUS_OBJECT_NAME_NOT_FOUND; -NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr, - root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, - BOOL case_sensitive, PIRP Irp) { - char* fn; - UINT32 crc32; - ULONG utf8len; - NTSTATUS Status; - - Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, filename->Buffer, filename->Length); - if (!NT_SUCCESS(Status)) { - ERR("RtlUnicodeToUTF8N 1 returned %08x\n", Status); - return Status; - } - - fn = ExAllocatePoolWithTag(PagedPool, utf8len, ALLOC_TAG); - if (!fn) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = RtlUnicodeToUTF8N(fn, utf8len, &utf8len, filename->Buffer, filename->Length); - if (!NT_SUCCESS(Status)) { - ExFreePool(fn); - ERR("RtlUnicodeToUTF8N 2 returned %08x\n", Status); - return Status; - } - - TRACE("%.*s\n", utf8len, fn); +end: + ExReleaseResourceLite(&fcb->nonpaged->dir_children_lock); - crc32 = calc_crc32c(0xfffffffe, (UINT8*)fn, (ULONG)utf8len); - TRACE("crc32c(%.*s) = %08x\n", utf8len, fn, crc32); + if (!case_sensitive) + ExFreePool(fnus.Buffer); - return find_file_in_dir_with_crc32(Vcb, filename, crc32, fr, subvol, inode, type, index, utf8, case_sensitive, Irp); + return Status; } static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream, PUNICODE_STRING newstreamname, UINT32* hash, PANSI_STRING xattr, PIRP Irp) { @@ -1178,14 +632,195 @@ end: return deleted; } +NTSTATUS load_csum(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp) { + NTSTATUS Status; + KEY searchkey; + traverse_ptr tp, next_tp; + UINT64 i, j; + BOOL b; + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = start; + + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + return Status; + } + + i = 0; + do { + if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + ULONG readlen; + + if (start < tp.item->key.offset) + j = 0; + else + j = ((start - tp.item->key.offset) / Vcb->superblock.sector_size) + i; + + if (j * sizeof(UINT32) > tp.item->size || tp.item->key.offset > start + (i * Vcb->superblock.sector_size)) { + ERR("checksum not found for %llx\n", start + (i * Vcb->superblock.sector_size)); + return STATUS_INTERNAL_ERROR; + } + + readlen = min((tp.item->size / sizeof(UINT32)) - j, length - i); + RtlCopyMemory(&csum[i], tp.item->data + (j * sizeof(UINT32)), readlen * sizeof(UINT32)); + i += readlen; + + if (i == length) + break; + } + + b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); + + if (b) + tp = next_tp; + } while (b); + + if (i < length) { + ERR("could not read checksums: offset %llx, length %llx sectors\n", start, length); + return STATUS_INTERNAL_ERROR; + } + + return STATUS_SUCCESS; +} + +NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) { + KEY searchkey; + traverse_ptr tp, next_tp; + NTSTATUS Status; + + fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); + + fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs_uc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); + + if (!ignore_size && fcb->inode_item.st_size == 0) + return STATUS_SUCCESS; + + searchkey.obj_id = fcb->inode; + searchkey.obj_type = TYPE_DIR_INDEX; + searchkey.offset = 2; + + Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return Status; + } + + if (keycmp(tp.item->key, searchkey) == -1) { + if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) { + tp = next_tp; + TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + } + } + + while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { + DIR_ITEM* di = (DIR_ITEM*)tp.item->data; + dir_child* dc; + ULONG utf16len; + + if (tp.item->size < sizeof(DIR_ITEM)) { + WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); + goto cont; + } + + if (di->n == 0) { + WARN("(%llx,%x,%llx): DIR_ITEM name length is zero\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + goto cont; + } + + Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, di->name, di->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + goto cont; + } + + dc = ExAllocatePoolWithTag(PagedPool, sizeof(dir_child), ALLOC_TAG); + if (!dc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dc->key = di->key; + dc->index = tp.item->key.offset; + dc->type = di->type; + dc->fileref = NULL; + + dc->utf8.MaximumLength = dc->utf8.Length = di->n; + dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, di->n, ALLOC_TAG); + if (!dc->utf8.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(dc->utf8.Buffer, di->name, di->n); + + dc->name.MaximumLength = dc->name.Length = utf16len; + dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name.MaximumLength, ALLOC_TAG); + if (!dc->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = RtlUTF8ToUnicodeN(dc->name.Buffer, utf16len, &utf16len, di->name, di->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + goto cont; + } + + Status = RtlUpcaseUnicodeString(&dc->name_uc, &dc->name, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + goto cont; + } + + dc->hash = calc_crc32c(0xffffffff, (UINT8*)dc->name.Buffer, dc->name.Length); + dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)dc->name_uc.Buffer, dc->name_uc.Length); + + InsertTailList(&fcb->dir_children_index, &dc->list_entry_index); + + insert_dir_child_into_hash_lists(fcb, dc); + +cont: + if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) + tp = next_tp; + else + break; + } + + return STATUS_SUCCESS; +} + NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp) { KEY searchkey; - traverse_ptr tp; + traverse_ptr tp, next_tp; NTSTATUS Status; fcb* fcb; - BOOL b; - UINT8* eadata; - UINT16 ealen; + BOOL atts_set = FALSE, sd_set = FALSE, no_data; + LIST_ENTRY* lastle = NULL; + EXTENT_DATA* ed = NULL; if (!IsListEmpty(&subvol->fcbs)) { LIST_ENTRY* le = subvol->fcbs.Flink; @@ -1193,17 +828,22 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, while (le != &subvol->fcbs) { fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); - if (fcb->inode == inode && !fcb->ads) { + if (fcb->inode == inode) { + if (!fcb->ads) { #ifdef DEBUG_FCB_REFCOUNTS - LONG rc = InterlockedIncrement(&fcb->refcount); - - WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); + LONG rc = InterlockedIncrement(&fcb->refcount); + + WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); #else - InterlockedIncrement(&fcb->refcount); + InterlockedIncrement(&fcb->refcount); #endif - *pfcb = fcb; - return STATUS_SUCCESS; + *pfcb = fcb; + return STATUS_SUCCESS; + } + } else if (fcb->inode > inode) { + lastle = le->Blink; + break; } le = le->Flink; @@ -1250,326 +890,356 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, else if (fcb->inode_item.st_mode & __S_IFBLK) fcb->type = BTRFS_TYPE_BLOCKDEV; else if (fcb->inode_item.st_mode & __S_IFIFO) - fcb->type = BTRFS_TYPE_FIFO; - else if (fcb->inode_item.st_mode & __S_IFLNK) - fcb->type = BTRFS_TYPE_SYMLINK; - else if (fcb->inode_item.st_mode & __S_IFSOCK) - fcb->type = BTRFS_TYPE_SOCKET; - else - fcb->type = BTRFS_TYPE_FILE; - } - - fcb->atts = get_file_attributes(Vcb, &fcb->inode_item, fcb->subvol, fcb->inode, fcb->type, utf8 && utf8->Buffer[0] == '.', FALSE, Irp); - - fcb_get_sd(fcb, parent, Irp); - - if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) { - UINT8* xattrdata; - UINT16 xattrlen; - - if (get_xattr(Vcb, subvol, inode, EA_REPARSE, EA_REPARSE_HASH, &xattrdata, &xattrlen, Irp)) { - fcb->reparse_xattr.Buffer = (char*)xattrdata; - fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = xattrlen; - } else { - fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; - - if (!Vcb->readonly && !(subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) { - fcb->atts_changed = TRUE; - mark_fcb_dirty(fcb); - } - } - } - - fcb->ealen = 0; - - if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &ealen, Irp)) { - ULONG offset; - - Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, ealen, &offset); - - if (!NT_SUCCESS(Status)) { - WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); - ExFreePool(eadata); - } else { - FILE_FULL_EA_INFORMATION* eainfo; - fcb->ea_xattr.Buffer = (char*)eadata; - fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen; - - fcb->ealen = 4; - - // calculate ealen - eainfo = (FILE_FULL_EA_INFORMATION*)eadata; - do { - fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; - - if (eainfo->NextEntryOffset == 0) - break; - - eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); - } while (TRUE); - } - } - - InsertTailList(&subvol->fcbs, &fcb->list_entry); - InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); - - fcb->Header.IsFastIoPossible = fast_io_possible(fcb); - - if (fcb->inode_item.st_size == 0 || (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK)) { - fcb->Header.AllocationSize.QuadPart = 0; - fcb->Header.FileSize.QuadPart = 0; - fcb->Header.ValidDataLength.QuadPart = 0; - } else { - EXTENT_DATA* ed = NULL; - traverse_ptr next_tp; - - searchkey.obj_id = fcb->inode; - searchkey.obj_type = TYPE_EXTENT_DATA; - searchkey.offset = 0; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - free_fcb(fcb); - return Status; - } - - do { - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { - extent* ext; - BOOL unique = FALSE; - - ed = (EXTENT_DATA*)tp.item->data; - - if (tp.item->size < sizeof(EXTENT_DATA)) { - ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, - tp.item->size, sizeof(EXTENT_DATA)); - - free_fcb(fcb); - return STATUS_INTERNAL_ERROR; - } - - if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0]; - - if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { - ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, - tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)); - - free_fcb(fcb); - return STATUS_INTERNAL_ERROR; - } - - if (ed2->address == 0 && ed2->size == 0) // sparse - goto nextitem; - - if (ed2->size != 0 && is_tree_unique(Vcb, tp.tree, Irp)) - unique = is_extent_unique(Vcb, ed2->address, ed2->size, Irp); - } - - ext = ExAllocatePoolWithTag(pooltype, sizeof(extent), ALLOC_TAG); - if (!ext) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - ext->data = ExAllocatePoolWithTag(pooltype, tp.item->size, ALLOC_TAG); - if (!ext->data) { - ERR("out of memory\n"); - ExFreePool(ext); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - ext->offset = tp.item->key.offset; - RtlCopyMemory(ext->data, tp.item->data, tp.item->size); - ext->datalen = tp.item->size; - ext->unique = unique; - ext->ignore = FALSE; - - InsertTailList(&fcb->extents, &ext->list_entry); - } - -nextitem: - b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - - if (b) { - tp = next_tp; - - if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type)) - break; - } - } while (b); - - if (ed && ed->type == EXTENT_TYPE_INLINE) - fcb->Header.AllocationSize.QuadPart = fcb->inode_item.st_size; - else - fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); - - fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size; - fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size; - } - - // FIXME - only do if st_nlink > 1? - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_REF; - searchkey.offset = 0; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - free_fcb(fcb); - return Status; + fcb->type = BTRFS_TYPE_FIFO; + else if (fcb->inode_item.st_mode & __S_IFLNK) + fcb->type = BTRFS_TYPE_SYMLINK; + else if (fcb->inode_item.st_mode & __S_IFSOCK) + fcb->type = BTRFS_TYPE_SOCKET; + else + fcb->type = BTRFS_TYPE_FILE; } - do { - traverse_ptr next_tp; + no_data = fcb->inode_item.st_size == 0 || (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK); + + while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { + tp = next_tp; + + if (tp.item->key.obj_id > inode) + break; + + if ((no_data && tp.item->key.obj_type > TYPE_XATTR_ITEM) || tp.item->key.obj_type > TYPE_EXTENT_DATA) + break; - if (tp.item->key.obj_id == searchkey.obj_id) { - if (tp.item->key.obj_type == TYPE_INODE_REF) { - ULONG len; - INODE_REF* ir; + if (fcb->inode_item.st_nlink > 1 && tp.item->key.obj_type == TYPE_INODE_REF) { + ULONG len; + INODE_REF* ir; + + len = tp.item->size; + ir = (INODE_REF*)tp.item->data; + + while (len >= sizeof(INODE_REF) - 1) { + hardlink* hl; + ULONG stringlen; + + hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + hl->parent = tp.item->key.offset; + hl->index = ir->index; + + hl->utf8.Length = hl->utf8.MaximumLength = ir->n; + + if (hl->utf8.Length > 0) { + hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG); + RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n); + } - len = tp.item->size; - ir = (INODE_REF*)tp.item->data; + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ir->name, ir->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ExFreePool(hl); + free_fcb(fcb); + return Status; + } + + hl->name.Length = hl->name.MaximumLength = stringlen; - while (len >= sizeof(INODE_REF) - 1) { - hardlink* hl; - ULONG stringlen; + if (stringlen == 0) + hl->name.Buffer = NULL; + else { + hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG); - hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG); - if (!hl) { + if (!hl->name.Buffer) { ERR("out of memory\n"); + ExFreePool(hl); free_fcb(fcb); return STATUS_INSUFFICIENT_RESOURCES; } - hl->parent = tp.item->key.offset; - hl->index = ir->index; - - hl->utf8.Length = hl->utf8.MaximumLength = ir->n; - - if (hl->utf8.Length > 0) { - hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG); - RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n); - } - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ir->name, ir->n); + Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ir->name, ir->n); if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(hl->name.Buffer); ExFreePool(hl); free_fcb(fcb); return Status; } - - hl->name.Length = hl->name.MaximumLength = stringlen; - - if (stringlen == 0) - hl->name.Buffer = NULL; - else { - hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG); - - if (!hl->name.Buffer) { - ERR("out of memory\n"); - ExFreePool(hl); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ir->name, ir->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - ExFreePool(hl->name.Buffer); - ExFreePool(hl); - free_fcb(fcb); - return Status; - } - } - - InsertTailList(&fcb->hardlinks, &hl->list_entry); - - len -= sizeof(INODE_REF) - 1 + ir->n; - ir = (INODE_REF*)&ir->name[ir->n]; } - } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { - ULONG len; - INODE_EXTREF* ier; - len = tp.item->size; - ier = (INODE_EXTREF*)tp.item->data; + InsertTailList(&fcb->hardlinks, &hl->list_entry); + + len -= sizeof(INODE_REF) - 1 + ir->n; + ir = (INODE_REF*)&ir->name[ir->n]; + } + } else if (fcb->inode_item.st_nlink > 1 && tp.item->key.obj_type == TYPE_INODE_EXTREF) { + ULONG len; + INODE_EXTREF* ier; + + len = tp.item->size; + ier = (INODE_EXTREF*)tp.item->data; + + while (len >= sizeof(INODE_EXTREF) - 1) { + hardlink* hl; + ULONG stringlen; + + hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + hl->parent = ier->dir; + hl->index = ier->index; + + hl->utf8.Length = hl->utf8.MaximumLength = ier->n; + + if (hl->utf8.Length > 0) { + hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG); + RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n); + } + + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ier->name, ier->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ExFreePool(hl); + free_fcb(fcb); + return Status; + } + + hl->name.Length = hl->name.MaximumLength = stringlen; - while (len >= sizeof(INODE_EXTREF) - 1) { - hardlink* hl; - ULONG stringlen; + if (stringlen == 0) + hl->name.Buffer = NULL; + else { + hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG); - hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG); - if (!hl) { + if (!hl->name.Buffer) { ERR("out of memory\n"); + ExFreePool(hl); free_fcb(fcb); return STATUS_INSUFFICIENT_RESOURCES; } - hl->parent = ier->dir; - hl->index = ier->index; - - hl->utf8.Length = hl->utf8.MaximumLength = ier->n; - - if (hl->utf8.Length > 0) { - hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG); - RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n); - } - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ier->name, ier->n); + Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ier->name, ier->n); if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(hl->name.Buffer); ExFreePool(hl); free_fcb(fcb); return Status; } + } + + InsertTailList(&fcb->hardlinks, &hl->list_entry); + + len -= sizeof(INODE_EXTREF) - 1 + ier->n; + ier = (INODE_EXTREF*)&ier->name[ier->n]; + } + } else if (tp.item->key.obj_type == TYPE_XATTR_ITEM) { + if (tp.item->size < sizeof(DIR_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); + continue; + } + + if (tp.item->key.offset == EA_REPARSE_HASH) { + UINT8* xattrdata; + UINT16 xattrlen; + + if (extract_xattr(tp.item->data, tp.item->size, EA_REPARSE, &xattrdata, &xattrlen)) { + fcb->reparse_xattr.Buffer = (char*)xattrdata; + fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = xattrlen; + } + } else if (tp.item->key.offset == EA_EA_HASH) { + UINT8* eadata; + UINT16 ealen; + + if (extract_xattr(tp.item->data, tp.item->size, EA_EA, &eadata, &ealen)) { + ULONG offset; - hl->name.Length = hl->name.MaximumLength = stringlen; + Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, ealen, &offset); - if (stringlen == 0) - hl->name.Buffer = NULL; - else { - hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG); + if (!NT_SUCCESS(Status)) { + WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset); + ExFreePool(eadata); + } else { + FILE_FULL_EA_INFORMATION* eainfo; + fcb->ea_xattr.Buffer = (char*)eadata; + fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen; - if (!hl->name.Buffer) { - ERR("out of memory\n"); - ExFreePool(hl); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } + fcb->ealen = 4; - Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ier->name, ier->n); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - ExFreePool(hl->name.Buffer); - ExFreePool(hl); - free_fcb(fcb); - return Status; - } + // calculate ealen + eainfo = (FILE_FULL_EA_INFORMATION*)eadata; + do { + fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength; + + if (eainfo->NextEntryOffset == 0) + break; + + eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset); + } while (TRUE); + } + } + } else if (tp.item->key.offset == EA_DOSATTRIB_HASH) { + UINT8* xattrdata; + UINT16 xattrlen; + + if (extract_xattr(tp.item->data, tp.item->size, EA_DOSATTRIB, &xattrdata, &xattrlen)) { + if (get_file_attributes_from_xattr((char*)xattrdata, xattrlen, &fcb->atts)) { + atts_set = TRUE; + + if (fcb->type == BTRFS_TYPE_DIRECTORY) + fcb->atts |= FILE_ATTRIBUTE_DIRECTORY; + else if (fcb->type == BTRFS_TYPE_SYMLINK) + fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT; } - InsertTailList(&fcb->hardlinks, &hl->list_entry); - - len -= sizeof(INODE_EXTREF) - 1 + ier->n; - ier = (INODE_EXTREF*)&ier->name[ier->n]; + ExFreePool(xattrdata); + } + } else if (tp.item->key.offset == EA_NTACL_HASH) { + UINT16 buflen; + + if (extract_xattr(tp.item->data, tp.item->size, EA_NTACL, (UINT8**)&fcb->sd, &buflen)) { + if (get_sd_from_xattr(fcb, buflen)) { + sd_set = TRUE; + } else + ExFreePool(fcb->sd); + } + } + } else if (tp.item->key.obj_type == TYPE_EXTENT_DATA) { + extent* ext; + BOOL unique = FALSE; + + ed = (EXTENT_DATA*)tp.item->data; + + if (tp.item->size < sizeof(EXTENT_DATA)) { + ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(EXTENT_DATA)); + + free_fcb(fcb); + return STATUS_INTERNAL_ERROR; + } + + if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0]; + + if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { + ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, + tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)); + + free_fcb(fcb); + return STATUS_INTERNAL_ERROR; } + + if (ed2->address == 0 && ed2->size == 0) // sparse + continue; + + if (ed2->size != 0 && is_tree_unique(Vcb, tp.tree, Irp)) + unique = is_extent_unique(Vcb, ed2->address, ed2->size, Irp); + } + + ext = ExAllocatePoolWithTag(pooltype, sizeof(extent), ALLOC_TAG); + if (!ext) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ext->data = ExAllocatePoolWithTag(pooltype, tp.item->size, ALLOC_TAG); + if (!ext->data) { + ERR("out of memory\n"); + ExFreePool(ext); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; } + + ext->offset = tp.item->key.offset; + RtlCopyMemory(ext->data, tp.item->data, tp.item->size); + ext->datalen = tp.item->size; + ext->unique = unique; + ext->ignore = FALSE; + ext->inserted = FALSE; + + if (ed->type == EXTENT_TYPE_REGULAR && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0]; + UINT64 len; + + len = (ed->compression == BTRFS_COMPRESSION_NONE ? ed2->num_bytes : ed2->size) / Vcb->superblock.sector_size; + + ext->csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(UINT32), ALLOC_TAG); + if (!ext->csum) { + ERR("out of memory\n"); + ExFreePool(ext); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = load_csum(Vcb, ext->csum, ed2->address + (ed->compression == BTRFS_COMPRESSION_NONE ? ed2->offset : 0), len, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("load_csum returned %08x\n", Status); + ExFreePool(ext); + free_fcb(fcb); + return Status; + } + } else + ext->csum = NULL; + + InsertTailList(&fcb->extents, &ext->list_entry); + } + } + + if (fcb->type == BTRFS_TYPE_DIRECTORY) { + Status = load_dir_children(fcb, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("load_dir_children returned %08x\n", Status); + free_fcb(fcb); + return Status; } + } + + if (no_data) { + fcb->Header.AllocationSize.QuadPart = 0; + fcb->Header.FileSize.QuadPart = 0; + fcb->Header.ValidDataLength.QuadPart = 0; + } else { + if (ed && ed->type == EXTENT_TYPE_INLINE) + fcb->Header.AllocationSize.QuadPart = fcb->inode_item.st_size; + else + fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size); - b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); + fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size; + fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size; + } + + if (!atts_set) + fcb->atts = get_file_attributes(Vcb, &fcb->inode_item, fcb->subvol, fcb->inode, fcb->type, utf8 && utf8->Buffer[0] == '.', TRUE, Irp); + + if (!sd_set) + fcb_get_sd(fcb, parent, FALSE, Irp); + + if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT && fcb->reparse_xattr.Length == 0) { + fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; - if (b) { - tp = next_tp; - - if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > TYPE_INODE_EXTREF)) - break; + if (!Vcb->readonly && !(subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) { + fcb->atts_changed = TRUE; + mark_fcb_dirty(fcb); } - } while (b); + } + + if (lastle) + InsertHeadList(lastle, &fcb->list_entry); + else + InsertTailList(&subvol->fcbs, &fcb->list_entry); + + InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); + + fcb->Header.IsFastIoPossible = fast_io_possible(fcb); *pfcb = fcb; return STATUS_SUCCESS; @@ -1583,6 +1253,7 @@ NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI NTSTATUS Status; KEY searchkey; traverse_ptr tp; + LIST_ENTRY* lastle = NULL; if (!IsListEmpty(&subvol->fcbs)) { LIST_ENTRY* le = subvol->fcbs.Flink; @@ -1590,18 +1261,23 @@ NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI while (le != &subvol->fcbs) { fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); - if (fcb->inode == inode && fcb->ads && fcb->adsxattr.Length == xattr->Length && - RtlCompareMemory(fcb->adsxattr.Buffer, xattr->Buffer, fcb->adsxattr.Length) == fcb->adsxattr.Length) { + if (fcb->inode == inode) { + if (fcb->ads && fcb->adsxattr.Length == xattr->Length && + RtlCompareMemory(fcb->adsxattr.Buffer, xattr->Buffer, fcb->adsxattr.Length) == fcb->adsxattr.Length) { #ifdef DEBUG_FCB_REFCOUNTS - LONG rc = InterlockedIncrement(&fcb->refcount); - - WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); + LONG rc = InterlockedIncrement(&fcb->refcount); + + WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode); #else - InterlockedIncrement(&fcb->refcount); + InterlockedIncrement(&fcb->refcount); #endif - *pfcb = fcb; - return STATUS_SUCCESS; + *pfcb = fcb; + return STATUS_SUCCESS; + } + } else if (fcb->inode > inode) { + lastle = le->Blink; + break; } le = le->Flink; @@ -1668,7 +1344,11 @@ NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI TRACE("stream found: size = %x, hash = %08x\n", xattrlen, fcb->adshash); - InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry); + if (lastle) + InsertHeadList(lastle, &fcb->list_entry); + else + InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry); + InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); *pfcb = fcb; @@ -1703,11 +1383,197 @@ void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock) { } } - if (do_lock) - ExReleaseResourceLite(&parent->nonpaged->children_lock); + if (do_lock) + ExReleaseResourceLite(&parent->nonpaged->children_lock); +} + +static NTSTATUS open_fileref_child(device_extension* Vcb, file_ref* sf, PUNICODE_STRING name, BOOL case_sensitive, BOOL lastpart, BOOL streampart, + POOL_TYPE pooltype, file_ref** psf2, PIRP Irp) { + NTSTATUS Status; + file_ref* sf2; + + if (streampart) { + UNICODE_STRING streamname; + ANSI_STRING xattr; + UINT32 streamhash; + + sf2 = search_fileref_children(sf, name, case_sensitive); + + if (sf2) { + if (sf2->deleted) { + TRACE("element in path has been deleted\n"); + free_fileref(sf2); + return lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND; + } + + *psf2 = sf2; + + return STATUS_SUCCESS; + } + + streamname.Buffer = NULL; + streamname.Length = streamname.MaximumLength = 0; + xattr.Buffer = NULL; + xattr.Length = xattr.MaximumLength = 0; + + // FIXME - check if already opened + + if (!find_stream(Vcb, sf->fcb, name, &streamname, &streamhash, &xattr, Irp)) { + TRACE("could not find stream %.*S\n", name->Length / sizeof(WCHAR), name->Buffer); + + return STATUS_OBJECT_NAME_NOT_FOUND; + } else { + fcb* fcb; + + if (streamhash == EA_DOSATTRIB_HASH && xattr.Length == strlen(EA_DOSATTRIB) && + RtlCompareMemory(xattr.Buffer, EA_DOSATTRIB, xattr.Length) == xattr.Length) { + WARN("not allowing user.DOSATTRIB to be opened as stream\n"); + + return STATUS_OBJECT_NAME_NOT_FOUND; + } + + Status = open_fcb_stream(Vcb, sf->fcb->subvol, sf->fcb->inode, &xattr, streamhash, sf->fcb, &fcb, Irp); + if (!NT_SUCCESS(Status)) { + ERR("open_fcb_stream returned %08x\n", Status); + return Status; + } + + sf2 = create_fileref(); + if (!sf2) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sf2->fcb = fcb; + + if (streamname.Buffer) // case has changed + sf2->filepart = streamname; + else { + sf2->filepart.MaximumLength = sf2->filepart.Length = name->Length; + sf2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, sf2->filepart.MaximumLength, ALLOC_TAG); + if (!sf2->filepart.Buffer) { + ERR("out of memory\n"); + free_fileref(sf2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(sf2->filepart.Buffer, name->Buffer, name->Length); + } + + Status = RtlUpcaseUnicodeString(&sf2->filepart_uc, &sf2->filepart, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("RtlUpcaseUnicodeString returned %08x\n", Status); + free_fileref(sf2); + return Status; + } + + // FIXME - make sure all functions know that ADS FCBs won't have a valid SD or INODE_ITEM + + sf2->parent = (struct _file_ref*)sf; + insert_fileref_child(sf, sf2, TRUE); + + increase_fileref_refcount(sf); + } + } else { + root* subvol; + UINT64 inode; + dir_child* dc; + + Status = find_file_in_dir(Vcb, name, sf->fcb, &subvol, &inode, &dc, case_sensitive, Irp); + if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { + TRACE("could not find %.*S\n", name->Length / sizeof(WCHAR), name->Buffer); + + return lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND; + } else if (!NT_SUCCESS(Status)) { + ERR("find_file_in_dir returned %08x\n", Status); + return Status; + } else { + fcb* fcb; + + if (dc->fileref) { + if (!lastpart && dc->type != BTRFS_TYPE_DIRECTORY) { + WARN("passed path including file as subdirectory\n"); + return STATUS_OBJECT_PATH_NOT_FOUND; + } + + InterlockedIncrement(&dc->fileref->refcount); + *psf2 = dc->fileref; + return STATUS_SUCCESS; + } + + Status = open_fcb(Vcb, subvol, inode, dc->type, &dc->utf8, sf->fcb, &fcb, pooltype, Irp); + if (!NT_SUCCESS(Status)) { + ERR("open_fcb returned %08x\n", Status); + return Status; + } + + if (dc->type != BTRFS_TYPE_DIRECTORY && !lastpart && !(fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)) { + WARN("passed path including file as subdirectory\n"); + free_fcb(fcb); + return STATUS_OBJECT_PATH_NOT_FOUND; + } + + sf2 = create_fileref(); + if (!sf2) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sf2->fcb = fcb; + + if (dc->type == BTRFS_TYPE_DIRECTORY) + fcb->fileref = sf2; + + sf2->index = dc->index; + sf2->dc = dc; + dc->fileref = sf2; + + sf2->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, dc->utf8.Length, ALLOC_TAG); + if (!sf2->utf8.Buffer) { + ERR("out of memory\n"); + free_fileref(sf2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sf2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name.Length, ALLOC_TAG); + if (!sf2->filepart.Buffer) { + ERR("out of memory\n"); + free_fileref(sf2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sf2->filepart_uc.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name_uc.Length, ALLOC_TAG); + if (!sf2->filepart_uc.Buffer) { + ERR("out of memory\n"); + free_fileref(sf2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + sf2->utf8.Length = sf2->utf8.MaximumLength = dc->utf8.Length; + RtlCopyMemory(sf2->utf8.Buffer, dc->utf8.Buffer, dc->utf8.Length); + + sf2->filepart.Length = sf2->filepart.MaximumLength = dc->name.Length; + RtlCopyMemory(sf2->filepart.Buffer, dc->name.Buffer, dc->name.Length); + + sf2->filepart_uc.Length = sf2->filepart_uc.MaximumLength = dc->name_uc.Length; + RtlCopyMemory(sf2->filepart_uc.Buffer, dc->name_uc.Buffer, dc->name_uc.Length); + + sf2->parent = (struct _file_ref*)sf; + + insert_fileref_child(sf, sf2, TRUE); + + increase_fileref_refcount(sf); + } + } + + *psf2 = sf2; + + return STATUS_SUCCESS; } -NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset, +NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* parsed, ULONG* fn_offset, POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp) { UNICODE_STRING fnus2; file_ref *dir, *sf, *sf2; @@ -1716,7 +1582,7 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu BOOL has_stream; NTSTATUS Status; - TRACE("(%p, %p, %p, %u, %p)\n", Vcb, pfr, related, parent, unparsed); + TRACE("(%p, %p, %p, %u, %p)\n", Vcb, pfr, related, parent, parsed); #ifdef DEBUG if (!ExIsResourceAcquiredExclusiveLite(&Vcb->fcb_lock) && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) { @@ -1755,7 +1621,7 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu ULONG cc; IO_STATUS_BLOCK iosb; - Status = dev_ioctl(Vcb->devices[0].devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); + Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); if (!NT_SUCCESS(Status)) return Status; @@ -1763,6 +1629,10 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu increase_fileref_refcount(Vcb->root_fileref); *pfr = Vcb->root_fileref; + + if (fn_offset) + *fn_offset = 0; + return STATUS_SUCCESS; } @@ -1807,190 +1677,26 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu if (num_parts == 0) { Status = STATUS_SUCCESS; *pfr = dir; + + if (fn_offset) + *fn_offset = 0; + goto end2; } for (i = 0; i < num_parts; i++) { BOOL lastpart = (i == num_parts-1) || (i == num_parts-2 && has_stream); - sf2 = search_fileref_children(sf, &parts[i], case_sensitive); - - if (sf2 && sf2->fcb->type != BTRFS_TYPE_DIRECTORY && !lastpart) { - WARN("passed path including file as subdirectory\n"); - free_fileref(sf2); + Status = open_fileref_child(Vcb, sf, &parts[i], case_sensitive, lastpart, has_stream && i == num_parts - 1, pooltype, &sf2, Irp); + if (!NT_SUCCESS(Status)) { + if (Status == STATUS_OBJECT_PATH_NOT_FOUND || Status == STATUS_OBJECT_NAME_NOT_FOUND) + TRACE("open_fileref_child returned %08x\n", Status); + else + ERR("open_fileref_child returned %08x\n", Status); - Status = STATUS_OBJECT_PATH_NOT_FOUND; - goto end; - } - - if (sf2 && sf2->deleted) { - TRACE("element in path has been deleted\n"); - free_fileref(sf2); - Status = lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND; goto end; } - if (!sf2) { - if (has_stream && i == num_parts - 1) { - UNICODE_STRING streamname; - ANSI_STRING xattr; - UINT32 streamhash; - - streamname.Buffer = NULL; - streamname.Length = streamname.MaximumLength = 0; - xattr.Buffer = NULL; - xattr.Length = xattr.MaximumLength = 0; - - // FIXME - check if already opened - - if (!find_stream(Vcb, sf->fcb, &parts[i], &streamname, &streamhash, &xattr, Irp)) { - TRACE("could not find stream %.*S\n", parts[i].Length / sizeof(WCHAR), parts[i].Buffer); - - Status = STATUS_OBJECT_NAME_NOT_FOUND; - goto end; - } else { - fcb* fcb; - - if (streamhash == EA_DOSATTRIB_HASH && xattr.Length == strlen(EA_DOSATTRIB) && - RtlCompareMemory(xattr.Buffer, EA_DOSATTRIB, xattr.Length) == xattr.Length) { - WARN("not allowing user.DOSATTRIB to be opened as stream\n"); - - Status = STATUS_OBJECT_NAME_NOT_FOUND; - goto end; - } - - Status = open_fcb_stream(Vcb, sf->fcb->subvol, sf->fcb->inode, &xattr, streamhash, sf->fcb, &fcb, Irp); - if (!NT_SUCCESS(Status)) { - ERR("open_fcb_stream returned %08x\n", Status); - goto end; - } - - sf2 = create_fileref(); - if (!sf2) { - ERR("out of memory\n"); - free_fcb(fcb); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - sf2->fcb = fcb; - - if (streamname.Buffer) // case has changed - sf2->filepart = streamname; - else { - sf2->filepart.MaximumLength = sf2->filepart.Length = parts[i].Length; - sf2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, sf2->filepart.MaximumLength, ALLOC_TAG); - if (!sf2->filepart.Buffer) { - ERR("out of memory\n"); - free_fileref(sf2); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(sf2->filepart.Buffer, parts[i].Buffer, parts[i].Length); - } - - Status = RtlUpcaseUnicodeString(&sf2->filepart_uc, &sf2->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(sf2); - goto end; - } - - // FIXME - make sure all functions know that ADS FCBs won't have a valid SD or INODE_ITEM - - sf2->parent = (struct _file_ref*)sf; - insert_fileref_child(sf, sf2, TRUE); - - increase_fileref_refcount(sf); - } - } else { - root* subvol; - UINT64 inode, index; - UINT8 type; - ANSI_STRING utf8; - - Status = find_file_in_dir(Vcb, &parts[i], sf, &subvol, &inode, &type, &index, &utf8, case_sensitive, Irp); - if (Status == STATUS_OBJECT_NAME_NOT_FOUND) { - TRACE("could not find %.*S\n", parts[i].Length / sizeof(WCHAR), parts[i].Buffer); - - Status = lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND; - goto end; - } else if (!NT_SUCCESS(Status)) { - ERR("find_file_in_dir returned %08x\n", Status); - goto end; - } else { - fcb* fcb; - ULONG strlen; - - Status = open_fcb(Vcb, subvol, inode, type, &utf8, sf->fcb, &fcb, pooltype, Irp); - if (!NT_SUCCESS(Status)) { - ERR("open_fcb returned %08x\n", Status); - goto end; - } - - if (type != BTRFS_TYPE_DIRECTORY && !lastpart && !(fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)) { - WARN("passed path including file as subdirectory\n"); - free_fcb(fcb); - Status = STATUS_OBJECT_PATH_NOT_FOUND; - goto end; - } - - sf2 = create_fileref(); - if (!sf2) { - ERR("out of memory\n"); - free_fcb(fcb); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - sf2->fcb = fcb; - - if (type == BTRFS_TYPE_DIRECTORY) - fcb->fileref = sf2; - - sf2->index = index; - sf2->utf8 = utf8; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &strlen, utf8.Buffer, utf8.Length); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - free_fileref(sf2); - goto end; - } - - sf2->filepart.MaximumLength = sf2->filepart.Length = strlen; - sf2->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, sf2->filepart.MaximumLength, ALLOC_TAG); - if (!sf2->filepart.Buffer) { - ERR("out of memory\n"); - free_fileref(sf2); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - Status = RtlUTF8ToUnicodeN(sf2->filepart.Buffer, strlen, &strlen, utf8.Buffer, utf8.Length); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - free_fileref(sf2); - goto end; - } - - Status = RtlUpcaseUnicodeString(&sf2->filepart_uc, &sf2->filepart, TRUE); - if (!NT_SUCCESS(Status)) { - ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - free_fileref(sf2); - goto end; - } - - sf2->parent = (struct _file_ref*)sf; - - insert_fileref_child(sf, sf2, TRUE); - - increase_fileref_refcount(sf); - } - } - } - if (i == num_parts - 1) { if (fn_offset) *fn_offset = parts[has_stream ? (num_parts - 2) : (num_parts - 1)].Buffer - fnus->Buffer; @@ -2001,8 +1707,8 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu if (sf2->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) { Status = STATUS_REPARSE; - if (unparsed) - *unparsed = fnus->Length - ((parts[i+1].Buffer - fnus->Buffer - 1) * sizeof(WCHAR)); + if (parsed) + *parsed = (parts[i+1].Buffer - fnus->Buffer - 1) * sizeof(WCHAR); break; } @@ -2072,6 +1778,71 @@ end: return Status; } +NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, UINT64 index, PANSI_STRING utf8, PUNICODE_STRING name, PUNICODE_STRING name_uc, UINT8 type, dir_child** pdc) { + dir_child* dc; + + dc = ExAllocatePoolWithTag(PagedPool, sizeof(dir_child), ALLOC_TAG); + if (!dc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8->Length, ALLOC_TAG); + if (!dc->utf8.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, name->Length, ALLOC_TAG); + if (!dc->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, name_uc->Length, ALLOC_TAG); + if (!dc->name_uc.Buffer) { + ERR("out of memory\n"); + ExFreePool(dc->utf8.Buffer); + ExFreePool(dc->name.Buffer); + ExFreePool(dc); + return STATUS_INSUFFICIENT_RESOURCES; + } + + dc->key.obj_id = inode; + dc->key.obj_type = subvol ? TYPE_ROOT_ITEM : TYPE_INODE_ITEM; + dc->key.offset = 0; + dc->index = index; + dc->type = type; + dc->fileref = NULL; + + dc->utf8.Length = dc->utf8.MaximumLength = utf8->Length; + RtlCopyMemory(dc->utf8.Buffer, utf8->Buffer, utf8->Length); + + dc->name.Length = dc->name.MaximumLength = name->Length; + RtlCopyMemory(dc->name.Buffer, name->Buffer, name->Length); + + dc->name_uc.Length = dc->name_uc.MaximumLength = name_uc->Length; + RtlCopyMemory(dc->name_uc.Buffer, name_uc->Buffer, name_uc->Length); + + dc->hash = calc_crc32c(0xffffffff, (UINT8*)dc->name.Buffer, dc->name.Length); + dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)dc->name_uc.Buffer, dc->name_uc.Length); + + ExAcquireResourceExclusiveLite(&fcb->nonpaged->dir_children_lock, TRUE); + + InsertTailList(&fcb->dir_children_index, &dc->list_entry_index); + + insert_dir_child_into_hash_lists(fcb, dc); + + ExReleaseResourceLite(&fcb->nonpaged->dir_children_lock); + + *pdc = dc; + + return STATUS_SUCCESS; +} + static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_STRING fpus, file_ref* parfileref, ULONG options, FILE_FULL_EA_INFORMATION* ea, ULONG ealen, file_ref** pfr, LIST_ENTRY* rollback) { NTSTATUS Status; @@ -2086,7 +1857,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S POOL_TYPE pool_type = IrpSp->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool; ULONG defda; file_ref* fileref; - hardlink* hl; + dir_child* dc; #ifdef DEBUG_FCB_REFCOUNTS LONG rc; #endif @@ -2277,42 +2048,6 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S fcb->ea_changed = TRUE; } - hl = ExAllocatePoolWithTag(pool_type, sizeof(hardlink), ALLOC_TAG); - if (!hl) { - ERR("out of memory\n"); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - hl->parent = parfileref->fcb->inode; - hl->index = dirpos; - - hl->utf8.Length = hl->utf8.MaximumLength = utf8len; - hl->utf8.Buffer = ExAllocatePoolWithTag(pool_type, utf8len, ALLOC_TAG); - - if (!hl->utf8.Buffer) { - ERR("out of memory\n"); - ExFreePool(hl); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - RtlCopyMemory(hl->utf8.Buffer, utf8, utf8len); - - hl->name.Length = hl->name.MaximumLength = fpus->Length; - hl->name.Buffer = ExAllocatePoolWithTag(pool_type, fpus->Length, ALLOC_TAG); - - if (!hl->name.Buffer) { - ERR("out of memory\n"); - ExFreePool(hl->utf8.Buffer); - ExFreePool(hl); - free_fcb(fcb); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(hl->name.Buffer, fpus->Buffer, fpus->Length); - - InsertTailList(&fcb->hardlinks, &hl->list_entry); - fileref = create_fileref(); if (!fileref) { ERR("out of memory\n"); @@ -2348,7 +2083,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S free_fileref(fileref); return Status; } - + if (Irp->Overlay.AllocationSize.QuadPart > 0 && !write_fcb_compressed(fcb)) { Status = extend_file(fcb, fileref, Irp->Overlay.AllocationSize.QuadPart, TRUE, NULL, rollback); @@ -2372,7 +2107,34 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S insert_fileref_child(parfileref, fileref, TRUE); + Status = add_dir_child(fileref->parent->fcb, fcb->inode, FALSE, fileref->index, &fileref->utf8, &fileref->filepart, &fileref->filepart_uc, fcb->type, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fileref->dc = dc; + dc->fileref = fileref; + increase_fileref_refcount(parfileref); + + if (fcb->type == BTRFS_TYPE_DIRECTORY) { + fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs) { + ERR("out of memory\n"); + free_fileref(fileref); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); + + fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs_uc) { + ERR("out of memory\n"); + free_fileref(fileref); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); + } InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry); InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); @@ -2574,7 +2336,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r mark_fcb_dirty(fcb); mark_fileref_dirty(fileref); - InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry); + InsertHeadList(&parfileref->fcb->list_entry, &fcb->list_entry); // insert in list after parent fcb InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all); KeQuerySystemTime(&time); @@ -2601,10 +2363,11 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r return STATUS_SUCCESS; } -static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJECT FileObject, PUNICODE_STRING fnus, ULONG disposition, ULONG options, LIST_ENTRY* rollback) { +static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJECT FileObject, file_ref* related, BOOL loaded_related, + PUNICODE_STRING fnus, ULONG disposition, ULONG options, LIST_ENTRY* rollback) { NTSTATUS Status; // fcb *fcb, *parfcb = NULL; - file_ref *fileref, *parfileref = NULL, *related; + file_ref *fileref, *parfileref = NULL; ULONG i, j, fn_offset; // ULONG utf8len; ccb* ccb; @@ -2626,17 +2389,13 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC dsus.Length = dsus.MaximumLength = wcslen(datasuf) * sizeof(WCHAR); fpus.Buffer = NULL; - if (FileObject->RelatedFileObject && FileObject->RelatedFileObject->FsContext2) { - struct _ccb* relatedccb = FileObject->RelatedFileObject->FsContext2; + if (!loaded_related) { + Status = open_fileref(Vcb, &parfileref, fnus, related, TRUE, NULL, NULL, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp); - related = relatedccb->fileref; + if (!NT_SUCCESS(Status)) + goto end; } else - related = NULL; - - Status = open_fileref(Vcb, &parfileref, &FileObject->FileName, related, TRUE, NULL, NULL, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp); - - if (!NT_SUCCESS(Status)) - goto end; + parfileref = related; if (parfileref->fcb->type != BTRFS_TYPE_DIRECTORY && (fnus->Length < sizeof(WCHAR) || fnus->Buffer[0] != ':')) { Status = STATUS_OBJECT_PATH_NOT_FOUND; @@ -2811,7 +2570,7 @@ end: ExFreePool(fpus.Buffer); end2: - if (parfileref) + if (parfileref && !loaded_related) free_fileref(parfileref); return Status; @@ -2962,7 +2721,7 @@ static NTSTATUS get_reparse_block(fcb* fcb, UINT8** data) { return STATUS_INSUFFICIENT_RESOURCES; } - Status = read_file(fcb, *data, 0, size, &bytes_read, NULL); + Status = read_file(fcb, *data, 0, size, &bytes_read, NULL, TRUE); if (!NT_SUCCESS(Status)) { ERR("read_file_fcb returned %08x\n", Status); ExFreePool(*data); @@ -3068,14 +2827,22 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN ccb* ccb; device_extension* Vcb = DeviceObject->DeviceExtension; PIO_STACK_LOCATION Stack = IoGetCurrentIrpStackLocation(Irp); - USHORT unparsed; + USHORT parsed; ULONG fn_offset = 0; file_ref *related, *fileref; POOL_TYPE pool_type = Stack->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool; ACCESS_MASK granted_access; + BOOL loaded_related = FALSE; + UNICODE_STRING fn; #ifdef DEBUG_FCB_REFCOUNTS LONG oc; #endif +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; + UINT8 open_type = 0; + + time1 = KeQueryPerformanceCounter(NULL); +#endif Irp->IoStatus.Information = 0; @@ -3130,7 +2897,9 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN goto exit; } - TRACE("(%.*S)\n", FileObject->FileName.Length / sizeof(WCHAR), FileObject->FileName.Buffer); + fn = FileObject->FileName; + + TRACE("(%.*S)\n", fn.Length / sizeof(WCHAR), fn.Buffer); TRACE("FileObject = %p\n", FileObject); if (Vcb->readonly && (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_CREATE || RequestedDisposition == FILE_OVERWRITE)) { @@ -3147,10 +2916,10 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); if (options & FILE_OPEN_BY_FILE_ID) { - if (FileObject->FileName.Length == sizeof(UINT64) && related && RequestedDisposition == FILE_OPEN) { + if (fn.Length == sizeof(UINT64) && related && RequestedDisposition == FILE_OPEN) { UINT64 inode; - RtlCopyMemory(&inode, FileObject->FileName.Buffer, sizeof(UINT64)); + RtlCopyMemory(&inode, fn.Buffer, sizeof(UINT64)); if (related->fcb == Vcb->root_fileref->fcb && inode == 0) inode = Vcb->root_fileref->fcb->inode; @@ -3168,13 +2937,44 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN goto exit; } } else { - if (related && FileObject->FileName.Length != 0 && FileObject->FileName.Buffer[0] == '\\') { + if (related && fn.Length != 0 && fn.Buffer[0] == '\\') { Status = STATUS_OBJECT_NAME_INVALID; goto exit; } - Status = open_fileref(Vcb, &fileref, &FileObject->FileName, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &unparsed, &fn_offset, - pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); + if (!related && RequestedDisposition != FILE_OPEN && !(Stack->Flags & SL_OPEN_TARGET_DIRECTORY)) { + ULONG fnoff; + + Status = open_fileref(Vcb, &related, &fn, NULL, TRUE, &parsed, &fnoff, + pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); + + if (Status == STATUS_OBJECT_NAME_NOT_FOUND) + Status = STATUS_OBJECT_PATH_NOT_FOUND; + else if (Status == STATUS_REPARSE) + fileref = related; + else if (NT_SUCCESS(Status)) { + fnoff *= sizeof(WCHAR); + fnoff += related->filepart.Length + sizeof(WCHAR); + + if (related->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) { + Status = STATUS_REPARSE; + fileref = related; + parsed = fnoff - sizeof(WCHAR); + } else { + fn.Buffer = &fn.Buffer[fnoff / sizeof(WCHAR)]; + fn.Length -= fnoff; + + Status = open_fileref(Vcb, &fileref, &fn, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &parsed, &fn_offset, + pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); + + loaded_related = TRUE; + } + + } + } else { + Status = open_fileref(Vcb, &fileref, &fn, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &parsed, &fn_offset, + pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp); + } } if (Status == STATUS_REPARSE) { @@ -3194,7 +2994,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN Status = STATUS_REPARSE; RtlCopyMemory(&Irp->IoStatus.Information, data, sizeof(ULONG)); - data->Reserved = unparsed; + data->Reserved = FileObject->FileName.Length - parsed; Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data; @@ -3232,6 +3032,11 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN file_ref* sf; if (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) { + LARGE_INTEGER zero; + +#ifdef DEBUG_STATS + open_type = 1; +#endif if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY || fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { Status = STATUS_ACCESS_DENIED; free_fileref(fileref); @@ -3243,6 +3048,13 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN free_fileref(fileref); goto exit; } + + zero.QuadPart = 0; + if (!MmCanFileBeTruncated(&fileref->fcb->nonpaged->segment_object, &zero)) { + Status = STATUS_USER_MAPPED_FILE; + free_fileref(fileref); + goto exit; + } } SeLockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); @@ -3259,8 +3071,10 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext); + // We allow a subvolume root to be opened read-write even if its readonly flag is set, so it can be cleared if (fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && granted_access & - (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC)) { + (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC) && + fileref->fcb->inode != SUBVOL_ROOT_INODE) { Status = STATUS_ACCESS_DENIED; free_fileref(fileref); goto exit; @@ -3324,7 +3138,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN Status = STATUS_REPARSE; Irp->IoStatus.Information = data->ReparseTag; - if (FileObject->FileName.Buffer[(FileObject->FileName.Length / sizeof(WCHAR)) - 1] == '\\') + if (fn.Buffer[(fn.Length / sizeof(WCHAR)) - 1] == '\\') data->Reserved = sizeof(WCHAR); Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data; @@ -3543,8 +3357,8 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN FileObject->FsContext2 = ccb; if (fn_offset > 0) { - FileObject->FileName.Length -= fn_offset * sizeof(WCHAR); - RtlMoveMemory(&FileObject->FileName.Buffer[0], &FileObject->FileName.Buffer[fn_offset], FileObject->FileName.Length); + fn.Length -= fn_offset * sizeof(WCHAR); + RtlMoveMemory(&fn.Buffer[0], &fn.Buffer[fn_offset], fn.Length); } FileObject->SectionObjectPointer = &fileref->fcb->nonpaged->segment_object; @@ -3607,7 +3421,10 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN #endif InterlockedIncrement(&Vcb->open_files); } else { - Status = file_create(Irp, DeviceObject->DeviceExtension, FileObject, &FileObject->FileName, RequestedDisposition, options, rollback); +#ifdef DEBUG_STATS + open_type = 2; +#endif + Status = file_create(Irp, DeviceObject->DeviceExtension, FileObject, related, loaded_related, &fn, RequestedDisposition, options, rollback); Irp->IoStatus.Information = NT_SUCCESS(Status) ? FILE_CREATED : 0; } @@ -3618,6 +3435,9 @@ exit: ExReleaseResourceLite(&Vcb->fcb_lock); exit2: + if (loaded_related) + free_fileref(related); + if (NT_SUCCESS(Status)) { if (!FileObject->Vpb) FileObject->Vpb = DeviceObject->Vpb; @@ -3626,54 +3446,94 @@ exit2: TRACE("returning %08x\n", Status); } +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + + if (open_type == 0) { + Vcb->stats.open_total_time += time2.QuadPart - time1.QuadPart; + Vcb->stats.num_opens++; + } else if (open_type == 1) { + Vcb->stats.overwrite_total_time += time2.QuadPart - time1.QuadPart; + Vcb->stats.num_overwrites++; + } else if (open_type == 2) { + Vcb->stats.create_total_time += time2.QuadPart - time1.QuadPart; + Vcb->stats.num_creates++; + } +#endif + return Status; } NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp) { - UINT64 i; + NTSTATUS Status; + LIST_ENTRY* le; - for (i = 0; i < Vcb->devices_loaded; i++) { - if (Vcb->devices[i].removable) { - NTSTATUS Status; + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->removable) { ULONG cc; IO_STATUS_BLOCK iosb; - Status = dev_ioctl(Vcb->devices[i].devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb); if (!NT_SUCCESS(Status)) { ERR("dev_ioctl returned %08x\n", Status); - return Status; + goto end; } if (iosb.Information < sizeof(ULONG)) { ERR("iosb.Information was too short\n"); - return STATUS_INTERNAL_ERROR; + Status = STATUS_INTERNAL_ERROR; + goto end; } - if (cc != Vcb->devices[i].change_count) { - PDEVICE_OBJECT dev; + if (cc != dev->change_count) { + PDEVICE_OBJECT devobj; - Vcb->devices[i].devobj->Flags |= DO_VERIFY_VOLUME; + dev->devobj->Flags |= DO_VERIFY_VOLUME; - dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); + devobj = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL); - if (!dev) { - dev = IoGetDeviceToVerify(PsGetCurrentThread()); + if (!devobj) { + devobj = IoGetDeviceToVerify(PsGetCurrentThread()); IoSetDeviceToVerify(PsGetCurrentThread(), NULL); } - dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; + devobj = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; - if (dev) - IoVerifyVolume(dev, FALSE); + if (devobj) + IoVerifyVolume(devobj, FALSE); - return STATUS_VERIFY_REQUIRED; + Status = STATUS_VERIFY_REQUIRED; + goto end; } } + + le = le->Flink; } - return STATUS_SUCCESS; + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static BOOL has_manage_volume_privilege(ACCESS_STATE* access_state, KPROCESSOR_MODE processor_mode) { + PRIVILEGE_SET privset; + + privset.PrivilegeCount = 1; + privset.Control = PRIVILEGE_SET_ALL_NECESSARY; + privset.Privilege[0].Luid = RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE); + privset.Privilege[0].Attributes = 0; + + return SePrivilegeCheck(&privset, &access_state->SubjectSecurityContext, processor_mode) ? TRUE : FALSE; } NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { @@ -3760,18 +3620,16 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { #ifdef DEBUG_FCB_REFCOUNTS LONG rc, oc; #endif + ccb* ccb; TRACE("open operation for volume\n"); - if (RequestedDisposition != FILE_OPEN && - RequestedDisposition != FILE_OPEN_IF) - { + if (RequestedDisposition != FILE_OPEN && RequestedDisposition != FILE_OPEN_IF) { Status = STATUS_ACCESS_DENIED; goto exit; } - if (RequestedOptions & FILE_DIRECTORY_FILE) - { + if (RequestedOptions & FILE_DIRECTORY_FILE) { Status = STATUS_NOT_A_DIRECTORY; goto exit; } @@ -3780,6 +3638,23 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { Status = STATUS_ACCESS_DENIED; goto exit; } + + ccb = ExAllocatePoolWithTag(NonPagedPool, sizeof(*ccb), ALLOC_TAG); + if (!ccb) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + RtlZeroMemory(ccb, sizeof(*ccb)); + + ccb->NodeType = BTRFS_NODE_TYPE_CCB; + ccb->NodeSize = sizeof(ccb); + ccb->disposition = RequestedDisposition; + ccb->options = RequestedOptions; + ccb->access = IrpSp->Parameters.Create.SecurityContext->AccessState->PreviouslyGrantedAccess; + ccb->manage_volume_privilege = has_manage_volume_privilege(IrpSp->Parameters.Create.SecurityContext->AccessState, + IrpSp->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode); #ifdef DEBUG_FCB_REFCOUNTS rc = InterlockedIncrement(&Vcb->volume_fcb->refcount); @@ -3788,6 +3663,7 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { InterlockedIncrement(&Vcb->volume_fcb->refcount); #endif IrpSp->FileObject->FsContext = Vcb->volume_fcb; + IrpSp->FileObject->FsContext2 = ccb; IrpSp->FileObject->SectionObjectPointer = &Vcb->volume_fcb->nonpaged->segment_object; @@ -3812,11 +3688,7 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { if (!skip_lock) ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); -// ExAcquireResourceExclusiveLite(&Vpb->DirResource, TRUE); - // Status = NtfsCreateFile(DeviceObject, - // Irp); Status = open_file(DeviceObject, Irp, &rollback); -// ExReleaseResourceLite(&Vpb->DirResource); if (!NT_SUCCESS(Status)) do_rollback(Vcb, &rollback); diff --git a/reactos/drivers/filesystems/btrfs/devctrl.c b/reactos/drivers/filesystems/btrfs/devctrl.c index 11422e4277f..10c02cb28b9 100644 --- a/reactos/drivers/filesystems/btrfs/devctrl.c +++ b/reactos/drivers/filesystems/btrfs/devctrl.c @@ -23,6 +23,9 @@ #include #include +extern LIST_ENTRY VcbList; +extern ERESOURCE global_loading_lock; + static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; part0_device_extension* p0de = DeviceObject->DeviceExtension; @@ -79,7 +82,7 @@ static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp name = Irp->AssociatedIrp.SystemBuffer; name->NameLength = p0de->name.Length; - if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME) - 1 + name->NameLength) { + if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < offsetof(MOUNTDEV_NAME, Name[0]) + name->NameLength) { Status = STATUS_BUFFER_OVERFLOW; Irp->IoStatus.Status = Status; Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME); @@ -91,7 +94,7 @@ static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp Status = STATUS_SUCCESS; Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME) - 1 + name->NameLength; + Irp->IoStatus.Information = offsetof(MOUNTDEV_NAME, Name[0]) + name->NameLength; IoCompleteRequest(Irp, IO_NO_INCREMENT); return Status; @@ -130,7 +133,7 @@ static NTSTATUS get_partition_info_ex(device_extension* Vcb, PIRP Irp) { TRACE("IOCTL_DISK_GET_PARTITION_INFO_EX\n"); - Status = dev_ioctl(Vcb->devices[0].devobj, IOCTL_DISK_GET_PARTITION_INFO_EX, NULL, 0, + Status = dev_ioctl(Vcb->Vpb->RealDevice, IOCTL_DISK_GET_PARTITION_INFO_EX, NULL, 0, Irp->UserBuffer, IrpSp->Parameters.DeviceIoControl.OutputBufferLength, TRUE, &Irp->IoStatus); if (!NT_SUCCESS(Status)) return Status; @@ -153,6 +156,132 @@ static NTSTATUS is_writable(device_extension* Vcb, PIRP Irp) { return Vcb->readonly ? STATUS_MEDIA_WRITE_PROTECTED : STATUS_SUCCESS; } +static NTSTATUS query_filesystems(void* data, ULONG length) { + NTSTATUS Status; + LIST_ENTRY *le, *le2; + btrfs_filesystem* bfs = NULL; + ULONG itemsize; + + ExAcquireResourceSharedLite(&global_loading_lock, TRUE); + + if (IsListEmpty(&VcbList)) { + if (length < sizeof(btrfs_filesystem)) { + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } else { + RtlZeroMemory(data, sizeof(btrfs_filesystem)); + Status = STATUS_SUCCESS; + goto end; + } + } + + le = VcbList.Flink; + + while (le != &VcbList) { + device_extension* Vcb = CONTAINING_RECORD(le, device_extension, list_entry); + btrfs_filesystem_device* bfd; + + if (bfs) { + bfs->next_entry = itemsize; + bfs = (btrfs_filesystem*)((UINT8*)bfs + itemsize); + } else + bfs = data; + + if (length < offsetof(btrfs_filesystem, device)) { + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + itemsize = offsetof(btrfs_filesystem, device); + length -= offsetof(btrfs_filesystem, device); + + bfs->next_entry = 0; + RtlCopyMemory(&bfs->uuid, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)); + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + bfs->num_devices = Vcb->superblock.num_devices; + + bfd = NULL; + + le2 = Vcb->devices.Flink; + while (le2 != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le2, device, list_entry); + MOUNTDEV_NAME mdn; + + if (bfd) + bfd = (btrfs_filesystem_device*)((UINT8*)bfd + offsetof(btrfs_filesystem_device, name[0]) + bfd->name_length); + else + bfd = &bfs->device; + + if (length < offsetof(btrfs_filesystem_device, name[0])) { + ExReleaseResourceLite(&Vcb->tree_lock); + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + itemsize += offsetof(btrfs_filesystem_device, name[0]); + length -= offsetof(btrfs_filesystem_device, name[0]); + + RtlCopyMemory(&bfd->uuid, &dev->devitem.device_uuid, sizeof(BTRFS_UUID)); + + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end; + } + + if (mdn.NameLength > length) { + ExReleaseResourceLite(&Vcb->tree_lock); + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &bfd->name_length, offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength, TRUE, NULL); + if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) { + ExReleaseResourceLite(&Vcb->tree_lock); + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + goto end; + } + + itemsize += bfd->name_length; + length -= bfd->name_length; + + le2 = le2->Flink; + } + + ExReleaseResourceLite(&Vcb->tree_lock); + + le = le->Flink; + } + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&global_loading_lock); + + return Status; +} + +static NTSTATUS control_ioctl(PIRP Irp) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + NTSTATUS Status; + + switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) { + case IOCTL_BTRFS_QUERY_FILESYSTEMS: + Status = query_filesystems(map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + default: + TRACE("unhandled ioctl %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode); + Status = STATUS_NOT_IMPLEMENTED; + break; + } + + return Status; +} + NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { NTSTATUS Status; PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); @@ -165,9 +294,22 @@ NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) Irp->IoStatus.Information = 0; - if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) { - Status = part0_device_control(DeviceObject, Irp); - goto end2; + if (Vcb) { + if (Vcb->type == VCB_TYPE_PARTITION0) { + Status = part0_device_control(DeviceObject, Irp); + goto end2; + } else if (Vcb->type == VCB_TYPE_CONTROL) { + Status = control_ioctl(Irp); + goto end; + } + } else { + Status = STATUS_INVALID_PARAMETER; + goto end; + } + + if (!IrpSp->FileObject || IrpSp->FileObject->FsContext != Vcb->volume_fcb) { + Status = STATUS_INVALID_PARAMETER; + goto end; } switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) { @@ -190,7 +332,7 @@ NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) IoSkipCurrentIrpStackLocation(Irp); - Status = IoCallDriver(Vcb->devices[0].devobj, Irp); + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); goto end2; diff --git a/reactos/drivers/filesystems/btrfs/dirctrl.c b/reactos/drivers/filesystems/btrfs/dirctrl.c index b70e68f741d..aa23219a611 100644 --- a/reactos/drivers/filesystems/btrfs/dirctrl.c +++ b/reactos/drivers/filesystems/btrfs/dirctrl.c @@ -25,9 +25,7 @@ enum DirEntryType { typedef struct { KEY key; - BOOL name_alloc; - char* name; - ULONG namelen; + UNICODE_STRING name; UINT8 type; enum DirEntryType dir_entry_type; } dir_entry; @@ -50,14 +48,11 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, if (!(atts & FILE_ATTRIBUTE_REPARSE_POINT)) return 0; - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); Status = open_fcb(Vcb, subvol, inode, type, NULL, NULL, &fcb, PagedPool, Irp); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); - ExReleaseResourceLite(&Vcb->fcb_lock); return 0; } - ExReleaseResourceLite(&Vcb->fcb_lock); ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); @@ -67,7 +62,7 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, RtlCopyMemory(&tag, fcb->reparse_xattr.Buffer, sizeof(ULONG)); } else { - Status = read_file(fcb, (UINT8*)&tag, 0, sizeof(ULONG), &br, NULL); + Status = read_file(fcb, (UINT8*)&tag, 0, sizeof(ULONG), &br, NULL, TRUE); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); goto end; @@ -80,9 +75,7 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode, end: ExReleaseResourceLite(fcb->Header.Resource); - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); return tag; } @@ -130,7 +123,6 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L UINT64 inode; INODE_ITEM ii; NTSTATUS Status; - ULONG stringlen; ULONG atts, ealen; IrpSp = IoGetCurrentIrpStackLocation(Irp); @@ -169,7 +161,6 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L LIST_ENTRY* le; BOOL found = FALSE; - ExAcquireResourceSharedLite(&fcb->Vcb->fcb_lock, TRUE); if (!IsListEmpty(&r->fcbs)) { le = r->fcbs.Flink; while (le != &r->fcbs) { @@ -181,12 +172,12 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L ealen = fcb2->ealen; found = TRUE; break; - } + } else if (fcb2->inode > inode) + break; le = le->Flink; } } - ExReleaseResourceLite(&fcb->Vcb->fcb_lock); if (!found) { KEY searchkey; @@ -218,7 +209,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) { - BOOL dotfile = de->namelen > 1 && de->name[0] == '.'; + BOOL dotfile = de->name.Length > sizeof(WCHAR) && de->name.Buffer[0] == '.'; atts = get_file_attributes(fcb->Vcb, &ii, r, inode, de->type, dotfile, FALSE, Irp); } @@ -257,21 +248,6 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L } } - // FICs which return the filename - if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation || - IrpSp->Parameters.QueryDirectory.FileInformationClass == FileNamesInformation) { - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, de->name, de->namelen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } - } - switch (IrpSp->Parameters.QueryDirectory.FileInformationClass) { case FileBothDirectoryInformation: { @@ -279,7 +255,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L TRACE("FileBothDirectoryInformation\n"); - needed = sizeof(FILE_BOTH_DIR_INFORMATION) - sizeof(WCHAR) + stringlen; + needed = sizeof(FILE_BOTH_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); @@ -295,17 +271,12 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fbdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; fbdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; fbdi->FileAttributes = atts; - fbdi->FileNameLength = stringlen; + fbdi->FileNameLength = de->name.Length; fbdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; fbdi->ShortNameLength = 0; // fibdi->ShortName[12]; - Status = RtlUTF8ToUnicodeN(fbdi->FileName, stringlen, &stringlen, de->name, de->namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } + RtlCopyMemory(fbdi->FileName, de->name.Buffer, de->name.Length); *len -= needed; @@ -318,7 +289,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L TRACE("FileDirectoryInformation\n"); - needed = sizeof(FILE_DIRECTORY_INFORMATION) - sizeof(WCHAR) + stringlen; + needed = sizeof(FILE_DIRECTORY_INFORMATION) - sizeof(WCHAR) + de->name.Length; if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); @@ -334,14 +305,9 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; fdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; fdi->FileAttributes = atts; - fdi->FileNameLength = stringlen; + fdi->FileNameLength = de->name.Length; - Status = RtlUTF8ToUnicodeN(fdi->FileName, stringlen, &stringlen, de->name, de->namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } + RtlCopyMemory(fdi->FileName, de->name.Buffer, de->name.Length); *len -= needed; @@ -354,7 +320,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L TRACE("FileFullDirectoryInformation\n"); - needed = sizeof(FILE_FULL_DIR_INFORMATION) - sizeof(WCHAR) + stringlen; + needed = sizeof(FILE_FULL_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); @@ -370,15 +336,10 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L ffdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; ffdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; ffdi->FileAttributes = atts; - ffdi->FileNameLength = stringlen; + ffdi->FileNameLength = de->name.Length; ffdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; - Status = RtlUTF8ToUnicodeN(ffdi->FileName, stringlen, &stringlen, de->name, de->namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } + RtlCopyMemory(ffdi->FileName, de->name.Buffer, de->name.Length); *len -= needed; @@ -391,7 +352,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L TRACE("FileIdBothDirectoryInformation\n"); - needed = sizeof(FILE_ID_BOTH_DIR_INFORMATION) - sizeof(WCHAR) + stringlen; + needed = sizeof(FILE_ID_BOTH_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); @@ -410,18 +371,13 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fibdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; fibdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; fibdi->FileAttributes = atts; - fibdi->FileNameLength = stringlen; + fibdi->FileNameLength = de->name.Length; fibdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; fibdi->ShortNameLength = 0; // fibdi->ShortName[12]; fibdi->FileId.QuadPart = make_file_id(r, inode); - Status = RtlUTF8ToUnicodeN(fibdi->FileName, stringlen, &stringlen, de->name, de->namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } + RtlCopyMemory(fibdi->FileName, de->name.Buffer, de->name.Length); *len -= needed; @@ -434,7 +390,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L TRACE("FileIdFullDirectoryInformation\n"); - needed = sizeof(FILE_ID_FULL_DIR_INFORMATION) - sizeof(WCHAR) + stringlen; + needed = sizeof(FILE_ID_FULL_DIR_INFORMATION) - sizeof(WCHAR) + de->name.Length; if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); @@ -453,16 +409,11 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fifdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size; fifdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks; fifdi->FileAttributes = atts; - fifdi->FileNameLength = stringlen; + fifdi->FileNameLength = de->name.Length; fifdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen; fifdi->FileId.QuadPart = make_file_id(r, inode); - Status = RtlUTF8ToUnicodeN(fifdi->FileName, stringlen, &stringlen, de->name, de->namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } + RtlCopyMemory(fifdi->FileName, de->name.Buffer, de->name.Length); *len -= needed; @@ -475,7 +426,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L TRACE("FileNamesInformation\n"); - needed = sizeof(FILE_NAMES_INFORMATION) - sizeof(WCHAR) + stringlen; + needed = sizeof(FILE_NAMES_INFORMATION) - sizeof(WCHAR) + de->name.Length; if (needed > *len) { TRACE("buffer overflow - %u > %u\n", needed, *len); @@ -484,14 +435,9 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L fni->NextEntryOffset = 0; fni->FileIndex = 0; - fni->FileNameLength = stringlen; + fni->FileNameLength = de->name.Length; - Status = RtlUTF8ToUnicodeN(fni->FileName, stringlen, &stringlen, de->name, de->namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - return Status; - } + RtlCopyMemory(fni->FileName, de->name.Buffer, de->name.Length); *len -= needed; @@ -518,14 +464,20 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L return STATUS_NO_MORE_FILES; } -static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_entry* de, PIRP Irp) { - KEY searchkey; - traverse_ptr tp, next_tp; - DIR_ITEM* di; - NTSTATUS Status; - file_ref* fr; +static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_entry* de, dir_child** pdc, PIRP Irp) { LIST_ENTRY* le; - char* name; + dir_child* dc; + + if (*pdc) { + dir_child* dc2 = *pdc; + + if (dc2->list_entry_index.Flink != &fileref->fcb->dir_children_index) + dc = CONTAINING_RECORD(dc2->list_entry_index.Flink, dir_child, list_entry_index); + else + dc = NULL; + + goto next; + } if (fileref->parent) { // don't return . and .. if root directory if (*offset == 0) { @@ -533,12 +485,12 @@ static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_en de->key.obj_type = TYPE_INODE_ITEM; de->key.offset = 0; de->dir_entry_type = DirEntryType_Self; - de->name = "."; - de->name_alloc = FALSE; - de->namelen = 1; + de->name.Buffer = L"."; + de->name.Length = de->name.MaximumLength = sizeof(WCHAR); de->type = BTRFS_TYPE_DIRECTORY; *offset = 1; + *pdc = NULL; return STATUS_SUCCESS; } else if (*offset == 1) { @@ -546,12 +498,12 @@ static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_en de->key.obj_type = TYPE_INODE_ITEM; de->key.offset = 0; de->dir_entry_type = DirEntryType_Parent; - de->name = ".."; - de->name_alloc = FALSE; - de->namelen = 2; + de->name.Buffer = L".."; + de->name.Length = de->name.MaximumLength = sizeof(WCHAR) * 2; de->type = BTRFS_TYPE_DIRECTORY; *offset = 2; + *pdc = NULL; return STATUS_SUCCESS; } @@ -560,168 +512,34 @@ static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_en if (*offset < 2) *offset = 2; - ExAcquireResourceSharedLite(&fileref->nonpaged->children_lock, TRUE); - - fr = NULL; - le = fileref->children.Flink; + dc = NULL; + le = fileref->fcb->dir_children_index.Flink; // skip entries before offset - while (le != &fileref->children) { - file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry); + while (le != &fileref->fcb->dir_children_index) { + dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_index); - if (fr2->index >= *offset) { - fr = fr2; + if (dc2->index >= *offset) { + dc = dc2; break; } le = le->Flink; } - do { - if (fr && fr->index == *offset) { - if (!fr->deleted) { - if (fr->fcb->subvol == fileref->fcb->subvol) { - de->key.obj_id = fr->fcb->inode; - de->key.obj_type = TYPE_INODE_ITEM; - de->key.offset = 0; - } else { - de->key.obj_id = fr->fcb->subvol->id; - de->key.obj_type = TYPE_ROOT_ITEM; - de->key.offset = 0; - } - - name = fr->utf8.Buffer; - de->namelen = fr->utf8.Length; - de->type = fr->fcb->type; - de->dir_entry_type = DirEntryType_File; - - (*offset)++; - - Status = STATUS_SUCCESS; - goto end; - } else { - (*offset)++; - fr = fr->list_entry.Flink == &fileref->children ? NULL : CONTAINING_RECORD(fr->list_entry.Flink, file_ref, list_entry); - continue; - } - } - - searchkey.obj_id = fileref->fcb->inode; - searchkey.obj_type = TYPE_DIR_INDEX; - searchkey.offset = *offset; - - Status = find_item(fileref->fcb->Vcb, fileref->fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - if (keycmp(tp.item->key, searchkey) == -1) { - if (find_next_item(fileref->fcb->Vcb, &tp, &next_tp, FALSE, Irp)) - tp = next_tp; - } - - if (keycmp(tp.item->key, searchkey) != -1 && tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { - do { - if (fr) { - if (fr->index <= tp.item->key.offset && !fr->deleted) { - if (fr->fcb->subvol == fileref->fcb->subvol) { - de->key.obj_id = fr->fcb->inode; - de->key.obj_type = TYPE_INODE_ITEM; - de->key.offset = 0; - } else { - de->key.obj_id = fr->fcb->subvol->id; - de->key.obj_type = TYPE_ROOT_ITEM; - de->key.offset = 0; - } - - name = fr->utf8.Buffer; - de->namelen = fr->utf8.Length; - de->type = fr->fcb->type; - de->dir_entry_type = DirEntryType_File; - - *offset = fr->index + 1; - - Status = STATUS_SUCCESS; - goto end; - } - - if (fr->index == tp.item->key.offset && fr->deleted) - break; - - fr = fr->list_entry.Flink == &fileref->children ? NULL : CONTAINING_RECORD(fr->list_entry.Flink, file_ref, list_entry); - } - } while (fr && fr->index < tp.item->key.offset); - - if (fr && fr->index == tp.item->key.offset && fr->deleted) { - *offset = fr->index + 1; - fr = fr->list_entry.Flink == &fileref->children ? NULL : CONTAINING_RECORD(fr->list_entry.Flink, file_ref, list_entry); - continue; - } - - *offset = tp.item->key.offset + 1; - - di = (DIR_ITEM*)tp.item->data; - - if (tp.item->size < sizeof(DIR_ITEM) || tp.item->size < sizeof(DIR_ITEM) - 1 + di->m + di->n) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM)); - Status = STATUS_INTERNAL_ERROR; - goto end; - } - - de->key = di->key; - name = di->name; - de->namelen = di->n; - de->type = di->type; - de->dir_entry_type = DirEntryType_File; - - Status = STATUS_SUCCESS; - goto end; - } else { - if (fr) { - if (fr->fcb->subvol == fileref->fcb->subvol) { - de->key.obj_id = fr->fcb->inode; - de->key.obj_type = TYPE_INODE_ITEM; - de->key.offset = 0; - } else { - de->key.obj_id = fr->fcb->subvol->id; - de->key.obj_type = TYPE_ROOT_ITEM; - de->key.offset = 0; - } - - name = fr->utf8.Buffer; - de->namelen = fr->utf8.Length; - de->type = fr->fcb->type; - de->dir_entry_type = DirEntryType_File; - - *offset = fr->index + 1; - - Status = STATUS_SUCCESS; - goto end; - } else { - Status = STATUS_NO_MORE_FILES; - goto end; - } - } - } while (TRUE); +next: + if (!dc) + return STATUS_NO_MORE_FILES; -end: - ExReleaseResourceLite(&fileref->nonpaged->children_lock); + de->key = dc->key; + de->name = dc->name; + de->type = dc->type; + de->dir_entry_type = DirEntryType_File; - if (NT_SUCCESS(Status)) { - de->name_alloc = TRUE; - - de->name = ExAllocatePoolWithTag(PagedPool, de->namelen, ALLOC_TAG); - if (!de->name) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(de->name, name, de->namelen); - } else - de->name_alloc = FALSE; + *offset = dc->index + 1; + *pdc = dc; - return Status; + return STATUS_SUCCESS; } static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { @@ -739,6 +557,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { dir_entry de; UINT64 newoffset; ANSI_STRING utf8; + dir_child* dc = NULL; TRACE("query directory\n"); @@ -767,6 +586,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); + ExAcquireResourceSharedLite(&fcb->Vcb->fcb_lock, TRUE); TRACE("%S\n", file_desc(IrpSp->FileObject)); @@ -828,7 +648,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { if (!ccb->query_string.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; + goto end2; } ccb->query_string.Length = ccb->query_string.MaximumLength = IrpSp->Parameters.QueryDirectory.FileName->Length; @@ -846,7 +666,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { if (specific_file) { Status = STATUS_NO_MORE_FILES; - goto end; + goto end2; } } } @@ -856,7 +676,10 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } newoffset = ccb->query_dir_offset; - Status = next_dir_entry(fileref, &newoffset, &de, Irp); + + ExAcquireResourceSharedLite(&fileref->fcb->nonpaged->dir_children_lock, TRUE); + + Status = next_dir_entry(fileref, &newoffset, &de, &dc, Irp); if (!NT_SUCCESS(Status)) { if (Status == STATUS_NO_MORE_FILES && initial) @@ -878,11 +701,10 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { if (specific_file) { BOOL found = FALSE; - root* found_subvol; - UINT64 found_inode, found_index; - UINT8 found_type; UNICODE_STRING us; LIST_ENTRY* le; + UINT32 hash; + UINT8 c; us.Buffer = NULL; @@ -892,163 +714,89 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); goto end; } - } + + hash = calc_crc32c(0xffffffff, (UINT8*)us.Buffer, us.Length); + } else + hash = calc_crc32c(0xffffffff, (UINT8*)ccb->query_string.Buffer, ccb->query_string.Length); - ExAcquireResourceSharedLite(&fileref->nonpaged->children_lock, TRUE); + c = hash >> 24; - le = fileref->children.Flink; - while (le != &fileref->children) { - file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry); - - if (!fr2->deleted) { - if (!ccb->case_sensitive && fr2->filepart_uc.Length == us.Length && - RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length) - found = TRUE; - else if (ccb->case_sensitive && fr2->filepart.Length == ccb->query_string.Length && - RtlCompareMemory(fr2->filepart.Buffer, ccb->query_string.Buffer, ccb->query_string.Length) == ccb->query_string.Length) - found = TRUE; - } - - if (found) { - if (fr2->fcb->subvol == fcb->subvol) { - de.key.obj_id = fr2->fcb->inode; - de.key.obj_type = TYPE_INODE_ITEM; - de.key.offset = 0; - } else { - de.key.obj_id = fr2->fcb->subvol->id; - de.key.obj_type = TYPE_ROOT_ITEM; - de.key.offset = 0; + if (ccb->case_sensitive) { + if (fileref->fcb->hash_ptrs[c]) { + le = fileref->fcb->hash_ptrs[c]; + while (le != &fileref->fcb->dir_children_hash) { + dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash); + + if (dc2->hash == hash) { + if (dc2->name.Length == ccb->query_string.Length && RtlCompareMemory(dc2->name.Buffer, ccb->query_string.Buffer, ccb->query_string.Length) == ccb->query_string.Length) { + found = TRUE; + + de.key = dc2->key; + de.name = dc2->name; + de.type = dc2->type; + de.dir_entry_type = DirEntryType_File; + + break; + } + } else if (dc2->hash > hash) + break; + + le = le->Flink; } - - de.name = ExAllocatePoolWithTag(PagedPool, fr2->utf8.Length, ALLOC_TAG); - if (!de.name) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; + } + } else { + if (fileref->fcb->hash_ptrs_uc[c]) { + le = fileref->fcb->hash_ptrs_uc[c]; + while (le != &fileref->fcb->dir_children_hash_uc) { + dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc); + + if (dc2->hash_uc == hash) { + if (dc2->name_uc.Length == us.Length && RtlCompareMemory(dc2->name_uc.Buffer, us.Buffer, us.Length) == us.Length) { + found = TRUE; + + de.key = dc2->key; + de.name = dc2->name; + de.type = dc2->type; + de.dir_entry_type = DirEntryType_File; + + break; + } + } else if (dc2->hash_uc > hash) + break; + + le = le->Flink; } - - RtlCopyMemory(de.name, fr2->utf8.Buffer, fr2->utf8.Length); - - de.name_alloc = TRUE; - de.namelen = fr2->utf8.Length; - de.type = fr2->fcb->type; - de.dir_entry_type = DirEntryType_File; - break; } - - le = le->Flink; } - ExReleaseResourceLite(&fileref->nonpaged->children_lock); - if (us.Buffer) ExFreePool(us.Buffer); if (!found) { - Status = find_file_in_dir(fcb->Vcb, &ccb->query_string, fileref, &found_subvol, &found_inode, &found_type, &found_index, &utf8, FALSE, Irp); - - if (!NT_SUCCESS(Status)) { - Status = STATUS_NO_SUCH_FILE; - goto end; - } - - if (found_subvol == fcb->subvol) { - de.key.obj_id = found_inode; - de.key.obj_type = TYPE_INODE_ITEM; - de.key.offset = 0; - } else { - de.key.obj_id = found_subvol->id; - de.key.obj_type = TYPE_ROOT_ITEM; - de.key.offset = 0; - } - - de.name = utf8.Buffer; - de.name_alloc = FALSE; - de.namelen = utf8.Length; - de.type = found_type; - de.dir_entry_type = DirEntryType_File; - } - } else if (has_wildcard) { - WCHAR* uni_fn; - ULONG stringlen; - UNICODE_STRING di_uni_fn; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, de.name, de.namelen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - goto end; - } - - uni_fn = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - if (!uni_fn) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - Status = RtlUTF8ToUnicodeN(uni_fn, stringlen, &stringlen, de.name, de.namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); + Status = STATUS_NO_SUCH_FILE; goto end; } - - di_uni_fn.Length = di_uni_fn.MaximumLength = stringlen; - di_uni_fn.Buffer = uni_fn; - - while (!FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, !ccb->case_sensitive, NULL)) { - if (de.name_alloc) - ExFreePool(de.name); - + } else if (has_wildcard) { + while (!FsRtlIsNameInExpression(&ccb->query_string, &de.name, !ccb->case_sensitive, NULL)) { newoffset = ccb->query_dir_offset; - Status = next_dir_entry(fileref, &newoffset, &de, Irp); + Status = next_dir_entry(fileref, &newoffset, &de, &dc, Irp); - ExFreePool(uni_fn); - if (NT_SUCCESS(Status)) { + if (NT_SUCCESS(Status)) ccb->query_dir_offset = newoffset; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, de.name, de.namelen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - goto end; - } - - uni_fn = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - if (!uni_fn) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - Status = RtlUTF8ToUnicodeN(uni_fn, stringlen, &stringlen, de.name, de.namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - ExFreePool(uni_fn); - goto end; - } - - di_uni_fn.Length = di_uni_fn.MaximumLength = stringlen; - di_uni_fn.Buffer = uni_fn; - } else { + else { if (Status == STATUS_NO_MORE_FILES && initial) Status = STATUS_NO_SUCH_FILE; goto end; } } - - ExFreePool(uni_fn); } - TRACE("file(0) = %.*s\n", de.namelen, de.name); + TRACE("file(0) = %.*S\n", de.name.Length / sizeof(WCHAR), de.name.Buffer); TRACE("offset = %u\n", ccb->query_dir_offset - 1); Status = query_dir_item(fcb, fileref, buf, &length, Irp, &de, fcb->subvol); - - if (de.name_alloc) - ExFreePool(de.name); - + count = 0; if (NT_SUCCESS(Status) && !(IrpSp->Flags & SL_RETURN_SINGLE_ENTRY) && !specific_file) { lastitem = (UINT8*)buf; @@ -1073,48 +821,14 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { } if (length > 0) { - WCHAR* uni_fn = NULL; - UNICODE_STRING di_uni_fn; - newoffset = ccb->query_dir_offset; - Status = next_dir_entry(fileref, &newoffset, &de, Irp); + Status = next_dir_entry(fileref, &newoffset, &de, &dc, Irp); if (NT_SUCCESS(Status)) { - if (has_wildcard) { - ULONG stringlen; - - Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, de.name, de.namelen); - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - if (de.name_alloc) ExFreePool(de.name); - goto end; - } - - uni_fn = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG); - if (!uni_fn) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - if (de.name_alloc) ExFreePool(de.name); - goto end; - } - - Status = RtlUTF8ToUnicodeN(uni_fn, stringlen, &stringlen, de.name, de.namelen); - - if (!NT_SUCCESS(Status)) { - ERR("RtlUTF8ToUnicodeN returned %08x\n", Status); - ExFreePool(uni_fn); - if (de.name_alloc) ExFreePool(de.name); - goto end; - } - - di_uni_fn.Length = di_uni_fn.MaximumLength = stringlen; - di_uni_fn.Buffer = uni_fn; - } - - if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, !ccb->case_sensitive, NULL)) { + if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &de.name, !ccb->case_sensitive, NULL)) { curitem = (UINT8*)buf + IrpSp->Parameters.QueryDirectory.Length - length; count++; - TRACE("file(%u) %u = %.*s\n", count, curitem - (UINT8*)buf, de.namelen, de.name); + TRACE("file(%u) %u = %.*S\n", count, curitem - (UINT8*)buf, de.name.Length / sizeof(WCHAR), de.name.Buffer); TRACE("offset = %u\n", ccb->query_dir_offset - 1); status2 = query_dir_item(fcb, fileref, curitem, &length, Irp, &de, fcb->subvol); @@ -1126,21 +840,10 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { ccb->query_dir_offset = newoffset; lastitem = curitem; - } else { - if (uni_fn) ExFreePool(uni_fn); - if (de.name_alloc) ExFreePool(de.name); + } else break; - } } else ccb->query_dir_offset = newoffset; - - if (uni_fn) { - ExFreePool(uni_fn); - uni_fn = NULL; - } - - if (de.name_alloc) - ExFreePool(de.name); } else { if (Status == STATUS_NO_MORE_FILES) Status = STATUS_SUCCESS; @@ -1155,6 +858,10 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) { Irp->IoStatus.Information = IrpSp->Parameters.QueryDirectory.Length - length; end: + ExReleaseResourceLite(&fileref->fcb->nonpaged->dir_children_lock); + +end2: + ExReleaseResourceLite(&fcb->Vcb->fcb_lock); ExReleaseResourceLite(&fcb->Vcb->tree_lock); TRACE("returning %08x\n", Status); diff --git a/reactos/drivers/filesystems/btrfs/extent-tree.c b/reactos/drivers/filesystems/btrfs/extent-tree.c index 8df4e57e765..19b92328613 100644 --- a/reactos/drivers/filesystems/btrfs/extent-tree.c +++ b/reactos/drivers/filesystems/btrfs/extent-tree.c @@ -84,7 +84,7 @@ static __inline UINT64 get_extent_data_refcount(UINT8 type, void* data) { } } -static UINT64 get_extent_data_ref_hash2(UINT64 root, UINT64 objid, UINT64 offset) { +UINT64 get_extent_data_ref_hash2(UINT64 root, UINT64 objid, UINT64 offset) { UINT32 high_crc = 0xffffffff, low_crc = 0xffffffff; high_crc = calc_crc32c(high_crc, (UINT8*)&root, sizeof(UINT64)); @@ -474,7 +474,7 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 UINT8* ptr; eisize = sizeof(EXTENT_ITEM); - if (is_tree) eisize += sizeof(EXTENT_ITEM2); + if (is_tree && !(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) eisize += sizeof(EXTENT_ITEM2); eisize += sizeof(UINT8); eisize += datalen; @@ -590,7 +590,6 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount += rc; sectedr2 = (EXTENT_DATA_REF*)((UINT8*)newei + ((UINT8*)sectedr - tp.item->data)); @@ -635,7 +634,6 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount += rc; sectsdr2 = (SHARED_DATA_REF*)((UINT8*)newei + ((UINT8*)sectsdr - tp.item->data)); @@ -697,7 +695,6 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 newei = ExAllocatePoolWithTag(PagedPool, tp.item->size + sizeof(UINT8) + datalen, ALLOC_TAG); RtlCopyMemory(newei, tp.item->data, ptr - tp.item->data); - newei->generation = Vcb->superblock.generation; newei->refcount += get_extent_data_refcount(type, data); if (len > 0) @@ -733,9 +730,9 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 return Status; } - if (!keycmp(tp.item->key, searchkey)) { - if (tp.item->size < datalen) { - ERR("(%llx,%x,%llx) was %x bytes, expecting %x\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp.item->size, datalen); + if (!keycmp(tp2.item->key, searchkey)) { + if (tp2.item->size < datalen) { + ERR("(%llx,%x,%llx) was %x bytes, expecting %x\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp2.item->size, datalen); return STATUS_INTERNAL_ERROR; } @@ -770,7 +767,6 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount += get_extent_data_refcount(type, data); delete_tree_item(Vcb, &tp, rollback); @@ -797,7 +793,6 @@ NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount += get_extent_data_refcount(type, data); delete_tree_item(Vcb, &tp, rollback); @@ -828,7 +823,7 @@ void decrease_chunk_usage(chunk* c, UINT64 delta) { } NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, - UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) { + UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback) { KEY searchkey; NTSTATUS Status; traverse_ptr tp, tp2; @@ -884,7 +879,7 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 return Status; } - return decrease_extent_refcount(Vcb, address, size, type, data, firstitem, level, parent, Irp, rollback); + return decrease_extent_refcount(Vcb, address, size, type, data, firstitem, level, parent, superseded, Irp, rollback); } } @@ -944,6 +939,10 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 if (sectedr->root == edr->root && sectedr->objid == edr->objid && sectedr->offset == edr->offset) { if (ei->refcount == edr->count) { delete_tree_item(Vcb, &tp, rollback); + + if (!superseded) + add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); + return STATUS_SUCCESS; } @@ -976,7 +975,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); } - newei->generation = Vcb->superblock.generation; newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -995,14 +993,24 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 EXTENT_ITEM* newei; if (sectsdr->offset == sdr->offset) { - // We ignore sdr->count, and assume that we want to remove the whole bit - if (ei->refcount == sectsdr->count) { delete_tree_item(Vcb, &tp, rollback); + + if (!superseded) + add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); + return STATUS_SUCCESS; } - neweilen = tp.item->size - sizeof(UINT8) - sectlen; + if (sectsdr->count < sdr->count) { + ERR("error - SHARED_DATA_REF has refcount %x, trying to reduce by %x\n", sectsdr->count, sdr->count); + return STATUS_INTERNAL_ERROR; + } + + if (sectsdr->count > sdr->count) // reduce section refcount + neweilen = tp.item->size; + else // remove section entirely + neweilen = tp.item->size - sizeof(UINT8) - sectlen; newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG); if (!newei) { @@ -1010,12 +1018,19 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 return STATUS_INSUFFICIENT_RESOURCES; } - RtlCopyMemory(newei, ei, ptr - tp.item->data); - - if (len > sectlen) - RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); - - newei->generation = Vcb->superblock.generation; + if (sectsdr->count > sdr->count) { + SHARED_DATA_REF* newsdr = (SHARED_DATA_REF*)((UINT8*)newei + ((UINT8*)sectsdr - tp.item->data)); + + RtlCopyMemory(newei, ei, neweilen); + + newsdr->count -= rc; + } else { + RtlCopyMemory(newei, ei, ptr - tp.item->data); + + if (len > sectlen) + RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); + } + newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -1052,7 +1067,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 if (len > sectlen) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); - newei->generation = Vcb->superblock.generation; newei->refcount--; delete_tree_item(Vcb, &tp, rollback); @@ -1089,7 +1103,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 if (len > sectlen) RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen); - newei->generation = Vcb->superblock.generation; newei->refcount--; delete_tree_item(Vcb, &tp, rollback); @@ -1146,6 +1159,10 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 if (ei->refcount == edr->count) { delete_tree_item(Vcb, &tp, rollback); delete_tree_item(Vcb, &tp2, rollback); + + if (!superseded) + add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); + return STATUS_SUCCESS; } @@ -1182,7 +1199,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -1203,16 +1219,41 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 EXTENT_ITEM* newei; if (sectsdr->offset == sdr->offset) { - // As above, we assume that we want to remove the whole shared data ref - - if (ei->refcount == sectsdr->count) { + if (ei->refcount == sdr->count) { delete_tree_item(Vcb, &tp, rollback); delete_tree_item(Vcb, &tp2, rollback); + + if (!superseded) + add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); + return STATUS_SUCCESS; } + if (sectsdr->count < sdr->count) { + ERR("error - extent section has refcount %x, trying to reduce by %x\n", sectsdr->count, sdr->count); + return STATUS_INTERNAL_ERROR; + } + delete_tree_item(Vcb, &tp2, rollback); + if (sectsdr->count > sdr->count) { + SHARED_DATA_REF* newsdr = ExAllocatePoolWithTag(PagedPool, tp2.item->size, ALLOC_TAG); + + if (!newsdr) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(newsdr, sectsdr, tp2.item->size); + + newsdr->count -= sdr->count; + + if (!insert_tree_item(Vcb, Vcb->extent_root, tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, newsdr, tp2.item->size, NULL, Irp, rollback)) { + ERR("insert_tree_item failed\n"); + return STATUS_INTERNAL_ERROR; + } + } + newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG); if (!newei) { ERR("out of memory\n"); @@ -1221,7 +1262,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -1258,7 +1298,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -1295,7 +1334,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -1317,6 +1355,10 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 if (ei->refcount == erv0->count) { delete_tree_item(Vcb, &tp, rollback); delete_tree_item(Vcb, &tp2, rollback); + + if (!superseded) + add_checksum_entry(Vcb, address, size / Vcb->superblock.sector_size, NULL, Irp, rollback); + return STATUS_SUCCESS; } @@ -1330,7 +1372,6 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 RtlCopyMemory(newei, tp.item->data, tp.item->size); - newei->generation = Vcb->superblock.generation; newei->refcount -= rc; delete_tree_item(Vcb, &tp, rollback); @@ -1348,7 +1389,7 @@ NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 } NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, - UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback) { + UINT64 offset, UINT32 refcount, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback) { EXTENT_DATA_REF edr; edr.root = root; @@ -1356,7 +1397,7 @@ NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UI edr.offset = offset; edr.count = refcount; - return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, 0, Irp, rollback); + return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, 0, superseded, Irp, rollback); } NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, @@ -1365,7 +1406,7 @@ NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UI tbr.offset = root; - return decrease_extent_refcount(Vcb, address, size, TYPE_TREE_BLOCK_REF, &tbr, NULL/*FIXME*/, level, 0, Irp, rollback); + return decrease_extent_refcount(Vcb, address, size, TYPE_TREE_BLOCK_REF, &tbr, NULL/*FIXME*/, level, 0, FALSE, Irp, rollback); } static UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) { diff --git a/reactos/drivers/filesystems/btrfs/fastio.c b/reactos/drivers/filesystems/btrfs/fastio.c index 2b0608d7a86..30b58384895 100644 --- a/reactos/drivers/filesystems/btrfs/fastio.c +++ b/reactos/drivers/filesystems/btrfs/fastio.c @@ -265,17 +265,26 @@ static NTSTATUS STDCALL fast_io_release_for_ccflush(PFILE_OBJECT FileObject, PDE return STATUS_SUCCESS; } -#ifdef DEBUG -static BOOLEAN STDCALL fast_io_read(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { - TRACE("(%p, %p, %x, %x, %x, %p, %p, %p)\n", FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); +static BOOLEAN STDCALL fast_io_write(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { + TRACE("(%p (%.*S), %llx, %x, %x, %x, %p, %p, %p)\n", FileObject, FileObject->FileName.Length / sizeof(WCHAR), FileObject->FileName.Buffer, + *FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); - return FsRtlCopyRead(FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); + if (FsRtlCopyWrite(FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject)) { + fcb* fcb = FileObject->FsContext; + + fcb->inode_item.st_size = fcb->Header.FileSize.QuadPart; + + return TRUE; + } + + return FALSE; } -static BOOLEAN STDCALL fast_io_write(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { +#ifdef _DEBUG +static BOOLEAN STDCALL fast_io_read(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait, ULONG LockKey, PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { TRACE("(%p, %p, %x, %x, %x, %p, %p, %p)\n", FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); - return FsRtlCopyWrite(FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); + return FsRtlCopyRead(FileObject, FileOffset, Length, Wait, LockKey, Buffer, IoStatus, DeviceObject); } static BOOLEAN STDCALL fast_io_mdl_read(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, ULONG LockKey, PMDL* MdlChain, PIO_STATUS_BLOCK IoStatus, PDEVICE_OBJECT DeviceObject) { @@ -284,7 +293,7 @@ static BOOLEAN STDCALL fast_io_mdl_read(PFILE_OBJECT FileObject, PLARGE_INTEGER return FsRtlMdlReadDev(FileObject, FileOffset, Length, LockKey, MdlChain, IoStatus, DeviceObject); } -static BOOLEAN STDCALL fast_io_mdl_read_complete(PFILE_OBJECT FileObject, PMDL* MdlChain, PDEVICE_OBJECT DeviceObject) { +static BOOLEAN STDCALL fast_io_mdl_read_complete(PFILE_OBJECT FileObject, PMDL MdlChain, PDEVICE_OBJECT DeviceObject) { TRACE("(%p, %p, %p)\n", FileObject, MdlChain, DeviceObject); return FsRtlMdlReadCompleteDev(FileObject, MdlChain, DeviceObject); @@ -296,7 +305,7 @@ static BOOLEAN STDCALL fast_io_prepare_mdl_write(PFILE_OBJECT FileObject, PLARGE return FsRtlPrepareMdlWriteDev(FileObject, FileOffset, Length, LockKey, MdlChain, IoStatus, DeviceObject); } -static BOOLEAN STDCALL fast_io_mdl_write_complete(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, PMDL* MdlChain, PDEVICE_OBJECT DeviceObject) { +static BOOLEAN STDCALL fast_io_mdl_write_complete(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, PMDL MdlChain, PDEVICE_OBJECT DeviceObject) { TRACE("(%p, %p, %p, %p)\n", FileObject, FileOffset, MdlChain, DeviceObject); return FsRtlMdlWriteCompleteDev(FileObject, FileOffset, MdlChain, DeviceObject); @@ -329,17 +338,16 @@ void __stdcall init_fast_io_dispatch(FAST_IO_DISPATCH** fiod) { FastIoDispatch.ReleaseForModWrite = fast_io_release_for_mod_write; FastIoDispatch.AcquireForCcFlush = fast_io_acquire_for_ccflush; FastIoDispatch.ReleaseForCcFlush = fast_io_release_for_ccflush; + FastIoDispatch.FastIoWrite = fast_io_write; -#ifdef DEBUG +#ifdef _DEBUG FastIoDispatch.FastIoRead = fast_io_read; - FastIoDispatch.FastIoWrite = fast_io_write; FastIoDispatch.MdlRead = fast_io_mdl_read; FastIoDispatch.MdlReadComplete = fast_io_mdl_read_complete; FastIoDispatch.PrepareMdlWrite = fast_io_prepare_mdl_write; FastIoDispatch.MdlWriteComplete = fast_io_mdl_write_complete; #else FastIoDispatch.FastIoRead = FsRtlCopyRead; - FastIoDispatch.FastIoWrite = FsRtlCopyWrite; FastIoDispatch.MdlRead = FsRtlMdlReadDev; FastIoDispatch.MdlReadComplete = FsRtlMdlReadCompleteDev; FastIoDispatch.PrepareMdlWrite = FsRtlPrepareMdlWriteDev; diff --git a/reactos/drivers/filesystems/btrfs/fileinfo.c b/reactos/drivers/filesystems/btrfs/fileinfo.c index e1e12721a11..0582d5aedf2 100644 --- a/reactos/drivers/filesystems/btrfs/fileinfo.c +++ b/reactos/drivers/filesystems/btrfs/fileinfo.c @@ -54,10 +54,11 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P goto end; } - // FIXME - what if FCB is volume or root? - // FIXME - what about subvol roots? - - // FIXME - link FILE_ATTRIBUTE_READONLY to st_mode + if (fcb->inode == SUBVOL_ROOT_INODE && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && + (fbi->FileAttributes == 0 || fbi->FileAttributes & FILE_ATTRIBUTE_READONLY)) { + Status = STATUS_ACCESS_DENIED; + goto end; + } if (fbi->CreationTime.QuadPart == -1) ccb->user_set_creation_time = TRUE; @@ -130,6 +131,13 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P fcb->subvol->root_item.ctransid = Vcb->superblock.generation; fcb->subvol->root_item.ctime = now; + if (fcb->inode == SUBVOL_ROOT_INODE) { + if (fbi->FileAttributes & FILE_ATTRIBUTE_READONLY) + fcb->subvol->root_item.flags |= BTRFS_SUBVOL_READONLY; + else + fcb->subvol->root_item.flags &= ~BTRFS_SUBVOL_READONLY; + } + inode_item_changed = TRUE; filter |= FILE_NOTIFY_CHANGE_ATTRIBUTES; @@ -270,11 +278,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { if (!fcb->adsxattr.Buffer) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -288,11 +292,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { if (!fcb->adsdata.Buffer) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -309,11 +309,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { fcb->sd = ExAllocatePoolWithTag(PagedPool, RtlLengthSecurityDescriptor(oldfcb->sd), ALLOC_TAG); if (!fcb->sd) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -331,11 +327,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { if (!ext2) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -347,11 +339,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { if (!ext2->data) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -361,6 +349,29 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { ext2->unique = FALSE; ext2->ignore = FALSE; + ext2->inserted = TRUE; + + if (ext->csum) { + ULONG len; + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; + + if (ext->data->compression == BTRFS_COMPRESSION_NONE) + len = ed2->num_bytes; + else + len = ed2->size; + + len = len * sizeof(UINT32) / Vcb->superblock.sector_size; + + ext2->csum = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); + if (!ext2->csum) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(ext2->csum, ext->csum, len); + } else + ext2->csum = NULL; InsertTailList(&fcb->extents, &ext2->list_entry); } @@ -376,11 +387,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { if (!hl2) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -393,11 +400,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { if (!hl2->name.Buffer) { ERR("out of memory\n"); ExFreePool(hl2); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -410,11 +413,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { ERR("out of memory\n"); ExFreePool(hl2->name.Buffer); ExFreePool(hl2); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -433,11 +432,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { fcb->reparse_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->reparse_xattr.MaximumLength, ALLOC_TAG); if (!fcb->reparse_xattr.Buffer) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -450,11 +445,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) { fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->ea_xattr.MaximumLength, ALLOC_TAG); if (!fcb->ea_xattr.Buffer) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - return STATUS_INSUFFICIENT_RESOURCES; } @@ -585,10 +576,8 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { RtlCopyMemory(xattr.Buffer, xa->name, xa->n); xattr.Buffer[xa->n] = 0; - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); Status = open_fcb_stream(me->fileref->fcb->Vcb, me->fileref->fcb->subvol, me->fileref->fcb->inode, &xattr, tp.item->key.offset, me->fileref->fcb, &fcb, Irp); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); if (!NT_SUCCESS(Status)) { ERR("open_fcb_stream returned %08x\n", Status); @@ -599,11 +588,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { fr = create_fileref(); if (!fr) { ERR("out of memory\n"); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - Status = STATUS_INSUFFICIENT_RESOURCES; goto end; } @@ -613,11 +598,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, &xa->name[xapreflen], xa->n - xapreflen); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -625,22 +606,14 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { if (!fr->filepart.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } Status = RtlUTF8ToUnicodeN(fr->filepart.Buffer, stringlen, &stringlen, &xa->name[xapreflen], xa->n - xapreflen); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -649,11 +622,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { Status = RtlUpcaseUnicodeString(&fr->filepart_uc, &fr->filepart, TRUE); if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -666,11 +635,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { if (!me2) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -762,6 +727,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { ULONG stringlen; root* subvol; UINT64 inode; + dir_child* dc = NULL; utf8.Length = utf8.MaximumLength = di->n; utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.MaximumLength, ALLOC_TAG); @@ -802,9 +768,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { inode = di->key.obj_id; } - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); Status = open_fcb(me->fileref->fcb->Vcb, subvol, inode, di->type, &utf8, me->fileref->fcb, &fcb, PagedPool, Irp); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); if (!NT_SUCCESS(Status)) { ERR("open_fcb returned %08x\n", Status); @@ -817,11 +781,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; ExFreePool(utf8.Buffer); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fcb(fcb); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -831,11 +791,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, utf8.Buffer, utf8.Length); if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -843,11 +799,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { if (!fr->filepart.Buffer) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -855,11 +807,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { if (!NT_SUCCESS(Status)) { ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -869,11 +817,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { if (!NT_SUCCESS(Status)) { ERR("RtlUpcaseUnicodeString returned %08x\n", Status); - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -882,6 +826,14 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { fr->index = tp.item->key.offset; increase_fileref_refcount(me->fileref); + Status = add_dir_child(me->fileref->fcb, di->key.obj_type == TYPE_ROOT_ITEM ? subvol->id : fr->fcb->inode, + di->key.obj_type == TYPE_ROOT_ITEM ? TRUE : FALSE, fr->index, &utf8, &fr->filepart, &fr->filepart_uc, BTRFS_TYPE_DIRECTORY, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fr->dc = dc; + dc->fileref = fr; + insert_fileref_child(fr->parent, fr, FALSE); if (fr->fcb->type == BTRFS_TYPE_DIRECTORY) @@ -891,11 +843,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) { if (!me2) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; - - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(fr); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - goto end; } @@ -931,6 +879,44 @@ end: return Status; } +void remove_dir_child_from_hash_lists(fcb* fcb, dir_child* dc) { + UINT8 c; + + c = dc->hash >> 24; + + if (fcb->hash_ptrs[c] == &dc->list_entry_hash) { + if (dc->list_entry_hash.Flink == &fcb->dir_children_hash) + fcb->hash_ptrs[c] = NULL; + else { + dir_child* dc2 = CONTAINING_RECORD(dc->list_entry_hash.Flink, dir_child, list_entry_hash); + + if (dc2->hash >> 24 == c) + fcb->hash_ptrs[c] = &dc2->list_entry_hash; + else + fcb->hash_ptrs[c] = NULL; + } + } + + RemoveEntryList(&dc->list_entry_hash); + + c = dc->hash_uc >> 24; + + if (fcb->hash_ptrs_uc[c] == &dc->list_entry_hash_uc) { + if (dc->list_entry_hash_uc.Flink == &fcb->dir_children_hash_uc) + fcb->hash_ptrs_uc[c] = NULL; + else { + dir_child* dc2 = CONTAINING_RECORD(dc->list_entry_hash_uc.Flink, dir_child, list_entry_hash_uc); + + if (dc2->hash_uc >> 24 == c) + fcb->hash_ptrs_uc[c] = &dc2->list_entry_hash_uc; + else + fcb->hash_ptrs_uc[c] = NULL; + } + } + + RemoveEntryList(&dc->list_entry_hash_uc); +} + static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_STRING utf8, PUNICODE_STRING fnus, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; LIST_ENTRY move_list, *le; @@ -988,6 +974,8 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ if (me->fileref->fcb->inode != SUBVOL_ROOT_INODE) { if (!me->dummyfcb) { ULONG defda; + BOOL inserted = FALSE; + LIST_ENTRY* le; ExAcquireResourceExclusiveLite(me->fileref->fcb->Header.Resource, TRUE); @@ -1070,7 +1058,21 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ InsertHeadList(&me->fileref->fcb->list_entry, &me->dummyfcb->list_entry); RemoveEntryList(&me->fileref->fcb->list_entry); - InsertTailList(&destdir->fcb->subvol->fcbs, &me->fileref->fcb->list_entry); + le = destdir->fcb->subvol->fcbs.Flink; + while (le != &destdir->fcb->subvol->fcbs) { + fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry); + + if (fcb->inode > me->fileref->fcb->inode) { + InsertHeadList(le->Blink, &me->fileref->fcb->list_entry); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&destdir->fcb->subvol->fcbs, &me->fileref->fcb->list_entry); InsertTailList(&me->fileref->fcb->Vcb->all_fcbs, &me->dummyfcb->list_entry_all); @@ -1128,6 +1130,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ le = move_list.Flink; while (le != &move_list) { hardlink* hl; + BOOL name_changed = FALSE; me = CONTAINING_RECORD(le, move_entry, list_entry); @@ -1150,7 +1153,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ if (le == move_list.Flink) // first item me->fileref->filepart.Length = me->fileref->filepart.MaximumLength = fnus->Length; else - me->fileref->filepart.MaximumLength = me->fileref->filepart.MaximumLength; + me->fileref->filepart.MaximumLength = me->fileref->filepart.Length; me->fileref->filepart.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->filepart.MaximumLength, ALLOC_TAG); @@ -1171,9 +1174,12 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ me->dummyfileref->utf8 = me->fileref->utf8; me->dummyfileref->oldutf8 = me->fileref->oldutf8; - if (le == move_list.Flink) + if (le == move_list.Flink) { + if (me->fileref->utf8.Length != utf8->Length || RtlCompareMemory(me->fileref->utf8.Buffer, utf8->Buffer, utf8->Length) != utf8->Length) + name_changed = TRUE; + me->fileref->utf8.Length = me->fileref->utf8.MaximumLength = utf8->Length; - else + } else me->fileref->utf8.MaximumLength = me->fileref->utf8.Length; if (me->fileref->utf8.MaximumLength > 0) { @@ -1209,20 +1215,71 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ if (!me->parent) { RemoveEntryList(&me->fileref->list_entry); - ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE); free_fileref(me->fileref->parent); - ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock); - - me->fileref->parent = destdir; increase_fileref_refcount(destdir); - Status = fcb_get_last_dir_index(me->fileref->parent->fcb, &me->fileref->index, Irp); + Status = fcb_get_last_dir_index(destdir->fcb, &me->fileref->index, Irp); if (!NT_SUCCESS(Status)) { ERR("fcb_get_last_dir_index returned %08x\n", Status); goto end; } + if (me->fileref->dc) { + // remove from old parent + ExAcquireResourceExclusiveLite(&me->fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); + RemoveEntryList(&me->fileref->dc->list_entry_index); + remove_dir_child_from_hash_lists(me->fileref->parent->fcb, me->fileref->dc); + ExReleaseResourceLite(&me->fileref->parent->fcb->nonpaged->dir_children_lock); + + if (name_changed) { + ExFreePool(me->fileref->dc->utf8.Buffer); + ExFreePool(me->fileref->dc->name.Buffer); + ExFreePool(me->fileref->dc->name_uc.Buffer); + + me->fileref->dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8->Length, ALLOC_TAG); + if (!me->fileref->dc->utf8.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + me->fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->filepart.Length, ALLOC_TAG); + if (!me->fileref->dc->name.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + me->fileref->dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, me->fileref->filepart_uc.Length, ALLOC_TAG); + if (!me->fileref->dc->name_uc.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + me->fileref->dc->utf8.Length = me->fileref->dc->utf8.MaximumLength = utf8->Length; + RtlCopyMemory(me->fileref->dc->utf8.Buffer, utf8->Buffer, utf8->Length); + + me->fileref->dc->name.Length = me->fileref->dc->name.MaximumLength = me->fileref->filepart.Length; + RtlCopyMemory(me->fileref->dc->name.Buffer, me->fileref->filepart.Buffer, me->fileref->filepart.Length); + + me->fileref->dc->name_uc.Length = me->fileref->dc->name_uc.MaximumLength = me->fileref->filepart_uc.Length; + RtlCopyMemory(me->fileref->dc->name_uc.Buffer, me->fileref->filepart_uc.Buffer, me->fileref->filepart_uc.Length); + + me->fileref->dc->hash = calc_crc32c(0xffffffff, (UINT8*)me->fileref->dc->name.Buffer, me->fileref->dc->name.Length); + me->fileref->dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)me->fileref->dc->name_uc.Buffer, me->fileref->dc->name_uc.Length); + } + + // add to new parent + ExAcquireResourceExclusiveLite(&destdir->fcb->nonpaged->dir_children_lock, TRUE); + InsertTailList(&destdir->fcb->dir_children_index, &me->fileref->dc->list_entry_index); + insert_dir_child_into_hash_lists(destdir->fcb, me->fileref->dc); + ExReleaseResourceLite(&destdir->fcb->nonpaged->dir_children_lock); + } + + me->fileref->parent = destdir; + insert_fileref_child(me->fileref->parent, me->fileref, TRUE); TRACE("me->fileref->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", me->fileref->parent->fcb->inode, me->fileref->parent->fcb->inode_item.st_size); @@ -1247,41 +1304,43 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ } } - hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); - if (!hl) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - hl->parent = me->fileref->parent->fcb->inode; - hl->index = me->fileref->index; - - hl->utf8.Length = hl->utf8.MaximumLength = me->fileref->utf8.Length; - hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - if (!hl->utf8.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(hl); - goto end; - } - - RtlCopyMemory(hl->utf8.Buffer, me->fileref->utf8.Buffer, me->fileref->utf8.Length); - - hl->name.Length = hl->name.MaximumLength = me->fileref->filepart.Length; - hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - if (!hl->name.Buffer) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(hl->utf8.Buffer); - ExFreePool(hl); - goto end; + if (me->fileref->fcb->inode_item.st_nlink > 1) { + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + hl->parent = me->fileref->parent->fcb->inode; + hl->index = me->fileref->index; + + hl->utf8.Length = hl->utf8.MaximumLength = me->fileref->utf8.Length; + hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); + if (!hl->utf8.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(hl); + goto end; + } + + RtlCopyMemory(hl->utf8.Buffer, me->fileref->utf8.Buffer, me->fileref->utf8.Length); + + hl->name.Length = hl->name.MaximumLength = me->fileref->filepart.Length; + hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); + if (!hl->name.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(hl->utf8.Buffer); + ExFreePool(hl); + goto end; + } + + RtlCopyMemory(hl->name.Buffer, me->fileref->filepart.Buffer, me->fileref->filepart.Length); + + InsertTailList(&me->fileref->fcb->hardlinks, &hl->list_entry); } - RtlCopyMemory(hl->name.Buffer, me->fileref->filepart.Buffer, me->fileref->filepart.Length); - - InsertTailList(&me->fileref->fcb->hardlinks, &hl->list_entry); - mark_fileref_dirty(me->fileref); le = le->Flink; @@ -1317,27 +1376,16 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_ end: while (!IsListEmpty(&move_list)) { - device_extension* Vcb; - le = RemoveHeadList(&move_list); me = CONTAINING_RECORD(le, move_entry, list_entry); - Vcb = me->fileref->fcb->Vcb; - if (me->dummyfcb) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (me->dummyfcb) free_fcb(me->dummyfcb); - ExReleaseResourceLite(&Vcb->fcb_lock); - } - if (me->dummyfileref) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (me->dummyfileref) free_fileref(me->dummyfileref); - ExReleaseResourceLite(&Vcb->fcb_lock); - } - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fileref(me->fileref); - ExReleaseResourceLite(&Vcb->fcb_lock); ExFreePool(me); } @@ -1345,6 +1393,94 @@ end: return Status; } +void insert_dir_child_into_hash_lists(fcb* fcb, dir_child* dc) { + BOOL inserted; + LIST_ENTRY* le; + UINT8 c, d; + + c = dc->hash >> 24; + + inserted = FALSE; + + d = c; + do { + le = fcb->hash_ptrs[d]; + + if (d == 0) + break; + + d--; + } while (!le); + + if (!le) + le = fcb->dir_children_hash.Flink; + + while (le != &fcb->dir_children_hash) { + dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash); + + if (dc2->hash > dc->hash) { + InsertHeadList(le->Blink, &dc->list_entry_hash); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&fcb->dir_children_hash, &dc->list_entry_hash); + + if (!fcb->hash_ptrs[c]) + fcb->hash_ptrs[c] = &dc->list_entry_hash; + else { + dir_child* dc2 = CONTAINING_RECORD(fcb->hash_ptrs[c], dir_child, list_entry_hash); + + if (dc2->hash > dc->hash) + fcb->hash_ptrs[c] = &dc->list_entry_hash; + } + + c = dc->hash_uc >> 24; + + inserted = FALSE; + + d = c; + do { + le = fcb->hash_ptrs_uc[d]; + + if (d == 0) + break; + + d--; + } while (!le); + + if (!le) + le = fcb->dir_children_hash_uc.Flink; + + while (le != &fcb->dir_children_hash_uc) { + dir_child* dc2 = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc); + + if (dc2->hash_uc > dc->hash_uc) { + InsertHeadList(le->Blink, &dc->list_entry_hash_uc); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&fcb->dir_children_hash_uc, &dc->list_entry_hash_uc); + + if (!fcb->hash_ptrs_uc[c]) + fcb->hash_ptrs_uc[c] = &dc->list_entry_hash_uc; + else { + dir_child* dc2 = CONTAINING_RECORD(fcb->hash_ptrs_uc[c], dir_child, list_entry_hash_uc); + + if (dc2->hash_uc > dc->hash_uc) + fcb->hash_ptrs_uc[c] = &dc->list_entry_hash_uc; + } +} + static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, PFILE_OBJECT FileObject, PFILE_OBJECT tfo) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); FILE_RENAME_INFORMATION* fri = Irp->AssociatedIrp.SystemBuffer; @@ -1396,6 +1532,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, } ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); if (fcb->ads) { @@ -1435,9 +1572,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, increase_fileref_refcount(related); } - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp); - ExReleaseResourceLite(&Vcb->fcb_lock); if (NT_SUCCESS(Status)) { TRACE("destination file %S already exists\n", file_desc_fileref(oldfileref)); @@ -1460,17 +1595,13 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, } if (fileref == oldfileref || oldfileref->deleted) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fileref(oldfileref); - ExReleaseResourceLite(&Vcb->fcb_lock); oldfileref = NULL; } } if (!related) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp); - ExReleaseResourceLite(&Vcb->fcb_lock); if (!NT_SUCCESS(Status)) { ERR("open_fileref returned %08x\n", Status); @@ -1567,6 +1698,56 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, mark_fileref_dirty(fileref); + if (fileref->dc) { + ExAcquireResourceExclusiveLite(&fileref->parent->fcb->nonpaged->dir_children_lock, TRUE); + + ExFreePool(fileref->dc->utf8.Buffer); + ExFreePool(fileref->dc->name.Buffer); + ExFreePool(fileref->dc->name_uc.Buffer); + + fileref->dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.Length, ALLOC_TAG); + if (!fileref->dc->utf8.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + goto end; + } + + fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart.Length, ALLOC_TAG); + if (!fileref->dc->name.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + goto end; + } + + fileref->dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart_uc.Length, ALLOC_TAG); + if (!fileref->dc->name_uc.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + goto end; + } + + fileref->dc->utf8.Length = fileref->dc->utf8.MaximumLength = utf8.Length; + RtlCopyMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length); + + fileref->dc->name.Length = fileref->dc->name.MaximumLength = fileref->filepart.Length; + RtlCopyMemory(fileref->dc->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); + + fileref->dc->name_uc.Length = fileref->dc->name_uc.MaximumLength = fileref->filepart_uc.Length; + RtlCopyMemory(fileref->dc->name_uc.Buffer, fileref->filepart_uc.Buffer, fileref->filepart_uc.Length); + + remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc); + + fileref->dc->hash = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name.Buffer, fileref->dc->name.Length); + fileref->dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name_uc.Buffer, fileref->dc->name_uc.Length); + + insert_dir_child_into_hash_lists(fileref->parent->fcb, fileref->dc); + + ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock); + } + KeQuerySystemTime(&time); win_time_to_unix(time, &now); @@ -1622,6 +1803,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, fr2->deleted = TRUE; fr2->created = fileref->created; fr2->parent = fileref->parent; + fr2->dc = NULL; if (fr2->fcb->type == BTRFS_TYPE_DIRECTORY) fr2->fcb->fileref = fr2; @@ -1665,45 +1847,102 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, mark_fileref_dirty(fr2); mark_fileref_dirty(fileref); - // add new hardlink entry to fcb - - hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); - if (!hl) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - hl->parent = related->fcb->inode; - hl->index = index; - - hl->name.Length = hl->name.MaximumLength = fileref->filepart.Length; - hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); - - if (!hl->name.Buffer) { - ERR("out of memory\n"); - ExFreePool(hl); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; + if (fileref->dc) { + // remove from old parent + ExAcquireResourceExclusiveLite(&fr2->parent->fcb->nonpaged->dir_children_lock, TRUE); + RemoveEntryList(&fileref->dc->list_entry_index); + remove_dir_child_from_hash_lists(fr2->parent->fcb, fileref->dc); + ExReleaseResourceLite(&fr2->parent->fcb->nonpaged->dir_children_lock); + + if (fileref->utf8.Length != fr2->utf8.Length || RtlCompareMemory(fileref->utf8.Buffer, fr2->utf8.Buffer, fr2->utf8.Length) != fr2->utf8.Length) { + // handle changed name + + ExFreePool(fileref->dc->utf8.Buffer); + ExFreePool(fileref->dc->name.Buffer); + ExFreePool(fileref->dc->name_uc.Buffer); + + fileref->dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, utf8.Length, ALLOC_TAG); + if (!fileref->dc->utf8.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + fileref->dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart.Length, ALLOC_TAG); + if (!fileref->dc->name.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + fileref->dc->name_uc.Buffer = ExAllocatePoolWithTag(PagedPool, fileref->filepart_uc.Length, ALLOC_TAG); + if (!fileref->dc->name_uc.Buffer) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + fileref->dc->utf8.Length = fileref->dc->utf8.MaximumLength = utf8.Length; + RtlCopyMemory(fileref->dc->utf8.Buffer, utf8.Buffer, utf8.Length); + + fileref->dc->name.Length = fileref->dc->name.MaximumLength = fileref->filepart.Length; + RtlCopyMemory(fileref->dc->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); + + fileref->dc->name_uc.Length = fileref->dc->name_uc.MaximumLength = fileref->filepart_uc.Length; + RtlCopyMemory(fileref->dc->name_uc.Buffer, fileref->filepart_uc.Buffer, fileref->filepart_uc.Length); + + fileref->dc->hash = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name.Buffer, fileref->dc->name.Length); + fileref->dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)fileref->dc->name_uc.Buffer, fileref->dc->name_uc.Length); + } + + // add to new parent + ExAcquireResourceExclusiveLite(&related->fcb->nonpaged->dir_children_lock, TRUE); + InsertTailList(&related->fcb->dir_children_index, &fileref->dc->list_entry_index); + insert_dir_child_into_hash_lists(related->fcb, fileref->dc); + ExReleaseResourceLite(&related->fcb->nonpaged->dir_children_lock); } - RtlCopyMemory(hl->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); - - hl->utf8.Length = hl->utf8.MaximumLength = fileref->utf8.Length; - hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); - - if (!hl->utf8.Buffer) { - ERR("out of memory\n"); - ExFreePool(hl->name.Buffer); - ExFreePool(hl); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; + if (fcb->inode_item.st_nlink > 1) { + // add new hardlink entry to fcb + + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + hl->parent = related->fcb->inode; + hl->index = index; + + hl->name.Length = hl->name.MaximumLength = fileref->filepart.Length; + hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); + + if (!hl->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(hl); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(hl->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); + + hl->utf8.Length = hl->utf8.MaximumLength = fileref->utf8.Length; + hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); + + if (!hl->utf8.Buffer) { + ERR("out of memory\n"); + ExFreePool(hl->name.Buffer); + ExFreePool(hl); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(hl->utf8.Buffer, fileref->utf8.Buffer, fileref->utf8.Length); + + InsertTailList(&fcb->hardlinks, &hl->list_entry); } - - RtlCopyMemory(hl->utf8.Buffer, fileref->utf8.Buffer, fileref->utf8.Length); - - InsertTailList(&fcb->hardlinks, &hl->list_entry); - + // delete old hardlink entry from fcb le = fcb->hardlinks.Flink; @@ -1763,9 +2002,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, fr2->parent->fcb->inode_item.st_ctime = now; fr2->parent->fcb->inode_item.st_mtime = now; - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fileref(fr2); - ExReleaseResourceLite(&Vcb->fcb_lock); fr2->parent->fcb->inode_item_changed = TRUE; mark_fcb_dirty(fr2->parent->fcb); @@ -1776,25 +2013,16 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp, send_notification_fileref(fr2->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED); Status = STATUS_SUCCESS; - + end: - if (oldfileref) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (oldfileref) free_fileref(oldfileref); - ExReleaseResourceLite(&Vcb->fcb_lock); - } - if (!NT_SUCCESS(Status) && related) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (!NT_SUCCESS(Status) && related) free_fileref(related); - ExReleaseResourceLite(&Vcb->fcb_lock); - } - if (!NT_SUCCESS(Status) && fr2) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (!NT_SUCCESS(Status) && fr2) free_fileref(fr2); - ExReleaseResourceLite(&Vcb->fcb_lock); - } if (NT_SUCCESS(Status)) clear_rollback(Vcb, &rollback); @@ -1802,6 +2030,7 @@ end: do_rollback(Vcb, &rollback); ExReleaseResourceLite(fcb->Header.Resource); + ExReleaseResourceLite(&Vcb->fcb_lock); ExReleaseResourceLite(&Vcb->tree_lock); return Status; @@ -1895,6 +2124,7 @@ static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP LARGE_INTEGER time; CC_FILE_SIZES ccfs; LIST_ENTRY rollback; + BOOL set_size = FALSE; if (!fileref) { ERR("fileref is NULL\n"); @@ -1936,6 +2166,11 @@ static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP TRACE("truncating file to %llx bytes\n", feofi->EndOfFile.QuadPart); + if (!MmCanFileBeTruncated(&fcb->nonpaged->segment_object, &feofi->EndOfFile)) { + Status = STATUS_USER_MAPPED_FILE; + goto end; + } + Status = truncate_file(fcb, feofi->EndOfFile.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("error - truncate_file failed\n"); @@ -1960,9 +2195,7 @@ static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP ccfs.AllocationSize = fcb->Header.AllocationSize; ccfs.FileSize = fcb->Header.FileSize; ccfs.ValidDataLength = fcb->Header.ValidDataLength; - - CcSetFileSizes(FileObject, &ccfs); - TRACE("setting FileSize for %S to %llx\n", file_desc(FileObject), ccfs.FileSize); + set_size = TRUE; if (!ccb->user_set_write_time) { KeQuerySystemTime(&time); @@ -1983,6 +2216,9 @@ end: ExReleaseResourceLite(fcb->Header.Resource); + if (set_size) + CcSetFileSizes(FileObject, &ccfs); + ExReleaseResourceLite(&Vcb->tree_lock); return Status; @@ -2028,6 +2264,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF hardlink* hl; ACCESS_MASK access; SECURITY_SUBJECT_CONTEXT subjcont; + dir_child* dc = NULL; InitializeListHead(&rollback); @@ -2072,6 +2309,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF } ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); if (fcb->type == BTRFS_TYPE_DIRECTORY) { @@ -2114,9 +2352,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF increase_fileref_refcount(related); } - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp); - ExReleaseResourceLite(&Vcb->fcb_lock); if (NT_SUCCESS(Status)) { if (!oldfileref->deleted) { @@ -2140,17 +2376,13 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF goto end; } } else { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); free_fileref(oldfileref); - ExReleaseResourceLite(&Vcb->fcb_lock); oldfileref = NULL; } } if (!related) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp); - ExReleaseResourceLite(&Vcb->fcb_lock); if (!NT_SUCCESS(Status)) { ERR("open_fileref returned %08x\n", Status); @@ -2228,6 +2460,53 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF insert_fileref_child(related, fr2, TRUE); + Status = add_dir_child(related->fcb, fcb->inode, FALSE, index, &utf8, &fr2->filepart, &fr2->filepart_uc, fcb->type, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fr2->dc = dc; + dc->fileref = fr2; + + // add hardlink for existing fileref, if it's not there already + if (IsListEmpty(&fcb->hardlinks)) { + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + hl->parent = fileref->parent->fcb->inode; + hl->index = fileref->index; + + hl->name.Length = hl->name.MaximumLength = fileref->filepart.Length; + hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); + + if (!hl->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(hl); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(hl->name.Buffer, fileref->filepart.Buffer, fileref->filepart.Length); + + hl->utf8.Length = hl->utf8.MaximumLength = fileref->utf8.Length; + hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); + + if (!hl->utf8.Buffer) { + ERR("out of memory\n"); + ExFreePool(hl->name.Buffer); + ExFreePool(hl); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlCopyMemory(hl->utf8.Buffer, fileref->utf8.Buffer, fileref->utf8.Length); + + InsertTailList(&fcb->hardlinks, &hl->list_entry); + } + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); if (!hl) { ERR("out of memory\n"); @@ -2300,23 +2579,14 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF Status = STATUS_SUCCESS; end: - if (oldfileref) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (oldfileref) free_fileref(oldfileref); - ExReleaseResourceLite(&Vcb->fcb_lock); - } - if (!NT_SUCCESS(Status) && related) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (!NT_SUCCESS(Status) && related) free_fileref(related); - ExReleaseResourceLite(&Vcb->fcb_lock); - } - if (!NT_SUCCESS(Status) && fr2) { - ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + if (!NT_SUCCESS(Status) && fr2) free_fileref(fr2); - ExReleaseResourceLite(&Vcb->fcb_lock); - } if (NT_SUCCESS(Status)) clear_rollback(Vcb, &rollback); @@ -2324,6 +2594,7 @@ end: do_rollback(Vcb, &rollback); ExReleaseResourceLite(fcb->Header.Resource); + ExReleaseResourceLite(&Vcb->fcb_lock); ExReleaseResourceLite(&Vcb->tree_lock); return Status; @@ -2368,7 +2639,8 @@ NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp goto end; } - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && IrpSp->Parameters.SetFile.FileInformationClass != FilePositionInformation) { + if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && IrpSp->Parameters.SetFile.FileInformationClass != FilePositionInformation && + (fcb->inode != SUBVOL_ROOT_INODE || IrpSp->Parameters.SetFile.FileInformationClass != FileBasicInformation)) { Status = STATUS_ACCESS_DENIED; goto end; } @@ -2656,7 +2928,7 @@ static NTSTATUS STDCALL fill_in_file_alignment_information(FILE_ALIGNMENT_INFORM *length -= sizeof(FILE_ALIGNMENT_INFORMATION); - fai->AlignmentRequirement = Vcb->devices[0].devobj->AlignmentRequirement; + fai->AlignmentRequirement = first_device(Vcb)->devobj->AlignmentRequirement; return STATUS_SUCCESS; } @@ -2683,6 +2955,10 @@ NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* nam fn->Length = fn->MaximumLength = sizeof(WCHAR); fn->Buffer[0] = '\\'; + + if (name_offset) + *name_offset = 0; + return STATUS_SUCCESS; } @@ -3423,6 +3699,7 @@ NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode fcb* fcb; hardlink* hl; file_ref *parfr, *fr; + dir_child* dc = NULL; Status = open_fcb(Vcb, subvol, inode, 0, NULL, NULL, &fcb, PagedPool, Irp); if (!NT_SUCCESS(Status)) { @@ -3436,6 +3713,140 @@ NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode return STATUS_SUCCESS; } + // find hardlink if fcb doesn't have any loaded + if (IsListEmpty(&fcb->hardlinks)) { + KEY searchkey; + traverse_ptr tp; + + searchkey.obj_id = fcb->inode; + searchkey.obj_type = TYPE_INODE_EXTREF; + searchkey.offset = 0xffffffffffffffff; + + Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + free_fcb(fcb); + return Status; + } + + if (tp.item->key.obj_id == fcb->inode) { + if (tp.item->key.obj_type == TYPE_INODE_REF) { + INODE_REF* ir; + ULONG stringlen; + + ir = (INODE_REF*)tp.item->data; + + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + hl->parent = tp.item->key.offset; + hl->index = ir->index; + + hl->utf8.Length = hl->utf8.MaximumLength = ir->n; + + if (hl->utf8.Length > 0) { + hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); + RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n); + } + + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ir->name, ir->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ExFreePool(hl); + free_fcb(fcb); + return Status; + } + + hl->name.Length = hl->name.MaximumLength = stringlen; + + if (stringlen == 0) + hl->name.Buffer = NULL; + else { + hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); + + if (!hl->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(hl); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ir->name, ir->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(hl->name.Buffer); + ExFreePool(hl); + free_fcb(fcb); + return Status; + } + } + + InsertTailList(&fcb->hardlinks, &hl->list_entry); + } else if (tp.item->key.obj_type == TYPE_INODE_EXTREF) { + INODE_EXTREF* ier; + hardlink* hl; + ULONG stringlen; + + ier = (INODE_EXTREF*)tp.item->data; + + hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG); + if (!hl) { + ERR("out of memory\n"); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + hl->parent = ier->dir; + hl->index = ier->index; + + hl->utf8.Length = hl->utf8.MaximumLength = ier->n; + + if (hl->utf8.Length > 0) { + hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG); + RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n); + } + + Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, ier->name, ier->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); + ExFreePool(hl); + free_fcb(fcb); + return Status; + } + + hl->name.Length = hl->name.MaximumLength = stringlen; + + if (stringlen == 0) + hl->name.Buffer = NULL; + else { + hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG); + + if (!hl->name.Buffer) { + ERR("out of memory\n"); + ExFreePool(hl); + free_fcb(fcb); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = RtlUTF8ToUnicodeN(hl->name.Buffer, stringlen, &stringlen, ier->name, ier->n); + if (!NT_SUCCESS(Status)) { + ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); + ExFreePool(hl->name.Buffer); + ExFreePool(hl); + free_fcb(fcb); + return Status; + } + } + + InsertTailList(&fcb->hardlinks, &hl->list_entry); + } + } + } + if (IsListEmpty(&fcb->hardlinks)) { ERR("subvol %llx, inode %llx has no hardlinks\n", subvol->id, inode); free_fcb(fcb); @@ -3504,6 +3915,14 @@ NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode fr->parent = parfr; + Status = add_dir_child(parfr->fcb, fr->fcb->inode == SUBVOL_ROOT_INODE ? fr->fcb->subvol->id : fr->fcb->inode, fr->fcb->inode == SUBVOL_ROOT_INODE, + fr->index, &fr->utf8, &fr->filepart, &fr->filepart_uc, fr->fcb->type, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fr->dc = dc; + dc->fileref = fr; + insert_fileref_child(parfr, fr, TRUE); *pfr = fr; @@ -3569,75 +3988,95 @@ static NTSTATUS STDCALL fill_in_hard_link_information(FILE_LINKS_INFORMATION* fl } else { ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE); - le = fcb->hardlinks.Flink; - while (le != &fcb->hardlinks) { - hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); - file_ref* parfr; + if (IsListEmpty(&fcb->hardlinks)) { + bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) + fileref->filepart.Length - sizeof(WCHAR); - TRACE("parent %llx, index %llx, name %.*S\n", hl->parent, hl->index, hl->name.Length / sizeof(WCHAR), hl->name.Buffer); + if (bytes_needed > *length) + overflow = TRUE; - Status = open_fileref_by_inode(fcb->Vcb, fcb->subvol, hl->parent, &parfr, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("open_fileref_by_inode returned %08x\n", Status); - } else if (!parfr->deleted) { - LIST_ENTRY* le2; - BOOL found = FALSE, deleted = FALSE; - UNICODE_STRING* fn; + if (!overflow) { + feli = &fli->Entry; + + feli->NextEntryOffset = 0; + feli->ParentFileId = fileref->parent->fcb->inode; + feli->FileNameLength = fileref->filepart.Length / sizeof(WCHAR); + RtlCopyMemory(feli->FileName, fileref->filepart.Buffer, fileref->filepart.Length); + + fli->EntriesReturned++; + + len = bytes_needed; + } + } else { + le = fcb->hardlinks.Flink; + while (le != &fcb->hardlinks) { + hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry); + file_ref* parfr; - le2 = parfr->children.Flink; - while (le2 != &parfr->children) { - file_ref* fr2 = CONTAINING_RECORD(le2, file_ref, list_entry); + TRACE("parent %llx, index %llx, name %.*S\n", hl->parent, hl->index, hl->name.Length / sizeof(WCHAR), hl->name.Buffer); + + Status = open_fileref_by_inode(fcb->Vcb, fcb->subvol, hl->parent, &parfr, Irp); + + if (!NT_SUCCESS(Status)) { + ERR("open_fileref_by_inode returned %08x\n", Status); + } else if (!parfr->deleted) { + LIST_ENTRY* le2; + BOOL found = FALSE, deleted = FALSE; + UNICODE_STRING* fn; - if (fr2->index == hl->index) { - found = TRUE; - deleted = fr2->deleted; + le2 = parfr->children.Flink; + while (le2 != &parfr->children) { + file_ref* fr2 = CONTAINING_RECORD(le2, file_ref, list_entry); - if (!deleted) - fn = &fr2->filepart; + if (fr2->index == hl->index) { + found = TRUE; + deleted = fr2->deleted; + + if (!deleted) + fn = &fr2->filepart; + + break; + } - break; + le2 = le2->Flink; } - le2 = le2->Flink; - } - - if (!found) - fn = &hl->name; - - if (!deleted) { - TRACE("fn = %.*S (found = %u)\n", fn->Length / sizeof(WCHAR), fn->Buffer, found); - - if (feli) - bytes_needed = sector_align(bytes_needed, 8); + if (!found) + fn = &hl->name; - bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) + fn->Length - sizeof(WCHAR); - - if (bytes_needed > *length) - overflow = TRUE; - - if (!overflow) { - if (feli) { - feli->NextEntryOffset = sector_align(sizeof(FILE_LINK_ENTRY_INFORMATION) + ((feli->FileNameLength - 1) * sizeof(WCHAR)), 8); - feli = (FILE_LINK_ENTRY_INFORMATION*)((UINT8*)feli + feli->NextEntryOffset); - } else - feli = &fli->Entry; + if (!deleted) { + TRACE("fn = %.*S (found = %u)\n", fn->Length / sizeof(WCHAR), fn->Buffer, found); - feli->NextEntryOffset = 0; - feli->ParentFileId = parfr->fcb->inode; - feli->FileNameLength = fn->Length / sizeof(WCHAR); - RtlCopyMemory(feli->FileName, fn->Buffer, fn->Length); + if (feli) + bytes_needed = sector_align(bytes_needed, 8); - fli->EntriesReturned++; + bytes_needed += sizeof(FILE_LINK_ENTRY_INFORMATION) + fn->Length - sizeof(WCHAR); - len = bytes_needed; + if (bytes_needed > *length) + overflow = TRUE; + + if (!overflow) { + if (feli) { + feli->NextEntryOffset = sector_align(sizeof(FILE_LINK_ENTRY_INFORMATION) + ((feli->FileNameLength - 1) * sizeof(WCHAR)), 8); + feli = (FILE_LINK_ENTRY_INFORMATION*)((UINT8*)feli + feli->NextEntryOffset); + } else + feli = &fli->Entry; + + feli->NextEntryOffset = 0; + feli->ParentFileId = parfr->fcb->inode; + feli->FileNameLength = fn->Length / sizeof(WCHAR); + RtlCopyMemory(feli->FileName, fn->Buffer, fn->Length); + + fli->EntriesReturned++; + + len = bytes_needed; + } } + + free_fileref(parfr); } - free_fileref(parfr); + le = le->Flink; } - - le = le->Flink; } ExReleaseResourceLite(&fcb->Vcb->fcb_lock); @@ -4039,9 +4478,6 @@ NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { ccb* ccb; FILE_FULL_EA_INFORMATION* ffei; ULONG retlen = 0; -#ifdef __REACTOS__ - Status = STATUS_INTERNAL_ERROR; -#endif TRACE("(%p, %p)\n", DeviceObject, Irp); @@ -4091,6 +4527,8 @@ NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE); + Status = STATUS_SUCCESS; + if (fcb->ea_xattr.Length == 0) goto end2; @@ -4232,8 +4670,6 @@ NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { } while (TRUE); } - Status = STATUS_SUCCESS; - end2: ExReleaseResourceLite(fcb->Header.Resource); diff --git a/reactos/drivers/filesystems/btrfs/flushthread.c b/reactos/drivers/filesystems/btrfs/flushthread.c index 78cd92a25a0..69ba21e051f 100644 --- a/reactos/drivers/filesystems/btrfs/flushthread.c +++ b/reactos/drivers/filesystems/btrfs/flushthread.c @@ -38,16 +38,8 @@ typedef struct { TREE_BLOCK_REF tbr; } EXTENT_ITEM_SKINNY_METADATA; -typedef struct { - UINT64 address; - UINT32 length; - BOOL overlap; - UINT8* data; - LIST_ENTRY list_entry; -} tree_write; - static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback); - +static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback); static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset, void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback); @@ -326,6 +318,75 @@ static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 return TRUE; } +BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address) { + LIST_ENTRY* le; + space* s; + + TRACE("(%p, %llx, %p)\n", Vcb, c->offset, address); + + if (IsListEmpty(&c->space_size)) + return FALSE; + + if (!c->last_alloc_set) { + s = CONTAINING_RECORD(c->space.Blink, space, list_entry); + + c->last_alloc = s->address; + c->last_alloc_set = TRUE; + + if (s->size >= Vcb->superblock.node_size) { + *address = s->address; + c->last_alloc += Vcb->superblock.node_size; + return TRUE; + } + } + + le = c->space.Flink; + while (le != &c->space) { + s = CONTAINING_RECORD(le, space, list_entry); + + if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) { + *address = c->last_alloc; + c->last_alloc += Vcb->superblock.node_size; + return TRUE; + } + + le = le->Flink; + } + + le = c->space_size.Flink; + while (le != &c->space_size) { + s = CONTAINING_RECORD(le, space, list_entry_size); + + if (s->size == Vcb->superblock.node_size) { + *address = s->address; + c->last_alloc = s->address + Vcb->superblock.node_size; + return TRUE; + } else if (s->size < Vcb->superblock.node_size) { + if (le == c->space_size.Flink) + return FALSE; + + s = CONTAINING_RECORD(le->Blink, space, list_entry_size); + + *address = s->address; + c->last_alloc = s->address + Vcb->superblock.node_size; + + return TRUE; + } + + le = le->Flink; + } + + s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size); + + if (s->size > Vcb->superblock.node_size) { + *address = s->address; + c->last_alloc = s->address + Vcb->superblock.node_size; + return TRUE; + } + + return FALSE; +} + static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) { UINT64 address; EXTENT_ITEM_TREE2* eit2; @@ -333,7 +394,7 @@ static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_i TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback); - if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address)) + if (!find_metadata_address_in_chunk(Vcb, c, &address)) return FALSE; if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) { @@ -389,16 +450,12 @@ static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_i NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { chunk *origchunk = NULL, *c; LIST_ENTRY* le; - UINT64 flags = t->flags, addr; + UINT64 flags, addr; - if (flags == 0) { - if (t->root->id == BTRFS_ROOT_CHUNK) - flags = BLOCK_FLAG_SYSTEM | BLOCK_FLAG_DUPLICATE; - else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) - flags = BLOCK_FLAG_DATA | BLOCK_FLAG_METADATA; - else - flags = BLOCK_FLAG_METADATA | BLOCK_FLAG_DUPLICATE; - } + if (t->root->id == BTRFS_ROOT_CHUNK) + flags = Vcb->system_flags; + else + flags = Vcb->metadata_flags; // TRACE("flags = %x\n", (UINT32)wt->flags); @@ -416,7 +473,8 @@ NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENT if (t->has_address) { origchunk = get_chunk_from_address(Vcb, t->header.address); - if (!origchunk->readonly && insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) { + if (!origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags && + insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) { t->new_address = addr; t->has_new_address = TRUE; return STATUS_SUCCESS; @@ -429,7 +487,7 @@ NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENT while (le != &Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - if (!c->readonly) { + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) { @@ -505,7 +563,7 @@ NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENT // } // } -static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, UINT64 parent_root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; UINT64 rc, root; @@ -517,12 +575,12 @@ static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* return STATUS_INTERNAL_ERROR; } - if (t->parent) - root = t->parent->header.tree_id; + if (!t || t->parent) + root = parent_root; else root = t->header.tree_id; - Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, t->header.level, Irp, rollback); + Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount_tree returned %08x\n", Status); return Status; @@ -620,11 +678,19 @@ end: return STATUS_SUCCESS; } -static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp) { +static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { KEY searchkey; traverse_ptr tp; NTSTATUS Status; + if (!t->updated_extents && t->has_address) { + Status = update_tree_extents(Vcb, t, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("update_tree_extents returned %08x\n", Status); + return FALSE; + } + } + searchkey.obj_id = t->header.address; searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM; searchkey.offset = 0xffffffffffffffff; @@ -653,7 +719,7 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) { TREE_BLOCK_REF tbr; - BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp) : FALSE); + BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : FALSE); if (t->header.level == 0) { LIST_ENTRY* le; @@ -721,18 +787,50 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI SHARED_DATA_REF sdr; sdr.offset = t->header.address; - sdr.count = sdrrc; + sdr.count = 1; Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, - t->header.address, Irp, rollback); + t->header.address, ce->superseded, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; } if (ce) { - ce->count--; - ce->old_count--; + LIST_ENTRY* le2; + + le2 = ce->refs.Flink; + while (le2 != &ce->refs) { + changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); + + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { + ce->count--; + cer->sdr.count--; + break; + } + + le2 = le2->Flink; + } + + le2 = ce->old_refs.Flink; + while (le2 != &ce->old_refs) { + changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry); + + if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) { + ce->old_count--; + + if (cer->sdr.count > 1) + cer->sdr.count--; + else { + RemoveEntryList(&cer->list_entry); + ExFreePool(cer); + } + + break; + } + + le2 = le2->Flink; + } } } } @@ -772,7 +870,7 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI sbr.offset = t->header.address; Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, - t->header.address, Irp, rollback); + t->header.address, FALSE, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; @@ -796,7 +894,7 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI sbr.offset = t->parent->header.address; Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, - t->parent->header.address, Irp, rollback); + t->parent->header.address, FALSE, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount returned %08x\n", Status); return Status; @@ -821,7 +919,7 @@ static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LI t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF; } - Status = reduce_tree_extent(Vcb, t->header.address, t, Irp, rollback); + Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); @@ -1092,16 +1190,170 @@ static NTSTATUS update_root_root(device_extension* Vcb, PIRP Irp, LIST_ENTRY* ro return STATUS_SUCCESS; } +NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, PIRP Irp) { + chunk* c; + LIST_ENTRY* le; + tree_write* tw; + NTSTATUS Status; + write_data_context* wtc; + + wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG); + if (!wtc) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE); + InitializeListHead(&wtc->stripes); + wtc->tree = TRUE; + wtc->stripes_left = 0; + + // merge together runs + c = NULL; + le = tree_writes->Flink; + while (le != tree_writes) { + tw = CONTAINING_RECORD(le, tree_write, list_entry); + + if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) + c = get_chunk_from_address(Vcb, tw->address); + else { + tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); + + if (tw->address == tw2->address + tw2->length) { + UINT8* data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG); + + if (!data) { + ERR("out of memory\n"); + ExFreePool(wtc); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(data, tw2->data, tw2->length); + RtlCopyMemory(&data[tw2->length], tw->data, tw->length); + + ExFreePool(tw2->data); + tw2->data = data; + tw2->length += tw->length; + + ExFreePool(tw->data); + RemoveEntryList(&tw->list_entry); + ExFreePool(tw); + + le = tw2->list_entry.Flink; + continue; + } + } + + le = le->Flink; + } + + // mark RAID5/6 overlaps so we can do them one by one + c = NULL; + le = tree_writes->Flink; + while (le != tree_writes) { + tw = CONTAINING_RECORD(le, tree_write, list_entry); + + if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) + c = get_chunk_from_address(Vcb, tw->address); + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { + tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); + UINT64 last_stripe, this_stripe; + + last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)); + this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)); + + if (last_stripe == this_stripe) + tw->overlap = TRUE; + } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { + tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); + UINT64 last_stripe, this_stripe; + + last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)); + this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)); + + if (last_stripe == this_stripe) + tw->overlap = TRUE; + } + + le = le->Flink; + } + + le = tree_writes->Flink; + while (le != tree_writes) { + tw = CONTAINING_RECORD(le, tree_write, list_entry); + + if (!tw->overlap) { + TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap); + + Status = write_data(Vcb, tw->address, tw->data, TRUE, tw->length, wtc, NULL, NULL); + if (!NT_SUCCESS(Status)) { + ERR("write_data returned %08x\n", Status); + ExFreePool(wtc); + return Status; + } + } + + le = le->Flink; + } + + if (wtc->stripes.Flink != &wtc->stripes) { + // launch writes and wait + le = wtc->stripes.Flink; + while (le != &wtc->stripes) { + write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); + + if (stripe->status != WriteDataStatus_Ignore) + IoCallDriver(stripe->device->devobj, stripe->Irp); + + le = le->Flink; + } + + KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL); + + le = wtc->stripes.Flink; + while (le != &wtc->stripes) { + write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); + + if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { + Status = stripe->iosb.Status; + break; + } + + le = le->Flink; + } + + free_write_data_stripes(wtc); + } + + le = tree_writes->Flink; + while (le != tree_writes) { + tw = CONTAINING_RECORD(le, tree_write, list_entry); + + if (tw->overlap) { + TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap); + + Status = write_data_complete(Vcb, tw->address, tw->data, tw->length, Irp, NULL); + if (!NT_SUCCESS(Status)) { + ERR("write_data_complete returned %08x\n", Status); + ExFreePool(wtc); + return Status; + } + } + + le = le->Flink; + } + + return STATUS_SUCCESS; +} + static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { UINT8 level; UINT8 *data, *body; UINT32 crc32; NTSTATUS Status; LIST_ENTRY* le; - write_data_context* wtc; LIST_ENTRY tree_writes; tree_write* tw; - chunk* c; TRACE("(%p)\n", Vcb); @@ -1209,17 +1461,6 @@ static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { TRACE("allocated tree extents\n"); - wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG); - if (!wtc) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE); - InitializeListHead(&wtc->stripes); - wtc->tree = TRUE; - wtc->stripes_left = 0; - le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); @@ -1385,149 +1626,20 @@ static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) { le = le->Flink; } - Status = STATUS_SUCCESS; + Status = do_tree_writes(Vcb, &tree_writes, Irp); + if (!NT_SUCCESS(Status)) { + ERR("do_tree_writes returned %08x\n", Status); + goto end; + } - // merge together runs - c = NULL; - le = tree_writes.Flink; - while (le != &tree_writes) { + Status = STATUS_SUCCESS; + +end: + while (!IsListEmpty(&tree_writes)) { + le = RemoveHeadList(&tree_writes); tw = CONTAINING_RECORD(le, tree_write, list_entry); - if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) - c = get_chunk_from_address(Vcb, tw->address); - else { - tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); - - if (tw->address == tw2->address + tw2->length) { - data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG); - - if (!data) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(data, tw2->data, tw2->length); - RtlCopyMemory(&data[tw2->length], tw->data, tw->length); - - ExFreePool(tw2->data); - tw2->data = data; - tw2->length += tw->length; - - ExFreePool(tw->data); - RemoveEntryList(&tw->list_entry); - ExFreePool(tw); - - le = tw2->list_entry.Flink; - continue; - } - } - - le = le->Flink; - } - - // mark RAID5/6 overlaps so we can do them one by one - c = NULL; - le = tree_writes.Flink; - while (le != &tree_writes) { - tw = CONTAINING_RECORD(le, tree_write, list_entry); - - if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size) - c = get_chunk_from_address(Vcb, tw->address); - else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { - tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); - UINT64 last_stripe, this_stripe; - - last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)); - this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)); - - if (last_stripe == this_stripe) - tw->overlap = TRUE; - } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { - tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry); - UINT64 last_stripe, this_stripe; - - last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)); - this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)); - - if (last_stripe == this_stripe) - tw->overlap = TRUE; - } - - le = le->Flink; - } - - le = tree_writes.Flink; - while (le != &tree_writes) { - tw = CONTAINING_RECORD(le, tree_write, list_entry); - - if (!tw->overlap) { - TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap); - - Status = write_data(Vcb, tw->address, tw->data, TRUE, tw->length, wtc, NULL, NULL); - if (!NT_SUCCESS(Status)) { - ERR("write_data returned %08x\n", Status); - goto end; - } - } - - le = le->Flink; - } - - if (wtc->stripes.Flink != &wtc->stripes) { - // launch writes and wait - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { - write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - if (stripe->status != WriteDataStatus_Ignore) - IoCallDriver(stripe->device->devobj, stripe->Irp); - - le = le->Flink; - } - - KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL); - - le = wtc->stripes.Flink; - while (le != &wtc->stripes) { - write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry); - - if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) { - Status = stripe->iosb.Status; - break; - } - - le = le->Flink; - } - - free_write_data_stripes(wtc); - } - - le = tree_writes.Flink; - while (le != &tree_writes) { - tw = CONTAINING_RECORD(le, tree_write, list_entry); - - if (tw->overlap) { - TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap); - - Status = write_data_complete(Vcb, tw->address, tw->data, tw->length, Irp, NULL); - if (!NT_SUCCESS(Status)) { - ERR("write_data_complete returned %08x\n", Status); - goto end; - } - } - - le = le->Flink; - } - -end: - ExFreePool(wtc); - - while (!IsListEmpty(&tree_writes)) { - le = RemoveHeadList(&tree_writes); - tw = CONTAINING_RECORD(le, tree_write, list_entry); - - ExFreePool(tw); + ExFreePool(tw); } return Status; @@ -1671,14 +1783,19 @@ static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) { update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp); - for (i = 0; i < Vcb->superblock.num_devices; i++) { - if (Vcb->devices[i].devobj && !Vcb->devices[i].readonly) { - Status = write_superblock(Vcb, &Vcb->devices[i]); + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && !dev->readonly) { + Status = write_superblock(Vcb, dev); if (!NT_SUCCESS(Status)) { ERR("write_superblock returned %08x\n", Status); return Status; } } + + le = le->Flink; } return STATUS_SUCCESS; @@ -1689,6 +1806,20 @@ static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_ex NTSTATUS Status; UINT64 old_size; + if (ce->count == 0 && ce->old_count == 0) { + while (!IsListEmpty(&ce->refs)) { + changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->refs), changed_extent_ref, list_entry); + ExFreePool(cer); + } + + while (!IsListEmpty(&ce->old_refs)) { + changed_extent_ref* cer = CONTAINING_RECORD(RemoveHeadList(&ce->old_refs), changed_extent_ref, list_entry); + ExFreePool(cer); + } + + goto end; + } + le = ce->refs.Flink; while (le != &ce->refs) { changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry); @@ -1722,7 +1853,7 @@ static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_ex } } else if (cer->edr.count < old_count) { Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, - old_count - cer->edr.count, Irp, rollback); + old_count - cer->edr.count, ce->superseded, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("decrease_extent_refcount_data returned %08x\n", Status); @@ -1797,32 +1928,9 @@ static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_ex WARN("old_refs not empty\n"); #endif +end: if (ce->count == 0 && !ce->superseded) { - if (!ce->no_csum) { - LIST_ENTRY changed_sector_list; - - changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG); - if (!sc) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sc->ol.key = ce->address; - sc->checksums = NULL; - sc->length = ce->size / Vcb->superblock.sector_size; - - sc->deleted = TRUE; - - InitializeListHead(&changed_sector_list); - insert_into_ordered_list(&changed_sector_list, &sc->ol); - - ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE); - commit_checksum_changes(Vcb, &changed_sector_list); - ExReleaseResourceLite(&Vcb->checksum_lock); - } - decrease_chunk_usage(c, ce->size); - space_list_add(Vcb, c, TRUE, ce->address, ce->size, rollback); } @@ -1832,197 +1940,187 @@ static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_ex return STATUS_SUCCESS; } -static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { - LIST_ENTRY* le = Vcb->sector_checksums.Flink; - changed_sector* cs; - traverse_ptr tp, next_tp; +void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp, LIST_ENTRY* rollback) { KEY searchkey; + traverse_ptr tp, next_tp; UINT32* data; NTSTATUS Status; - - if (!Vcb->checksum_root) { - ERR("no checksum root\n"); - goto exit; - } - - while (le != &Vcb->sector_checksums) { - UINT64 startaddr, endaddr; - ULONG len; - UINT32* checksums; - RTL_BITMAP bmp; - ULONG* bmparr; - ULONG runlength, index; - - cs = (changed_sector*)le; - - searchkey.obj_id = EXTENT_CSUM_ID; - searchkey.obj_type = TYPE_EXTENT_CSUM; - searchkey.offset = cs->ol.key; - - // FIXME - create checksum_root if it doesn't exist at all - - Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); - if (Status == STATUS_NOT_FOUND) { // tree is completely empty - if (!cs->deleted) { - checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG); + UINT64 startaddr, endaddr; + ULONG len; + UINT32* checksums; + RTL_BITMAP bmp; + ULONG* bmparr; + ULONG runlength, index; + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = address; + + // FIXME - create checksum_root if it doesn't exist at all + + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); + if (Status == STATUS_NOT_FOUND) { // tree is completely empty + if (csum) { // not deleted + ULONG length2 = length; + UINT64 off = address; + UINT32* data = csum; + + do { + ULONG il = min(length2, MAX_CSUM_SIZE / sizeof(UINT32)); + + checksums = ExAllocatePoolWithTag(PagedPool, il * sizeof(UINT32), ALLOC_TAG); if (!checksums) { ERR("out of memory\n"); - goto exit; + return; } - RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length); + RtlCopyMemory(checksums, data, il * sizeof(UINT32)); - if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, Irp, rollback)) { + if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums, + il * sizeof(UINT32), NULL, Irp, rollback)) { ERR("insert_tree_item failed\n"); ExFreePool(checksums); - goto exit; + return; } - } - } else if (!NT_SUCCESS(Status)) { - ERR("find_item returned %08x\n", Status); - goto exit; - } else { - UINT32 tplen; - - // FIXME - check entry is TYPE_EXTENT_CSUM? - - if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key) - startaddr = tp.item->key.offset; - else - startaddr = cs->ol.key; - - searchkey.obj_id = EXTENT_CSUM_ID; - searchkey.obj_type = TYPE_EXTENT_CSUM; - searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size); - - Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto exit; - } - - tplen = tp.item->size / sizeof(UINT32); - - if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size)) - endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size); - else - endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size); - - TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length); - TRACE("startaddr = %llx\n", startaddr); - TRACE("endaddr = %llx\n", endaddr); - - len = (endaddr - startaddr) / Vcb->superblock.sector_size; - - checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG); - if (!checksums) { - ERR("out of memory\n"); - goto exit; - } - - bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG); - if (!bmparr) { - ERR("out of memory\n"); - ExFreePool(checksums); - goto exit; - } - RtlInitializeBitMap(&bmp, bmparr, len); - RtlSetAllBits(&bmp); - - searchkey.obj_id = EXTENT_CSUM_ID; - searchkey.obj_type = TYPE_EXTENT_CSUM; - searchkey.offset = cs->ol.key; - - Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto exit; - } - - // set bit = free space, cleared bit = allocated sector - - // ERR("start loop\n"); - while (tp.item->key.offset < endaddr) { - // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - if (tp.item->key.offset >= startaddr) { - if (tp.item->size > 0) { - RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size); - RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32)); - } - - delete_tree_item(Vcb, &tp, rollback); - } + length2 -= il; - if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { - tp = next_tp; - } else - break; - } - // ERR("end loop\n"); - - if (cs->deleted) { - RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length); - } else { - RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32)); - RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length); - } - - runlength = RtlFindFirstRunClear(&bmp, &index); + if (length2 > 0) { + off += il * Vcb->superblock.sector_size; + data += il; + } + } while (length2 > 0); + } + } else if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return; + } else { + UINT32 tplen; + + // FIXME - check entry is TYPE_EXTENT_CSUM? + + if (tp.item->key.offset < address && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= address) + startaddr = tp.item->key.offset; + else + startaddr = address; + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = address + (length * Vcb->superblock.sector_size); + + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return; + } + + tplen = tp.item->size / sizeof(UINT32); + + if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= address + (length * Vcb->superblock.sector_size)) + endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size); + else + endaddr = address + (length * Vcb->superblock.sector_size); + + TRACE("cs starts at %llx (%x sectors)\n", address, length); + TRACE("startaddr = %llx\n", startaddr); + TRACE("endaddr = %llx\n", endaddr); + + len = (endaddr - startaddr) / Vcb->superblock.sector_size; + + checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG); + if (!checksums) { + ERR("out of memory\n"); + return; + } + + bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG); + if (!bmparr) { + ERR("out of memory\n"); + ExFreePool(checksums); + return; + } - while (runlength != 0) { - do { - ULONG rl; - UINT64 off; - - if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE) - rl = MAX_CSUM_SIZE / sizeof(UINT32); - else - rl = runlength; - - data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG); - if (!data) { - ERR("out of memory\n"); - ExFreePool(bmparr); - ExFreePool(checksums); - goto exit; - } - - RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl); - - off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size); - - if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - ExFreePool(data); - ExFreePool(bmparr); - ExFreePool(checksums); - goto exit; - } + RtlInitializeBitMap(&bmp, bmparr, len); + RtlSetAllBits(&bmp); + + searchkey.obj_id = EXTENT_CSUM_ID; + searchkey.obj_type = TYPE_EXTENT_CSUM; + searchkey.offset = address; + + Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("find_item returned %08x\n", Status); + return; + } + + // set bit = free space, cleared bit = allocated sector + + while (tp.item->key.offset < endaddr) { + if (tp.item->key.offset >= startaddr) { + if (tp.item->size > 0) { + ULONG itemlen = min((len - (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size) * sizeof(UINT32), tp.item->size); - runlength -= rl; - index += rl; - } while (runlength > 0); + RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, itemlen); + RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, itemlen / sizeof(UINT32)); + } - runlength = RtlFindNextForwardRunClear(&bmp, index, &index); + delete_tree_item(Vcb, &tp, rollback); } - ExFreePool(bmparr); - ExFreePool(checksums); + if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) { + tp = next_tp; + } else + break; } - le = le->Flink; - } - -exit: - while (!IsListEmpty(&Vcb->sector_checksums)) { - le = RemoveHeadList(&Vcb->sector_checksums); - cs = (changed_sector*)le; + if (!csum) { // deleted + RtlSetBits(&bmp, (address - startaddr) / Vcb->superblock.sector_size, length); + } else { + RtlCopyMemory(&checksums[(address - startaddr) / Vcb->superblock.sector_size], csum, length * sizeof(UINT32)); + RtlClearBits(&bmp, (address - startaddr) / Vcb->superblock.sector_size, length); + } - if (cs->checksums) - ExFreePool(cs->checksums); + runlength = RtlFindFirstRunClear(&bmp, &index); + + while (runlength != 0) { + do { + ULONG rl; + UINT64 off; + + if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE) + rl = MAX_CSUM_SIZE / sizeof(UINT32); + else + rl = runlength; + + data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG); + if (!data) { + ERR("out of memory\n"); + ExFreePool(bmparr); + ExFreePool(checksums); + return; + } + + RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl); + + off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size); + + if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp, rollback)) { + ERR("insert_tree_item failed\n"); + ExFreePool(data); + ExFreePool(bmparr); + ExFreePool(checksums); + return; + } + + runlength -= rl; + index += rl; + } while (runlength > 0); + + runlength = RtlFindNextForwardRunClear(&bmp, index, &index); + } - ExFreePool(cs); + ExFreePool(bmparr); + ExFreePool(checksums); } } @@ -2033,7 +2131,6 @@ static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* traverse_ptr tp; BLOCK_GROUP_ITEM* bgi; NTSTATUS Status; - BOOL flushed_extents = FALSE; TRACE("(%p)\n", Vcb); @@ -2056,8 +2153,6 @@ static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* goto end; } - flushed_extents = TRUE; - le2 = le3; } @@ -2136,14 +2231,6 @@ static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* le = le->Flink; } - if (flushed_extents) { - ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE); - if (!IsListEmpty(&Vcb->sector_checksums)) { - update_checksum_tree(Vcb, Irp, rollback); - } - ExReleaseResourceLite(&Vcb->checksum_lock); - } - Status = STATUS_SUCCESS; end: @@ -2208,7 +2295,7 @@ static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* nt->header.address = 0; nt->header.generation = Vcb->superblock.generation; nt->header.num_items = t->header.num_items - numitems; - nt->header.flags = HEADER_FLAG_MIXED_BACKREF; + nt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN; nt->has_address = FALSE; nt->Vcb = Vcb; @@ -2223,7 +2310,7 @@ static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* nt->new_address = 0; nt->has_new_address = FALSE; nt->updated_extents = FALSE; - nt->flags = t->flags; + nt->list_entry_hash.Flink = NULL; InitializeListHead(&nt->itemlist); // ExInitializeResourceLite(&nt->nonpaged->load_tree_lock); @@ -2357,7 +2444,7 @@ static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* pt->updated_extents = FALSE; // pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG); pt->size = pt->header.num_items * sizeof(internal_node); - pt->flags = t->flags; + pt->list_entry_hash.Flink = NULL; InitializeListHead(&pt->itemlist); // ExInitializeResourceLite(&pt->nonpaged->load_tree_lock); @@ -2459,8 +2546,15 @@ static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) { else ds = sizeof(internal_node); - // FIXME - move back if previous item was deleted item with same key - if (size + ds > Vcb->superblock.node_size - sizeof(tree_header)) + if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) { + ERR("(%llx,%x,%llx) in tree %llx is too large (%x > %x)\n", + td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id, + ds, Vcb->superblock.node_size - sizeof(tree_header)); + int3; + } + + // FIXME - move back if previous item was deleted item with same key + if (size + ds > Vcb->superblock.node_size - sizeof(tree_header)) return split_tree_at(Vcb, t, td, numitems, size); size += ds; @@ -2528,17 +2622,18 @@ BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) { return TRUE; } -static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, BOOL* done, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY* le; tree_data* nextparitem = NULL; NTSTATUS Status; tree *next_tree, *par; BOOL loaded; + *done = FALSE; + TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size); // FIXME - doesn't capture everything, as it doesn't ascend - // FIXME - write proper function and put it in treefuncs.c le = t->paritem->list_entry.Flink; while (le != &t->parent->itemlist) { tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); @@ -2554,8 +2649,6 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LI if (!nextparitem) return STATUS_SUCCESS; - // FIXME - loop, and capture more than one tree if we can - TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset); // nextparitem = t->paritem; @@ -2574,6 +2667,14 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LI next_tree = nextparitem->treeholder.tree; + if (!next_tree->updated_extents && next_tree->has_address) { + Status = update_tree_extents(Vcb, next_tree, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("update_tree_extents returned %08x\n", Status); + return Status; + } + } + if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) { // merge two trees into one @@ -2597,35 +2698,34 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LI } } + le = next_tree->itemlist.Flink; + while (le != &next_tree->itemlist) { + tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); + + td->inserted = TRUE; + + le = le->Flink; + } + t->itemlist.Blink->Flink = next_tree->itemlist.Flink; t->itemlist.Blink->Flink->Blink = t->itemlist.Blink; t->itemlist.Blink = next_tree->itemlist.Blink; t->itemlist.Blink->Flink = &t->itemlist; -// // TESTING -// le = t->itemlist.Flink; -// while (le != &t->itemlist) { -// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); -// if (!td->ignore) { -// ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset); -// } -// le = le->Flink; -// } - next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist; next_tree->header.num_items = 0; next_tree->size = 0; if (next_tree->has_new_address) { // delete associated EXTENT_ITEM - Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, Irp, rollback); + Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); return Status; } } else if (next_tree->has_address) { - Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, Irp, rollback); + Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); @@ -2652,6 +2752,8 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LI next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size; free_tree(next_tree); + + *done = TRUE; } else { // rebalance by moving items from second tree into first ULONG avg_size = (t->size + next_tree->size) / 2; @@ -2676,6 +2778,7 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LI if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) { RemoveEntryList(&td->list_entry); InsertTailList(&t->itemlist, &td->list_entry); + td->inserted = TRUE; if (next_tree->header.level > 0 && td->treeholder.tree) { td->treeholder.tree->parent = t; @@ -2698,31 +2801,32 @@ static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LI le = next_tree->itemlist.Flink; } - if (changed) { - le = next_tree->itemlist.Flink; - while (le != &next_tree->itemlist) { - tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - - if (!td->ignore) { - firstitem = td->key; - break; - } - - le = le->Flink; - } - - // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset); - - // FIXME - once ascension is working, make this work with parent's parent, etc. - if (next_tree->paritem) - next_tree->paritem->key = firstitem; + le = next_tree->itemlist.Flink; + while (le != &next_tree->itemlist) { + tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - par = next_tree; - while (par) { - par->write = TRUE; - par = par->parent; + if (!td->ignore) { + firstitem = td->key; + break; } + + le = le->Flink; + } + +// ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset); + + // FIXME - once ascension is working, make this work with parent's parent, etc. + if (next_tree->paritem) + next_tree->paritem->key = firstitem; + + par = next_tree; + while (par) { + par->write = TRUE; + par = par->parent; } + + if (changed) + *done = TRUE; } return STATUS_SUCCESS; @@ -2816,6 +2920,24 @@ static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* return STATUS_INTERNAL_ERROR; } +static NTSTATUS update_tree_extents_recursive(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) { + NTSTATUS Status; + + if (t->parent && !t->parent->updated_extents && t->parent->has_address) { + Status = update_tree_extents_recursive(Vcb, t->parent, Irp, rollback); + if (!NT_SUCCESS(Status)) + return Status; + } + + Status = update_tree_extents(Vcb, t, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("update_tree_extents returned %08x\n", Status); + return Status; + } + + return STATUS_SUCCESS; +} + static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) { // LIST_ENTRY *le, *le2; // write_tree* wt; @@ -2848,29 +2970,23 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r empty = FALSE; if (t->header.num_items == 0) { + if (!t->updated_extents && t->has_address) { + Status = update_tree_extents(Vcb, t, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("update_tree_extents returned %08x\n", Status); + return Status; + } + } + if (t->parent) { - LIST_ENTRY* le2; - KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc}; -#ifdef __REACTOS__ - (void)firstitem; -#endif - done_deletions = TRUE; - le2 = t->itemlist.Flink; - while (le2 != &t->itemlist) { - tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry); - firstitem = td->key; - break; - } - - TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n", - t->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset); + TRACE("deleting tree in root %llx\n", t->root->id); t->root->root_item.bytes_used -= Vcb->superblock.node_size; if (t->has_new_address) { // delete associated EXTENT_ITEM - Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback); + Status = reduce_tree_extent(Vcb, t->new_address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); @@ -2879,7 +2995,7 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r t->has_new_address = FALSE; } else if (t->has_address) { - Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback); + Status = reduce_tree_extent(Vcb,t->header.address, t, t->parent->header.tree_id, t->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); @@ -2914,6 +3030,15 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r } } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) { TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header)); + + if (!t->updated_extents && t->has_address) { + Status = update_tree_extents_recursive(Vcb, t, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("update_tree_extents_recursive returned %08x\n", Status); + return Status; + } + } + Status = split_tree(Vcb, t); if (!NT_SUCCESS(Status)) { @@ -2945,11 +3070,15 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r t = CONTAINING_RECORD(le, tree, list_entry); if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size && is_tree_unique(Vcb, t, Irp)) { - Status = try_tree_amalgamate(Vcb, t, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("try_tree_amalgamate returned %08x\n", Status); - return Status; - } + BOOL done; + + do { + Status = try_tree_amalgamate(Vcb, t, &done, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("try_tree_amalgamate returned %08x\n", Status); + return Status; + } + } while (done && t->size < min_size); } le = le->Flink; @@ -2983,7 +3112,7 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r TRACE("deleting top-level tree in root %llx with one item\n", t->root->id); if (t->has_new_address) { // delete associated EXTENT_ITEM - Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback); + Status = reduce_tree_extent(Vcb, t->new_address, t, t->header.tree_id, t->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); @@ -2992,7 +3121,7 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r t->has_new_address = FALSE; } else if (t->has_address) { - Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback); + Status = reduce_tree_extent(Vcb,t->header.address, t, t->header.tree_id, t->header.level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent returned %08x\n", Status); @@ -3037,41 +3166,47 @@ static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* r return STATUS_SUCCESS; } -static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, tree* parent, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; - if (level > 0) { - if (!th->tree) { - Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL); - - if (!NT_SUCCESS(Status)) { - ERR("load_tree(%llx) returned %08x\n", th->address, Status); - return Status; - } + if (!th->tree) { + Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("load_tree(%llx) returned %08x\n", th->address, Status); + return Status; } + } + + if (level > 0) { + LIST_ENTRY* le = th->tree->itemlist.Flink; - if (th->tree->header.level > 0) { - LIST_ENTRY* le = th->tree->itemlist.Flink; + while (le != &th->tree->itemlist) { + tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); - while (le != &th->tree->itemlist) { - tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry); + if (!td->ignore) { + Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, th->tree, Irp, rollback); - if (!td->ignore) { - Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, Irp, rollback); - - if (!NT_SUCCESS(Status)) { - ERR("remove_root_extents returned %08x\n", Status); - return Status; - } + if (!NT_SUCCESS(Status)) { + ERR("remove_root_extents returned %08x\n", Status); + return Status; } - - le = le->Flink; } + + le = le->Flink; + } + } + + if (th->tree && !th->tree->updated_extents && th->tree->has_address) { + Status = update_tree_extents(Vcb, th->tree, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("update_tree_extents returned %08x\n", Status); + return Status; } } if (!th->tree || th->tree->has_address) { - Status = reduce_tree_extent(Vcb, th->address, NULL, Irp, rollback); + Status = reduce_tree_extent(Vcb, th->address, NULL, parent ? parent->header.tree_id : r->id, level, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status); @@ -3087,7 +3222,7 @@ static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* KEY searchkey; traverse_ptr tp; - Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, Irp, rollback); + Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, NULL, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("remove_root_extents returned %08x\n", Status); return Status; @@ -3587,7 +3722,7 @@ static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, while (le != &br->items) { batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry); - if (keycmp(bi2->key, bi->key) == -1) { + if (keycmp(bi2->key, bi->key) != 1) { InsertHeadList(&bi2->list_entry, &bi->list_entry); return TRUE; } @@ -3611,7 +3746,7 @@ typedef struct { LIST_ENTRY list_entry; } extent_range; -static void rationalize_extents(fcb* fcb, PIRP Irp) { +static void rationalize_extents(fcb* fcb, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY* le; LIST_ENTRY extent_ranges; extent_range* er; @@ -3739,26 +3874,8 @@ cont: le2 = le2->Flink; } - if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - LIST_ENTRY changed_sector_list; - - changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG); - if (!sc) { - ERR("out of memory\n"); - goto end; - } - - sc->ol.key = er->address; - sc->checksums = NULL; - sc->length = er->skip_start / fcb->Vcb->superblock.sector_size; - - sc->deleted = TRUE; - - InitializeListHead(&changed_sector_list); - insert_into_ordered_list(&changed_sector_list, &sc->ol); - - commit_checksum_changes(fcb->Vcb, &changed_sector_list); - } + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) + add_checksum_entry(fcb->Vcb, er->address, er->skip_start / fcb->Vcb->superblock.sector_size, NULL, NULL, rollback); decrease_chunk_usage(er->chunk, er->skip_start); @@ -3797,26 +3914,8 @@ cont: le2 = le2->Flink; } - if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - LIST_ENTRY changed_sector_list; - - changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG); - if (!sc) { - ERR("out of memory\n"); - goto end; - } - - sc->ol.key = er->address + er->length - er->skip_end; - sc->checksums = NULL; - sc->length = er->skip_end / fcb->Vcb->superblock.sector_size; - - sc->deleted = TRUE; - - InitializeListHead(&changed_sector_list); - insert_into_ordered_list(&changed_sector_list, &sc->ol); - - commit_checksum_changes(fcb->Vcb, &changed_sector_list); - } + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) + add_checksum_entry(fcb->Vcb, er->address + er->length - er->skip_end, er->skip_end / fcb->Vcb->superblock.sector_size, NULL, NULL, rollback); decrease_chunk_usage(er->chunk, er->skip_end); @@ -3929,19 +4028,6 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY BOOL extents_changed; #endif -// ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE); - - while (!IsListEmpty(&fcb->index_list)) { - LIST_ENTRY* le = RemoveHeadList(&fcb->index_list); - index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry); - - if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer); - if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer); - ExFreePool(ie); - } - - fcb->index_loaded = FALSE; - if (fcb->ads) { if (fcb->deleted) delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, Irp, rollback); @@ -3955,6 +4041,13 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY goto end; } + if (fcb->deleted) { + if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0xffffffffffffffff, NULL, 0, Batch_DeleteInode, Irp, rollback)) + ERR("insert_tree_item_batch failed\n"); + + goto end; + } + #ifdef DEBUG_PARANOID extents_changed = fcb->extents_changed; #endif @@ -3974,6 +4067,10 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY if (ext->ignore) { RemoveEntryList(&ext->list_entry); + + if (ext->csum) + ExFreePool(ext->csum); + ExFreePool(ext->data); ExFreePool(ext); } @@ -3981,8 +4078,26 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY le = le2; } + le = fcb->extents.Flink; + while (le != &fcb->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (ext->inserted && ext->csum && ext->data->type == EXTENT_TYPE_REGULAR) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data; + + if (ed2->size > 0) { // not sparse + if (ext->data->compression == BTRFS_COMPRESSION_NONE) + add_checksum_entry(fcb->Vcb, ed2->address + ed2->offset, ed2->num_bytes / fcb->Vcb->superblock.sector_size, ext->csum, Irp, rollback); + else + add_checksum_entry(fcb->Vcb, ed2->address, ed2->size / fcb->Vcb->superblock.sector_size, ext->csum, Irp, rollback); + } + } + + le = le->Flink; + } + if (!IsListEmpty(&fcb->extents)) { - rationalize_extents(fcb, Irp); + rationalize_extents(fcb, Irp, rollback); // merge together adjacent EXTENT_DATAs pointing to same extent @@ -4002,10 +4117,32 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) { chunk* c; + if (ext->data->compression == BTRFS_COMPRESSION_NONE && ext->csum) { + ULONG len = (ed2->num_bytes + ned2->num_bytes) / fcb->Vcb->superblock.sector_size; + UINT32* csum; + + csum = ExAllocatePoolWithTag(NonPagedPool, len * sizeof(UINT32), ALLOC_TAG); + if (!csum) { + ERR("out of memory\n"); + goto end; + } + + RtlCopyMemory(csum, ext->csum, ed2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size); + RtlCopyMemory(&csum[ed2->num_bytes / fcb->Vcb->superblock.sector_size], nextext->csum, + ned2->num_bytes * sizeof(UINT32) / fcb->Vcb->superblock.sector_size); + + ExFreePool(ext->csum); + ext->csum = csum; + } + ext->data->generation = fcb->Vcb->superblock.generation; ed2->num_bytes += ned2->num_bytes; RemoveEntryList(&nextext->list_entry); + + if (nextext->csum) + ExFreePool(nextext->csum); + ExFreePool(nextext->data); ExFreePool(nextext); @@ -4059,78 +4196,78 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY } while (b); } - if (!fcb->deleted) { - // add new EXTENT_DATAs + // add new EXTENT_DATAs + + last_end = 0; + + le = fcb->extents.Flink; + while (le != &fcb->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + EXTENT_DATA* ed; - last_end = 0; + ext->inserted = FALSE; - le = fcb->extents.Flink; - while (le != &fcb->extents) { - extent* ext = CONTAINING_RECORD(le, extent, list_entry); - EXTENT_DATA* ed; - - if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) { - Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("insert_sparse_extent returned %08x\n", Status); - goto end; - } - } - - ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); - if (!ed) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlCopyMemory(ed, ext->data, ext->datalen); - - if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, - ed, ext->datalen, Batch_Insert, Irp, rollback)) { - ERR("insert_tree_item_batch failed\n"); - Status = STATUS_INTERNAL_ERROR; + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) { + Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("insert_sparse_extent returned %08x\n", Status); goto end; } + } - if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC) - prealloc = TRUE; - - if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE) - extents_inline = TRUE; - - if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) { - if (ed->type == EXTENT_TYPE_INLINE) - last_end = ext->offset + ed->decoded_size; - else { - EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; - - last_end = ext->offset + ed2->num_bytes; - } - } - - le = le->Flink; + ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG); + if (!ed) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; } - if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline && - sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) { - Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("insert_sparse_extent returned %08x\n", Status); - goto end; - } + RtlCopyMemory(ed, ext->data, ext->datalen); + + if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, + ed, ext->datalen, Batch_Insert, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; } - // update prealloc flag in INODE_ITEM + if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC) + prealloc = TRUE; - if (!prealloc) - fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC; - else - fcb->inode_item.flags |= BTRFS_INODE_PREALLOC; + if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE) + extents_inline = TRUE; + + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) { + if (ed->type == EXTENT_TYPE_INLINE) + last_end = ext->offset + ed->decoded_size; + else { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; + + last_end = ext->offset + ed2->num_bytes; + } + } - fcb->inode_item_changed = TRUE; + le = le->Flink; + } + + if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline && + sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) { + Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("insert_sparse_extent returned %08x\n", Status); + goto end; + } } + // update prealloc flag in INODE_ITEM + + if (!prealloc) + fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC; + else + fcb->inode_item.flags |= BTRFS_INODE_PREALLOC; + + fcb->inode_item_changed = TRUE; + fcb->extents_changed = FALSE; } @@ -4208,37 +4345,6 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY fcb->created = FALSE; - if (fcb->deleted) { - traverse_ptr tp2; - - // delete XATTR_ITEMs - - searchkey.obj_id = fcb->inode; - searchkey.obj_type = TYPE_XATTR_ITEM; - searchkey.offset = 0; - - Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - goto end; - } - - while (find_next_item(fcb->Vcb, &tp, &tp2, FALSE, Irp)) { - tp = tp2; - - if (tp.item->key.obj_id == fcb->inode) { - // FIXME - do metadata thing here too? - if (tp.item->key.obj_type == TYPE_XATTR_ITEM) { - delete_tree_item(fcb->Vcb, &tp, rollback); - TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } - } else - break; - } - - goto end; - } - if (!cache && fcb->inode_item_changed) { ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG); if (!ii) { @@ -4327,9 +4433,6 @@ void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY end: fcb->dirty = FALSE; - -// ExReleaseResourceLite(fcb->Header.Resource); - return; } static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { @@ -4345,6 +4448,12 @@ static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlis if (c->cache) { c->cache->deleted = TRUE; + Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + return Status; + } + flush_fcb(c->cache, TRUE, batchlist, Irp, rollback); free_fcb(c->cache); @@ -4487,14 +4596,38 @@ static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlis RemoveEntryList(&c->list_entry); - if (c->list_entry_changed.Flink) - RemoveEntryList(&c->list_entry_changed); - - ExFreePool(c->chunk_item); - ExFreePool(c->devices); + // clear raid56 incompat flag if dropping last RAID5/6 chunk - while (!IsListEmpty(&c->space)) { - space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry); + if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) { + LIST_ENTRY* le; + BOOL clear_flag = TRUE; + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry); + + if (c2->chunk_item->type & BLOCK_FLAG_RAID5 || c2->chunk_item->type & BLOCK_FLAG_RAID6) { + clear_flag = FALSE; + break; + } + + le = le->Flink; + } + + if (clear_flag) + Vcb->superblock.incompat_flags &= ~BTRFS_INCOMPAT_FLAGS_RAID56; + } + + Vcb->superblock.bytes_used -= c->oldused; + + if (c->list_entry_changed.Flink) + RemoveEntryList(&c->list_entry_changed); + + ExFreePool(c->chunk_item); + ExFreePool(c->devices); + + while (!IsListEmpty(&c->space)) { + space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry); RemoveEntryList(&s->list_entry); ExFreePool(s); @@ -4531,6 +4664,12 @@ static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP ExAcquireResourceExclusiveLite(&c->lock, TRUE); + if (c->list_entry_balance.Flink) { + ExReleaseResourceLite(&c->lock); + le = le2; + continue; + } + used_minus_cache = c->used; // subtract self-hosted cache @@ -4833,192 +4972,6 @@ static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP return STATUS_SUCCESS; } -static NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback) { - KEY searchkey; - traverse_ptr tp; - UINT8* di2; - NTSTATUS Status; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_DIR_ITEM; - searchkey.offset = crc32; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(tp.item->key, searchkey)) { - ULONG maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node); - - if (tp.item->size + disize > maxlen) { - WARN("DIR_ITEM was longer than maxlen (%u + %u > %u)\n", tp.item->size, disize, maxlen); - return STATUS_INTERNAL_ERROR; - } - - di2 = ExAllocatePoolWithTag(PagedPool, tp.item->size + disize, ALLOC_TAG); - if (!di2) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - if (tp.item->size > 0) - RtlCopyMemory(di2, tp.item->data, tp.item->size); - - RtlCopyMemory(di2 + tp.item->size, di, disize); - - delete_tree_item(Vcb, &tp, rollback); - - insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di2, tp.item->size + disize, NULL, Irp, rollback); - - ExFreePool(di); - } else { - insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di, disize, NULL, Irp, rollback); - } - - return STATUS_SUCCESS; -} - -static NTSTATUS add_inode_extref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) { - KEY searchkey; - traverse_ptr tp; - INODE_EXTREF* ier; - NTSTATUS Status; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_EXTREF; - searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length); - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(searchkey, tp.item->key)) { - ULONG iersize = tp.item->size + sizeof(INODE_EXTREF) - 1 + utf8->Length; - UINT8* ier2; - UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node); - - if (iersize > maxlen) { - ERR("item would be too long (%u > %u)\n", iersize, maxlen); - return STATUS_INTERNAL_ERROR; - } - - ier2 = ExAllocatePoolWithTag(PagedPool, iersize, ALLOC_TAG); - if (!ier2) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - if (tp.item->size > 0) - RtlCopyMemory(ier2, tp.item->data, tp.item->size); - - ier = (INODE_EXTREF*)&ier2[tp.item->size]; - ier->dir = parinode; - ier->index = index; - ier->n = utf8->Length; - RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier2, iersize, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); - return STATUS_INTERNAL_ERROR; - } - } else { - ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + utf8->Length, ALLOC_TAG); - if (!ier) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - ier->dir = parinode; - ier->index = index; - ier->n = utf8->Length; - RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length); - - if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier, sizeof(INODE_EXTREF) - 1 + utf8->Length, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); - return STATUS_INTERNAL_ERROR; - } - } - - return STATUS_SUCCESS; -} - -static NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) { - KEY searchkey; - traverse_ptr tp; - INODE_REF* ir; - NTSTATUS Status; - - searchkey.obj_id = inode; - searchkey.obj_type = TYPE_INODE_REF; - searchkey.offset = parinode; - - Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - if (!keycmp(searchkey, tp.item->key)) { - ULONG irsize = tp.item->size + sizeof(INODE_REF) - 1 + utf8->Length; - UINT8* ir2; - UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node); - - if (irsize > maxlen) { - if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { - TRACE("INODE_REF too long, creating INODE_EXTREF\n"); - return add_inode_extref(Vcb, subvol, inode, parinode, index, utf8, Irp, rollback); - } else { - ERR("item would be too long (%u > %u)\n", irsize, maxlen); - return STATUS_INTERNAL_ERROR; - } - } - - ir2 = ExAllocatePoolWithTag(PagedPool, irsize, ALLOC_TAG); - if (!ir2) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - if (tp.item->size > 0) - RtlCopyMemory(ir2, tp.item->data, tp.item->size); - - ir = (INODE_REF*)&ir2[tp.item->size]; - ir->index = index; - ir->n = utf8->Length; - RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length); - - delete_tree_item(Vcb, &tp, rollback); - - if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir2, irsize, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); - return STATUS_INTERNAL_ERROR; - } - } else { - ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + utf8->Length, ALLOC_TAG); - if (!ir) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - ir->index = index; - ir->n = utf8->Length; - RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length); - - if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir, sizeof(INODE_REF) - 1 + ir->n, NULL, Irp, rollback)) { - ERR("error - failed to insert item\n"); - return STATUS_INTERNAL_ERROR; - } - } - - return STATUS_SUCCESS; -} - static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; @@ -5134,9 +5087,8 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp fileref->created = FALSE; } else if (fileref->deleted) { UINT32 crc32; - KEY searchkey; - traverse_ptr tp; ANSI_STRING* name; + DIR_ITEM* di; if (fileref->oldutf8.Buffer) name = &fileref->oldutf8; @@ -5147,26 +5099,49 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp TRACE("deleting %.*S\n", file_desc_fileref(fileref)); + di = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + name->Length, ALLOC_TAG); + if (!di) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + di->m = 0; + di->n = name->Length; + RtlCopyMemory(di->name, name->Buffer, name->Length); + // delete DIR_ITEM (0x54) - Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, name, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("delete_dir_item returned %08x\n", Status); - return Status; + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, + crc32, di, sizeof(DIR_ITEM) - 1 + name->Length, Batch_DeleteDirItem, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { + INODE_REF* ir; + // delete INODE_REF (0xc) - Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, name, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("delete_inode_ref returned %08x\n", Status); - return Status; + ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + name->Length, ALLOC_TAG); + if (!ir) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ir->index = fileref->index; + ir->n = name->Length; + RtlCopyMemory(ir->name, name->Buffer, name->Length); + + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, + fileref->parent->fcb->inode, ir, sizeof(INODE_REF) - 1 + name->Length, Batch_DeleteInodeRef, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } } else { // subvolume Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("delete_root_ref returned %08x\n", Status); + return Status; } Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback); @@ -5178,20 +5153,10 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp // delete DIR_INDEX (0x60) - searchkey.obj_id = fileref->parent->fcb->inode; - searchkey.obj_type = TYPE_DIR_INDEX; - searchkey.offset = fileref->index; - - Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - Status = STATUS_INTERNAL_ERROR; - return Status; - } - - if (!keycmp(searchkey, tp.item->key)) { - delete_tree_item(fileref->fcb->Vcb, &tp, rollback); - TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->index, NULL, 0, Batch_Delete, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } if (fileref->oldutf8.Buffer) { @@ -5202,9 +5167,7 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->utf8; UINT32 crc32, oldcrc32; ULONG disize; - DIR_ITEM *di, *di2; - KEY searchkey; - traverse_ptr tp; + DIR_ITEM *olddi, *di, *di2; crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length); @@ -5212,15 +5175,25 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp oldcrc32 = crc32; else oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length); - - // delete DIR_ITEM (0x54) - Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, oldcrc32, oldutf8, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("delete_dir_item returned %08x\n", Status); - return Status; + olddi = ExAllocatePoolWithTag(PagedPool, sizeof(DIR_ITEM) - 1 + oldutf8->Length, ALLOC_TAG); + if (!olddi) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; } + olddi->m = 0; + olddi->n = (UINT16)oldutf8->Length; + RtlCopyMemory(olddi->name, oldutf8->Buffer, oldutf8->Length); + + // delete DIR_ITEM (0x54) + + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, + oldcrc32, olddi, sizeof(DIR_ITEM) - 1 + oldutf8->Length, Batch_DeleteDirItem, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; + } + // add DIR_ITEM (0x54) disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length; @@ -5255,27 +5228,49 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp RtlCopyMemory(di2, di, disize); - Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di, disize, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("add_dir_item returned %08x\n", Status); - return Status; + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32, + di, disize, Batch_DirItem, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } if (fileref->parent->fcb->subvol == fileref->fcb->subvol) { + INODE_REF *ir, *ir2; + // delete INODE_REF (0xc) - Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, oldutf8, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("delete_inode_ref returned %08x\n", Status); - return Status; + ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + oldutf8->Length, ALLOC_TAG); + if (!ir) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ir->index = fileref->index; + ir->n = oldutf8->Length; + RtlCopyMemory(ir->name, oldutf8->Buffer, ir->n); + + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, + ir, sizeof(INODE_REF) - 1 + ir->n, Batch_DeleteInodeRef, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } // add INODE_REF (0xc) - Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback); - if (!NT_SUCCESS(Status)) { - ERR("add_inode_ref returned %08x\n", Status); - return Status; + ir2 = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->utf8.Length, ALLOC_TAG); + if (!ir2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + ir2->index = fileref->index; + ir2->n = fileref->utf8.Length; + RtlCopyMemory(ir2->name, fileref->utf8.Buffer, ir2->n); + + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode, + ir2, sizeof(INODE_REF) - 1 + ir2->n, Batch_InodeRef, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } } else { // subvolume ULONG rrlen; @@ -5316,29 +5311,18 @@ static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp // delete DIR_INDEX (0x60) - searchkey.obj_id = fileref->parent->fcb->inode; - searchkey.obj_type = TYPE_DIR_INDEX; - searchkey.offset = fileref->index; - - Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - Status = STATUS_INTERNAL_ERROR; - return Status; + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->index, NULL, 0, Batch_Delete, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } - if (!keycmp(searchkey, tp.item->key)) { - delete_tree_item(fileref->fcb->Vcb, &tp, rollback); - TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset); - } else - WARN("could not find (%llx,%x,%llx) in subvol %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, fileref->fcb->subvol->id); - // add DIR_INDEX (0x60) - if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di2, disize, NULL, Irp, rollback)) { - ERR("insert_tree_item failed\n"); - Status = STATUS_INTERNAL_ERROR; - return Status; + if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, + fileref->index, di2, disize, Batch_Insert, Irp, rollback)) { + ERR("insert_tree_item_batch failed\n"); + return STATUS_INTERNAL_ERROR; } if (fileref->oldutf8.Buffer) { @@ -5411,7 +5395,10 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) if (dirt->fcb->deleted && dirt->fcb->ads) { RemoveEntryList(le); + ExAcquireResourceExclusiveLite(dirt->fcb->Header.Resource, TRUE); flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback); + ExReleaseResourceLite(dirt->fcb->Header.Resource); + free_fcb(dirt->fcb); ExFreePool(dirt); @@ -5433,7 +5420,9 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) if (dirt->fcb->subvol != Vcb->root_root || dirt->fcb->deleted) { RemoveEntryList(le); + ExAcquireResourceExclusiveLite(dirt->fcb->Header.Resource, TRUE); flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback); + ExReleaseResourceLite(dirt->fcb->Header.Resource); free_fcb(dirt->fcb); ExFreePool(dirt); @@ -5453,12 +5442,6 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) ERR("flushed %llu fcbs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart); #endif - ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE); - if (!IsListEmpty(&Vcb->sector_checksums)) { - update_checksum_tree(Vcb, Irp, rollback); - } - ExReleaseResourceLite(&Vcb->checksum_lock); - if (!IsListEmpty(&Vcb->drop_roots)) { Status = drop_roots(Vcb, Irp, rollback); @@ -5563,25 +5546,10 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) goto end; } - Vcb->superblock.cache_generation = Vcb->superblock.generation; - - Status = write_superblocks(Vcb, Irp); - if (!NT_SUCCESS(Status)) { - ERR("write_superblocks returned %08x\n", Status); - goto end; - } - - clean_space_cache(Vcb); - - Vcb->superblock.generation++; - - Status = STATUS_SUCCESS; - +#ifdef DEBUG_PARANOID le = Vcb->trees.Flink; while (le != &Vcb->trees) { tree* t = CONTAINING_RECORD(le, tree, list_entry); - -#ifdef DEBUG_PARANOID KEY searchkey; traverse_ptr tp; @@ -5611,8 +5579,29 @@ NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) int3; } } -#endif + le = le->Flink; + } +#endif + + Vcb->superblock.cache_generation = Vcb->superblock.generation; + + Status = write_superblocks(Vcb, Irp); + if (!NT_SUCCESS(Status)) { + ERR("write_superblocks returned %08x\n", Status); + goto end; + } + + clean_space_cache(Vcb); + + Vcb->superblock.generation++; + + Status = STATUS_SUCCESS; + + le = Vcb->trees.Flink; + while (le != &Vcb->trees) { + tree* t = CONTAINING_RECORD(le, tree, list_entry); + t->write = FALSE; le = le->Flink; @@ -5645,6 +5634,14 @@ static void print_stats(device_extension* Vcb) { ERR("disk time taken: %llu\n", Vcb->stats.read_disk_time); ERR("other time taken: %llu\n", Vcb->stats.read_total_time - Vcb->stats.read_csum_time - Vcb->stats.read_disk_time); + ERR("OPEN STATS:\n"); + ERR("number of opens: %llu\n", Vcb->stats.num_opens); + ERR("total time taken: %llu\n", Vcb->stats.open_total_time); + ERR("number of overwrites: %llu\n", Vcb->stats.num_overwrites); + ERR("total time taken: %llu\n", Vcb->stats.overwrite_total_time); + ERR("number of creates: %llu\n", Vcb->stats.num_creates); + ERR("total time taken: %llu\n", Vcb->stats.create_total_time); + RtlZeroMemory(&Vcb->stats, sizeof(debug_stats)); } #endif @@ -5692,8 +5689,9 @@ void STDCALL flush_thread(void* context) { if (!(devobj->Vpb->Flags & VPB_MOUNTED) || Vcb->removing) break; - - do_flush(Vcb); + + if (!Vcb->locked) + do_flush(Vcb); KeSetTimer(&Vcb->flush_thread_timer, due_time, NULL); } diff --git a/reactos/drivers/filesystems/btrfs/free-space.c b/reactos/drivers/filesystems/btrfs/free-space.c index e87408f3f58..9c631301717 100644 --- a/reactos/drivers/filesystems/btrfs/free-space.c +++ b/reactos/drivers/filesystems/btrfs/free-space.c @@ -88,10 +88,8 @@ NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PI Status = remove_free_space_inode(Vcb, fsi->key.obj_id, batchlist, Irp, &rollback); - if (!NT_SUCCESS(Status)) { + if (!NT_SUCCESS(Status)) ERR("remove_free_space_inode for (%llx,%x,%llx) returned %08x\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset, Status); - goto end; - } le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { @@ -116,7 +114,6 @@ NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PI Status = STATUS_SUCCESS; -end: if (NT_SUCCESS(Status)) clear_rollback(Vcb, &rollback); else @@ -251,30 +248,44 @@ typedef struct { LIST_ENTRY list_entry; } superblock_stripe; -static void add_superblock_stripe(LIST_ENTRY* stripes, UINT64 off, UINT64 len) { +static NTSTATUS add_superblock_stripe(LIST_ENTRY* stripes, UINT64 off, UINT64 len) { UINT64 i; for (i = 0; i < len; i++) { LIST_ENTRY* le; superblock_stripe* ss; + BOOL ignore = FALSE; le = stripes->Flink; while (le != stripes) { ss = CONTAINING_RECORD(le, superblock_stripe, list_entry); - if (ss->stripe == off + i) - continue; + if (ss->stripe == off + i) { + ignore = TRUE; + break; + } le = le->Flink; } + if (ignore) + continue; + ss = ExAllocatePoolWithTag(PagedPool, sizeof(superblock_stripe), ALLOC_TAG); + if (!ss) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + ss->stripe = off + i; InsertTailList(stripes, &ss->list_entry); } + + return STATUS_SUCCESS; } -static UINT64 get_superblock_size(chunk* c) { +static NTSTATUS get_superblock_size(chunk* c, UINT64* size) { + NTSTATUS Status; CHUNK_ITEM* ci = c->chunk_item; CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; UINT64 off_start, off_end, space; @@ -296,7 +307,11 @@ static UINT64 get_superblock_size(chunk* c) { off_end = off_start + ci->stripe_length; - add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1); + Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1); + if (!NT_SUCCESS(Status)) { + ERR("add_superblock_stripe returned %08x\n", Status); + goto end; + } } } } else if (ci->type & BLOCK_FLAG_RAID5) { @@ -310,7 +325,11 @@ static UINT64 get_superblock_size(chunk* c) { off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1)); - add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); + Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); + if (!NT_SUCCESS(Status)) { + ERR("add_superblock_stripe returned %08x\n", Status); + goto end; + } } } } else if (ci->type & BLOCK_FLAG_RAID6) { @@ -324,7 +343,11 @@ static UINT64 get_superblock_size(chunk* c) { off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2)); - add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); + Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); + if (!NT_SUCCESS(Status)) { + ERR("add_superblock_stripe returned %08x\n", Status); + goto end; + } } } } else { // SINGLE, DUPLICATE, RAID1 @@ -333,7 +356,11 @@ static UINT64 get_superblock_size(chunk* c) { off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length; off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length); - add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); + Status = add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length); + if (!NT_SUCCESS(Status)) { + ERR("add_superblock_stripe returned %08x\n", Status); + goto end; + } } } } @@ -343,6 +370,9 @@ static UINT64 get_superblock_size(chunk* c) { space = 0; + Status = STATUS_SUCCESS; + +end: while (!IsListEmpty(&stripes)) { LIST_ENTRY* le = RemoveHeadList(&stripes); superblock_stripe* ss = CONTAINING_RECORD(le, superblock_stripe, list_entry); @@ -352,7 +382,10 @@ static UINT64 get_superblock_size(chunk* c) { ExFreePool(ss); } - return space * ci->stripe_length; + if (NT_SUCCESS(Status)) + *size = space * ci->stripe_length; + + return Status; } static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) { @@ -368,7 +401,6 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI LIST_ENTRY *le, rollback; // FIXME - does this break if Vcb->superblock.sector_size is not 4096? - // FIXME - remove INODE_ITEM etc. if cache invalid for whatever reason TRACE("(%p, %llx)\n", Vcb, c->offset); @@ -432,7 +464,7 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI return STATUS_INSUFFICIENT_RESOURCES; } - Status = read_file(c->cache, data, 0, c->cache->inode_item.st_size, NULL, NULL); + Status = read_file(c->cache, data, 0, c->cache->inode_item.st_size, NULL, NULL, FALSE); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); ExFreePool(data); @@ -529,7 +561,13 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI // do sanity check - superblock_size = get_superblock_size(c); + Status = get_superblock_size(c, &superblock_size); + if (!NT_SUCCESS(Status)) { + ERR("get_superblock_size returned %08x\n", Status); + ExFreePool(data); + return Status; + } + if (c->chunk_item->size - c->used != total_space + superblock_size) { WARN("invalidating cache for chunk %llx: space was %llx, expected %llx\n", c->offset, total_space + superblock_size, c->chunk_item->size - c->used); goto clearcache; @@ -569,6 +607,8 @@ clearcache: InitializeListHead(&rollback); + delete_tree_item(Vcb, &tp, &rollback); + Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("excise_extents returned %08x\n", Status); @@ -696,11 +736,11 @@ static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - if (!c->readonly) { + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); return STATUS_SUCCESS; } @@ -722,7 +762,7 @@ static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) return STATUS_SUCCESS; } @@ -740,6 +780,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan NTSTATUS Status; UINT64 num_entries, new_cache_size, i; UINT32 num_sectors; + BOOL realloc_extents = FALSE; // FIXME - also do bitmaps // FIXME - make sure this works when sector_size is not 4096 @@ -788,6 +829,33 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c->offset, c->cache ? c->cache->inode_item.st_size : 0, new_cache_size); + if (c->cache) { + if (new_cache_size > c->cache->inode_item.st_size) + realloc_extents = TRUE; + else { + le = c->cache->extents.Flink; + + while (le != &c->cache->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (!ext->ignore && (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->data->data[0]; + + if (ed2->size != 0) { + chunk* c2 = get_chunk_from_address(Vcb, ed2->address); + + if (c2 && (c2->readonly || c2->reloc)) { + realloc_extents = TRUE; + break; + } + } + } + + le = le->Flink; + } + } + } + if (!c->cache) { FREE_SPACE_ITEM* fsi; KEY searchkey; @@ -869,19 +937,17 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan } c->cache->extents_changed = TRUE; + InsertTailList(&Vcb->all_fcbs, &c->cache->list_entry_all); flush_fcb(c->cache, TRUE, batchlist, Irp, rollback); *changed = TRUE; - } else if (new_cache_size > c->cache->inode_item.st_size) { + } else if (realloc_extents) { KEY searchkey; traverse_ptr tp; - ERR("extending existing inode\n"); + TRACE("reallocating extents\n"); - // FIXME - try to extend existing extent first of all - // Or ditch all existing extents and replace with one new one? - // add free_space entry to tree cache searchkey.obj_id = FREE_SPACE_CACHE_ID; @@ -906,9 +972,38 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan tp.tree->write = TRUE; + // remove existing extents + + if (c->cache->inode_item.st_size > 0) { + le = c->cache->extents.Flink; + + while (le != &c->cache->extents) { + extent* ext = CONTAINING_RECORD(le, extent, list_entry); + + if (!ext->ignore && (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC)) { + EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ext->data->data[0]; + + if (ed2->size != 0) { + chunk* c2 = get_chunk_from_address(Vcb, ed2->address); + + if (!c2->list_entry_changed.Flink) + InsertTailList(&Vcb->chunks_changed, &c2->list_entry_changed); + } + } + + le = le->Flink; + } + + Status = excise_extents(Vcb, c->cache, 0, c->cache->inode_item.st_size, Irp, rollback); + if (!NT_SUCCESS(Status)) { + ERR("excise_extents returned %08x\n", Status); + return Status; + } + } + // add new extent - Status = insert_cache_extent(c->cache, c->cache->inode_item.st_size, new_cache_size - c->cache->inode_item.st_size, rollback); + Status = insert_cache_extent(c->cache, 0, new_cache_size, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_cache_extent returned %08x\n", Status); return Status; @@ -1372,7 +1467,7 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* // write cache - Status = do_write_file(c->cache, 0, c->cache->inode_item.st_size, data, NULL, NULL, rollback); + Status = do_write_file(c->cache, 0, c->cache->inode_item.st_size, data, NULL, rollback); if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); return Status; diff --git a/reactos/drivers/filesystems/btrfs/fsctl.c b/reactos/drivers/filesystems/btrfs/fsctl.c index 73c9f7d8e93..4de4e4e733f 100644 --- a/reactos/drivers/filesystems/btrfs/fsctl.c +++ b/reactos/drivers/filesystems/btrfs/fsctl.c @@ -32,6 +32,7 @@ extern LIST_ENTRY VcbList; extern ERESOURCE global_loading_lock; extern LIST_ENTRY volumes; +extern ERESOURCE volumes_lock; static NTSTATUS get_file_ids(PFILE_OBJECT FileObject, void* data, ULONG length) { btrfs_get_file_ids* bgfi; @@ -71,7 +72,7 @@ static void get_uuid(BTRFS_UUID* uuid) { } } -static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* subvol, UINT64 dupflags, UINT64* newaddr, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* subvol, UINT64* newaddr, PIRP Irp, LIST_ENTRY* rollback) { UINT8* buf; NTSTATUS Status; write_data_context* wtc; @@ -93,7 +94,7 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub return STATUS_INSUFFICIENT_RESOURCES; } - Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL, Irp); + Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL, Irp, FALSE); if (!NT_SUCCESS(Status)) { ERR("read_data returned %08x\n", Status); goto end; @@ -102,7 +103,6 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub th = (tree_header*)buf; RtlZeroMemory(&t, sizeof(tree)); - t.flags = dupflags; t.root = subvol; t.header.level = th->level; t.header.tree_id = t.root->id; @@ -259,6 +259,7 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f ccb* ccb = parent->FsContext2; LIST_ENTRY* le; file_ref *fileref, *fr; + dir_child* dc = NULL; if (!ccb) { ERR("error - ccb was NULL\n"); @@ -354,7 +355,7 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f goto end; } - Status = snapshot_tree_copy(Vcb, subvol->root_item.block_number, r, tp.tree->flags, &address, Irp, &rollback); + Status = snapshot_tree_copy(Vcb, subvol->root_item.block_number, r, &address, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("snapshot_tree_copy returned %08x\n", Status); goto end; @@ -468,6 +469,13 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f fr->parent = fileref; + Status = add_dir_child(fileref->fcb, r->id, TRUE, dirpos, utf8, &fr->filepart, &fr->filepart_uc, BTRFS_TYPE_DIRECTORY, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fr->dc = dc; + dc->fileref = fr; + insert_fileref_child(fileref, fr, TRUE); increase_fileref_refcount(fileref); @@ -574,6 +582,9 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject, return STATUS_ACCESS_DENIED; } + if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) + return STATUS_ACCESS_DENIED; + nameus.Buffer = bcs->name; nameus.Length = nameus.MaximumLength = bcs->namelen; @@ -664,8 +675,6 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject, struct _fcb* openfcb = CONTAINING_RECORD(le, struct _fcb, list_entry); LIST_ENTRY* le2; - ExAcquireResourceExclusiveLite(openfcb->Header.Resource, TRUE); - le2 = openfcb->extents.Flink; while (le2 != &openfcb->extents) { @@ -676,8 +685,6 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject, le2 = le2->Flink; } - ExReleaseResourceLite(openfcb->Header.Resource); - le = le->Flink; } } @@ -732,6 +739,7 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC BOOLEAN defaulted; UINT64* root_num; file_ref *fr = NULL, *fr2; + dir_child* dc = NULL; fcb = FileObject->FsContext; if (!fcb) { @@ -767,6 +775,9 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC return STATUS_ACCESS_DENIED; } + if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) + return STATUS_ACCESS_DENIED; + nameus.Length = nameus.MaximumLength = length; nameus.Buffer = name; @@ -1029,6 +1040,37 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC fr->parent = fileref; + Status = add_dir_child(fileref->fcb, r->id, TRUE, dirpos, &utf8, &fr->filepart, &fr->filepart_uc, BTRFS_TYPE_DIRECTORY, &dc); + if (!NT_SUCCESS(Status)) + WARN("add_dir_child returned %08x\n", Status); + + fr->dc = dc; + dc->fileref = fr; + + fr->fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fr->fcb->hash_ptrs) { + ERR("out of memory\n"); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(fr); + ExReleaseResourceLite(&Vcb->fcb_lock); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(fr->fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256); + + fr->fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG); + if (!fcb->hash_ptrs_uc) { + ERR("out of memory\n"); + ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE); + free_fileref(fr); + ExReleaseResourceLite(&Vcb->fcb_lock); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(fr->fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256); + insert_fileref_child(fileref, fr, TRUE); increase_fileref_refcount(fileref); @@ -1278,37 +1320,228 @@ end: return Status; } +static NTSTATUS get_devices(device_extension* Vcb, void* data, ULONG length) { + btrfs_device* dev = NULL; + NTSTATUS Status; + LIST_ENTRY* le; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev2 = CONTAINING_RECORD(le, device, list_entry); + ULONG structlen; + + if (length < sizeof(btrfs_device) - sizeof(WCHAR)) { + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + if (!dev) + dev = data; + else { + dev->next_entry = sizeof(btrfs_device) - sizeof(WCHAR) + dev->namelen; + dev = (btrfs_device*)((UINT8*)dev + dev->next_entry); + } + + structlen = length - offsetof(btrfs_device, namelen); + + Status = dev_ioctl(dev2->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &dev->namelen, structlen, TRUE, NULL); + if (!NT_SUCCESS(Status)) + goto end; + + dev->next_entry = 0; + dev->dev_id = dev2->devitem.dev_id; + dev->size = dev2->length; + dev->readonly = (Vcb->readonly || dev2->readonly) ? TRUE : FALSE; + dev->device_number = dev2->disk_num; + dev->partition_number = dev2->part_num; + + length -= sizeof(btrfs_device) - sizeof(WCHAR) + dev->namelen; + + le = le->Flink; + } + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + + return Status; +} + +static NTSTATUS get_usage(device_extension* Vcb, void* data, ULONG length) { + btrfs_usage* usage = (btrfs_usage*)data; + btrfs_usage* lastbue = NULL; + NTSTATUS Status; + LIST_ENTRY* le; + + if (length < sizeof(btrfs_usage)) + return STATUS_BUFFER_OVERFLOW; + + length -= offsetof(btrfs_usage, devices); + + ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); + + le = Vcb->chunks.Flink; + while (le != &Vcb->chunks) { + BOOL addnew = FALSE; + + chunk* c = CONTAINING_RECORD(le, chunk, list_entry); + + if (!lastbue) // first entry + addnew = TRUE; + else { + btrfs_usage* bue = usage; + + addnew = TRUE; + + while (TRUE) { + if (bue->type == c->chunk_item->type) { + addnew = FALSE; + break; + } + + if (bue->next_entry == 0) + break; + else + bue = (btrfs_usage*)((UINT8*)bue + bue->next_entry); + } + } + + if (addnew) { + btrfs_usage* bue; + LIST_ENTRY* le2; + UINT64 factor; + + if (!lastbue) { + bue = usage; + } else { + if (length < offsetof(btrfs_usage, devices)) { + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + length -= offsetof(btrfs_usage, devices); + + lastbue->next_entry = offsetof(btrfs_usage, devices) + (lastbue->num_devices * sizeof(btrfs_usage_device)); + + bue = (btrfs_usage*)((UINT8*)lastbue + lastbue->next_entry); + } + + bue->next_entry = 0; + bue->type = c->chunk_item->type; + bue->size = 0; + bue->used = 0; + bue->num_devices = 0; + + if (c->chunk_item->type & BLOCK_FLAG_RAID0) + factor = c->chunk_item->num_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID10) + factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes; + else if (c->chunk_item->type & BLOCK_FLAG_RAID5) + factor = c->chunk_item->num_stripes - 1; + else if (c->chunk_item->type & BLOCK_FLAG_RAID6) + factor = c->chunk_item->num_stripes - 2; + else + factor = 1; + + le2 = le; + while (le2 != &Vcb->chunks) { + chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry); + + if (c2->chunk_item->type == c->chunk_item->type) { + UINT16 i; + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c2->chunk_item[1]; + UINT64 stripesize; + + bue->size += c2->chunk_item->size; + bue->used += c2->used; + + stripesize = c2->chunk_item->size / factor; + + for (i = 0; i < c2->chunk_item->num_stripes; i++) { + UINT64 j; + BOOL found = FALSE; + + for (j = 0; j < bue->num_devices; j++) { + if (bue->devices[j].dev_id == cis[i].dev_id) { + bue->devices[j].alloc += stripesize; + found = TRUE; + break; + } + } + + if (!found) { + if (length < sizeof(btrfs_usage_device)) { + Status = STATUS_BUFFER_OVERFLOW; + goto end; + } + + length -= sizeof(btrfs_usage_device); + + bue->devices[bue->num_devices].dev_id = cis[i].dev_id; + bue->devices[bue->num_devices].alloc = stripesize; + bue->num_devices++; + } + } + } + + le2 = le2->Flink; + } + + lastbue = bue; + } + + le = le->Flink; + } + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->chunk_lock); + + return Status; +} + static NTSTATUS is_volume_mounted(device_extension* Vcb, PIRP Irp) { - UINT64 i, num_devices; NTSTATUS Status; ULONG cc; IO_STATUS_BLOCK iosb; BOOL verify = FALSE; + LIST_ENTRY* le; - num_devices = Vcb->superblock.num_devices; - for (i = 0; i < num_devices; i++) { - if (Vcb->devices[i].devobj && Vcb->devices[i].removable) { - Status = dev_ioctl(Vcb->devices[i].devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), FALSE, &iosb); + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devobj && dev->removable) { + Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), FALSE, &iosb); if (iosb.Information != sizeof(ULONG)) cc = 0; - if (Status == STATUS_VERIFY_REQUIRED || (NT_SUCCESS(Status) && cc != Vcb->devices[i].change_count)) { - Vcb->devices[i].devobj->Flags |= DO_VERIFY_VOLUME; + if (Status == STATUS_VERIFY_REQUIRED || (NT_SUCCESS(Status) && cc != dev->change_count)) { + dev->devobj->Flags |= DO_VERIFY_VOLUME; verify = TRUE; } if (NT_SUCCESS(Status) && iosb.Information == sizeof(ULONG)) - Vcb->devices[i].change_count = cc; + dev->change_count = cc; if (!NT_SUCCESS(Status) || verify) { - IoSetHardErrorOrVerifyDevice(Irp, Vcb->devices[i].devobj); + IoSetHardErrorOrVerifyDevice(Irp, dev->devobj); + ExReleaseResourceLite(&Vcb->tree_lock); return verify ? STATUS_VERIFY_REQUIRED : Status; } } + + le = le->Flink; } + ExReleaseResourceLite(&Vcb->tree_lock); + return STATUS_SUCCESS; } @@ -1413,7 +1646,7 @@ end: return Status; } -static NTSTATUS zero_data(device_extension* Vcb, fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +static NTSTATUS zero_data(device_extension* Vcb, fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; BOOL compress = write_fcb_compressed(fcb); UINT64 start_data, end_data; @@ -1437,7 +1670,7 @@ static NTSTATUS zero_data(device_extension* Vcb, fcb* fcb, UINT64 start, UINT64 RtlZeroMemory(data, end_data - start_data); if (start > start_data || start + length < end_data) { - Status = read_file(fcb, data, start_data, end_data - start_data, NULL, Irp); + Status = read_file(fcb, data, start_data, end_data - start_data, NULL, Irp, TRUE); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); @@ -1449,7 +1682,7 @@ static NTSTATUS zero_data(device_extension* Vcb, fcb* fcb, UINT64 start, UINT64 RtlZeroMemory(data + start - start_data, length); if (compress) { - Status = write_compressed(fcb, start_data, end_data, data, changed_sector_list, Irp, rollback); + Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback); ExFreePool(data); @@ -1458,7 +1691,7 @@ static NTSTATUS zero_data(device_extension* Vcb, fcb* fcb, UINT64 start, UINT64 return Status; } } else { - Status = do_write_file(fcb, start_data, end_data, data, changed_sector_list, Irp, rollback); + Status = do_write_file(fcb, start_data, end_data, data, Irp, rollback); ExFreePool(data); @@ -1477,12 +1710,11 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo fcb* fcb; ccb* ccb; file_ref* fileref; - LIST_ENTRY rollback, changed_sector_list, *le; + LIST_ENTRY rollback, *le; LARGE_INTEGER time; BTRFS_TIME now; UINT64 start, end; extent* ext; - BOOL nocsum; IO_STATUS_BLOCK iosb; if (!data || length < sizeof(FILE_ZERO_DATA_INFORMATION)) @@ -1566,13 +1798,8 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo goto end; } - nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM; - - if (!nocsum) - InitializeListHead(&changed_sector_list); - if (ext->datalen >= sizeof(EXTENT_DATA) && ext->data->type == EXTENT_TYPE_INLINE) { - Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, nocsum ? NULL : &changed_sector_list, Irp, &rollback); + Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); goto end; @@ -1586,14 +1813,14 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo end = (fzdi->BeyondFinalZero.QuadPart / Vcb->superblock.sector_size) * Vcb->superblock.sector_size; if (end <= start) { - Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, nocsum ? NULL : &changed_sector_list, Irp, &rollback); + Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, fzdi->BeyondFinalZero.QuadPart - fzdi->FileOffset.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); goto end; } } else { if (start > fzdi->FileOffset.QuadPart) { - Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, start - fzdi->FileOffset.QuadPart, nocsum ? NULL : &changed_sector_list, Irp, &rollback); + Status = zero_data(Vcb, fcb, fzdi->FileOffset.QuadPart, start - fzdi->FileOffset.QuadPart, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); goto end; @@ -1601,7 +1828,7 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo } if (end < fzdi->BeyondFinalZero.QuadPart) { - Status = zero_data(Vcb, fcb, end, fzdi->BeyondFinalZero.QuadPart - end, nocsum ? NULL : &changed_sector_list, Irp, &rollback); + Status = zero_data(Vcb, fcb, end, fzdi->BeyondFinalZero.QuadPart - end, Irp, &rollback); if (!NT_SUCCESS(Status)) { ERR("zero_data returned %08x\n", Status); goto end; @@ -1643,12 +1870,6 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo Status = STATUS_SUCCESS; - if (!nocsum) { - ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE); - commit_checksum_changes(Vcb, &changed_sector_list); - ExReleaseResourceLite(&Vcb->checksum_lock); - } - end: if (!NT_SUCCESS(Status)) do_rollback(Vcb, &rollback); @@ -1814,6 +2035,7 @@ static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) { NTSTATUS Status; LIST_ENTRY rollback; KIRQL irql; + BOOL lock_paused_balance = FALSE; TRACE("FSCTL_LOCK_VOLUME\n"); @@ -1832,12 +2054,18 @@ static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) { goto end; } - Vcb->locked = TRUE; - ExReleaseResourceLite(&Vcb->fcb_lock); InitializeListHead(&rollback); + if (Vcb->balance.thread && KeReadStateEvent(&Vcb->balance.event)) { + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + KeClearEvent(&Vcb->balance.event); + ExReleaseResourceLite(&Vcb->tree_lock); + + lock_paused_balance = TRUE; + } + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); flush_fcb_caches(Vcb); @@ -1855,10 +2083,16 @@ static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) { if (!(Vcb->Vpb->Flags & VPB_LOCKED)) { Vcb->Vpb->Flags |= VPB_LOCKED; + Vcb->locked = TRUE; Vcb->locked_fileobj = IrpSp->FileObject; + Vcb->lock_paused_balance = lock_paused_balance; } else { Status = STATUS_ACCESS_DENIED; IoReleaseVpbSpinLock(irql); + + if (lock_paused_balance) + KeSetEvent(&Vcb->balance.event, 0, FALSE); + goto end; } @@ -1883,6 +2117,9 @@ void do_unlock_volume(device_extension* Vcb) { Vcb->locked_fileobj = NULL; IoReleaseVpbSpinLock(irql); + + if (Vcb->lock_paused_balance) + KeSetEvent(&Vcb->balance.event, 0, FALSE); } static NTSTATUS unlock_volume(device_extension* Vcb, PIRP Irp) { @@ -2082,24 +2319,37 @@ static NTSTATUS get_compression(device_extension* Vcb, PIRP Irp) { } static void update_volumes(device_extension* Vcb) { - LIST_ENTRY* le = volumes.Flink; + LIST_ENTRY* le; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); + + le = volumes.Flink; while (le != &volumes) { volume* v = CONTAINING_RECORD(le, volume, list_entry); if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { - UINT64 i; + LIST_ENTRY* le; - for (i = 0; i < Vcb->superblock.num_devices; i++) { - if (RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { v->gen1 = v->gen2 = Vcb->superblock.generation - 1; break; } + + le = le->Flink; } } le = le->Flink; } + + ExReleaseResourceLite(&volumes_lock); + ExReleaseResourceLite(&Vcb->tree_lock); } static NTSTATUS dismount_volume(device_extension* Vcb, PIRP Irp) { @@ -2148,6 +2398,456 @@ static NTSTATUS dismount_volume(device_extension* Vcb, PIRP Irp) { return STATUS_SUCCESS; } +static NTSTATUS is_device_part_of_mounted_btrfs_raid(PDEVICE_OBJECT devobj) { + NTSTATUS Status; + ULONG to_read; + superblock* sb; + UINT32 crc32; + BTRFS_UUID fsuuid, devuuid; + LIST_ENTRY* le; + + to_read = devobj->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), devobj->SectorSize); + + sb = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG); + if (!sb) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = sync_read_phys(devobj, superblock_addrs[0], to_read, (UINT8*)sb, TRUE); + if (!NT_SUCCESS(Status)) { + ERR("sync_read_phys returned %08x\n", Status); + ExFreePool(sb); + return Status; + } + + if (sb->magic != BTRFS_MAGIC) { + TRACE("device is not Btrfs\n"); + ExFreePool(sb); + return STATUS_SUCCESS; + } + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum)); + + if (crc32 != *((UINT32*)sb->checksum)) { + TRACE("device has Btrfs magic, but invalid superblock checksum\n"); + ExFreePool(sb); + return STATUS_SUCCESS; + } + + fsuuid = sb->uuid; + devuuid = sb->dev_item.device_uuid; + + ExFreePool(sb); + + ExAcquireResourceSharedLite(&global_loading_lock, TRUE); + + le = VcbList.Flink; + + while (le != &VcbList) { + device_extension* Vcb = CONTAINING_RECORD(le, device_extension, list_entry); + + if (RtlCompareMemory(&Vcb->superblock.uuid, &fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + LIST_ENTRY* le2; + + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + + if (Vcb->superblock.num_devices > 1) { + le2 = Vcb->devices.Flink; + while (le2 != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le2, device, list_entry); + + if (RtlCompareMemory(&dev->devitem.device_uuid, &devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) { + ExReleaseResourceLite(&Vcb->tree_lock); + ExReleaseResourceLite(&global_loading_lock); + return STATUS_DEVICE_NOT_READY; + } + + le2 = le2->Flink; + } + } + + ExReleaseResourceLite(&Vcb->tree_lock); + ExReleaseResourceLite(&global_loading_lock); + return STATUS_SUCCESS; + } + + le = le->Flink; + } + + ExReleaseResourceLite(&global_loading_lock); + + return STATUS_SUCCESS; +} + +static NTSTATUS add_device(device_extension* Vcb, PIRP Irp, void* data, ULONG length, KPROCESSOR_MODE processor_mode) { + PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); + NTSTATUS Status; + PFILE_OBJECT fileobj, mountmgrfo; + HANDLE h; + LIST_ENTRY rollback, *le; + GET_LENGTH_INFORMATION gli; + device* dev; + DEV_ITEM* di; + UINT64 dev_id; + UINT8* mb; + UINT64* stats; + MOUNTDEV_NAME mdn1, *mdn2; + UNICODE_STRING volname, mmdevpath; + volume* v; + PDEVICE_OBJECT mountmgr; + KEY searchkey; + traverse_ptr tp; + STORAGE_DEVICE_NUMBER sdn; + + volname.Buffer = NULL; + + if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) + return STATUS_PRIVILEGE_NOT_HELD; + + if (Vcb->readonly) // FIXME - handle adding R/W device to seeding device + return STATUS_MEDIA_WRITE_PROTECTED; + +#if defined(_WIN64) + if (IoIs32bitProcess(Irp)) { + if (IrpSp->Parameters.FileSystemControl.InputBufferLength != sizeof(UINT32)) + return STATUS_INVALID_PARAMETER; + + h = (HANDLE)LongToHandle((*(PUINT32)Irp->AssociatedIrp.SystemBuffer)); + } else { +#endif + if (IrpSp->Parameters.FileSystemControl.InputBufferLength != sizeof(HANDLE)) + return STATUS_INVALID_PARAMETER; + + h = *(PHANDLE)Irp->AssociatedIrp.SystemBuffer; +#if defined(_WIN64) + } +#endif + + Status = ObReferenceObjectByHandle(h, 0, *IoFileObjectType, Irp->RequestorMode, (void**)&fileobj, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("ObReferenceObjectByHandle returned %08x\n", Status); + return Status; + } + + Status = is_device_part_of_mounted_btrfs_raid(fileobj->DeviceObject); + if (!NT_SUCCESS(Status)) { + ERR("is_device_part_of_mounted_btrfs_raid returned %08x\n", Status); + ObDereferenceObject(fileobj); + return Status; + } + + Status = dev_ioctl(fileobj->DeviceObject, IOCTL_DISK_IS_WRITABLE, NULL, 0, NULL, 0, TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_DISK_IS_WRITABLE returned %08x\n", Status); + ObDereferenceObject(fileobj); + return Status; + } + + Status = dev_ioctl(fileobj->DeviceObject, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, + &gli, sizeof(gli), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + ERR("error reading length information: %08x\n", Status); + ObDereferenceObject(fileobj); + return Status; + } + + if (gli.Length.QuadPart < 0x100000) { + ERR("device was not large enough to hold FS (%llx bytes, need at least 1 MB)\n", gli.Length.QuadPart); + ObDereferenceObject(fileobj); + return STATUS_INTERNAL_ERROR; + } + + Status = dev_ioctl(fileobj->DeviceObject, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, + &mdn1, sizeof(MOUNTDEV_NAME), TRUE, NULL); + if (Status == STATUS_BUFFER_OVERFLOW) { + mdn2 = ExAllocatePoolWithTag(PagedPool, offsetof(MOUNTDEV_NAME, Name[0]) + mdn1.NameLength, ALLOC_TAG); + if (!mdn2) { + ERR("out of memory\n"); + ObDereferenceObject(fileobj); + return STATUS_INTERNAL_ERROR; + } + + Status = dev_ioctl(fileobj->DeviceObject, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, + mdn2, offsetof(MOUNTDEV_NAME, Name[0]) + mdn1.NameLength, TRUE, NULL); + + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + ObDereferenceObject(fileobj); + return Status; + } + } else if (NT_SUCCESS(Status)) { + mdn2 = ExAllocatePoolWithTag(PagedPool, sizeof(MOUNTDEV_NAME), ALLOC_TAG); + if (!mdn2) { + ERR("out of memory\n"); + ObDereferenceObject(fileobj); + return STATUS_INTERNAL_ERROR; + } + + RtlCopyMemory(mdn2, &mdn1, sizeof(MOUNTDEV_NAME)); + } else { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status); + ObDereferenceObject(fileobj); + return Status; + } + + if (mdn2->NameLength == 0) { + ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned zero-length name\n"); + ObDereferenceObject(fileobj); + ExFreePool(mdn2); + return STATUS_INTERNAL_ERROR; + } + + volname.Length = volname.MaximumLength = mdn2->NameLength; + volname.Buffer = ExAllocatePoolWithTag(PagedPool, volname.MaximumLength, ALLOC_TAG); + if (!volname.Buffer) { + ERR("out of memory\n"); + ObDereferenceObject(fileobj); + ExFreePool(mdn2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(volname.Buffer, mdn2->Name, volname.Length); + ExFreePool(mdn2); + + InitializeListHead(&rollback); + + ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); + + if (Vcb->need_write) { + Status = do_write(Vcb, Irp, &rollback); + if (!NT_SUCCESS(Status)) { + ERR("do_write returned %08x\n", Status); + do_rollback(Vcb, &rollback); + goto end; + } + } + + free_trees(Vcb); + + clear_rollback(Vcb, &rollback); + + dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG); + if (!dev) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(dev, sizeof(device)); + + dev->devobj = fileobj->DeviceObject; + dev->seeding = FALSE; + dev->length = gli.Length.QuadPart; + init_device(Vcb, dev, FALSE, TRUE); + + InitializeListHead(&dev->space); + + if (gli.Length.QuadPart > 0x100000) { // add disk hole - the first MB is marked as used + Status = add_space_entry(&dev->space, NULL, 0x100000, gli.Length.QuadPart - 0x100000); + if (!NT_SUCCESS(Status)) { + ERR("add_space_entry returned %08x\n", Status); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + } + + dev_id = 0; + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (dev->devitem.dev_id > dev_id) + dev_id = dev->devitem.dev_id; + + le = le->Flink; + } + + dev_id++; + + dev->devitem.dev_id = dev_id; + dev->devitem.num_bytes = gli.Length.QuadPart; + dev->devitem.bytes_used = 0; + dev->devitem.optimal_io_align = Vcb->superblock.sector_size; + dev->devitem.optimal_io_width = Vcb->superblock.sector_size; + dev->devitem.minimal_io_size = Vcb->superblock.sector_size; + dev->devitem.type = 0; + dev->devitem.generation = 0; + dev->devitem.start_offset = 0; + dev->devitem.dev_group = 0; + dev->devitem.seek_speed = 0; + dev->devitem.bandwidth = 0; + get_uuid(&dev->devitem.device_uuid); + dev->devitem.fs_uuid = Vcb->superblock.uuid; + + di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG); + if (!di) { + ERR("out of memory\n"); + goto end; + } + + RtlCopyMemory(di, &dev->devitem, sizeof(DEV_ITEM)); + + if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, di->dev_id, di, sizeof(DEV_ITEM), NULL, Irp, &rollback)) { + ERR("insert_tree_item failed\n"); + ExFreePool(di); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + // add stats entry to dev tree + stats = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * 5, ALLOC_TAG); + if (!stats) { + ERR("out of memory\n"); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + RtlZeroMemory(stats, sizeof(UINT64) * 5); + + searchkey.obj_id = 0; + searchkey.obj_type = TYPE_DEV_STATS; + searchkey.offset = di->dev_id; + + Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp); + if (!NT_SUCCESS(Status)) { + ERR("error - find_item returned %08x\n", Status); + goto end; + } + + if (!keycmp(tp.item->key, searchkey)) + delete_tree_item(Vcb, &tp, &rollback); + + if (!insert_tree_item(Vcb, Vcb->dev_root, 0, TYPE_DEV_STATS, di->dev_id, stats, sizeof(UINT64) * 5, NULL, Irp, &rollback)) { + ERR("insert_tree_item failed\n"); + ExFreePool(stats); + Status = STATUS_INTERNAL_ERROR; + goto end; + } + + // We clear the first megabyte of the device, so Windows doesn't identify it as another FS + mb = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG); + if (!mb) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + RtlZeroMemory(mb, 0x100000); + + Status = write_data_phys(fileobj->DeviceObject, 0, mb, 0x100000); + if (!NT_SUCCESS(Status)) { + ERR("write_data_phys returned %08x\n", Status); + goto end; + } + + ExFreePool(mb); + + v = ExAllocatePoolWithTag(PagedPool, sizeof(volume), ALLOC_TAG); + if (!v) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + v->fsuuid = Vcb->superblock.uuid; + v->devuuid = dev->devitem.device_uuid; + v->devnum = dev_id; + v->devpath = volname; + v->length = gli.Length.QuadPart; + v->gen1 = v->gen2 = Vcb->superblock.generation; + v->seeding = FALSE; + v->processed = TRUE; + + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); + InsertTailList(&volumes, &v->list_entry); + ExReleaseResourceLite(&volumes_lock); + + volname.Buffer = NULL; + + Status = dev_ioctl(fileobj->DeviceObject, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, + &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL); + if (!NT_SUCCESS(Status)) { + WARN("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); + v->disk_num = 0; + v->part_num = 0; + } else { + v->disk_num = sdn.DeviceNumber; + v->part_num = sdn.PartitionNumber; + } + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &mountmgrfo, &mountmgr); + if (!NT_SUCCESS(Status)) + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + else { + remove_drive_letter(mountmgr, v); + + ObDereferenceObject(mountmgrfo); + } + + Vcb->superblock.num_devices++; + Vcb->superblock.total_bytes += gli.Length.QuadPart; + Vcb->devices_loaded++; + InsertTailList(&Vcb->devices, &dev->list_entry); + + ObReferenceObject(fileobj->DeviceObject); + + do_write(Vcb, Irp, &rollback); + + free_trees(Vcb); + + clear_rollback(Vcb, &rollback); + + Status = STATUS_SUCCESS; + +end: + ExReleaseResourceLite(&Vcb->tree_lock); + ObDereferenceObject(fileobj); + + if (volname.Buffer) + ExFreePool(volname.Buffer); + + return Status; +} + +static NTSTATUS allow_extended_dasd_io(device_extension* Vcb, PFILE_OBJECT FileObject) { + fcb* fcb; + ccb* ccb; + + TRACE("FSCTL_ALLOW_EXTENDED_DASD_IO\n"); + + if (!FileObject) + return STATUS_INVALID_PARAMETER; + + fcb = FileObject->FsContext; + ccb = FileObject->FsContext2; + + if (!fcb) + return STATUS_INVALID_PARAMETER; + + if (fcb != Vcb->volume_fcb) + return STATUS_INVALID_PARAMETER; + + if (!ccb) + return STATUS_INVALID_PARAMETER; + + ccb->allow_extended_dasd_io = TRUE; + + return STATUS_SUCCESS; +} + +static NTSTATUS query_uuid(device_extension* Vcb, void* data, ULONG length) { + if (length < sizeof(BTRFS_UUID)) + return STATUS_BUFFER_OVERFLOW; + + RtlCopyMemory(data, &Vcb->superblock.uuid, sizeof(BTRFS_UUID)); + + return STATUS_SUCCESS; +} + NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user) { PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); NTSTATUS Status; @@ -2155,32 +2855,32 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL switch (type) { case FSCTL_REQUEST_OPLOCK_LEVEL_1: WARN("STUB: FSCTL_REQUEST_OPLOCK_LEVEL_1\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_REQUEST_OPLOCK_LEVEL_2: WARN("STUB: FSCTL_REQUEST_OPLOCK_LEVEL_2\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_REQUEST_BATCH_OPLOCK: WARN("STUB: FSCTL_REQUEST_BATCH_OPLOCK\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_OPLOCK_BREAK_ACKNOWLEDGE: WARN("STUB: FSCTL_OPLOCK_BREAK_ACKNOWLEDGE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_OPBATCH_ACK_CLOSE_PENDING: WARN("STUB: FSCTL_OPBATCH_ACK_CLOSE_PENDING\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_OPLOCK_BREAK_NOTIFY: WARN("STUB: FSCTL_OPLOCK_BREAK_NOTIFY\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_LOCK_VOLUME: @@ -2201,17 +2901,17 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_IS_PATHNAME_VALID: WARN("STUB: FSCTL_IS_PATHNAME_VALID\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_MARK_VOLUME_DIRTY: WARN("STUB: FSCTL_MARK_VOLUME_DIRTY\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_QUERY_RETRIEVAL_POINTERS: WARN("STUB: FSCTL_QUERY_RETRIEVAL_POINTERS\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_GET_COMPRESSION: @@ -2220,17 +2920,17 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_SET_COMPRESSION: WARN("STUB: FSCTL_SET_COMPRESSION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_BOOTLOADER_ACCESSED: WARN("STUB: FSCTL_SET_BOOTLOADER_ACCESSED\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_OPLOCK_BREAK_ACK_NO_2: WARN("STUB: FSCTL_OPLOCK_BREAK_ACK_NO_2\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_INVALIDATE_VOLUMES: @@ -2239,12 +2939,12 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_QUERY_FAT_BPB: WARN("STUB: FSCTL_QUERY_FAT_BPB\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_REQUEST_FILTER_OPLOCK: WARN("STUB: FSCTL_REQUEST_FILTER_OPLOCK\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_FILESYSTEM_GET_STATISTICS: @@ -2254,27 +2954,27 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_GET_NTFS_VOLUME_DATA: WARN("STUB: FSCTL_GET_NTFS_VOLUME_DATA\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_GET_NTFS_FILE_RECORD: WARN("STUB: FSCTL_GET_NTFS_FILE_RECORD\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_GET_VOLUME_BITMAP: WARN("STUB: FSCTL_GET_VOLUME_BITMAP\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_GET_RETRIEVAL_POINTERS: WARN("STUB: FSCTL_GET_RETRIEVAL_POINTERS\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_MOVE_FILE: WARN("STUB: FSCTL_MOVE_FILE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_IS_VOLUME_DIRTY: @@ -2282,18 +2982,17 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL break; case FSCTL_ALLOW_EXTENDED_DASD_IO: - WARN("STUB: FSCTL_ALLOW_EXTENDED_DASD_IO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = allow_extended_dasd_io(DeviceObject->DeviceExtension, IrpSp->FileObject); break; case FSCTL_FIND_FILES_BY_SID: WARN("STUB: FSCTL_FIND_FILES_BY_SID\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_OBJECT_ID: WARN("STUB: FSCTL_SET_OBJECT_ID\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_GET_OBJECT_ID: @@ -2303,7 +3002,7 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_DELETE_OBJECT_ID: WARN("STUB: FSCTL_DELETE_OBJECT_ID\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_REPARSE_POINT: @@ -2321,22 +3020,22 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_ENUM_USN_DATA: WARN("STUB: FSCTL_ENUM_USN_DATA\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SECURITY_ID_CHECK: WARN("STUB: FSCTL_SECURITY_ID_CHECK\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_READ_USN_JOURNAL: WARN("STUB: FSCTL_READ_USN_JOURNAL\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_OBJECT_ID_EXTENDED: WARN("STUB: FSCTL_SET_OBJECT_ID_EXTENDED\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_CREATE_OR_GET_OBJECT_ID: @@ -2362,258 +3061,258 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_ENABLE_UPGRADE: WARN("STUB: FSCTL_ENABLE_UPGRADE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_ENCRYPTION: WARN("STUB: FSCTL_SET_ENCRYPTION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_ENCRYPTION_FSCTL_IO: WARN("STUB: FSCTL_ENCRYPTION_FSCTL_IO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_WRITE_RAW_ENCRYPTED: WARN("STUB: FSCTL_WRITE_RAW_ENCRYPTED\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_READ_RAW_ENCRYPTED: WARN("STUB: FSCTL_READ_RAW_ENCRYPTED\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_CREATE_USN_JOURNAL: WARN("STUB: FSCTL_CREATE_USN_JOURNAL\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_READ_FILE_USN_DATA: WARN("STUB: FSCTL_READ_FILE_USN_DATA\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_WRITE_USN_CLOSE_RECORD: WARN("STUB: FSCTL_WRITE_USN_CLOSE_RECORD\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_EXTEND_VOLUME: WARN("STUB: FSCTL_EXTEND_VOLUME\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_QUERY_USN_JOURNAL: WARN("STUB: FSCTL_QUERY_USN_JOURNAL\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_DELETE_USN_JOURNAL: WARN("STUB: FSCTL_DELETE_USN_JOURNAL\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_MARK_HANDLE: WARN("STUB: FSCTL_MARK_HANDLE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SIS_COPYFILE: WARN("STUB: FSCTL_SIS_COPYFILE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SIS_LINK_FILES: WARN("STUB: FSCTL_SIS_LINK_FILES\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_RECALL_FILE: WARN("STUB: FSCTL_RECALL_FILE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_READ_FROM_PLEX: WARN("STUB: FSCTL_READ_FROM_PLEX\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_FILE_PREFETCH: WARN("STUB: FSCTL_FILE_PREFETCH\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; #if WIN32_WINNT >= 0x0600 case FSCTL_MAKE_MEDIA_COMPATIBLE: WARN("STUB: FSCTL_MAKE_MEDIA_COMPATIBLE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_DEFECT_MANAGEMENT: WARN("STUB: FSCTL_SET_DEFECT_MANAGEMENT\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_QUERY_SPARING_INFO: WARN("STUB: FSCTL_QUERY_SPARING_INFO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_QUERY_ON_DISK_VOLUME_INFO: WARN("STUB: FSCTL_QUERY_ON_DISK_VOLUME_INFO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_VOLUME_COMPRESSION_STATE: WARN("STUB: FSCTL_SET_VOLUME_COMPRESSION_STATE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_MODIFY_RM: WARN("STUB: FSCTL_TXFS_MODIFY_RM\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_QUERY_RM_INFORMATION: WARN("STUB: FSCTL_TXFS_QUERY_RM_INFORMATION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_ROLLFORWARD_REDO: WARN("STUB: FSCTL_TXFS_ROLLFORWARD_REDO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_ROLLFORWARD_UNDO: WARN("STUB: FSCTL_TXFS_ROLLFORWARD_UNDO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_START_RM: WARN("STUB: FSCTL_TXFS_START_RM\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_SHUTDOWN_RM: WARN("STUB: FSCTL_TXFS_SHUTDOWN_RM\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_READ_BACKUP_INFORMATION: WARN("STUB: FSCTL_TXFS_READ_BACKUP_INFORMATION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_WRITE_BACKUP_INFORMATION: WARN("STUB: FSCTL_TXFS_WRITE_BACKUP_INFORMATION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_CREATE_SECONDARY_RM: WARN("STUB: FSCTL_TXFS_CREATE_SECONDARY_RM\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_GET_METADATA_INFO: WARN("STUB: FSCTL_TXFS_GET_METADATA_INFO\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_GET_TRANSACTED_VERSION: WARN("STUB: FSCTL_TXFS_GET_TRANSACTED_VERSION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_SAVEPOINT_INFORMATION: WARN("STUB: FSCTL_TXFS_SAVEPOINT_INFORMATION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_CREATE_MINIVERSION: WARN("STUB: FSCTL_TXFS_CREATE_MINIVERSION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_TRANSACTION_ACTIVE: WARN("STUB: FSCTL_TXFS_TRANSACTION_ACTIVE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_ZERO_ON_DEALLOCATION: WARN("STUB: FSCTL_SET_ZERO_ON_DEALLOCATION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_REPAIR: WARN("STUB: FSCTL_SET_REPAIR\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_GET_REPAIR: WARN("STUB: FSCTL_GET_REPAIR\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_WAIT_FOR_REPAIR: WARN("STUB: FSCTL_WAIT_FOR_REPAIR\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_INITIATE_REPAIR: WARN("STUB: FSCTL_INITIATE_REPAIR\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_CSC_INTERNAL: WARN("STUB: FSCTL_CSC_INTERNAL\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SHRINK_VOLUME: WARN("STUB: FSCTL_SHRINK_VOLUME\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_SET_SHORT_NAME_BEHAVIOR: WARN("STUB: FSCTL_SET_SHORT_NAME_BEHAVIOR\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_DFSR_SET_GHOST_HANDLE_STATE: WARN("STUB: FSCTL_DFSR_SET_GHOST_HANDLE_STATE\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_LIST_TRANSACTION_LOCKED_FILES: WARN("STUB: FSCTL_TXFS_LIST_TRANSACTION_LOCKED_FILES\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_LIST_TRANSACTIONS: WARN("STUB: FSCTL_TXFS_LIST_TRANSACTIONS\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_QUERY_PAGEFILE_ENCRYPTION: WARN("STUB: FSCTL_QUERY_PAGEFILE_ENCRYPTION\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_RESET_VOLUME_ALLOCATION_HINTS: WARN("STUB: FSCTL_RESET_VOLUME_ALLOCATION_HINTS\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_TXFS_READ_BACKUP_INFORMATION2: WARN("STUB: FSCTL_TXFS_READ_BACKUP_INFORMATION2\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; case FSCTL_CSV_CONTROL: WARN("STUB: FSCTL_CSV_CONTROL\n"); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; #endif case FSCTL_BTRFS_GET_FILE_IDS: @@ -2635,13 +3334,53 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL case FSCTL_BTRFS_SET_INODE_INFO: Status = set_inode_info(IrpSp->FileObject, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); break; + + case FSCTL_BTRFS_GET_DEVICES: + Status = get_devices(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + case FSCTL_BTRFS_GET_USAGE: + Status = get_usage(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + case FSCTL_BTRFS_START_BALANCE: + Status = start_balance(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_QUERY_BALANCE: + Status = query_balance(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; + + case FSCTL_BTRFS_PAUSE_BALANCE: + Status = pause_balance(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_RESUME_BALANCE: + Status = resume_balance(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_STOP_BALANCE: + Status = stop_balance(DeviceObject->DeviceExtension, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_ADD_DEVICE: + Status = add_device(DeviceObject->DeviceExtension, Irp, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_REMOVE_DEVICE: + Status = remove_device(DeviceObject->DeviceExtension, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.FileSystemControl.InputBufferLength, Irp->RequestorMode); + break; + + case FSCTL_BTRFS_GET_UUID: + Status = query_uuid(DeviceObject->DeviceExtension, map_user_buffer(Irp), IrpSp->Parameters.FileSystemControl.OutputBufferLength); + break; default: TRACE("unknown control code %x (DeviceType = %x, Access = %x, Function = %x, Method = %x)\n", IrpSp->Parameters.FileSystemControl.FsControlCode, (IrpSp->Parameters.FileSystemControl.FsControlCode & 0xff0000) >> 16, (IrpSp->Parameters.FileSystemControl.FsControlCode & 0xc000) >> 14, (IrpSp->Parameters.FileSystemControl.FsControlCode & 0x3ffc) >> 2, IrpSp->Parameters.FileSystemControl.FsControlCode & 0x3); - Status = STATUS_NOT_IMPLEMENTED; + Status = STATUS_INVALID_DEVICE_REQUEST; break; } diff --git a/reactos/drivers/filesystems/btrfs/pnp.c b/reactos/drivers/filesystems/btrfs/pnp.c index a15091e2a08..6c67e089b45 100644 --- a/reactos/drivers/filesystems/btrfs/pnp.c +++ b/reactos/drivers/filesystems/btrfs/pnp.c @@ -24,6 +24,7 @@ typedef struct { PIRP Irp; IO_STATUS_BLOCK iosb; NTSTATUS Status; + device* dev; } pnp_stripe; typedef struct { @@ -51,6 +52,7 @@ static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { pnp_context* context; UINT64 num_devices, i; NTSTATUS Status; + LIST_ENTRY* le; context = ExAllocatePoolWithTag(NonPagedPool, sizeof(pnp_context), ALLOC_TAG); if (!context) { @@ -58,6 +60,8 @@ static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { return STATUS_INSUFFICIENT_RESOURCES; } + ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); + RtlZeroMemory(context, sizeof(pnp_context)); KeInitializeEvent(&context->Event, NotificationEvent, FALSE); @@ -67,18 +71,23 @@ static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { if (!context->stripes) { ERR("out of memory\n"); ExFreePool(context); - return STATUS_INSUFFICIENT_RESOURCES; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; } RtlZeroMemory(context->stripes, sizeof(pnp_stripe) * num_devices); - for (i = 0; i < num_devices; i++) { + i = 0; + le = Vcb->devices.Flink; + + while (le != &Vcb->devices) { PIO_STACK_LOCATION IrpSp; + device* dev = CONTAINING_RECORD(le, device, list_entry); - if (Vcb->devices[i].devobj) { + if (dev->devobj) { context->stripes[i].context = (struct pnp_context*)context; - context->stripes[i].Irp = IoAllocateIrp(Vcb->devices[i].devobj->StackSize, FALSE); + context->stripes[i].Irp = IoAllocateIrp(dev->devobj->StackSize, FALSE); if (!context->stripes[i].Irp) { UINT64 j; @@ -86,14 +95,15 @@ static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { ERR("IoAllocateIrp failed\n"); for (j = 0; j < i; j++) { - if (Vcb->devices[j].devobj) { + if (context->stripes[j].dev->devobj) { IoFreeIrp(context->stripes[j].Irp); } } ExFreePool(context->stripes); ExFreePool(context); - return STATUS_INSUFFICIENT_RESOURCES; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto end2; } IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); @@ -105,9 +115,12 @@ static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { IoSetCompletionRoutine(context->stripes[i].Irp, pnp_completion, &context->stripes[i], TRUE, TRUE, TRUE); context->stripes[i].Irp->IoStatus.Status = STATUS_NOT_SUPPORTED; + context->stripes[i].dev = dev; context->left++; } + + le = le->Flink; } if (context->left == 0) { @@ -117,7 +130,7 @@ static NTSTATUS send_disks_pnp_message(device_extension* Vcb, UCHAR minor) { for (i = 0; i < num_devices; i++) { if (context->stripes[i].Irp) { - IoCallDriver(Vcb->devices[i].devobj, context->stripes[i].Irp); + IoCallDriver(context->stripes[i].dev->devobj, context->stripes[i].Irp); } } @@ -141,6 +154,9 @@ end: ExFreePool(context->stripes); ExFreePool(context); + +end2: + ExReleaseResourceLite(&Vcb->tree_lock); return Status; } @@ -296,7 +312,7 @@ NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { TRACE("passing minor function 0x%x on\n", IrpSp->MinorFunction); IoSkipCurrentIrpStackLocation(Irp); - Status = IoCallDriver(Vcb->devices[0].devobj, Irp); + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); goto end; } @@ -305,7 +321,7 @@ NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp) { // // IoSkipCurrentIrpStackLocation(Irp); // -// Status = IoCallDriver(Vcb->devices[0].devobj, Irp); +// Status = IoCallDriver(first_device(Vcb)->devobj, Irp); // // // IoCompleteRequest(Irp, IO_NO_INCREMENT); diff --git a/reactos/drivers/filesystems/btrfs/read.c b/reactos/drivers/filesystems/btrfs/read.c index 694eea53eba..f2739cdf939 100644 --- a/reactos/drivers/filesystems/btrfs/read.c +++ b/reactos/drivers/filesystems/btrfs/read.c @@ -52,10 +52,15 @@ typedef struct { UINT16 firstoff, startoffstripe, sectors_per_stripe, stripes_cancel; UINT32* csum; BOOL tree; + BOOL check_nocsum_parity; read_data_stripe* stripes; KSPIN_LOCK spin_lock; } read_data_context; +extern BOOL diskacc; +extern tPsUpdateDiskCounters PsUpdateDiskCounters; +extern tCcCopyReadEx CcCopyReadEx; + static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { read_data_stripe* stripe = conptr; read_data_context* context = (read_data_context*)stripe->context; @@ -103,7 +108,7 @@ static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP I } else if (context->type == BLOCK_FLAG_RAID5) { stripe->status = ReadDataStatus_Success; - if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree)) { + if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree || !context->check_nocsum_parity)) { for (i = 0; i < context->num_stripes; i++) { if (context->stripes[i].status == ReadDataStatus_Pending) { context->stripes[i].status = ReadDataStatus_Cancelling; @@ -115,7 +120,7 @@ static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP I } else if (context->type == BLOCK_FLAG_RAID6) { stripe->status = ReadDataStatus_Success; - if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree)) { + if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree || !context->check_nocsum_parity)) { for (i = 0; i < context->num_stripes; i++) { if (context->stripes[i].status == ReadDataStatus_Pending) { context->stripes[i].status = ReadDataStatus_Cancelling; @@ -1105,1728 +1110,1825 @@ end: return STATUS_SUCCESS; } -NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp) { - CHUNK_ITEM* ci; - CHUNK_ITEM_STRIPE* cis; - read_data_context* context; - UINT64 i, type, offset; +static NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { NTSTATUS Status; - device** devices; - UINT64 *stripestart = NULL, *stripeend = NULL; - UINT32 firststripesize; - UINT16 startoffstripe, allowed_missing, missing_devices = 0; -#ifdef DEBUG_STATS - LARGE_INTEGER time1, time2; -#endif + calc_job* cj; + UINT32* csum2; + + // From experimenting, it seems that 40 sectors is roughly the crossover + // point where offloading the crc32 calculation becomes worth it. + + if (sectors < 40) { + ULONG j; + + for (j = 0; j < sectors; j++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != csum[j]) { + return STATUS_CRC_ERROR; + } + } + + return STATUS_SUCCESS; + } + + csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG); + if (!csum2) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } - Status = verify_vcb(Vcb, Irp); + Status = add_calc_job(Vcb, data, sectors, csum2, &cj); if (!NT_SUCCESS(Status)) { - ERR("verify_vcb returned %08x\n", Status); + ERR("add_calc_job returned %08x\n", Status); return Status; } - if (Vcb->log_to_phys_loaded) { - if (!c) { - c = get_chunk_from_address(Vcb, addr); - - if (!c) { - ERR("get_chunk_from_address failed\n"); - return STATUS_INTERNAL_ERROR; + KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); + + if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) { + free_calc_job(cj); + ExFreePool(csum2); + return STATUS_CRC_ERROR; + } + + free_calc_job(cj); + ExFreePool(csum2); + + return STATUS_SUCCESS; +} + +static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, + CHUNK_ITEM* ci, device** devices, UINT64 *stripestart, UINT64 *stripeend) { + UINT64 i; + BOOL checksum_error = FALSE; + UINT16 cancelled = 0; + NTSTATUS Status; + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Success) { + if (context->tree) { + tree_header* th = (tree_header*)context->stripes[i].buf; + UINT32 crc32; + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); + + if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { + context->stripes[i].status = ReadDataStatus_CRCError; + checksum_error = TRUE; + } + } else if (context->csum) { +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; + + time1 = KeQueryPerformanceCounter(NULL); +#endif + Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].Irp->IoStatus.Information / context->sector_size, context->csum); + + if (Status == STATUS_CRC_ERROR) { + context->stripes[i].status = ReadDataStatus_CRCError; + checksum_error = TRUE; + break; + } else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; +#endif } + } else if (context->stripes[i].status == ReadDataStatus_Cancelled) { + cancelled++; } + } + + if (checksum_error) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - ci = c->chunk_item; - offset = c->offset; - devices = c->devices; - - if (pc) - *pc = c; - } else { - LIST_ENTRY* le = Vcb->sys_chunks.Flink; - - ci = NULL; + // FIXME - update dev stats - while (le != &Vcb->sys_chunks) { - sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); + if (cancelled > 0) { +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; +#endif + context->stripes_left = 0; - if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { - CHUNK_ITEM* chunk_item = sc->data; - - if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { - ci = chunk_item; - offset = sc->key.offset; - cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Cancelled) { + PIO_STACK_LOCATION IrpSp; - devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); - if (!devices) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; + // re-run Irp that we cancelled + + if (context->stripes[i].Irp) { + if (devices[i]->devobj->Flags & DO_DIRECT_IO) { + MmUnlockPages(context->stripes[i].Irp->MdlAddress); + IoFreeMdl(context->stripes[i].Irp->MdlAddress); + } + IoFreeIrp(context->stripes[i].Irp); } - for (i = 0; i < ci->num_stripes; i++) { - devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); + if (!Irp) { + context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); + + if (!context->stripes[i].Irp) { + ERR("IoAllocateIrp failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + } else { + context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); + + if (!context->stripes[i].Irp) { + ERR("IoMakeAssociatedIrp failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } } - break; + IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); + IrpSp->MajorFunction = IRP_MJ_READ; + + if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { + FIXME("FIXME - buffered IO\n"); + } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) { + context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL); + if (!context->stripes[i].Irp->MdlAddress) { + ERR("IoAllocateMdl failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); + } else { + context->stripes[i].Irp->UserBuffer = context->stripes[i].buf; + } + + IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i]; + IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset; + + context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; + + IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE); + + context->stripes_left++; + context->stripes[i].status = ReadDataStatus_Pending; } } - le = le->Flink; + context->stripes_cancel = 0; + KeClearEvent(&context->Event); + +#ifdef DEBUG_STATS + if (!context->tree) + time1 = KeQueryPerformanceCounter(NULL); +#endif + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Pending) { + IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); + } + } + + KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); + +#ifdef DEBUG_STATS + if (!context->tree) { + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; + } +#endif + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Success) { + if (context->tree) { + tree_header* th = (tree_header*)context->stripes[i].buf; + UINT32 crc32; + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); + + if (th->address != context->address || crc32 != *((UINT32*)th->csum)) + context->stripes[i].status = ReadDataStatus_CRCError; + } else if (context->csum) { + NTSTATUS Status; +#ifdef DEBUG_STATS + time1 = KeQueryPerformanceCounter(NULL); +#endif + Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].Irp->IoStatus.Information / Vcb->superblock.sector_size, context->csum); + + if (Status == STATUS_CRC_ERROR) + context->stripes[i].status = ReadDataStatus_CRCError; + else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; +#endif + } + } + } } - if (!ci) { - ERR("could not find chunk for %llx in bootstrap\n", addr); - return STATUS_INTERNAL_ERROR; + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Success) { + RtlCopyMemory(buf, context->stripes[i].buf, length); + goto raid1write; + } } - if (pc) - *pc = NULL; + if (context->tree || ci->num_stripes == 1) { // unable to recover from checksum error + ERR("unrecoverable checksum error at %llx\n", addr); + +#ifdef _DEBUG + if (context->tree) { + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_CRCError) { + tree_header* th = (tree_header*)context->stripes[i].buf; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); + + if (crc32 != *((UINT32*)th->csum)) { + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); + return STATUS_CRC_ERROR; + } else if (addr != th->address) { + WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); + return STATUS_CRC_ERROR; + } + } + } + } +#endif + return STATUS_CRC_ERROR; + } + + // checksum errors on both stripes - we need to check sector by sector + + for (i = 0; i < (stripeend[0] - stripestart[0]) / context->sector_size; i++) { + UINT16 j; + BOOL success = FALSE; +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; + + time1 = KeQueryPerformanceCounter(NULL); +#endif + + for (j = 0; j < ci->num_stripes; j++) { + if (context->stripes[j].status == ReadDataStatus_CRCError) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[j].buf + (i * context->sector_size), context->sector_size); + + if (crc32 == context->csum[i]) { + RtlCopyMemory(buf + (i * context->sector_size), context->stripes[j].buf + (i * context->sector_size), context->sector_size); + success = TRUE; + break; + } + } + } + +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; +#endif + if (!success) { + ERR("unrecoverable checksum error at %llx\n", addr + (i * context->sector_size)); + return STATUS_CRC_ERROR; + } + } + +raid1write: + // write good data over bad + + if (!Vcb->readonly) { + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_CRCError && devices[i] && !devices[i]->readonly) { + Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], buf, length); + + if (!NT_SUCCESS(Status)) + WARN("write_data_phys returned %08x\n", Status); + } + } + } + + return STATUS_SUCCESS; } - if (ci->type & BLOCK_FLAG_DUPLICATE) { - type = BLOCK_FLAG_DUPLICATE; - allowed_missing = 0; - } else if (ci->type & BLOCK_FLAG_RAID0) { - type = BLOCK_FLAG_RAID0; - allowed_missing = 0; - } else if (ci->type & BLOCK_FLAG_RAID1) { - type = BLOCK_FLAG_DUPLICATE; - allowed_missing = 1; - } else if (ci->type & BLOCK_FLAG_RAID10) { - type = BLOCK_FLAG_RAID10; - allowed_missing = 1; - } else if (ci->type & BLOCK_FLAG_RAID5) { - type = BLOCK_FLAG_RAID5; - allowed_missing = 1; - } else if (ci->type & BLOCK_FLAG_RAID6) { - type = BLOCK_FLAG_RAID6; - allowed_missing = 2; - } else { // SINGLE - type = BLOCK_FLAG_DUPLICATE; - allowed_missing = 0; + // check if any of the stripes succeeded + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Success) { + RtlCopyMemory(buf, context->stripes[i].buf, length); + return STATUS_SUCCESS; + } } - - cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - - context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG); - if (!context) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; + + // failing that, return the first error we encountered + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Error) + return context->stripes[i].iosb.Status; } - RtlZeroMemory(context, sizeof(read_data_context)); - KeInitializeEvent(&context->Event, NotificationEvent, FALSE); + // if we somehow get here, return STATUS_INTERNAL_ERROR - context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); - if (!context->stripes) { + return STATUS_INTERNAL_ERROR; +} + +static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, + CHUNK_ITEM* ci, UINT64* stripestart, UINT64* stripeend, UINT16 startoffstripe) { + UINT64 i; + UINT32 pos, *stripeoff; + UINT8 stripe; + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Error) { + WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); + return context->stripes[i].iosb.Status; + } + } + + pos = 0; + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); + if (!stripeoff) { ERR("out of memory\n"); - ExFreePool(context); return STATUS_INSUFFICIENT_RESOURCES; } - RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes); + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); - context->buflen = length; - context->num_stripes = ci->num_stripes; - context->stripes_left = context->num_stripes; - context->sector_size = Vcb->superblock.sector_size; - context->csum = csum; - context->tree = is_tree; - context->type = type; + stripe = startoffstripe; + while (pos < length) { + if (pos == 0) { + UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length)); + + RtlCopyMemory(buf, context->stripes[stripe].buf, readlen); + stripeoff[stripe] += readlen; + pos += readlen; + } else if (length - pos < ci->stripe_length) { + RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos); + pos = length; + } else { + RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length); + stripeoff[stripe] += ci->stripe_length; + pos += ci->stripe_length; + } + + stripe = (stripe + 1) % ci->num_stripes; + } - stripestart = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG); - if (!stripestart) { - ERR("out of memory\n"); - ExFreePool(context); - return STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(stripeoff); + + // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short + + if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries + tree_header* th = (tree_header*)buf; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (crc32 != *((UINT32*)th->csum)) { + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); + return STATUS_CRC_ERROR; + } else if (addr != th->address) { + WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); + return STATUS_CRC_ERROR; + } + } else if (context->csum) { + NTSTATUS Status; +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; + + time1 = KeQueryPerformanceCounter(NULL); +#endif + Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); + + if (Status == STATUS_CRC_ERROR) { + WARN("checksum error\n"); + return Status; + } else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; +#endif } - stripeend = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG); - if (!stripeend) { + return STATUS_SUCCESS; +} + +static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, + CHUNK_ITEM* ci, device** devices, UINT64* stripestart, UINT64* stripeend, UINT16 startoffstripe) { + UINT64 i; + NTSTATUS Status; + BOOL checksum_error = FALSE; + UINT32 pos, *stripeoff; + UINT8 stripe; + read_data_stripe** stripes; + + stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); + if (!stripes) { ERR("out of memory\n"); - ExFreePool(stripestart); - ExFreePool(context); return STATUS_INSUFFICIENT_RESOURCES; } - if (type == BLOCK_FLAG_RAID0) { - UINT64 startoff, endoff; - UINT16 endoffstripe; + RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); + + for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { + UINT16 j; - get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); - get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); + for (j = 0; j < ci->sub_stripes; j++) { + if (context->stripes[i+j].status == ReadDataStatus_Success) { + stripes[i / ci->sub_stripes] = &context->stripes[i+j]; + break; + } + } - for (i = 0; i < ci->num_stripes; i++) { - if (startoffstripe > i) { - stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - stripestart[i] = startoff; - } else { - stripestart[i] = startoff - (startoff % ci->stripe_length); + if (!stripes[i / ci->sub_stripes]) { + for (j = 0; j < ci->sub_stripes; j++) { + if (context->stripes[i+j].status == ReadDataStatus_Error) { + // both stripes must have errored if we get here + WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status); + ExFreePool(stripes); + return context->stripes[i].iosb.Status; + } } + } + } + + pos = 0; + stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); + if (!stripeoff) { + ERR("out of memory\n"); + ExFreePool(stripes); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); + + stripe = startoffstripe / ci->sub_stripes; + while (pos < length) { + if (pos == 0) { + UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - if (endoffstripe > i) { - stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stripeend[i] = endoff + 1; - } else { - stripeend[i] = endoff - (endoff % ci->stripe_length); - } + RtlCopyMemory(buf, stripes[stripe]->buf, readlen); + stripeoff[stripe] += readlen; + pos += readlen; + } else if (length - pos < ci->stripe_length) { + RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos); + + pos = length; + } else { + RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length); + stripeoff[stripe] += ci->stripe_length; + + pos += ci->stripe_length; } - } else if (type == BLOCK_FLAG_RAID10) { - UINT64 startoff, endoff; - UINT16 endoffstripe, j; - get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); - get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); + stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); + } + + if (context->tree) { + tree_header* th = (tree_header*)buf; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - if ((ci->num_stripes % ci->sub_stripes) != 0) { - ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); - Status = STATUS_INTERNAL_ERROR; - goto exit; + if (crc32 != *((UINT32*)th->csum)) { + WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); + checksum_error = TRUE; + stripes[startoffstripe]->status = ReadDataStatus_CRCError; + } else if (addr != th->address) { + WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); + checksum_error = TRUE; + stripes[startoffstripe]->status = ReadDataStatus_CRCError; } + } else if (context->csum) { + NTSTATUS Status; +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; - context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size; - context->startoffstripe = startoffstripe; - context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size; - - startoffstripe *= ci->sub_stripes; - endoffstripe *= ci->sub_stripes; + time1 = KeQueryPerformanceCounter(NULL); +#endif + Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); - for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { - if (startoffstripe > i) { - stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - stripestart[i] = startoff; - } else { - stripestart[i] = startoff - (startoff % ci->stripe_length); - } - - if (endoffstripe > i) { - stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stripeend[i] = endoff + 1; - } else { - stripeend[i] = endoff - (endoff % ci->stripe_length); - } - - for (j = 1; j < ci->sub_stripes; j++) { - stripestart[i+j] = stripestart[i]; - stripeend[i+j] = stripeend[i]; - } + if (Status == STATUS_CRC_ERROR) + checksum_error = TRUE; + else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; } +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); - context->stripes_cancel = 1; - } else if (type == BLOCK_FLAG_DUPLICATE) { - for (i = 0; i < ci->num_stripes; i++) { - stripestart[i] = addr - offset; - stripeend[i] = stripestart[i] + length; - } + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; +#endif + } + + if (checksum_error) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; +#endif - context->stripes_cancel = ci->num_stripes - 1; - } else if (type == BLOCK_FLAG_RAID5) { - UINT64 startoff, endoff; - UINT16 endoffstripe; - UINT64 start = 0xffffffffffffffff, end = 0; + // FIXME - update dev stats - get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); - get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); + WARN("checksum error\n"); - for (i = 0; i < ci->num_stripes - 1; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % ci->stripe_length); - } - - if (endoffstripe > i) { - stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % ci->stripe_length); - } + if (!context->tree) { + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); - if (ststart != stend) { - if (ststart < start) { - start = ststart; - firststripesize = ci->stripe_length - (ststart % ci->stripe_length); + // find out which stripe the error was on + pos = 0; + stripe = startoffstripe / ci->sub_stripes; + while (pos < length) { + if (pos == 0) { + UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); + + stripeoff[stripe] += readlen; + pos += readlen; + + for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[i]) + stripes[stripe]->status = ReadDataStatus_CRCError; + } + } else if (length - pos < ci->stripe_length) { + for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) + stripes[stripe]->status = ReadDataStatus_CRCError; + } + + pos = length; + } else { + stripeoff[stripe] += ci->stripe_length; + + for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) + stripes[stripe]->status = ReadDataStatus_CRCError; + } + + pos += ci->stripe_length; } - if (stend > end) - end = stend; + stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); } } + context->stripes_left = 0; + for (i = 0; i < ci->num_stripes; i++) { - stripestart[i] = start; - stripeend[i] = end; + if (context->stripes[i].status == ReadDataStatus_CRCError) { + UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1); + + if (context->stripes[other_stripe].status == ReadDataStatus_Cancelled) { + PIO_STACK_LOCATION IrpSp; + + // re-run Irp that we cancelled + + if (context->stripes[other_stripe].Irp) { + if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) { + MmUnlockPages(context->stripes[other_stripe].Irp->MdlAddress); + IoFreeMdl(context->stripes[other_stripe].Irp->MdlAddress); + } + IoFreeIrp(context->stripes[other_stripe].Irp); + } + + if (!Irp) { + context->stripes[other_stripe].Irp = IoAllocateIrp(devices[other_stripe]->devobj->StackSize, FALSE); + + if (!context->stripes[other_stripe].Irp) { + ERR("IoAllocateIrp failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + } else { + context->stripes[other_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[other_stripe]->devobj->StackSize); + + if (!context->stripes[other_stripe].Irp) { + ERR("IoMakeAssociatedIrp failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + } + + IrpSp = IoGetNextIrpStackLocation(context->stripes[other_stripe].Irp); + IrpSp->MajorFunction = IRP_MJ_READ; + + if (devices[other_stripe]->devobj->Flags & DO_BUFFERED_IO) { + FIXME("FIXME - buffered IO\n"); + } else if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) { + context->stripes[other_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[other_stripe].buf, stripeend[other_stripe] - stripestart[other_stripe], FALSE, FALSE, NULL); + if (!context->stripes[other_stripe].Irp->MdlAddress) { + ERR("IoAllocateMdl failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + MmProbeAndLockPages(context->stripes[other_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); + } else { + context->stripes[other_stripe].Irp->UserBuffer = context->stripes[other_stripe].buf; + } + + IrpSp->Parameters.Read.Length = stripeend[other_stripe] - stripestart[other_stripe]; + IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[other_stripe] + cis[other_stripe].offset; + + context->stripes[other_stripe].Irp->UserIosb = &context->stripes[other_stripe].iosb; + + IoSetCompletionRoutine(context->stripes[other_stripe].Irp, read_data_completion, &context->stripes[other_stripe], TRUE, TRUE, TRUE); + + context->stripes_left++; + context->stripes[other_stripe].status = ReadDataStatus_Pending; + } + } } - context->stripes_cancel = Vcb->options.raid5_recalculation; - } else if (type == BLOCK_FLAG_RAID6) { - UINT64 startoff, endoff; - UINT16 endoffstripe; - UINT64 start = 0xffffffffffffffff, end = 0; + if (context->stripes_left == 0) { + WARN("could not recover from checksum error\n"); + ExFreePool(stripes); + ExFreePool(stripeoff); + return STATUS_CRC_ERROR; + } - get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); - get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); + context->stripes_cancel = 0; + KeClearEvent(&context->Event); - for (i = 0; i < ci->num_stripes - 2; i++) { - UINT64 ststart, stend; - - if (startoffstripe > i) { - ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; - } else if (startoffstripe == i) { - ststart = startoff; - } else { - ststart = startoff - (startoff % ci->stripe_length); - } - - if (endoffstripe > i) { - stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; - } else if (endoffstripe == i) { - stend = endoff + 1; - } else { - stend = endoff - (endoff % ci->stripe_length); +#ifdef DEBUG_STATS + if (!context->tree) + time1 = KeQueryPerformanceCounter(NULL); +#endif + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Pending) { + IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); } + } + + KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); + +#ifdef DEBUG_STATS + if (!context->tree) { + time2 = KeQueryPerformanceCounter(NULL); - if (ststart != stend) { - if (ststart < start) { - start = ststart; - firststripesize = ci->stripe_length - (ststart % ci->stripe_length); + Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; + } +#endif + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_CRCError) { + UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1); + + if (context->stripes[other_stripe].status != ReadDataStatus_Success) { + WARN("could not recover from checksum error\n"); + ExFreePool(stripes); + ExFreePool(stripeoff); + return STATUS_CRC_ERROR; } + } + } + + RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); + + pos = 0; + stripe = startoffstripe / ci->sub_stripes; + while (pos < length) { + if (pos == 0) { + UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - if (stend > end) - end = stend; + stripeoff[stripe] += readlen; + pos += readlen; + + if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { + for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[i]) { + UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); + UINT32 crc32b = ~calc_crc32c(0xffffffff, context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32b == context->csum[i]) { + RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + RtlCopyMemory(stripes[stripe]->buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), + Vcb->superblock.sector_size); + stripes[stripe]->rewrite = TRUE; + } else { + WARN("could not recover from checksum error\n"); + ExFreePool(stripes); + ExFreePool(stripeoff); + return STATUS_CRC_ERROR; + } + } + } + } else if (context->tree) { + UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); + tree_header* th = (tree_header*)buf; + UINT32 crc32; + + RtlCopyMemory(buf, context->stripes[other_stripe].buf, readlen); + + crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (addr != th->address || crc32 != *((UINT32*)th->csum)) { + WARN("could not recover from checksum error\n"); + ExFreePool(stripes); + ExFreePool(stripeoff); + return STATUS_CRC_ERROR; + } + + RtlCopyMemory(stripes[stripe]->buf, buf, readlen); + stripes[stripe]->rewrite = TRUE; + } + } else if (length - pos < ci->stripe_length) { + if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { + for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) { + UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); + UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + if (crc32b == context->csum[i]) { + RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size), + &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); + RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], + &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + stripes[stripe]->rewrite = TRUE; + } else { + WARN("could not recover from checksum error\n"); + ExFreePool(stripes); + ExFreePool(stripeoff); + return STATUS_CRC_ERROR; + } + } + } + } + + pos = length; + } else { + if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { + for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) { + UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + + if (crc32 != context->csum[(pos / Vcb->superblock.sector_size) + i]) { + UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); + UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + + if (crc32b == context->csum[i]) { + RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size), + &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); + RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], + &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], + Vcb->superblock.sector_size); + stripes[stripe]->rewrite = TRUE; + } else { + WARN("could not recover from checksum error\n"); + ExFreePool(stripes); + ExFreePool(stripeoff); + return STATUS_CRC_ERROR; + } + } + } + } + + stripeoff[stripe] += ci->stripe_length; + pos += ci->stripe_length; } + + stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); } - for (i = 0; i < ci->num_stripes; i++) { - stripestart[i] = start; - stripeend[i] = end; - } + // write good data over bad - context->stripes_cancel = Vcb->options.raid6_recalculation; + if (!Vcb->readonly) { + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { + Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); + + if (!NT_SUCCESS(Status)) + WARN("write_data_phys returned %08x\n", Status); + } + } + } } - KeInitializeSpinLock(&context->spin_lock); + ExFreePool(stripes); + ExFreePool(stripeoff); - context->address = addr; + // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short + + return STATUS_SUCCESS; +} + +static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, CHUNK_ITEM* ci, + device** devices, UINT64* stripestart, UINT64* stripeend, UINT64 offset, UINT32 firststripesize, BOOL check_nocsum_parity) { + UINT32 pos, skip; + NTSTATUS Status; + int num_errors = 0; + UINT64 i, off, stripeoff, origoff; + BOOL needs_reconstruct = FALSE; + UINT64 reconstruct_stripe; + BOOL checksum_error = FALSE; for (i = 0; i < ci->num_stripes; i++) { - if (!devices[i] || stripestart[i] == stripeend[i]) { - context->stripes[i].status = ReadDataStatus_MissingDevice; - context->stripes[i].buf = NULL; - context->stripes_left--; - - if (!devices[i]) - missing_devices++; + if (context->stripes[i].status == ReadDataStatus_Error) { + num_errors++; + if (num_errors > 1) + break; } } - - if (missing_devices > allowed_missing) { - ERR("not enough devices to service request (%u missing)\n", missing_devices); - Status = STATUS_UNEXPECTED_IO_ERROR; - goto exit; + + if (num_errors > 1) { + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Error) { + WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); + return context->stripes[i].iosb.Status; + } + } } + off = addr - offset; + off -= off % ((ci->num_stripes - 1) * ci->stripe_length); + skip = addr - offset - off; + origoff = off; + for (i = 0; i < ci->num_stripes; i++) { - PIO_STACK_LOCATION IrpSp; + if (context->stripes[i].status == ReadDataStatus_Cancelled) { + if (needs_reconstruct) { + ERR("more than one stripe needs reconstruction\n"); + return STATUS_INTERNAL_ERROR; + } else { + needs_reconstruct = TRUE; + reconstruct_stripe = i; + } + } + } + + if (needs_reconstruct) { + TRACE("reconstructing stripe %u\n", reconstruct_stripe); - if (devices[i] && stripestart[i] != stripeend[i]) { - context->stripes[i].context = (struct read_data_context*)context; - context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG); + stripeoff = 0; + + raid5_reconstruct(off, skip, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], TRUE, firststripesize, reconstruct_stripe); + + while (stripeoff < stripeend[0] - stripestart[0]) { + off += (ci->num_stripes - 1) * ci->stripe_length; + raid5_reconstruct(off, 0, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, 0, reconstruct_stripe); + } + + off = addr - offset; + off -= off % ((ci->num_stripes - 1) * ci->stripe_length); + } + + pos = 0; + stripeoff = 0; + raid5_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize); + + while (pos < length) { + off += (ci->num_stripes - 1) * ci->stripe_length; + raid5_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0); + } + + if (context->tree) { + tree_header* th = (tree_header*)buf; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (addr != th->address || crc32 != *((UINT32*)th->csum)) + checksum_error = TRUE; + } else if (context->csum) { +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; + + time1 = KeQueryPerformanceCounter(NULL); +#endif + Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); + + if (Status == STATUS_CRC_ERROR) { + WARN("checksum error\n"); + checksum_error = TRUE; + } else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } + +#ifdef DEBUG_STATS + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; +#endif + } + + if (checksum_error) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; + + if (needs_reconstruct) { + PIO_STACK_LOCATION IrpSp; +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; +#endif - if (!context->stripes[i].buf) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } + // re-run Irp that we cancelled - if (type == BLOCK_FLAG_RAID10) { - context->stripes[i].stripenum = i / ci->sub_stripes; + if (context->stripes[reconstruct_stripe].Irp) { + if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { + MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress); + IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress); + } + IoFreeIrp(context->stripes[reconstruct_stripe].Irp); } - + if (!Irp) { - context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); + context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE); - if (!context->stripes[i].Irp) { + if (!context->stripes[reconstruct_stripe].Irp) { ERR("IoAllocateIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + return STATUS_INSUFFICIENT_RESOURCES; } } else { - context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); + context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize); - if (!context->stripes[i].Irp) { + if (!context->stripes[reconstruct_stripe].Irp) { ERR("IoMakeAssociatedIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + return STATUS_INSUFFICIENT_RESOURCES; } } - IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); + IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp); IrpSp->MajorFunction = IRP_MJ_READ; - if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { + if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) { FIXME("FIXME - buffered IO\n"); - } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL); - if (!context->stripes[i].Irp->MdlAddress) { + } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { + context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf, + stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL); + if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) { ERR("IoAllocateMdl failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + return STATUS_INSUFFICIENT_RESOURCES; } - MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); + MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); } else { - context->stripes[i].Irp->UserBuffer = context->stripes[i].buf; + context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf; } - IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset; + IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe]; + IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset; - context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; + context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb; - IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE); + IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE); - context->stripes[i].status = ReadDataStatus_Pending; - } - } - + context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending; + + context->stripes_left = 1; + KeClearEvent(&context->Event); + #ifdef DEBUG_STATS - if (!is_tree) - time1 = KeQueryPerformanceCounter(NULL); + if (!context->tree) + time1 = KeQueryPerformanceCounter(NULL); #endif - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status != ReadDataStatus_MissingDevice) { - IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); - } - } - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - + IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp); + + KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); + #ifdef DEBUG_STATS - if (!is_tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif - - // check if any of the devices return a "user-induced" error - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) { - if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) { - PDEVICE_OBJECT dev; - - dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); - IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL); - - if (!dev) { - dev = IoGetDeviceToVerify(PsGetCurrentThread()); - IoSetDeviceToVerify(PsGetCurrentThread(), NULL); - } - - dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; + if (!context->tree) { + time2 = KeQueryPerformanceCounter(NULL); - if (dev) - IoVerifyVolume(dev, FALSE); + Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; } -// IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj); - - Status = context->stripes[i].iosb.Status; - goto exit; - } - } - - if (type == BLOCK_FLAG_RAID0) { - UINT32 pos, *stripeoff; - UINT8 stripe; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); - Status = context->stripes[i].iosb.Status; - goto exit; +#endif + + if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) { + ERR("unrecoverable checksum error\n"); + return STATUS_CRC_ERROR; } } - pos = 0; - stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); - if (!stripeoff) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + if (context->tree) { + off = origoff; + pos = 0; + stripeoff = 0; + if (!raid5_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) { + ERR("unrecoverable metadata checksum error\n"); + return STATUS_CRC_ERROR; + } + } else { + off = origoff; + pos = 0; + stripeoff = 0; + if (!raid5_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, context->csum, Vcb->superblock.sector_size)) + return STATUS_CRC_ERROR; + + while (pos < length) { + off += (ci->num_stripes - 1) * ci->stripe_length; + if (!raid5_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, context->csum, Vcb->superblock.sector_size)) + return STATUS_CRC_ERROR; + } } - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); + // write good data over bad - stripe = startoffstripe; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length)); - - RtlCopyMemory(buf, context->stripes[stripe].buf, readlen); - stripeoff[stripe] += readlen; - pos += readlen; - } else if (length - pos < ci->stripe_length) { - RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos); - pos = length; - } else { - RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length); - stripeoff[stripe] += ci->stripe_length; - pos += ci->stripe_length; + if (!Vcb->readonly) { + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { + Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); + + if (!NT_SUCCESS(Status)) + WARN("write_data_phys returned %08x\n", Status); + } } - - stripe = (stripe + 1) % ci->num_stripes; } + } + + if (check_nocsum_parity && !context->tree && !context->csum) { + UINT32* parity_buf; - ExFreePool(stripeoff); + // We are reading a nodatacsum extent. Even though there's no checksum, we + // can still identify errors by checking if the parity is consistent. - // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short + parity_buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[0] - stripestart[0], ALLOC_TAG); - if (is_tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries - tree_header* th = (tree_header*)buf; - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); - Status = STATUS_CRC_ERROR; - goto exit; - } - } else if (csum) { -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (i = 0; i < length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[i]) { - WARN("checksum error (%08x != %08x)\n", crc32, csum[i]); - Status = STATUS_CRC_ERROR; - goto exit; - } + if (!parity_buf) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlCopyMemory(parity_buf, context->stripes[0].buf, stripeend[0] - stripestart[0]); + + for (i = 0; i < ci->num_stripes; i++) { + do_xor((UINT8*)parity_buf, context->stripes[i].buf, stripeend[0] - stripestart[0]); + } + + for (i = 0; i < (stripeend[0] - stripestart[0]) / sizeof(UINT32); i++) { + if (parity_buf[i] != 0) { + ERR("parity error on nodatacsum inode\n"); + ExFreePool(parity_buf); + return STATUS_CRC_ERROR; } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif } - Status = STATUS_SUCCESS; - } else if (type == BLOCK_FLAG_RAID10) { - BOOL checksum_error = FALSE; - UINT32 pos, *stripeoff; - UINT8 stripe; - read_data_stripe** stripes; + ExFreePool(parity_buf); + } + + return STATUS_SUCCESS; +} - stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); - if (!stripes) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; +static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, PIRP Irp, read_data_context* context, CHUNK_ITEM* ci, + device** devices, UINT64* stripestart, UINT64* stripeend, UINT64 offset, UINT32 firststripesize, BOOL check_nocsum_parity) { + NTSTATUS Status; + UINT32 pos, skip; + int num_errors = 0; + UINT64 i, off, stripeoff, origoff; + UINT8 needs_reconstruct = 0; + UINT16 missing1, missing2; + BOOL checksum_error = FALSE; + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Error) { + num_errors++; + if (num_errors > 2) + break; + } + } + + if (num_errors > 2) { + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Error) { + WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); + return context->stripes[i].iosb.Status; + } + } + } + + off = addr - offset; + off -= off % ((ci->num_stripes - 2) * ci->stripe_length); + skip = addr - offset - off; + origoff = off; + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Cancelled) { + if (needs_reconstruct == 2) { + ERR("more than two stripes need reconstruction\n"); + return STATUS_INTERNAL_ERROR; + } else if (needs_reconstruct == 1) { + needs_reconstruct++; + missing2 = i; + } else { + needs_reconstruct++; + missing1 = i; + } } + } + + if (needs_reconstruct > 0) { + stripeoff = 0; - RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); + if (needs_reconstruct == 2) { + TRACE("reconstructing stripes %u and %u\n", missing1, missing2); - for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { - UINT16 j; + raid6_reconstruct2(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], + TRUE, firststripesize, missing1, missing2); - for (j = 0; j < ci->sub_stripes; j++) { - if (context->stripes[i+j].status == ReadDataStatus_Success) { - stripes[i / ci->sub_stripes] = &context->stripes[i+j]; - break; - } + while (stripeoff < stripeend[0] - stripestart[0]) { + off += (ci->num_stripes - 2) * ci->stripe_length; + raid6_reconstruct2(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], + FALSE, 0, missing1, missing2); } + } else { + TRACE("reconstructing stripe %u\n", missing1); - if (!stripes[i / ci->sub_stripes]) { - for (j = 0; j < ci->sub_stripes; j++) { - if (context->stripes[i+j].status == ReadDataStatus_Error) { - // both stripes must have errored if we get here - WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status); - Status = context->stripes[i].iosb.Status; - ExFreePool(stripes); - goto exit; - } - } + raid6_reconstruct1(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], TRUE, firststripesize, missing1); + + while (stripeoff < stripeend[0] - stripestart[0]) { + off += (ci->num_stripes - 2) * ci->stripe_length; + raid6_reconstruct1(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], FALSE, 0, missing1); } } - pos = 0; - stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); - if (!stripeoff) { + off = origoff; + } + + if (check_nocsum_parity && !context->tree && !context->csum) { + UINT8* scratch; + + scratch = ExAllocatePoolWithTag(NonPagedPool, ci->stripe_length, ALLOC_TAG); + if (!scratch) { ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ExFreePool(stripes); - goto exit; + return STATUS_INSUFFICIENT_RESOURCES; } - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); + stripeoff = 0; + Status = check_raid6_nocsum_parity(off, skip, context, ci, &stripeoff, stripeend[0] - stripestart[0], TRUE, firststripesize, scratch); + if (!NT_SUCCESS(Status)) { + ERR("check_raid6_nocsum_parity returned %08x\n", Status); + ExFreePool(scratch); + return Status; + } + + while (stripeoff < stripeend[0] - stripestart[0]) { + off += (ci->num_stripes - 2) * ci->stripe_length; + Status = check_raid6_nocsum_parity(off, 0, context, ci, &stripeoff, stripeend[0] - stripestart[0], FALSE, 0, scratch); + + if (!NT_SUCCESS(Status)) { + ERR("check_raid6_nocsum_parity returned %08x\n", Status); + ExFreePool(scratch); + return Status; + } + } - stripe = startoffstripe / ci->sub_stripes; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - - RtlCopyMemory(buf, stripes[stripe]->buf, readlen); - stripeoff[stripe] += readlen; - pos += readlen; - - if (context->csum) { + ExFreePool(scratch); + + off = origoff; + } + + pos = 0; + stripeoff = 0; + raid6_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize); + + while (pos < length) { + off += (ci->num_stripes - 2) * ci->stripe_length; + raid6_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0); + } + + if (context->tree) { + tree_header* th = (tree_header*)buf; + UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + + if (addr != th->address || crc32 != *((UINT32*)th->csum)) + checksum_error = TRUE; + } else if (context->csum) { #ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); + LARGE_INTEGER time1, time2; + + time1 = KeQueryPerformanceCounter(NULL); #endif - for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[i]) { - checksum_error = TRUE; - stripes[stripe]->status = ReadDataStatus_CRCError; - } - } + Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); + + if (Status == STATUS_CRC_ERROR) { + WARN("checksum error\n"); + checksum_error = TRUE; + } else if (!NT_SUCCESS(Status)) { + ERR("check_csum returned %08x\n", Status); + return Status; + } #ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; + time2 = KeQueryPerformanceCounter(NULL); + + Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; #endif + } + + if (checksum_error) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; + + for (i = 0; i < needs_reconstruct; i++) { + PIO_STACK_LOCATION IrpSp; + UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; + + // re-run Irps that we cancelled + + if (context->stripes[reconstruct_stripe].Irp) { + if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { + MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress); + IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress); } - } else if (length - pos < ci->stripe_length) { - RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos); + IoFreeIrp(context->stripes[reconstruct_stripe].Irp); + } + + if (!Irp) { + context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE); - if (context->csum) { -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) { - checksum_error = TRUE; - stripes[stripe]->status = ReadDataStatus_CRCError; - } - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif + if (!context->stripes[reconstruct_stripe].Irp) { + ERR("IoAllocateIrp failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - - pos = length; } else { - RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length); - stripeoff[stripe] += ci->stripe_length; + context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize); - if (context->csum) { -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) { - checksum_error = TRUE; - stripes[stripe]->status = ReadDataStatus_CRCError; - } - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif + if (!context->stripes[reconstruct_stripe].Irp) { + ERR("IoMakeAssociatedIrp failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - - pos += ci->stripe_length; } - stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); - } - - if (is_tree) { - tree_header* th = (tree_header*)buf; - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); + IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp); + IrpSp->MajorFunction = IRP_MJ_READ; - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); - checksum_error = TRUE; - stripes[startoffstripe]->status = ReadDataStatus_CRCError; + if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) { + FIXME("FIXME - buffered IO\n"); + } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { + context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf, + stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL); + if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) { + ERR("IoAllocateMdl failed\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); + } else { + context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf; } - } - - if (checksum_error) { - // FIXME - update dev stats - - WARN("checksum error\n"); + + IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe]; + IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset; - context->stripes_left = 0; + context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb; - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError) { - UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1); - - if (context->stripes[other_stripe].status == ReadDataStatus_Cancelled) { - PIO_STACK_LOCATION IrpSp; - - // re-run Irp that we cancelled - - if (context->stripes[other_stripe].Irp) { - if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[other_stripe].Irp->MdlAddress); - IoFreeMdl(context->stripes[other_stripe].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[other_stripe].Irp); - } - - if (!Irp) { - context->stripes[other_stripe].Irp = IoAllocateIrp(devices[other_stripe]->devobj->StackSize, FALSE); - - if (!context->stripes[other_stripe].Irp) { - ERR("IoAllocateIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } else { - context->stripes[other_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[other_stripe]->devobj->StackSize); - - if (!context->stripes[other_stripe].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[other_stripe].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[other_stripe]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[other_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[other_stripe].buf, stripeend[other_stripe] - stripestart[other_stripe], FALSE, FALSE, NULL); - if (!context->stripes[other_stripe].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - - MmProbeAndLockPages(context->stripes[other_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[other_stripe].Irp->UserBuffer = context->stripes[other_stripe].buf; - } + IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE); - IrpSp->Parameters.Read.Length = stripeend[other_stripe] - stripestart[other_stripe]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[other_stripe] + cis[other_stripe].offset; - - context->stripes[other_stripe].Irp->UserIosb = &context->stripes[other_stripe].iosb; - - IoSetCompletionRoutine(context->stripes[other_stripe].Irp, read_data_completion, &context->stripes[other_stripe], TRUE, TRUE, TRUE); - - context->stripes_left++; - context->stripes[other_stripe].status = ReadDataStatus_Pending; - } - } - } - - if (context->stripes_left == 0) { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - Status = STATUS_CRC_ERROR; - goto exit; - } + context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending; + } - context->stripes_cancel = 0; + if (needs_reconstruct > 0) { +#ifdef DEBUG_STATS + LARGE_INTEGER time1, time2; +#endif + context->stripes_left = needs_reconstruct; KeClearEvent(&context->Event); #ifdef DEBUG_STATS - if (!is_tree) + if (!context->tree) time1 = KeQueryPerformanceCounter(NULL); #endif - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); - } + + for (i = 0; i < needs_reconstruct; i++) { + UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; + + IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp); } KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); #ifdef DEBUG_STATS - if (!is_tree) { + if (!context->tree) { time2 = KeQueryPerformanceCounter(NULL); Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; } #endif - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError) { - UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1); - - if (context->stripes[other_stripe].status != ReadDataStatus_Success) { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - Status = STATUS_CRC_ERROR; - goto exit; - } + for (i = 0; i < needs_reconstruct; i++) { + UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; + + if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) { + ERR("unrecoverable checksum error\n"); + return STATUS_CRC_ERROR; } } - - RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); + } + + off = origoff; + if (context->tree) { pos = 0; - stripe = startoffstripe / ci->sub_stripes; - while (pos < length) { - if (pos == 0) { - UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length)); - - stripeoff[stripe] += readlen; - pos += readlen; - - if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { - for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[i]) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - UINT32 crc32b = ~calc_crc32c(0xffffffff, context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32b == csum[i]) { - RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - RtlCopyMemory(stripes[stripe]->buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), - Vcb->superblock.sector_size); - stripes[stripe]->rewrite = TRUE; - } else { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - Status = STATUS_CRC_ERROR; - goto exit; - } - } - } - } else if (is_tree) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - tree_header* th = (tree_header*)buf; - UINT32 crc32; - - RtlCopyMemory(buf, context->stripes[other_stripe].buf, readlen); - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - Status = STATUS_CRC_ERROR; - goto exit; - } - - RtlCopyMemory(stripes[stripe]->buf, buf, readlen); - stripes[stripe]->rewrite = TRUE; - } - } else if (length - pos < ci->stripe_length) { - if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { - for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - - if (crc32b == csum[i]) { - RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size), - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); - RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - stripes[stripe]->rewrite = TRUE; - } else { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - Status = STATUS_CRC_ERROR; - goto exit; - } - } - } - } - - pos = length; - } else { - if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) { - for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) { - UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0); - UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - - if (crc32b == csum[i]) { - RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size), - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); - RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], - Vcb->superblock.sector_size); - stripes[stripe]->rewrite = TRUE; - } else { - WARN("could not recover from checksum error\n"); - ExFreePool(stripes); - ExFreePool(stripeoff); - Status = STATUS_CRC_ERROR; - goto exit; - } - } - } - } - - stripeoff[stripe] += ci->stripe_length; - pos += ci->stripe_length; - } - - stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); + stripeoff = 0; + if (!raid6_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) { + ERR("unrecoverable metadata checksum error\n"); + return STATUS_CRC_ERROR; } + } else { + pos = 0; + stripeoff = 0; + if (!raid6_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, context->csum, Vcb->superblock.sector_size)) + return STATUS_CRC_ERROR; - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); - } - } + while (pos < length) { + off += (ci->num_stripes - 1) * ci->stripe_length; + if (!raid6_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, context->csum, Vcb->superblock.sector_size)) + return STATUS_CRC_ERROR; } } - - ExFreePool(stripes); - ExFreePool(stripeoff); - - // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short - - Status = STATUS_SUCCESS; - } else if (type == BLOCK_FLAG_DUPLICATE) { - BOOL checksum_error = FALSE; - UINT16 cancelled = 0; + } + + // write good data over bad + + if (!Vcb->readonly) { + CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - if (context->tree) { - tree_header* th = (tree_header*)context->stripes[i].buf; - UINT32 crc32; - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); - - if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { - context->stripes[i].status = ReadDataStatus_CRCError; - checksum_error = TRUE; - } - } else if (context->csum) { - UINT32 j; - -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - - for (j = 0; j < context->stripes[i].Irp->IoStatus.Information / context->sector_size; j++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[i].buf + (j * context->sector_size), context->sector_size); - - if (crc32 != context->csum[j]) { - context->stripes[i].status = ReadDataStatus_CRCError; - checksum_error = TRUE; - break; - } - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif - } - } else if (context->stripes[i].status == ReadDataStatus_Cancelled) { - cancelled++; + if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { + Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); + + if (!NT_SUCCESS(Status)) + WARN("write_data_phys returned %08x\n", Status); } } - - if (checksum_error) { - // FIXME - update dev stats - - if (cancelled > 0) { - context->stripes_left = 0; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Cancelled) { - PIO_STACK_LOCATION IrpSp; - - // re-run Irp that we cancelled - - if (context->stripes[i].Irp) { - if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[i].Irp->MdlAddress); - IoFreeMdl(context->stripes[i].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[i].Irp); - } - - if (!Irp) { - context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); - - if (!context->stripes[i].Irp) { - ERR("IoAllocateIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } else { - context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); - - if (!context->stripes[i].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL); - if (!context->stripes[i].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - - MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[i].Irp->UserBuffer = context->stripes[i].buf; - } - - IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset; - - context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; - - IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE); - - context->stripes_left++; - context->stripes[i].status = ReadDataStatus_Pending; - } - } - - context->stripes_cancel = 0; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!is_tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif + } + + return STATUS_SUCCESS; +} - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Pending) { - IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); - } - } - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - -#ifdef DEBUG_STATS - if (!is_tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - if (context->tree) { - tree_header* th = (tree_header*)context->stripes[i].buf; - UINT32 crc32; - - crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); - - if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { - context->stripes[i].status = ReadDataStatus_CRCError; - checksum_error = TRUE; - } - } else if (context->csum) { - UINT32 j; - -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (j = 0; j < context->stripes[i].Irp->IoStatus.Information / context->sector_size; j++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[i].buf + (j * context->sector_size), context->sector_size); - - if (crc32 != context->csum[j]) { - context->stripes[i].status = ReadDataStatus_CRCError; - checksum_error = TRUE; - break; - } - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif - } - } - } - } - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - RtlCopyMemory(buf, context->stripes[i].buf, length); - goto raid1write; - } - } - - if (context->tree || ci->num_stripes == 1) { // unable to recover from checksum error - ERR("unrecoverable checksum error at %llx\n", addr); - -#ifdef _DEBUG - if (context->tree) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError) { - tree_header* th = (tree_header*)context->stripes[i].buf; - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); - - WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); - } - } - } -#endif - Status = STATUS_CRC_ERROR; - goto exit; - } - - // checksum errors on both stripes - we need to check sector by sector - - for (i = 0; i < (stripeend[0] - stripestart[0]) / context->sector_size; i++) { - UINT16 j; - BOOL success = FALSE; - -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - - for (j = 0; j < ci->num_stripes; j++) { - if (context->stripes[j].status == ReadDataStatus_CRCError) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[j].buf + (i * context->sector_size), context->sector_size); - - if (crc32 == context->csum[i]) { - RtlCopyMemory(buf + (i * context->sector_size), context->stripes[j].buf + (i * context->sector_size), context->sector_size); - success = TRUE; - break; - } - } - } - +NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, + PIRP Irp, BOOL check_nocsum_parity) { + CHUNK_ITEM* ci; + CHUNK_ITEM_STRIPE* cis; + read_data_context* context; + UINT64 i, type, offset; + NTSTATUS Status; + device** devices; + UINT64 *stripestart = NULL, *stripeend = NULL; + UINT32 firststripesize; + UINT16 startoffstripe, allowed_missing, missing_devices = 0; #ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; + LARGE_INTEGER time1, time2; #endif - if (!success) { - ERR("unrecoverable checksum error at %llx\n", addr + (i * context->sector_size)); - Status = STATUS_CRC_ERROR; - goto exit; - } - } - -raid1write: - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_CRCError && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], buf, length); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); - } - } - } - - Status = STATUS_SUCCESS; - goto exit; - } - - // check if any of the stripes succeeded - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Success) { - RtlCopyMemory(buf, context->stripes[i].buf, length); - Status = STATUS_SUCCESS; - goto exit; - } - } - - // failing that, return the first error we encountered - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - Status = context->stripes[i].iosb.Status; - goto exit; - } - } - - // if we somehow get here, return STATUS_INTERNAL_ERROR - - Status = STATUS_INTERNAL_ERROR; - } else if (type == BLOCK_FLAG_RAID5) { - UINT32 pos, skip; - int num_errors = 0; - UINT64 off, stripeoff, origoff; - BOOL needs_reconstruct = FALSE; - UINT64 reconstruct_stripe; - BOOL checksum_error = FALSE; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - num_errors++; - if (num_errors > 1) - break; - } - } - - if (num_errors > 1) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); - Status = context->stripes[i].iosb.Status; - goto exit; - } - } - } - - off = addr - offset; - off -= off % ((ci->num_stripes - 1) * ci->stripe_length); - skip = addr - offset - off; - origoff = off; - - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Cancelled) { - if (needs_reconstruct) { - ERR("more than one stripe needs reconstruction\n"); - Status = STATUS_INTERNAL_ERROR; - goto exit; - } else { - needs_reconstruct = TRUE; - reconstruct_stripe = i; - } - } - } - - if (needs_reconstruct) { - TRACE("reconstructing stripe %u\n", reconstruct_stripe); - - stripeoff = 0; - - raid5_reconstruct(off, skip, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], TRUE, firststripesize, reconstruct_stripe); + + if (Vcb->log_to_phys_loaded) { + if (!c) { + c = get_chunk_from_address(Vcb, addr); - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 1) * ci->stripe_length; - raid5_reconstruct(off, 0, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, 0, reconstruct_stripe); + if (!c) { + ERR("get_chunk_from_address failed\n"); + return STATUS_INTERNAL_ERROR; } - - off = addr - offset; - off -= off % ((ci->num_stripes - 1) * ci->stripe_length); } - pos = 0; - stripeoff = 0; - raid5_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize); + ci = c->chunk_item; + offset = c->offset; + devices = c->devices; + + if (pc) + *pc = c; + } else { + LIST_ENTRY* le = Vcb->sys_chunks.Flink; - while (pos < length) { - off += (ci->num_stripes - 1) * ci->stripe_length; - raid5_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0); - } + ci = NULL; - if (is_tree) { - tree_header* th = (tree_header*)buf; - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) - checksum_error = TRUE; - } else if (csum) { -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (i = 0; i < length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[i]) { - checksum_error = TRUE; - break; - } - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); + while (le != &Vcb->sys_chunks) { + sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif - } - - if (checksum_error) { - if (needs_reconstruct) { - PIO_STACK_LOCATION IrpSp; - - // re-run Irp that we cancelled - - if (context->stripes[reconstruct_stripe].Irp) { - if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress); - IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[reconstruct_stripe].Irp); - } + if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { + CHUNK_ITEM* chunk_item = sc->data; - if (!Irp) { - context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE); + if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { + ci = chunk_item; + offset = sc->key.offset; + cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoAllocateIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); + if (!devices) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; } - } else { - context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize); - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf, - stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL); - if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + for (i = 0; i < ci->num_stripes; i++) { + devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); } - MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf; + break; } + } + + le = le->Flink; + } + + if (!ci) { + ERR("could not find chunk for %llx in bootstrap\n", addr); + return STATUS_INTERNAL_ERROR; + } + + if (pc) + *pc = NULL; + } + + if (ci->type & BLOCK_FLAG_DUPLICATE) { + type = BLOCK_FLAG_DUPLICATE; + allowed_missing = 0; + } else if (ci->type & BLOCK_FLAG_RAID0) { + type = BLOCK_FLAG_RAID0; + allowed_missing = 0; + } else if (ci->type & BLOCK_FLAG_RAID1) { + type = BLOCK_FLAG_DUPLICATE; + allowed_missing = 1; + } else if (ci->type & BLOCK_FLAG_RAID10) { + type = BLOCK_FLAG_RAID10; + allowed_missing = 1; + } else if (ci->type & BLOCK_FLAG_RAID5) { + type = BLOCK_FLAG_RAID5; + allowed_missing = 1; + } else if (ci->type & BLOCK_FLAG_RAID6) { + type = BLOCK_FLAG_RAID6; + allowed_missing = 2; + } else { // SINGLE + type = BLOCK_FLAG_DUPLICATE; + allowed_missing = 0; + } - IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset; - - context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb; - - IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE); + cis = (CHUNK_ITEM_STRIPE*)&ci[1]; - context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending; - - context->stripes_left = 1; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!is_tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif + context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG); + if (!context) { + ERR("out of memory\n"); + return STATUS_INSUFFICIENT_RESOURCES; + } - IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp); - - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - -#ifdef DEBUG_STATS - if (!is_tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif + RtlZeroMemory(context, sizeof(read_data_context)); + KeInitializeEvent(&context->Event, NotificationEvent, FALSE); - if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) { - ERR("unrecoverable checksum error\n"); - Status = STATUS_CRC_ERROR; - goto exit; - } - } - - if (context->tree) { - off = origoff; - pos = 0; - stripeoff = 0; - if (!raid5_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) { - ERR("unrecoverable metadata checksum error\n"); - Status = STATUS_CRC_ERROR; - goto exit; - } + context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); + if (!context->stripes) { + ERR("out of memory\n"); + ExFreePool(context); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes); + + context->buflen = length; + context->num_stripes = ci->num_stripes; + context->stripes_left = context->num_stripes; + context->sector_size = Vcb->superblock.sector_size; + context->csum = csum; + context->tree = is_tree; + context->type = type; + context->check_nocsum_parity = check_nocsum_parity; + + stripestart = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG); + if (!stripestart) { + ERR("out of memory\n"); + ExFreePool(context); + return STATUS_INSUFFICIENT_RESOURCES; + } + + stripeend = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG); + if (!stripeend) { + ERR("out of memory\n"); + ExFreePool(stripestart); + ExFreePool(context); + return STATUS_INSUFFICIENT_RESOURCES; + } + + if (type == BLOCK_FLAG_RAID0) { + UINT64 startoff, endoff; + UINT16 endoffstripe; + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); + get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); + + for (i = 0; i < ci->num_stripes; i++) { + if (startoffstripe > i) { + stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + } else if (startoffstripe == i) { + stripestart[i] = startoff; } else { - off = origoff; - pos = 0; - stripeoff = 0; - if (!raid5_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, csum, Vcb->superblock.sector_size)) { - Status = STATUS_CRC_ERROR; - goto exit; - } - - while (pos < length) { - off += (ci->num_stripes - 1) * ci->stripe_length; - if (!raid5_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, csum, Vcb->superblock.sector_size)) { - Status = STATUS_CRC_ERROR; - goto exit; - } - } + stripestart[i] = startoff - (startoff % ci->stripe_length); } - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); - } - } + if (endoffstripe > i) { + stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + } else if (endoffstripe == i) { + stripeend[i] = endoff + 1; + } else { + stripeend[i] = endoff - (endoff % ci->stripe_length); } } + } else if (type == BLOCK_FLAG_RAID10) { + UINT64 startoff, endoff; + UINT16 endoffstripe, j; - if (!context->tree && !context->csum) { - UINT32* parity_buf; - - // We are reading a nodatacsum extent. Even though there's no checksum, we - // can still identify errors by checking if the parity is consistent. - - parity_buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[0] - stripestart[0], ALLOC_TAG); - - if (!parity_buf) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); + get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); + + if ((ci->num_stripes % ci->sub_stripes) != 0) { + ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); + Status = STATUS_INTERNAL_ERROR; + goto exit; + } + + context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size; + context->startoffstripe = startoffstripe; + context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size; + + startoffstripe *= ci->sub_stripes; + endoffstripe *= ci->sub_stripes; + + for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { + if (startoffstripe > i) { + stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + } else if (startoffstripe == i) { + stripestart[i] = startoff; + } else { + stripestart[i] = startoff - (startoff % ci->stripe_length); } - RtlCopyMemory(parity_buf, context->stripes[0].buf, stripeend[0] - stripestart[0]); - - for (i = 0; i < ci->num_stripes; i++) { - do_xor((UINT8*)parity_buf, context->stripes[i].buf, stripeend[0] - stripestart[0]); + if (endoffstripe > i) { + stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + } else if (endoffstripe == i) { + stripeend[i] = endoff + 1; + } else { + stripeend[i] = endoff - (endoff % ci->stripe_length); } - for (i = 0; i < (stripeend[0] - stripestart[0]) / sizeof(UINT32); i++) { - if (parity_buf[i] != 0) { - ERR("parity error on nodatacsum inode\n"); - ExFreePool(parity_buf); - Status = STATUS_CRC_ERROR; - goto exit; - } + for (j = 1; j < ci->sub_stripes; j++) { + stripestart[i+j] = stripestart[i]; + stripeend[i+j] = stripeend[i]; } - - ExFreePool(parity_buf); } - Status = STATUS_SUCCESS; - } else if (type == BLOCK_FLAG_RAID6) { - UINT32 pos, skip; - int num_errors = 0; - UINT64 off, stripeoff, origoff; - UINT8 needs_reconstruct = 0; - UINT16 missing1, missing2; - BOOL checksum_error = FALSE; - + context->stripes_cancel = 1; + } else if (type == BLOCK_FLAG_DUPLICATE) { for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - num_errors++; - if (num_errors > 2) - break; - } - } - - if (num_errors > 2) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Error) { - WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); - Status = context->stripes[i].iosb.Status; - goto exit; - } - } + stripestart[i] = addr - offset; + stripeend[i] = stripestart[i] + length; } - off = addr - offset; - off -= off % ((ci->num_stripes - 2) * ci->stripe_length); - skip = addr - offset - off; - origoff = off; + context->stripes_cancel = ci->num_stripes - 1; + } else if (type == BLOCK_FLAG_RAID5) { + UINT64 startoff, endoff; + UINT16 endoffstripe; + UINT64 start = 0xffffffffffffffff, end = 0; - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].status == ReadDataStatus_Cancelled) { - if (needs_reconstruct == 2) { - ERR("more than two stripes need reconstruction\n"); - Status = STATUS_INTERNAL_ERROR; - goto exit; - } else if (needs_reconstruct == 1) { - needs_reconstruct++; - missing2 = i; - } else { - needs_reconstruct++; - missing1 = i; - } - } - } + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); + get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); - if (needs_reconstruct > 0) { - stripeoff = 0; + for (i = 0; i < ci->num_stripes - 1; i++) { + UINT64 ststart, stend; - if (needs_reconstruct == 2) { - TRACE("reconstructing stripes %u and %u\n", missing1, missing2); + if (startoffstripe > i) { + ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + } else if (startoffstripe == i) { + ststart = startoff; + } else { + ststart = startoff - (startoff % ci->stripe_length); + } + + if (endoffstripe > i) { + stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + } else if (endoffstripe == i) { + stend = endoff + 1; + } else { + stend = endoff - (endoff % ci->stripe_length); + } - raid6_reconstruct2(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], - TRUE, firststripesize, missing1, missing2); - - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 2) * ci->stripe_length; - raid6_reconstruct2(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], - FALSE, 0, missing1, missing2); + if (ststart != stend) { + if (ststart < start) { + start = ststart; + firststripesize = ci->stripe_length - (ststart % ci->stripe_length); } - } else { - TRACE("reconstructing stripe %u\n", missing1); - - raid6_reconstruct1(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], TRUE, firststripesize, missing1); - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 2) * ci->stripe_length; - raid6_reconstruct1(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], FALSE, 0, missing1); - } + if (stend > end) + end = stend; } - - off = origoff; } - if (!context->tree && !context->csum) { - UINT8* scratch; + for (i = 0; i < ci->num_stripes; i++) { + stripestart[i] = start; + stripeend[i] = end; + } + + context->stripes_cancel = Vcb->options.raid5_recalculation; + } else if (type == BLOCK_FLAG_RAID6) { + UINT64 startoff, endoff; + UINT16 endoffstripe; + UINT64 start = 0xffffffffffffffff, end = 0; + + get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); + get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); + + for (i = 0; i < ci->num_stripes - 2; i++) { + UINT64 ststart, stend; - scratch = ExAllocatePoolWithTag(NonPagedPool, ci->stripe_length, ALLOC_TAG); - if (!scratch) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; + if (startoffstripe > i) { + ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; + } else if (startoffstripe == i) { + ststart = startoff; + } else { + ststart = startoff - (startoff % ci->stripe_length); } - - stripeoff = 0; - Status = check_raid6_nocsum_parity(off, skip, context, ci, &stripeoff, stripeend[0] - stripestart[0], TRUE, firststripesize, scratch); - if (!NT_SUCCESS(Status)) { - ERR("check_raid6_nocsum_parity returned %08x\n", Status); - ExFreePool(scratch); - goto exit; + + if (endoffstripe > i) { + stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; + } else if (endoffstripe == i) { + stend = endoff + 1; + } else { + stend = endoff - (endoff % ci->stripe_length); } - - while (stripeoff < stripeend[0] - stripestart[0]) { - off += (ci->num_stripes - 2) * ci->stripe_length; - Status = check_raid6_nocsum_parity(off, 0, context, ci, &stripeoff, stripeend[0] - stripestart[0], FALSE, 0, scratch); - - if (!NT_SUCCESS(Status)) { - ERR("check_raid6_nocsum_parity returned %08x\n", Status); - ExFreePool(scratch); - goto exit; + + if (ststart != stend) { + if (ststart < start) { + start = ststart; + firststripesize = ci->stripe_length - (ststart % ci->stripe_length); } + + if (stend > end) + end = stend; } - - ExFreePool(scratch); - - off = origoff; } - pos = 0; - stripeoff = 0; - raid6_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize); - - while (pos < length) { - off += (ci->num_stripes - 2) * ci->stripe_length; - raid6_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0); + for (i = 0; i < ci->num_stripes; i++) { + stripestart[i] = start; + stripeend[i] = end; } - if (is_tree) { - tree_header* th = (tree_header*)buf; - UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); - - if (addr != th->address || crc32 != *((UINT32*)th->csum)) - checksum_error = TRUE; - } else if (csum) { -#ifdef DEBUG_STATS - time1 = KeQueryPerformanceCounter(NULL); -#endif - for (i = 0; i < length / Vcb->superblock.sector_size; i++) { - UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - - if (crc32 != csum[i]) { - checksum_error = TRUE; - break; - } - } -#ifdef DEBUG_STATS - time2 = KeQueryPerformanceCounter(NULL); + context->stripes_cancel = Vcb->options.raid6_recalculation; + } + + KeInitializeSpinLock(&context->spin_lock); + + context->address = addr; + + for (i = 0; i < ci->num_stripes; i++) { + if (!devices[i] || stripestart[i] == stripeend[i]) { + context->stripes[i].status = ReadDataStatus_MissingDevice; + context->stripes[i].buf = NULL; + context->stripes_left--; - Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; -#endif + if (!devices[i]) + missing_devices++; } - - if (checksum_error) { - for (i = 0; i < needs_reconstruct; i++) { - PIO_STACK_LOCATION IrpSp; - UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; - - // re-run Irps that we cancelled - - if (context->stripes[reconstruct_stripe].Irp) { - if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress); - IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress); - } - IoFreeIrp(context->stripes[reconstruct_stripe].Irp); - } - - if (!Irp) { - context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE); - - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoAllocateIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } else { - context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize); - - if (!context->stripes[reconstruct_stripe].Irp) { - ERR("IoMakeAssociatedIrp failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - } - - IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp); - IrpSp->MajorFunction = IRP_MJ_READ; - - if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) { - FIXME("FIXME - buffered IO\n"); - } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) { - context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf, - stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL); - if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) { - ERR("IoAllocateMdl failed\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto exit; - } - - MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess); - } else { - context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf; - } - - IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe]; - IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset; - - context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb; - - IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE); - - context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending; + } + + if (missing_devices > allowed_missing) { + ERR("not enough devices to service request (%u missing)\n", missing_devices); + Status = STATUS_UNEXPECTED_IO_ERROR; + goto exit; + } + + for (i = 0; i < ci->num_stripes; i++) { + PIO_STACK_LOCATION IrpSp; + + if (devices[i] && stripestart[i] != stripeend[i]) { + context->stripes[i].context = (struct read_data_context*)context; + context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG); + + if (!context->stripes[i].buf) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } - - if (needs_reconstruct > 0) { - context->stripes_left = needs_reconstruct; - KeClearEvent(&context->Event); - -#ifdef DEBUG_STATS - if (!is_tree) - time1 = KeQueryPerformanceCounter(NULL); -#endif + + if (type == BLOCK_FLAG_RAID10) { + context->stripes[i].stripenum = i / ci->sub_stripes; + } + + if (!Irp) { + context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); - for (i = 0; i < needs_reconstruct; i++) { - UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; - - IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp); + if (!context->stripes[i].Irp) { + ERR("IoAllocateIrp failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } + } else { + context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); - KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); - -#ifdef DEBUG_STATS - if (!is_tree) { - time2 = KeQueryPerformanceCounter(NULL); - - Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; - } -#endif - - for (i = 0; i < needs_reconstruct; i++) { - UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2; - - if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) { - ERR("unrecoverable checksum error\n"); - Status = STATUS_CRC_ERROR; - goto exit; - } + if (!context->stripes[i].Irp) { + ERR("IoMakeAssociatedIrp failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; } } - off = origoff; + IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp); + IrpSp->MajorFunction = IRP_MJ_READ; - if (context->tree) { - pos = 0; - stripeoff = 0; - if (!raid6_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) { - ERR("unrecoverable metadata checksum error\n"); - Status = STATUS_CRC_ERROR; - goto exit; - } - } else { - pos = 0; - stripeoff = 0; - if (!raid6_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, csum, Vcb->superblock.sector_size)) { - Status = STATUS_CRC_ERROR; + if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { + FIXME("FIXME - buffered IO\n"); + } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) { + context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL); + if (!context->stripes[i].Irp->MdlAddress) { + ERR("IoAllocateMdl failed\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; goto exit; } - while (pos < length) { - off += (ci->num_stripes - 1) * ci->stripe_length; - if (!raid6_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, csum, Vcb->superblock.sector_size)) { - Status = STATUS_CRC_ERROR; - goto exit; - } - } + MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); + } else { + context->stripes[i].Irp->UserBuffer = context->stripes[i].buf; } + + IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i]; + IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset; + + context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb; + + IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE); + + context->stripes[i].status = ReadDataStatus_Pending; } + } + +#ifdef DEBUG_STATS + if (!is_tree) + time1 = KeQueryPerformanceCounter(NULL); +#endif + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status != ReadDataStatus_MissingDevice) { + IoCallDriver(devices[i]->devobj, context->stripes[i].Irp); + } + } + + KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL); + +#ifdef DEBUG_STATS + if (!is_tree) { + time2 = KeQueryPerformanceCounter(NULL); - // write good data over bad - - if (!Vcb->readonly) { - for (i = 0; i < ci->num_stripes; i++) { - if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) { - Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]); - - if (!NT_SUCCESS(Status)) - WARN("write_data_phys returned %08x\n", Status); + Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; + } +#endif + + // check if any of the devices return a "user-induced" error + + for (i = 0; i < ci->num_stripes; i++) { + if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) { + if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) { + PDEVICE_OBJECT dev; + + dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread); + IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL); + + if (!dev) { + dev = IoGetDeviceToVerify(PsGetCurrentThread()); + IoSetDeviceToVerify(PsGetCurrentThread(), NULL); } + + dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL; + + if (dev) + IoVerifyVolume(dev, FALSE); } +// IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj); + + Status = context->stripes[i].iosb.Status; + goto exit; + } + } + + if (type == BLOCK_FLAG_RAID0) { + Status = read_data_raid0(Vcb, buf, addr, length, context, ci, stripestart, stripeend, startoffstripe); + if (!NT_SUCCESS(Status)) { + ERR("read_data_raid0 returned %08x\n", Status); + goto exit; + } + } else if (type == BLOCK_FLAG_RAID10) { + Status = read_data_raid10(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend, startoffstripe); + if (!NT_SUCCESS(Status)) { + ERR("read_data_raid10 returned %08x\n", Status); + goto exit; + } + } else if (type == BLOCK_FLAG_DUPLICATE) { + Status = read_data_dup(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend); + if (!NT_SUCCESS(Status)) { + ERR("read_data_dup returned %08x\n", Status); + goto exit; + } + } else if (type == BLOCK_FLAG_RAID5) { + Status = read_data_raid5(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend, offset, firststripesize, check_nocsum_parity); + if (!NT_SUCCESS(Status)) { + ERR("read_data_raid5 returned %08x\n", Status); + goto exit; + } + } else if (type == BLOCK_FLAG_RAID6) { + Status = read_data_raid6(Vcb, buf, addr, length, Irp, context, ci, devices, stripestart, stripeend, offset, firststripesize, check_nocsum_parity); + if (!NT_SUCCESS(Status)) { + ERR("read_data_raid6 returned %08x\n", Status); + goto exit; } - - Status = STATUS_SUCCESS; } exit: @@ -2888,155 +2990,7 @@ static NTSTATUS STDCALL read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG l return Status; } -static NTSTATUS load_csum_from_disk(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp) { - NTSTATUS Status; - KEY searchkey; - traverse_ptr tp, next_tp; - UINT64 i, j; - BOOL b; - - searchkey.obj_id = EXTENT_CSUM_ID; - searchkey.obj_type = TYPE_EXTENT_CSUM; - searchkey.offset = start; - - Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp); - if (!NT_SUCCESS(Status)) { - ERR("error - find_item returned %08x\n", Status); - return Status; - } - - i = 0; - do { - if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { - ULONG readlen; - - if (start < tp.item->key.offset) - j = 0; - else - j = ((start - tp.item->key.offset) / Vcb->superblock.sector_size) + i; - - if (j * sizeof(UINT32) > tp.item->size || tp.item->key.offset > start + (i * Vcb->superblock.sector_size)) { - ERR("checksum not found for %llx\n", start + (i * Vcb->superblock.sector_size)); - return STATUS_INTERNAL_ERROR; - } - - readlen = min((tp.item->size / sizeof(UINT32)) - j, length - i); - RtlCopyMemory(&csum[i], tp.item->data + (j * sizeof(UINT32)), readlen * sizeof(UINT32)); - i += readlen; - - if (i == length) - break; - } - - b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp); - - if (b) - tp = next_tp; - } while (b); - - if (i < length) { - ERR("could not read checksums: offset %llx, length %llx sectors\n", start, length); - return STATUS_INTERNAL_ERROR; - } - - return STATUS_SUCCESS; -} - -static NTSTATUS load_csum(device_extension* Vcb, UINT64 start, UINT64 length, UINT32** pcsum, PIRP Irp) { - UINT32* csum = NULL; - NTSTATUS Status; - UINT64 end; - RTL_BITMAP bmp; - ULONG *bmpbuf = NULL, bmpbuflen, index, runlength; - LIST_ENTRY* le; - - if (length == 0) { - *pcsum = NULL; - return STATUS_SUCCESS; - } - - bmpbuflen = sector_align(length, sizeof(ULONG) * 8) / 8; - bmpbuf = ExAllocatePoolWithTag(PagedPool, bmpbuflen, ALLOC_TAG); - if (!bmpbuf) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - RtlInitializeBitMap(&bmp, bmpbuf, length); - RtlClearAllBits(&bmp); - - csum = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * length, ALLOC_TAG); - if (!csum) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - goto end; - } - - ExAcquireResourceSharedLite(&Vcb->checksum_lock, TRUE); - - end = start + (length * Vcb->superblock.sector_size); - - le = Vcb->sector_checksums.Flink; - while (le != &Vcb->sector_checksums) { - changed_sector* cs = (changed_sector*)le; - UINT64 cs_end = cs->ol.key + (cs->length * Vcb->superblock.sector_size); - - if (cs->ol.key <= start && cs_end >= end) { // outer - if (cs->deleted) { - RtlClearAllBits(&bmp); - } else { - RtlSetAllBits(&bmp); - RtlCopyMemory(csum, &cs->checksums[(start - cs->ol.key) / Vcb->superblock.sector_size], sizeof(UINT32) * length); - } - } else if (cs->ol.key >= start && cs->ol.key <= end) { // right or inner - if (cs->deleted) { - RtlClearBits(&bmp, (cs->ol.key - start) / Vcb->superblock.sector_size, (min(end, cs_end) - cs->ol.key) / Vcb->superblock.sector_size); - } else { - RtlSetBits(&bmp, (cs->ol.key - start) / Vcb->superblock.sector_size, (min(end, cs_end) - cs->ol.key) / Vcb->superblock.sector_size); - RtlCopyMemory(&csum[(cs->ol.key - start) / Vcb->superblock.sector_size], cs->checksums, (min(end, cs_end) - cs->ol.key) * sizeof(UINT32) / Vcb->superblock.sector_size); - } - } else if (cs_end >= start && cs_end <= end) { // left - if (cs->deleted) { - RtlClearBits(&bmp, 0, (cs_end - start) / Vcb->superblock.sector_size); - } else { - RtlSetBits(&bmp, 0, (cs_end - start) / Vcb->superblock.sector_size); - RtlCopyMemory(csum, &cs->checksums[(start - cs->ol.key) / Vcb->superblock.sector_size], (cs_end - start) * sizeof(UINT32) / Vcb->superblock.sector_size); - } - } - - le = le->Flink; - } - - ExReleaseResourceLite(&Vcb->checksum_lock); - - runlength = RtlFindFirstRunClear(&bmp, &index); - - while (runlength != 0) { - Status = load_csum_from_disk(Vcb, &csum[index], start + (index * Vcb->superblock.sector_size), runlength, Irp); - if (!NT_SUCCESS(Status)) { - ERR("load_csum_from_disk returned %08x\n", Status); - goto end; - } - - runlength = RtlFindNextForwardRunClear(&bmp, index + runlength, &index); - } - - Status = STATUS_SUCCESS; - -end: - if (bmpbuf) - ExFreePool(bmpbuf); - - if (NT_SUCCESS(Status)) - *pcsum = csum; - else if (csum) - ExFreePool(csum); - - return Status; -} - -NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) { +NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp, BOOL check_nocsum_parity) { NTSTATUS Status; EXTENT_DATA* ed; UINT64 bytes_read = 0; @@ -3128,7 +3082,7 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U UINT32 to_read, read; UINT8* buf; BOOL buf_free; - UINT32 *csum, bumpoff = 0; + UINT32 bumpoff = 0; UINT64 addr, lockaddr, locklen; chunk* c; @@ -3164,20 +3118,6 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U } } - if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { - Status = load_csum(fcb->Vcb, addr, to_read / fcb->Vcb->superblock.sector_size, &csum, Irp); - - if (!NT_SUCCESS(Status)) { - ERR("load_csum returned %08x\n", Status); - - if (buf_free) - ExFreePool(buf); - - goto exit; - } - } else - csum = NULL; - c = get_chunk_from_address(fcb->Vcb, addr); if (!c) { @@ -3194,8 +3134,8 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U chunk_lock_range(fcb->Vcb, c, lockaddr, locklen); } - - Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp); + Status = read_data(fcb->Vcb, addr, to_read, ext->csum ? &ext->csum[off / fcb->Vcb->superblock.sector_size] : NULL, FALSE, + buf, c, NULL, Irp, check_nocsum_parity); if (!NT_SUCCESS(Status)) { ERR("read_data returned %08x\n", Status); @@ -3244,9 +3184,6 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U if (buf_free) ExFreePool(buf); - if (csum) - ExFreePool(csum); - bytes_read += read; length -= read; @@ -3380,25 +3317,34 @@ NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read) { ccfs.FileSize = fcb->Header.FileSize; ccfs.ValidDataLength = fcb->Header.ValidDataLength; - TRACE("calling CcInitializeCacheMap (%llx, %llx, %llx)\n", - ccfs.AllocationSize.QuadPart, ccfs.FileSize.QuadPart, ccfs.ValidDataLength.QuadPart); - CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject); - - CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY); + init_file_cache(FileObject, &ccfs); } if (IrpSp->MinorFunction & IRP_MN_MDL) { CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); } else { - TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); - TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); - if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { - TRACE("CcCopyRead could not wait\n"); - - IoMarkIrpPending(Irp); - return STATUS_PENDING; + if (CcCopyReadEx) { + TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, + length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); + TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); + if (!CcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { + TRACE("CcCopyReadEx could not wait\n"); + + IoMarkIrpPending(Irp); + return STATUS_PENDING; + } + TRACE("CcCopyReadEx finished\n"); + } else { + TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); + TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); + if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { + TRACE("CcCopyRead could not wait\n"); + + IoMarkIrpPending(Irp); + return STATUS_PENDING; + } + TRACE("CcCopyRead finished\n"); } - TRACE("CcCopyRead finished\n"); } } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { Status = _SEH2_GetExceptionCode(); @@ -3431,20 +3377,30 @@ NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read) { } } - ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE); - if (fcb->ads) Status = read_stream(fcb, data, start, length, bytes_read); else - Status = read_file(fcb, data, start, length, bytes_read, Irp); - - ExReleaseResourceLite(&fcb->Vcb->tree_lock); + Status = read_file(fcb, data, start, length, bytes_read, Irp, TRUE); *bytes_read += addon; TRACE("read %u bytes\n", *bytes_read); Irp->IoStatus.Information = *bytes_read; + if (diskacc && Status != STATUS_PENDING) { + PETHREAD thread = NULL; + + if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) + thread = Irp->Tail.Overlay.Thread; + else if (!IoIsSystemThread(PsGetCurrentThread())) + thread = PsGetCurrentThread(); + else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) + thread = PsGetCurrentThread(); + + if (thread) + PsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); + } + return Status; } } @@ -3458,7 +3414,7 @@ NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { BOOL top_level; fcb* fcb; ccb* ccb; - BOOL tree_lock = FALSE, fcb_lock = FALSE, pagefile; + BOOL fcb_lock = FALSE, wait; FsRtlEnterFileSystem(); @@ -3491,12 +3447,6 @@ NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } - if (fcb == Vcb->volume_fcb) { - TRACE("not allowing read of volume FCB\n"); - Status = STATUS_INVALID_PARAMETER; - goto exit; - } - ccb = FileObject->FsContext2; if (!ccb) { @@ -3511,22 +3461,25 @@ NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { goto exit; } - pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && Irp->Flags & IRP_PAGING_IO; + if (fcb == Vcb->volume_fcb) { + TRACE("reading volume FCB\n"); + + IoSkipCurrentIrpStackLocation(Irp); - if (Irp->Flags & IRP_NOCACHE) { - if (!pagefile) { - if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, IoIsOperationSynchronous(Irp))) { - Status = STATUS_PENDING; - IoMarkIrpPending(Irp); - goto exit; - } - - tree_lock = TRUE; - } + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); + + goto exit2; } + wait = IoIsOperationSynchronous(Irp); + + // Don't offload jobs when doing paging IO - otherwise this can lead to + // deadlocks in CcCopyRead. + if (Irp->Flags & IRP_PAGING_IO) + wait = TRUE; + if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { - if (!ExAcquireResourceSharedLite(fcb->Header.Resource, IoIsOperationSynchronous(Irp))) { + if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { Status = STATUS_PENDING; IoMarkIrpPending(Irp); goto exit; @@ -3535,14 +3488,11 @@ NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { fcb_lock = TRUE; } - Status = do_read(Irp, IoIsOperationSynchronous(Irp), &bytes_read); + Status = do_read(Irp, wait, &bytes_read); exit: if (fcb_lock) ExReleaseResourceLite(fcb->Header.Resource); - - if (tree_lock) - ExReleaseResourceLite(&Vcb->tree_lock); Irp->IoStatus.Status = Status; diff --git a/reactos/drivers/filesystems/btrfs/registry.c b/reactos/drivers/filesystems/btrfs/registry.c index f02114144c5..402f30d9211 100644 --- a/reactos/drivers/filesystems/btrfs/registry.c +++ b/reactos/drivers/filesystems/btrfs/registry.c @@ -27,7 +27,7 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { BTRFS_UUID* uuid = &Vcb->superblock.uuid; mount_options* options = &Vcb->options; UNICODE_STRING path, ignoreus, compressus, compressforceus, compresstypeus, readonlyus, zliblevelus, flushintervalus, - maxinlineus, subvolidus, raid5recalcus, raid6recalcus; + maxinlineus, subvolidus, raid5recalcus, raid6recalcus, skipbalanceus; OBJECT_ATTRIBUTES oa; NTSTATUS Status; ULONG i, j, kvfilen, index, retlen; @@ -43,6 +43,7 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { options->max_inline = min(mount_max_inline, Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - sizeof(EXTENT_DATA) + 1); options->raid5_recalculation = mount_raid5_recalculation; options->raid6_recalculation = mount_raid6_recalculation; + options->skip_balance = mount_skip_balance; options->subvol_id = 0; path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR)); @@ -103,6 +104,7 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { RtlInitUnicodeString(&subvolidus, L"SubvolId"); RtlInitUnicodeString(&raid5recalcus, L"Raid5Recalculation"); RtlInitUnicodeString(&raid6recalcus, L"Raid6Recalculation"); + RtlInitUnicodeString(&skipbalanceus, L"SkipBalance"); do { Status = ZwEnumerateValueKey(h, index, KeyValueFullInformation, kvfi, kvfilen, &retlen); @@ -159,6 +161,10 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) { DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); options->raid6_recalculation = *val; + } else if (FsRtlAreNamesEqual(&skipbalanceus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) { + DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset); + + options->skip_balance = *val; } } else if (Status != STATUS_NO_MORE_ENTRIES) { ERR("ZwEnumerateValueKey returned %08x\n", Status); @@ -655,6 +661,7 @@ void STDCALL read_registry(PUNICODE_STRING regpath) { get_registry_value(h, L"MaxInline", REG_DWORD, &mount_max_inline, sizeof(mount_max_inline)); get_registry_value(h, L"Raid5Recalculation", REG_DWORD, &mount_raid5_recalculation, sizeof(mount_raid5_recalculation)); get_registry_value(h, L"Raid6Recalculation", REG_DWORD, &mount_raid6_recalculation, sizeof(mount_raid6_recalculation)); + get_registry_value(h, L"SkipBalance", REG_DWORD, &mount_skip_balance, sizeof(mount_skip_balance)); if (mount_flush_interval == 0) mount_flush_interval = 1; diff --git a/reactos/drivers/filesystems/btrfs/reparse.c b/reactos/drivers/filesystems/btrfs/reparse.c index 53606046be1..b495d78a5fb 100644 --- a/reactos/drivers/filesystems/btrfs/reparse.c +++ b/reactos/drivers/filesystems/btrfs/reparse.c @@ -56,7 +56,7 @@ NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, } TRACE("data = %p, size = %x\n", data, fcb->inode_item.st_size); - Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL); + Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL, TRUE); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); @@ -119,7 +119,7 @@ NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, if (fcb->type == BTRFS_TYPE_FILE) { ULONG len; - Status = read_file(fcb, buffer, 0, buflen, &len, NULL); + Status = read_file(fcb, buffer, 0, buflen, &len, NULL, TRUE); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); @@ -177,6 +177,9 @@ static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, ccb* ccb, REPARSE_DATA_ fileref->fcb->inode_item.st_mode |= __S_IFLNK; + if (fileref->dc) + fileref->dc->type = fileref->fcb->type; + if (write) { Status = truncate_file(fileref->fcb, 0, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -485,6 +488,9 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) { fileref->fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT; + if (fileref->dc) + fileref->dc->type = fileref->fcb->type; + mark_fileref_dirty(fileref); fileref->fcb->inode_item_changed = TRUE; diff --git a/reactos/drivers/filesystems/btrfs/search.c b/reactos/drivers/filesystems/btrfs/search.c index f5d250668bb..b0cd875062e 100644 --- a/reactos/drivers/filesystems/btrfs/search.c +++ b/reactos/drivers/filesystems/btrfs/search.c @@ -30,47 +30,28 @@ #endif #include -#ifndef __REACTOS__ -typedef struct _OBJECT_DIRECTORY_INFORMATION { - UNICODE_STRING Name; - UNICODE_STRING TypeName; -} OBJECT_DIRECTORY_INFORMATION, *POBJECT_DIRECTORY_INFORMATION; -#endif - -#if !defined (_GNU_NTIFS_) || defined(__REACTOS__) -NTSTATUS WINAPI ZwQueryDirectoryObject(HANDLE DirectoryHandle, PVOID Buffer, ULONG Length, - BOOLEAN ReturnSingleEntry, BOOLEAN RestartScan, PULONG Context, - PULONG ReturnLength); -#endif - -VOID WINAPI IopNotifyPlugPlayNotification( - IN PDEVICE_OBJECT DeviceObject, - IN IO_NOTIFICATION_EVENT_CATEGORY EventCategory, - IN LPCGUID Event, - IN PVOID EventCategoryData1, - IN PVOID EventCategoryData2 -); - -static const WCHAR devpath[] = {'\\','D','e','v','i','c','e',0}; +extern LIST_ENTRY volumes; +extern ERESOURCE volumes_lock; +extern LIST_ENTRY pnp_disks; -static NTSTATUS create_part0(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT DeviceObject, PUNICODE_STRING pardir, PUNICODE_STRING nameus, - BTRFS_UUID* uuid) { +static NTSTATUS create_part0(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT DeviceObject, PUNICODE_STRING devpath, + PUNICODE_STRING nameus, BTRFS_UUID* uuid) { PDEVICE_OBJECT newdevobj; UNICODE_STRING name; NTSTATUS Status; part0_device_extension* p0de; - static const WCHAR btrfs_partition[] = L"\\BtrfsPartition"; + static const WCHAR part0_suffix[] = L"Btrfs"; - name.Length = name.MaximumLength = pardir->Length + (wcslen(btrfs_partition) * sizeof(WCHAR)); + name.Length = name.MaximumLength = devpath->Length + (wcslen(part0_suffix) * sizeof(WCHAR)); name.Buffer = ExAllocatePoolWithTag(PagedPool, name.Length, ALLOC_TAG); if (!name.Buffer) { ERR("out of memory\n"); return STATUS_INSUFFICIENT_RESOURCES; } - RtlCopyMemory(name.Buffer, pardir->Buffer, pardir->Length); - RtlCopyMemory(&name.Buffer[pardir->Length / sizeof(WCHAR)], btrfs_partition, wcslen(btrfs_partition) * sizeof(WCHAR)); + RtlCopyMemory(name.Buffer, devpath->Buffer, devpath->Length); + RtlCopyMemory(&name.Buffer[devpath->Length / sizeof(WCHAR)], part0_suffix, wcslen(part0_suffix) * sizeof(WCHAR)); Status = IoCreateDevice(DriverObject, sizeof(part0_device_extension), &name, FILE_DEVICE_DISK, FILE_DEVICE_SECURE_OPEN, FALSE, &newdevobj); if (!NT_SUCCESS(Status)) { @@ -101,6 +82,7 @@ static NTSTATUS create_part0(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT DeviceO ObReferenceObject(DeviceObject); newdevobj->StackSize = DeviceObject->StackSize + 1; + newdevobj->SectorSize = DeviceObject->SectorSize; newdevobj->Flags |= DO_DIRECT_IO; newdevobj->Flags &= ~DO_DEVICE_INITIALIZING; @@ -110,7 +92,7 @@ static NTSTATUS create_part0(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT DeviceO return STATUS_SUCCESS; } -static void STDCALL add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us) { +void add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us) { ULONG tnsize; MOUNTMGR_TARGET_NAME* tn; KEVENT Event; @@ -156,7 +138,7 @@ static void STDCALL add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us) { ExFreePool(tn); - mmdltsize = sizeof(MOUNTMGR_DRIVE_LETTER_TARGET) - 1 + us->Length; + mmdltsize = offsetof(MOUNTMGR_DRIVE_LETTER_TARGET, DeviceName[0]) + us->Length; mmdlt = ExAllocatePoolWithTag(NonPagedPool, mmdltsize, ALLOC_TAG); if (!mmdlt) { @@ -191,7 +173,7 @@ static void STDCALL add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us) { ExFreePool(mmdlt); } -static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmgr, PUNICODE_STRING pardir, PUNICODE_STRING us, BOOL part0, LIST_ENTRY* volumes) { +static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmgr, PUNICODE_STRING devpath, DWORD disk_num, DWORD part_num, LIST_ENTRY* volumes) { KEVENT Event; PIRP Irp; IO_STATUS_BLOCK IoStatusBlock; @@ -200,31 +182,15 @@ static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmg PDEVICE_OBJECT DeviceObject; LARGE_INTEGER Offset; ULONG toread; - UINT8* data; - UNICODE_STRING us2; - BOOL added_entry = FALSE; + UINT8* data = NULL; UINT32 sector_size; - TRACE("%.*S\n", us->Length / sizeof(WCHAR), us->Buffer); - - us2.Length = pardir->Length + sizeof(WCHAR) + us->Length; - us2.MaximumLength = us2.Length; - us2.Buffer = ExAllocatePoolWithTag(PagedPool, us2.Length, ALLOC_TAG); - if (!us2.Buffer) { - ERR("out of memory\n"); - return; - } - - RtlCopyMemory(us2.Buffer, pardir->Buffer, pardir->Length); - us2.Buffer[pardir->Length / sizeof(WCHAR)] = '\\'; - RtlCopyMemory(&us2.Buffer[(pardir->Length / sizeof(WCHAR))+1], us->Buffer, us->Length); - - TRACE("%.*S\n", us2.Length / sizeof(WCHAR), us2.Buffer); + TRACE("%.*S\n", devpath->Length / sizeof(WCHAR), devpath->Buffer); - Status = IoGetDeviceObjectPointer(&us2, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); + Status = IoGetDeviceObjectPointer(devpath, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject); if (!NT_SUCCESS(Status)) { ERR("IoGetDeviceObjectPointer returned %08x\n", Status); - goto exit; + return; } sector_size = DeviceObject->SectorSize; @@ -238,20 +204,20 @@ static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmg if (!NT_SUCCESS(Status)) { ERR("%.*S had a sector size of 0, and IOCTL_DISK_GET_DRIVE_GEOMETRY returned %08x\n", - us2.Length / sizeof(WCHAR), us2.Buffer, Status); - goto exit; + devpath->Length / sizeof(WCHAR), devpath->Buffer, Status); + goto deref; } if (iosb.Information < sizeof(DISK_GEOMETRY)) { ERR("%.*S: IOCTL_DISK_GET_DRIVE_GEOMETRY returned %u bytes, expected %u\n", - us2.Length / sizeof(WCHAR), us2.Buffer, iosb.Information, sizeof(DISK_GEOMETRY)); + devpath->Length / sizeof(WCHAR), devpath->Buffer, iosb.Information, sizeof(DISK_GEOMETRY)); } sector_size = geometry.BytesPerSector; if (sector_size == 0) { - ERR("%.*S had a sector size of 0\n", us2.Length / sizeof(WCHAR), us2.Buffer); - goto exit; + ERR("%.*S had a sector size of 0\n", devpath->Length / sizeof(WCHAR), devpath->Buffer); + goto deref; } } @@ -295,33 +261,49 @@ static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmg &gli, sizeof(gli), TRUE, NULL); if (!NT_SUCCESS(Status)) { ERR("error reading length information: %08x\n", Status); + ExFreePool(v); goto deref; } - if (part0) { + if (part_num == 0) { UNICODE_STRING us3; - Status = create_part0(DriverObject, DeviceObject, pardir, &us3, &sb->dev_item.device_uuid); + Status = create_part0(DriverObject, DeviceObject, devpath, &us3, &sb->dev_item.device_uuid); if (!NT_SUCCESS(Status)) { ERR("create_part0 returned %08x\n", Status); + ExFreePool(v); + goto deref; + } + + v->devpath = us3; + } else { + v->devpath.Length = v->devpath.MaximumLength = devpath->Length; + v->devpath.Buffer = ExAllocatePoolWithTag(PagedPool, v->devpath.Length, ALLOC_TAG); + + if (!v->devpath.Buffer) { + ERR("out of memory\n"); + ExFreePool(v); goto deref; } - ExFreePool(us2.Buffer); - us2 = us3; + RtlCopyMemory(v->devpath.Buffer, devpath->Buffer, v->devpath.Length); } RtlCopyMemory(&v->fsuuid, &sb->uuid, sizeof(BTRFS_UUID)); RtlCopyMemory(&v->devuuid, &sb->dev_item.device_uuid, sizeof(BTRFS_UUID)); v->devnum = sb->dev_item.dev_id; - v->devpath = us2; v->processed = FALSE; v->length = gli.Length.QuadPart; v->gen1 = sb->generation; v->gen2 = 0; v->seeding = sb->flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE; + v->disk_num = disk_num; + v->part_num = part_num; + + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); InsertTailList(volumes, &v->list_entry); + ExReleaseResourceLite(&volumes_lock); i = 1; while (superblock_addrs[i] != 0 && superblock_addrs[i] + toread <= v->length) { @@ -368,120 +350,16 @@ static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmg v->devuuid.uuid[0], v->devuuid.uuid[1], v->devuuid.uuid[2], v->devuuid.uuid[3], v->devuuid.uuid[4], v->devuuid.uuid[5], v->devuuid.uuid[6], v->devuuid.uuid[7], v->devuuid.uuid[8], v->devuuid.uuid[9], v->devuuid.uuid[10], v->devuuid.uuid[11], v->devuuid.uuid[12], v->devuuid.uuid[13], v->devuuid.uuid[14], v->devuuid.uuid[15]); TRACE("device number %llx\n", v->devnum); - - added_entry = TRUE; } deref: - ExFreePool(data); - ObDereferenceObject(FileObject); - -exit: - if (!added_entry) - ExFreePool(us2.Buffer); -} - -static NTSTATUS look_in_harddisk_dir(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmgr, PUNICODE_STRING name, LIST_ENTRY* volumes) { - UNICODE_STRING path; - OBJECT_ATTRIBUTES attr; - NTSTATUS Status; - HANDLE h; - OBJECT_DIRECTORY_INFORMATION* odi; - ULONG odisize, context; - BOOL restart, has_part0 = FALSE, has_parts = FALSE; - - static const WCHAR partition[] = L"Partition"; - static WCHAR partition0[] = L"Partition0"; - - path.Buffer = ExAllocatePoolWithTag(PagedPool, ((wcslen(devpath) + 1) * sizeof(WCHAR)) + name->Length, ALLOC_TAG); - if (!path.Buffer) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - RtlCopyMemory(path.Buffer, devpath, wcslen(devpath) * sizeof(WCHAR)); - path.Buffer[wcslen(devpath)] = '\\'; - RtlCopyMemory(&path.Buffer[wcslen(devpath) + 1], name->Buffer, name->Length); - path.Length = path.MaximumLength = ((wcslen(devpath) + 1) * sizeof(WCHAR)) + name->Length; - - attr.Length = sizeof(attr); - attr.RootDirectory = 0; - attr.Attributes = OBJ_CASE_INSENSITIVE; - attr.ObjectName = &path; - attr.SecurityDescriptor = NULL; - attr.SecurityQualityOfService = NULL; - - Status = ZwOpenDirectoryObject(&h, DIRECTORY_TRAVERSE, &attr); - - if (!NT_SUCCESS(Status)) { - ERR("ZwOpenDirectoryObject returned %08x\n", Status); - goto end; - } - - odisize = sizeof(OBJECT_DIRECTORY_INFORMATION) * 16; - odi = ExAllocatePoolWithTag(PagedPool, odisize, ALLOC_TAG); - if (!odi) { - ERR("out of memory\n"); - Status = STATUS_INSUFFICIENT_RESOURCES; - ZwClose(h); - goto end; - } + if (data) + ExFreePool(data); - restart = TRUE; - do { - Status = ZwQueryDirectoryObject(h, odi, odisize, FALSE, restart, &context, NULL/*&retlen*/); - restart = FALSE; - - if (!NT_SUCCESS(Status)) { - if (Status != STATUS_NO_MORE_ENTRIES) - ERR("ZwQueryDirectoryObject returned %08x\n", Status); - } else { - OBJECT_DIRECTORY_INFORMATION* odi2 = odi; - - while (odi2->Name.Buffer) { - TRACE("%.*S, %.*S\n", odi2->TypeName.Length / sizeof(WCHAR), odi2->TypeName.Buffer, odi2->Name.Length / sizeof(WCHAR), odi2->Name.Buffer); - - if (odi2->Name.Length > wcslen(partition) * sizeof(WCHAR) && - RtlCompareMemory(odi2->Name.Buffer, partition, wcslen(partition) * sizeof(WCHAR)) == wcslen(partition) * sizeof(WCHAR)) { - - if (odi2->Name.Length == (wcslen(partition) + 1) * sizeof(WCHAR) && odi2->Name.Buffer[(odi2->Name.Length / sizeof(WCHAR)) - 1] == '0') { - // Partition0 refers to the whole disk - has_part0 = TRUE; - } else { - has_parts = TRUE; - - test_vol(DriverObject, mountmgr, &path, &odi2->Name, FALSE, volumes); - } - } - - odi2 = &odi2[1]; - } - } - } while (NT_SUCCESS(Status)); - - // If disk had no partitions, test the whole disk - if (!has_parts && has_part0) { - UNICODE_STRING part0us; - - part0us.Buffer = partition0; - part0us.Length = part0us.MaximumLength = wcslen(partition0) * sizeof(WCHAR); - - test_vol(DriverObject, mountmgr, &path, &part0us, TRUE, volumes); - } - - ZwClose(h); - - ExFreePool(odi); - - Status = STATUS_SUCCESS; - -end: - ExFreePool(path.Buffer); - - return Status; + ObDereferenceObject(FileObject); } -static void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v) { +void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v) { NTSTATUS Status; KEVENT Event; PIRP Irp; @@ -566,79 +444,10 @@ static void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v) { ExFreePool(mmp); } -void STDCALL look_for_vols(PDRIVER_OBJECT DriverObject, LIST_ENTRY* volumes) { - PFILE_OBJECT FileObject; - PDEVICE_OBJECT mountmgr; - OBJECT_ATTRIBUTES attr; - UNICODE_STRING mmdevpath, us; - HANDLE h; - OBJECT_DIRECTORY_INFORMATION* odi; - ULONG odisize; - ULONG context; - BOOL restart; - NTSTATUS Status; +static void refresh_mountmgr(PDEVICE_OBJECT mountmgr, LIST_ENTRY* volumes) { LIST_ENTRY* le; - static const WCHAR directory[] = L"Directory"; - static const WCHAR harddisk[] = L"Harddisk"; - - RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); - Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr); - if (!NT_SUCCESS(Status)) { - ERR("IoGetDeviceObjectPointer returned %08x\n", Status); - return; - } - - RtlInitUnicodeString(&us, devpath); - - attr.Length = sizeof(attr); - attr.RootDirectory = 0; - attr.Attributes = OBJ_CASE_INSENSITIVE; - attr.ObjectName = &us; - attr.SecurityDescriptor = NULL; - attr.SecurityQualityOfService = NULL; - - Status = ZwOpenDirectoryObject(&h, DIRECTORY_TRAVERSE, &attr); - - if (!NT_SUCCESS(Status)) { - ERR("ZwOpenDirectoryObject returned %08x\n", Status); - return; - } - - odisize = sizeof(OBJECT_DIRECTORY_INFORMATION) * 16; - odi = ExAllocatePoolWithTag(PagedPool, odisize, ALLOC_TAG); - if (!odi) { - ERR("out of memory\n"); - ZwClose(h); - return; - } - - restart = TRUE; - do { - Status = ZwQueryDirectoryObject(h, odi, odisize, FALSE, restart, &context, NULL/*&retlen*/); - restart = FALSE; - - if (!NT_SUCCESS(Status)) { - if (Status != STATUS_NO_MORE_ENTRIES) - ERR("ZwQueryDirectoryObject returned %08x\n", Status); - } else { - OBJECT_DIRECTORY_INFORMATION* odi2 = odi; - - while (odi2->Name.Buffer) { - if (odi2->TypeName.Length == wcslen(directory) * sizeof(WCHAR) && - RtlCompareMemory(odi2->TypeName.Buffer, directory, odi2->TypeName.Length) == odi2->TypeName.Length && - odi2->Name.Length > wcslen(harddisk) * sizeof(WCHAR) && - RtlCompareMemory(odi2->Name.Buffer, harddisk, wcslen(harddisk) * sizeof(WCHAR)) == wcslen(harddisk) * sizeof(WCHAR)) { - look_in_harddisk_dir(DriverObject, mountmgr, &odi2->Name, volumes); - } - - odi2 = &odi2[1]; - } - } - } while (NT_SUCCESS(Status)); - - ExFreePool(odi); - ZwClose(h); + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); le = volumes->Flink; while (le != volumes) { @@ -670,5 +479,215 @@ void STDCALL look_for_vols(PDRIVER_OBJECT DriverObject, LIST_ENTRY* volumes) { le = le->Flink; } + ExReleaseResourceLite(&volumes_lock); +} + +static void add_pnp_disk(ULONG disk_num, PUNICODE_STRING devpath) { + LIST_ENTRY* le; + pnp_disk* disk; + + le = pnp_disks.Flink; + while (le != &pnp_disks) { + disk = CONTAINING_RECORD(le, pnp_disk, list_entry); + + if (disk->devpath.Length == devpath->Length && + RtlCompareMemory(disk->devpath.Buffer, devpath->Buffer, devpath->Length) == devpath->Length) + return; + + le = le->Flink; + } + + disk = ExAllocatePoolWithTag(PagedPool, sizeof(pnp_disk), ALLOC_TAG); + if (!disk) { + ERR("out of memory\n"); + return; + } + + disk->devpath.Length = disk->devpath.MaximumLength = devpath->Length; + disk->devpath.Buffer = ExAllocatePoolWithTag(PagedPool, devpath->Length, ALLOC_TAG); + + if (!disk->devpath.Buffer) { + ERR("out of memory\n"); + ExFreePool(disk); + return; + } + + RtlCopyMemory(disk->devpath.Buffer, devpath->Buffer, devpath->Length); + + disk->disk_num = disk_num; + + InsertTailList(&pnp_disks, &disk->list_entry); +} + +static void disk_arrival(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { + PFILE_OBJECT FileObject, FileObject2; + PDEVICE_OBJECT devobj, mountmgr; + NTSTATUS Status; + STORAGE_DEVICE_NUMBER sdn; + ULONG dlisize; + DRIVE_LAYOUT_INFORMATION_EX* dli; + IO_STATUS_BLOCK iosb; + int i, num_parts = 0; + UNICODE_STRING devname, num, bspus, mmdevpath; + WCHAR devnamew[255], numw[20]; + USHORT preflen; + + static WCHAR device_harddisk[] = L"\\Device\\Harddisk"; + static WCHAR bs_partition[] = L"\\Partition"; + + // FIXME - work with CD-ROMs and floppies(?) + + Status = IoGetDeviceObjectPointer(devpath, FILE_READ_ATTRIBUTES, &FileObject, &devobj); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + return; + } + + RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME); + Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject2, &mountmgr); + if (!NT_SUCCESS(Status)) { + ERR("IoGetDeviceObjectPointer returned %08x\n", Status); + ObDereferenceObject(FileObject); + return; + } + + Status = dev_ioctl(devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0, + &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, &iosb); + if (!NT_SUCCESS(Status)) { + ERR("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status); + goto end; + } + + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); + add_pnp_disk(sdn.DeviceNumber, devpath); + ExReleaseResourceLite(&volumes_lock); + + dlisize = 0; + + do { + dlisize += 1024; + dli = ExAllocatePoolWithTag(PagedPool, dlisize, ALLOC_TAG); + + Status = dev_ioctl(devobj, IOCTL_DISK_GET_DRIVE_LAYOUT_EX, NULL, 0, + dli, dlisize, TRUE, &iosb); + } while (Status == STATUS_BUFFER_TOO_SMALL); + + if (!NT_SUCCESS(Status)) { + ExFreePool(dli); + goto no_parts; + } + + wcscpy(devnamew, device_harddisk); + devname.Buffer = devnamew; + devname.MaximumLength = sizeof(devnamew); + devname.Length = wcslen(device_harddisk) * sizeof(WCHAR); + + num.Buffer = numw; + num.MaximumLength = sizeof(numw); + RtlIntegerToUnicodeString(sdn.DeviceNumber, 10, &num); + RtlAppendUnicodeStringToString(&devname, &num); + + bspus.Buffer = bs_partition; + bspus.Length = bspus.MaximumLength = wcslen(bs_partition) * sizeof(WCHAR); + RtlAppendUnicodeStringToString(&devname, &bspus); + + preflen = devname.Length; + + for (i = 0; i < dli->PartitionCount; i++) { + if (dli->PartitionEntry[i].PartitionLength.QuadPart != 0 && dli->PartitionEntry[i].PartitionNumber != 0) { + devname.Length = preflen; + RtlIntegerToUnicodeString(dli->PartitionEntry[i].PartitionNumber, 10, &num); + RtlAppendUnicodeStringToString(&devname, &num); + + test_vol(DriverObject, mountmgr, &devname, sdn.DeviceNumber, dli->PartitionEntry[i].PartitionNumber, &volumes); + + num_parts++; + } + } + + ExFreePool(dli); + +no_parts: + if (num_parts == 0) { + devname.Length = preflen; + devname.Buffer[devname.Length / sizeof(WCHAR)] = '0'; + devname.Length += sizeof(WCHAR); + + test_vol(DriverObject, mountmgr, &devname, sdn.DeviceNumber, 0, &volumes); + } + +end: + refresh_mountmgr(mountmgr, &volumes); + ObDereferenceObject(FileObject); + ObDereferenceObject(FileObject2); +} + +static void disk_removal(PDRIVER_OBJECT DriverObject, PUNICODE_STRING devpath) { + LIST_ENTRY* le; + pnp_disk* disk = NULL; + + // FIXME - remove Partition0Btrfs devices and unlink from mountmgr + // FIXME - emergency unmount of RAIDed volumes + + ExAcquireResourceExclusiveLite(&volumes_lock, TRUE); + + le = pnp_disks.Flink; + while (le != &pnp_disks) { + pnp_disk* disk2 = CONTAINING_RECORD(le, pnp_disk, list_entry); + + if (disk2->devpath.Length == devpath->Length && + RtlCompareMemory(disk2->devpath.Buffer, devpath->Buffer, devpath->Length) == devpath->Length) { + disk = disk2; + break; + } + + le = le->Flink; + } + + if (!disk) { + ExReleaseResourceLite(&volumes_lock); + return; + } + + le = volumes.Flink; + while (le != &volumes) { + volume* v = CONTAINING_RECORD(le, volume, list_entry); + LIST_ENTRY* le2 = le->Flink; + + if (v->disk_num == disk->disk_num) { + if (v->devpath.Buffer) + ExFreePool(v->devpath.Buffer); + + RemoveEntryList(&v->list_entry); + + ExFreePool(v); + } + + le = le2; + } + + ExReleaseResourceLite(&volumes_lock); + + ExFreePool(disk->devpath.Buffer); + + RemoveEntryList(&disk->list_entry); + + ExFreePool(disk); +} + +#ifdef __REACTOS__ +NTSTATUS NTAPI pnp_notification(PVOID NotificationStructure, PVOID Context) { +#else +NTSTATUS pnp_notification(PVOID NotificationStructure, PVOID Context) { +#endif + DEVICE_INTERFACE_CHANGE_NOTIFICATION* dicn = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)NotificationStructure; + PDRIVER_OBJECT DriverObject = (PDRIVER_OBJECT)Context; + + if (RtlCompareMemory(&dicn->Event, &GUID_DEVICE_INTERFACE_ARRIVAL, sizeof(GUID)) == sizeof(GUID)) + disk_arrival(DriverObject, dicn->SymbolicLinkName); + else if (RtlCompareMemory(&dicn->Event, &GUID_DEVICE_INTERFACE_REMOVAL, sizeof(GUID)) == sizeof(GUID)) + disk_removal(DriverObject, dicn->SymbolicLinkName); + + return STATUS_SUCCESS; } diff --git a/reactos/drivers/filesystems/btrfs/security.c b/reactos/drivers/filesystems/btrfs/security.c index 7ad75a35acb..57dd7d135c3 100644 --- a/reactos/drivers/filesystems/btrfs/security.c +++ b/reactos/drivers/filesystems/btrfs/security.c @@ -401,14 +401,10 @@ static ACL* load_default_acl() { // } // } -static BOOL get_sd_from_xattr(fcb* fcb, PIRP Irp) { - ULONG buflen; +BOOL get_sd_from_xattr(fcb* fcb, ULONG buflen) { NTSTATUS Status; PSID sid, usersid; - if (!get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8**)&fcb->sd, (UINT16*)&buflen, Irp)) - return FALSE; - TRACE("using xattr " EA_NTACL " for security descriptor\n"); if (fcb->inode_item.st_uid != UID_NOBODY) { @@ -655,13 +651,16 @@ end: ExFreePool(groupsid); } -void fcb_get_sd(fcb* fcb, struct _fcb* parent, PIRP Irp) { +void fcb_get_sd(fcb* fcb, struct _fcb* parent, BOOL look_for_xattr, PIRP Irp) { NTSTATUS Status; PSID usersid = NULL, groupsid = NULL; SECURITY_SUBJECT_CONTEXT subjcont; + ULONG buflen; - if (get_sd_from_xattr(fcb, Irp)) - return; + if (look_for_xattr && get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8**)&fcb->sd, (UINT16*)&buflen, Irp)) { + if (get_sd_from_xattr(fcb, buflen)) + return; + } if (!parent) { get_top_level_sd(fcb); diff --git a/reactos/drivers/filesystems/btrfs/treefuncs.c b/reactos/drivers/filesystems/btrfs/treefuncs.c index 1864a529b8d..07919625db7 100644 --- a/reactos/drivers/filesystems/btrfs/treefuncs.c +++ b/reactos/drivers/filesystems/btrfs/treefuncs.c @@ -26,6 +26,9 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** tree* t; tree_data* td; chunk* c; + UINT8 h; + BOOL inserted; + LIST_ENTRY* le; TRACE("(%p, %llx)\n", Vcb, addr); @@ -35,7 +38,7 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** return STATUS_INSUFFICIENT_RESOURCES; } - Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, &c, Irp); + Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, &c, Irp, FALSE); if (!NT_SUCCESS(Status)) { ERR("read_data returned 0x%08x\n", Status); ExFreePool(buf); @@ -54,6 +57,7 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** RtlCopyMemory(&t->header, th, sizeof(tree_header)); // t->address = addr; // t->level = th->level; + t->hash = calc_crc32c(0xffffffff, (UINT8*)&addr, sizeof(UINT64)); t->has_address = TRUE; t->Vcb = Vcb; t->parent = NULL; @@ -66,11 +70,6 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** t->updated_extents = FALSE; t->write = FALSE; - if (c) - t->flags = c->chunk_item->type; - else - t->flags = 0; - // ExInitializeResourceLite(&t->nonpaged->load_tree_lock); // t->items = ExAllocatePoolWithTag(PagedPool, num_items * sizeof(tree_data), ALLOC_TAG); @@ -158,6 +157,46 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** InterlockedIncrement(&Vcb->open_trees); InsertTailList(&Vcb->trees, &t->list_entry); + h = t->hash >> 24; + + if (!Vcb->trees_ptrs[h]) { + UINT8 h2 = h; + + le = Vcb->trees_hash.Flink; + + if (h2 > 0) { + h2--; + do { + if (Vcb->trees_ptrs[h2]) { + le = Vcb->trees_ptrs[h2]; + break; + } + + h2--; + } while (h2 > 0); + } + } else + le = Vcb->trees_ptrs[h]; + + inserted = FALSE; + while (le != &Vcb->trees_hash) { + tree* t2 = CONTAINING_RECORD(le, tree, list_entry_hash); + + if (t2->hash >= t->hash) { + InsertHeadList(le->Blink, &t->list_entry_hash); + inserted = TRUE; + break; + } + + le = le->Flink; + } + + if (!inserted) + InsertTailList(&Vcb->trees_hash, &t->list_entry_hash); + + if (!Vcb->trees_ptrs[h] || t->list_entry_hash.Flink == Vcb->trees_ptrs[h]) + Vcb->trees_ptrs[h] = &t->list_entry_hash; + TRACE("returning %p\n", t); *pt = t; @@ -213,6 +252,23 @@ static tree* free_tree2(tree* t, const char* func, const char* file, unsigned in // FsRtlExitFileSystem(); } + if (t->list_entry_hash.Flink) { + UINT8 h = t->hash >> 24; + if (t->Vcb->trees_ptrs[h] == &t->list_entry_hash) { + if (t->list_entry_hash.Flink != &t->Vcb->trees_hash) { + tree* t2 = CONTAINING_RECORD(t->list_entry_hash.Flink, tree, list_entry_hash); + + if ((t2->hash >> 24) == h) + t->Vcb->trees_ptrs[h] = &t2->list_entry_hash; + else + t->Vcb->trees_ptrs[h] = NULL; + } else + t->Vcb->trees_ptrs[h] = NULL; + } + + RemoveEntryList(&t->list_entry_hash); + } + ExFreePool(t); return NULL; @@ -325,7 +381,7 @@ static __inline tree_data* next_item(tree* t, tree_data* td) { return CONTAINING_RECORD(le, tree_data, list_entry); } -static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, +static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, PIRP Irp, const char* func, const char* file, unsigned int line) { int cmp; tree_data *td, *lasttd; @@ -414,6 +470,12 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver if (!td) return STATUS_NOT_FOUND; + if (t->header.level <= level) { + tp->tree = t; + tp->item = td; + return STATUS_SUCCESS; + } + // if (i > 0) // TRACE("entering tree from (%x,%x,%x) to (%x,%x,%x) (%p)\n", (UINT32)t->items[i].key.obj_id, t->items[i].key.obj_type, (UINT32)t->items[i].key.offset, (UINT32)t->items[i+1].key.obj_id, t->items[i+1].key.obj_type, (UINT32)t->items[i+1].key.offset, t->items[i].tree); @@ -423,7 +485,7 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver return Status; } - Status = find_item_in_tree(Vcb, td->treeholder.tree, tp, searchkey, ignore, Irp, func, file, line); + Status = find_item_in_tree(Vcb, td->treeholder.tree, tp, searchkey, ignore, level, Irp, func, file, line); return Status; } @@ -444,7 +506,7 @@ NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, co } } - Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, Irp, func, file, line); + Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, 0, Irp, func, file, line); if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { ERR("find_item_in_tree returned %08x\n", Status); } @@ -459,6 +521,34 @@ NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, co return Status; } +NTSTATUS STDCALL _find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level, + PIRP Irp, const char* func, const char* file, unsigned int line) { + NTSTATUS Status; + BOOL loaded; + + TRACE("(%p, %p, %p, %p)\n", Vcb, r, tp, searchkey); + + if (!r->treeholder.tree) { + Status = _do_load_tree(Vcb, &r->treeholder, r, NULL, NULL, &loaded, Irp, func, file, line); + if (!NT_SUCCESS(Status)) { + ERR("do_load_tree returned %08x\n", Status); + return Status; + } + } + + Status = find_item_in_tree(Vcb, r->treeholder.tree, tp, searchkey, ignore, level, Irp, func, file, line); + if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { + ERR("find_item_in_tree returned %08x\n", Status); + } + + if (Status == STATUS_NOT_FOUND) { + tp->tree = r->treeholder.tree; + tp->item = NULL; + } + + return Status; +} + BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line) { tree* t; @@ -1168,219 +1258,536 @@ void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist) { } } +static void add_delete_inode_extref(device_extension* Vcb, batch_item* bi, LIST_ENTRY* listhead) { + batch_item* bi2; + LIST_ENTRY* le; + INODE_REF* delir = (INODE_REF*)bi->data; + INODE_EXTREF* ier; + + TRACE("entry in INODE_REF not found, adding Batch_DeleteInodeExtRef entry\n"); + + bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); + if (!bi2) { + ERR("out of memory\n"); + return; + } + + ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + delir->n, ALLOC_TAG); + if (!ier) { + ERR("out of memory\n"); + return; + } + + ier->dir = bi->key.offset; + ier->index = delir->index; + ier->n = delir->n; + RtlCopyMemory(ier->name, delir->name, delir->n); + + bi2->key.obj_id = bi->key.obj_id; + bi2->key.obj_type = TYPE_INODE_EXTREF; + bi2->key.offset = calc_crc32c((UINT32)bi->key.offset, (UINT8*)ier->name, ier->n); + bi2->data = ier; + bi2->datalen = sizeof(INODE_EXTREF) - 1 + ier->n; + bi2->operation = Batch_DeleteInodeExtRef; + + le = bi->list_entry.Flink; + while (le != listhead) { + batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry); + + if (keycmp(bi3->key, bi2->key) != -1) { + InsertHeadList(le->Blink, &bi2->list_entry); + return; + } + + le = le->Flink; + } + + InsertTailList(listhead, &bi2->list_entry); +} + static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* t, tree_data* td, tree_data* newtd, LIST_ENTRY* listhead, LIST_ENTRY* rollback) { - if (bi->operation == Batch_SetXattr || bi->operation == Batch_DirItem || bi->operation == Batch_InodeRef || bi->operation == Batch_InodeExtRef) { + if (bi->operation == Batch_Delete || bi->operation == Batch_SetXattr || bi->operation == Batch_DirItem || bi->operation == Batch_InodeRef || + bi->operation == Batch_InodeExtRef || bi->operation == Batch_DeleteDirItem || bi->operation == Batch_DeleteInodeRef || + bi->operation == Batch_DeleteInodeExtRef) { UINT16 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node); - if (bi->operation == Batch_SetXattr) { - if (td->size < sizeof(DIR_ITEM)) { - ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset, td->size, sizeof(DIR_ITEM)); - } else { - UINT8* newdata; - ULONG size = td->size; - DIR_ITEM* newxa = (DIR_ITEM*)bi->data; - DIR_ITEM* xa = (DIR_ITEM*)td->data; - - while (TRUE) { - ULONG oldxasize; - - if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) { - ERR("(%llx,%x,%llx) was truncated\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset); - break; - } - - oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n; + switch (bi->operation) { + case Batch_SetXattr: { + if (td->size < sizeof(DIR_ITEM)) { + ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset, td->size, sizeof(DIR_ITEM)); + } else { + UINT8* newdata; + ULONG size = td->size; + DIR_ITEM* newxa = (DIR_ITEM*)bi->data; + DIR_ITEM* xa = (DIR_ITEM*)td->data; - if (xa->n == newxa->n && RtlCompareMemory(newxa->name, xa->name, xa->n) == xa->n) { - UINT64 pos; + while (TRUE) { + ULONG oldxasize; - // replace + if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) { + ERR("(%llx,%x,%llx) was truncated\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset); + break; + } - if (td->size + bi->datalen - oldxasize > maxlen) - ERR("DIR_ITEM would be over maximum size, truncating (%u + %u - %u > %u)\n", td->size, bi->datalen, oldxasize, maxlen); + oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n; - newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen - oldxasize, ALLOC_TAG); - if (!newdata) { - ERR("out of memory\n"); - return TRUE; + if (xa->n == newxa->n && RtlCompareMemory(newxa->name, xa->name, xa->n) == xa->n) { + UINT64 pos; + + // replace + + if (td->size + bi->datalen - oldxasize > maxlen) + ERR("DIR_ITEM would be over maximum size, truncating (%u + %u - %u > %u)\n", td->size, bi->datalen, oldxasize, maxlen); + + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen - oldxasize, ALLOC_TAG); + if (!newdata) { + ERR("out of memory\n"); + return TRUE; + } + + pos = (UINT8*)xa - td->data; + if (pos + oldxasize < td->size) { // copy after changed xattr + RtlCopyMemory(newdata + pos + bi->datalen, td->data + pos + oldxasize, td->size - pos - oldxasize); + } + + if (pos > 0) { // copy before changed xattr + RtlCopyMemory(newdata, td->data, pos); + xa = (DIR_ITEM*)(newdata + pos); + } else + xa = (DIR_ITEM*)newdata; + + RtlCopyMemory(xa, bi->data, bi->datalen); + + bi->datalen = min(td->size + bi->datalen - oldxasize, maxlen); + + ExFreePool(bi->data); + bi->data = newdata; + + break; } - pos = (UINT8*)xa - td->data; - if (pos + oldxasize < td->size) { // copy after changed xattr - RtlCopyMemory(newdata + pos + bi->datalen, td->data + pos + oldxasize, td->size - pos - oldxasize); + if ((UINT8*)xa - (UINT8*)td->data + oldxasize >= size) { + // not found, add to end of data + + if (td->size + bi->datalen > maxlen) + ERR("DIR_ITEM would be over maximum size, truncating (%u + %u > %u)\n", td->size, bi->datalen, maxlen); + + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); + if (!newdata) { + ERR("out of memory\n"); + return TRUE; + } + + RtlCopyMemory(newdata, td->data, td->size); + + xa = (DIR_ITEM*)((UINT8*)newdata + td->size); + RtlCopyMemory(xa, bi->data, bi->datalen); + + bi->datalen = min(bi->datalen + td->size, maxlen); + + ExFreePool(bi->data); + bi->data = newdata; + + break; + } else { + xa = (DIR_ITEM*)&xa->name[xa->m + xa->n]; + size -= oldxasize; } + } + } + break; + } + + case Batch_DirItem: { + UINT8* newdata; + + if (td->size + bi->datalen > maxlen) { + ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); + return TRUE; + } + + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); + if (!newdata) { + ERR("out of memory\n"); + return TRUE; + } + + RtlCopyMemory(newdata, td->data, td->size); + + RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); + + bi->datalen += td->size; + + ExFreePool(bi->data); + bi->data = newdata; + + break; + } + + case Batch_InodeRef: { + UINT8* newdata; + + if (td->size + bi->datalen > maxlen) { + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { + INODE_REF* ir = (INODE_REF*)bi->data; + INODE_EXTREF* ier; + ULONG ierlen; + batch_item* bi2; + LIST_ENTRY* le; + BOOL inserted = FALSE; - if (pos > 0) { // copy before changed xattr - RtlCopyMemory(newdata, td->data, pos); - xa = (DIR_ITEM*)(newdata + pos); - } else - xa = (DIR_ITEM*)newdata; - - RtlCopyMemory(xa, bi->data, bi->datalen); - - bi->datalen = min(td->size + bi->datalen - oldxasize, maxlen); - - ExFreePool(bi->data); - bi->data = newdata; + TRACE("INODE_REF would be too long, adding INODE_EXTREF instead\n"); + + ierlen = sizeof(INODE_EXTREF) - 1 + ir->n; - break; - } - - if ((UINT8*)xa - (UINT8*)td->data + oldxasize >= size) { - // not found, add to end of data + ier = ExAllocatePoolWithTag(PagedPool, ierlen, ALLOC_TAG); + if (!ier) { + ERR("out of memory\n"); + return TRUE; + } - if (td->size + bi->datalen > maxlen) - ERR("DIR_ITEM would be over maximum size, truncating (%u + %u > %u)\n", td->size, bi->datalen, maxlen); + ier->dir = bi->key.offset; + ier->index = ir->index; + ier->n = ir->n; + RtlCopyMemory(ier->name, ir->name, ier->n); - newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); - if (!newdata) { + bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); + if (!bi2) { ERR("out of memory\n"); + ExFreePool(ier); return TRUE; } - RtlCopyMemory(newdata, td->data, td->size); + bi2->key.obj_id = bi->key.obj_id; + bi2->key.obj_type = TYPE_INODE_EXTREF; + bi2->key.offset = calc_crc32c((UINT32)ier->dir, (UINT8*)ier->name, ier->n); + bi2->data = ier; + bi2->datalen = ierlen; + bi2->operation = Batch_InodeExtRef; - xa = (DIR_ITEM*)((UINT8*)newdata + td->size); - RtlCopyMemory(xa, bi->data, bi->datalen); + le = bi->list_entry.Flink; + while (le != listhead) { + batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry); + + if (keycmp(bi3->key, bi2->key) != -1) { + InsertHeadList(le->Blink, &bi2->list_entry); + inserted = TRUE; + } + + le = le->Flink; + } - bi->datalen = min(bi->datalen + td->size, maxlen); + if (!inserted) + InsertTailList(listhead, &bi2->list_entry); - ExFreePool(bi->data); - bi->data = newdata; - - break; + return TRUE; } else { - xa = (DIR_ITEM*)&xa->name[xa->m + xa->n]; - size -= oldxasize; + ERR("INODE_REF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); + return TRUE; } } + + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); + if (!newdata) { + ERR("out of memory\n"); + return TRUE; + } + + RtlCopyMemory(newdata, td->data, td->size); + + RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); + + bi->datalen += td->size; + + ExFreePool(bi->data); + bi->data = newdata; + + break; } - } else if (bi->operation == Batch_DirItem) { - UINT8* newdata; - - if (td->size + bi->datalen > maxlen) { - ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - return TRUE; - } - - newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); - if (!newdata) { - ERR("out of memory\n"); - return TRUE; - } - - RtlCopyMemory(newdata, td->data, td->size); - - RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); + + case Batch_InodeExtRef: { + UINT8* newdata; + + if (td->size + bi->datalen > maxlen) { + ERR("INODE_EXTREF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); + return TRUE; + } + + newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); + if (!newdata) { + ERR("out of memory\n"); + return TRUE; + } + + RtlCopyMemory(newdata, td->data, td->size); + + RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); - bi->datalen += td->size; - - ExFreePool(bi->data); - bi->data = newdata; - } else if (bi->operation == Batch_InodeRef) { - UINT8* newdata; - - if (td->size + bi->datalen > maxlen) { - if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { - INODE_REF* ir = (INODE_REF*)bi->data; - INODE_EXTREF* ier; - ULONG ierlen; - batch_item* bi2; - LIST_ENTRY* le; - BOOL inserted = FALSE; + bi->datalen += td->size; + + ExFreePool(bi->data); + bi->data = newdata; + + break; + } + + case Batch_DeleteDirItem: { + if (td->size < sizeof(DIR_ITEM)) { + WARN("DIR_ITEM was %u bytes, expected at least %u\n", td->size, sizeof(DIR_ITEM)); + return TRUE; + } else { + DIR_ITEM *di, *deldi; + LONG len; - TRACE("INODE_REF would be too long, adding INODE_EXTREF instead\n"); + deldi = (DIR_ITEM*)bi->data; + di = (DIR_ITEM*)td->data; + len = td->size; + + do { + if (di->m == deldi->m && di->n == deldi->n && RtlCompareMemory(di->name, deldi->name, di->n + di->m) == di->n + di->m) { + ULONG newlen = td->size - (sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m); + + if (newlen == 0) { + TRACE("deleting DIR_ITEM\n"); + } else { + UINT8 *newdi = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *dioff; + tree_data* td2; + + if (!newdi) { + ERR("out of memory\n"); + return TRUE; + } + + TRACE("modifying DIR_ITEM\n"); - ierlen = sizeof(INODE_EXTREF) - 1 + ir->n; + if ((UINT8*)di > td->data) { + RtlCopyMemory(newdi, td->data, (UINT8*)di - td->data); + dioff = newdi + ((UINT8*)di - td->data); + } else { + dioff = newdi; + } + + if ((UINT8*)&di->name[di->n + di->m] - td->data < td->size) + RtlCopyMemory(dioff, &di->name[di->n + di->m], td->size - ((UINT8*)&di->name[di->n + di->m] - td->data)); + + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); + if (!td2) { + ERR("out of memory\n"); + return TRUE; + } + + td2->key = bi->key; + td2->size = newlen; + td2->data = newdi; + td2->ignore = FALSE; + td2->inserted = TRUE; + + InsertHeadList(td->list_entry.Blink, &td2->list_entry); + + t->header.num_items++; + t->size += newlen + sizeof(leaf_node); + t->write = TRUE; + } + + break; + } + + len -= sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m; + di = (DIR_ITEM*)&di->name[di->n + di->m]; + + if (len == 0) { + TRACE("could not find DIR_ITEM to delete\n"); + return TRUE; + } + } while (len > 0); + } + break; + } + + case Batch_DeleteInodeRef: { + if (td->size < sizeof(INODE_REF)) { + WARN("INODE_REF was %u bytes, expected at least %u\n", td->size, sizeof(INODE_REF)); + return TRUE; + } else { + INODE_REF *ir, *delir; + ULONG len; + BOOL changed = FALSE; - ier = ExAllocatePoolWithTag(PagedPool, ierlen, ALLOC_TAG); - if (!ier) { - ERR("out of memory\n"); - return TRUE; - } + delir = (INODE_REF*)bi->data; + ir = (INODE_REF*)td->data; + len = td->size; - ier->dir = bi->key.offset; - ier->index = ir->index; - ier->n = ir->n; - RtlCopyMemory(ier->name, ir->name, ier->n); + do { + ULONG itemlen; + + if (len < sizeof(INODE_REF) || len < sizeof(INODE_REF) - 1 + ir->n) { + ERR("INODE_REF was truncated\n"); + break; + } + + itemlen = sizeof(INODE_REF) - sizeof(char) + ir->n; + + if (ir->n == delir->n && RtlCompareMemory(ir->name, delir->name, ir->n) == ir->n) { + ULONG newlen = td->size - itemlen; + + changed = TRUE; + + if (newlen == 0) + TRACE("deleting INODE_REF\n"); + else { + UINT8 *newir = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *iroff; + tree_data* td2; + + if (!newir) { + ERR("out of memory\n"); + return TRUE; + } + + TRACE("modifying INODE_REF\n"); + + if ((UINT8*)ir > td->data) { + RtlCopyMemory(newir, td->data, (UINT8*)ir - td->data); + iroff = newir + ((UINT8*)ir - td->data); + } else { + iroff = newir; + } + + if ((UINT8*)&ir->name[ir->n] - td->data < td->size) + RtlCopyMemory(iroff, &ir->name[ir->n], td->size - ((UINT8*)&ir->name[ir->n] - td->data)); + + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); + if (!td2) { + ERR("out of memory\n"); + return TRUE; + } + + td2->key = bi->key; + td2->size = newlen; + td2->data = newir; + td2->ignore = FALSE; + td2->inserted = TRUE; + + InsertHeadList(td->list_entry.Blink, &td2->list_entry); + + t->header.num_items++; + t->size += newlen + sizeof(leaf_node); + t->write = TRUE; + } + + break; + } + + if (len > itemlen) { + len -= itemlen; + ir = (INODE_REF*)&ir->name[ir->n]; + } else + break; + } while (len > 0); - bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside); - if (!bi2) { - ERR("out of memory\n"); - ExFreePool(ier); - return TRUE; + if (!changed) { + if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { + TRACE("entry in INODE_REF not found, adding Batch_DeleteInodeExtRef entry\n"); + + add_delete_inode_extref(Vcb, bi, listhead); + + return TRUE; + } else + WARN("entry not found in INODE_REF\n"); } + } + + break; + } + + case Batch_DeleteInodeExtRef: { + if (td->size < sizeof(INODE_EXTREF)) { + WARN("INODE_EXTREF was %u bytes, expected at least %u\n", td->size, sizeof(INODE_EXTREF)); + return TRUE; + } else { + INODE_EXTREF *ier, *delier; + ULONG len; - bi2->key.obj_id = bi->key.obj_id; - bi2->key.obj_type = TYPE_INODE_EXTREF; - bi2->key.offset = calc_crc32c((UINT32)ier->dir, (UINT8*)ier->name, ier->n); - bi2->data = ier; - bi2->datalen = ierlen; - bi2->operation = Batch_InodeExtRef; + delier = (INODE_EXTREF*)bi->data; + ier = (INODE_EXTREF*)td->data; + len = td->size; - le = bi->list_entry.Flink; - while (le != listhead) { - batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry); + do { + ULONG itemlen; - if (keycmp(bi3->key, bi2->key) != -1) { - InsertHeadList(le->Blink, &bi2->list_entry); - inserted = TRUE; + if (len < sizeof(INODE_EXTREF) || len < sizeof(INODE_EXTREF) - 1 + ier->n) { + ERR("INODE_REF was truncated\n"); + break; } - le = le->Flink; - } - - if (!inserted) - InsertTailList(listhead, &bi2->list_entry); - - return TRUE; - } else { - ERR("INODE_REF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - return TRUE; - } - } - - newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); - if (!newdata) { - ERR("out of memory\n"); - return TRUE; - } - - RtlCopyMemory(newdata, td->data, td->size); - - RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); + itemlen = sizeof(INODE_EXTREF) - sizeof(char) + ier->n; + + if (ier->dir == delier->dir && ier->n == delier->n && RtlCompareMemory(ier->name, delier->name, ier->n) == ier->n) { + ULONG newlen = td->size - itemlen; + + if (newlen == 0) + TRACE("deleting INODE_EXTREF\n"); + else { + UINT8 *newier = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *ieroff; + tree_data* td2; + + if (!newier) { + ERR("out of memory\n"); + return TRUE; + } + + TRACE("modifying INODE_EXTREF\n"); - bi->datalen += td->size; - - ExFreePool(bi->data); - bi->data = newdata; - } else if (bi->operation == Batch_InodeExtRef) { - UINT8* newdata; - - if (td->size + bi->datalen > maxlen) { - ERR("INODE_EXTREF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen); - return TRUE; - } - - newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG); - if (!newdata) { - ERR("out of memory\n"); - return TRUE; + if ((UINT8*)ier > td->data) { + RtlCopyMemory(newier, td->data, (UINT8*)ier - td->data); + ieroff = newier + ((UINT8*)ier - td->data); + } else { + ieroff = newier; + } + + if ((UINT8*)&ier->name[ier->n] - td->data < td->size) + RtlCopyMemory(ieroff, &ier->name[ier->n], td->size - ((UINT8*)&ier->name[ier->n] - td->data)); + + td2 = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); + if (!td2) { + ERR("out of memory\n"); + return TRUE; + } + + td2->key = bi->key; + td2->size = newlen; + td2->data = newier; + td2->ignore = FALSE; + td2->inserted = TRUE; + + InsertHeadList(td->list_entry.Blink, &td2->list_entry); + + t->header.num_items++; + t->size += newlen + sizeof(leaf_node); + t->write = TRUE; + } + + break; + } + + if (len > itemlen) { + len -= itemlen; + ier = (INODE_EXTREF*)&ier->name[ier->n]; + } else + break; + } while (len > 0); + } + break; } - RtlCopyMemory(newdata, td->data, td->size); - - RtlCopyMemory(newdata + td->size, bi->data, bi->datalen); - - bi->datalen += td->size; + case Batch_Delete: + break; - ExFreePool(bi->data); - bi->data = newdata; + default: + ERR("unexpected batch operation type\n"); + int3; + break; } - newtd->data = bi->data; - newtd->size = bi->datalen; - // delete old item if (!td->ignore) { traverse_ptr* tp2; @@ -1389,6 +1796,7 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* t->header.num_items--; t->size -= sizeof(leaf_node) + td->size; + t->write = TRUE; if (rollback) { tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); @@ -1403,8 +1811,12 @@ static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2); } } - - InsertHeadList(&td->list_entry, &newtd->list_entry); + + if (newtd) { + newtd->data = bi->data; + newtd->size = bi->datalen; + InsertHeadList(&td->list_entry, &newtd->list_entry); + } } else { ERR("(%llx,%x,%llx) already exists\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset); int3; @@ -1441,149 +1853,248 @@ static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP I find_tree_end(tp.tree, &tree_end, &no_end); - td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); - if (!td) { - ERR("out of memory\n"); - return; - } - - td->key = bi->key; - td->size = bi->datalen; - td->data = bi->data; - td->ignore = FALSE; - td->inserted = TRUE; - - cmp = keycmp(bi->key, tp.item->key); - - if (cmp == -1) { // very first key in root - tree_data* paritem; - - InsertHeadList(&tp.tree->itemlist, &td->list_entry); - - paritem = tp.tree->paritem; - while (paritem) { - if (!keycmp(paritem->key, tp.item->key)) { - paritem->key = bi->key; - } else - break; + if (bi->operation == Batch_DeleteInode) { + if (tp.item->key.obj_id == bi->key.obj_id) { + BOOL ended = FALSE; - paritem = paritem->treeholder.tree->paritem; - } - } else if (cmp == 0) { // item already exists - ignore = handle_batch_collision(Vcb, bi, tp.tree, tp.item, td, &br->items, rollback); - } else { - InsertHeadList(&tp.item->list_entry, &td->list_entry); - } - - if (!ignore) { - tp.tree->header.num_items++; - tp.tree->size += bi->datalen + sizeof(leaf_node); - tp.tree->write = TRUE; - - if (rollback) { - // FIXME - free this correctly - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { - ERR("out of memory\n"); - return; - } + td = tp.item; - tp2->tree = tp.tree; - tp2->item = td; - - add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); - } - - listhead = &td->list_entry; - } else - listhead = &tp.item->list_entry; - - le2 = le->Flink; - while (le2 != &br->items) { - batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry); - - if (no_end || keycmp(bi2->key, tree_end) == -1) { - LIST_ENTRY* le3; - BOOL inserted = FALSE; + if (!tp.item->ignore) { + tp.item->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= tp.item->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } - ignore = FALSE; + le2 = tp.item->list_entry.Flink; + while (le2 != &tp.tree->itemlist) { + td = CONTAINING_RECORD(le2, tree_data, list_entry); + + if (td->key.obj_id == bi->key.obj_id) { + if (!td->ignore) { + td->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= td->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + } else { + ended = TRUE; + break; + } + + le2 = le2->Flink; + } + while (!ended) { + traverse_ptr next_tp; + + tp.item = td; + + if (!find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) + break; + + tp = next_tp; + + le2 = &tp.item->list_entry; + while (le2 != &tp.tree->itemlist) { + td = CONTAINING_RECORD(le2, tree_data, list_entry); + + if (td->key.obj_id == bi->key.obj_id) { + if (!td->ignore) { + td->ignore = TRUE; + tp.tree->header.num_items--; + tp.tree->size -= td->size + sizeof(leaf_node); + tp.tree->write = TRUE; + } + } else { + ended = TRUE; + break; + } + + le2 = le2->Flink; + } + } + } + } else { + if (bi->operation == Batch_Delete || bi->operation == Batch_DeleteDirItem || + bi->operation == Batch_DeleteInodeRef || bi->operation == Batch_DeleteInodeExtRef) + td = NULL; + else { td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); if (!td) { ERR("out of memory\n"); return; } - td->key = bi2->key; - td->size = bi2->datalen; - td->data = bi2->data; + td->key = bi->key; + td->size = bi->datalen; + td->data = bi->data; td->ignore = FALSE; td->inserted = TRUE; - - le3 = listhead; - while (le3 != &tp.tree->itemlist) { - tree_data* td2 = CONTAINING_RECORD(le3, tree_data, list_entry); + } + + cmp = keycmp(bi->key, tp.item->key); + + if (cmp == -1) { // very first key in root + if (td) { + tree_data* paritem; - if (!td2->ignore) { - cmp = keycmp(bi2->key, td2->key); + InsertHeadList(&tp.tree->itemlist, &td->list_entry); - if (cmp == 0) { - ignore = handle_batch_collision(Vcb, bi2, tp.tree, td2, td, &br->items, rollback); - inserted = TRUE; - break; - } else if (cmp == -1) { - InsertHeadList(le3->Blink, &td->list_entry); - inserted = TRUE; + paritem = tp.tree->paritem; + while (paritem) { + if (!keycmp(paritem->key, tp.item->key)) { + paritem->key = bi->key; + } else break; - } + + paritem = paritem->treeholder.tree->paritem; + } + } + } else if (cmp == 0) { // item already exists + ignore = handle_batch_collision(Vcb, bi, tp.tree, tp.item, td, &br->items, rollback); + } else if (td) { + InsertHeadList(&tp.item->list_entry, &td->list_entry); + } + + if (bi->operation == Batch_DeleteInodeRef && cmp != 0 && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { + add_delete_inode_extref(Vcb, bi, &br->items); + } + + if (!ignore && td) { + tp.tree->header.num_items++; + tp.tree->size += bi->datalen + sizeof(leaf_node); + tp.tree->write = TRUE; + + if (rollback) { + // FIXME - free this correctly + tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); + if (!tp2) { + ERR("out of memory\n"); + return; } - le3 = le3->Flink; + tp2->tree = tp.tree; + tp2->item = td; + + add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); } - if (!inserted) - InsertTailList(&tp.tree->itemlist, &td->list_entry); + listhead = &td->list_entry; + } else { + listhead = &tp.item->list_entry; - if (!ignore) { - tp.tree->header.num_items++; - tp.tree->size += bi2->datalen + sizeof(leaf_node); + if (!td && tp.item->ignore && tp.item->list_entry.Blink != &tp.tree->itemlist) { + tree_data* prevtd = CONTAINING_RECORD(tp.item->list_entry.Blink, tree_data, list_entry); - if (rollback) { - // FIXME - free this correctly - tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); - if (!tp2) { + if (!prevtd->ignore && !keycmp(prevtd->key, tp.item->key)) + listhead = &prevtd->list_entry; + } + } + + le2 = le->Flink; + while (le2 != &br->items) { + batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry); + + if (bi2->operation == Batch_DeleteInode) + break; + + if (no_end || keycmp(bi2->key, tree_end) == -1) { + LIST_ENTRY* le3; + BOOL inserted = FALSE; + + ignore = FALSE; + + if (bi2->operation == Batch_Delete || bi2->operation == Batch_DeleteDirItem || + bi2->operation == Batch_DeleteInodeRef || bi2->operation == Batch_DeleteInodeExtRef) + td = NULL; + else { + td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside); + if (!td) { ERR("out of memory\n"); return; } - tp2->tree = tp.tree; - tp2->item = td; + td->key = bi2->key; + td->size = bi2->datalen; + td->data = bi2->data; + td->ignore = FALSE; + td->inserted = TRUE; + } + + le3 = listhead; + while (le3 != &tp.tree->itemlist) { + tree_data* td2 = CONTAINING_RECORD(le3, tree_data, list_entry); + + if (!td2->ignore) { + cmp = keycmp(bi2->key, td2->key); + + if (cmp == 0) { + ignore = handle_batch_collision(Vcb, bi2, tp.tree, td2, td, &br->items, rollback); + inserted = TRUE; + break; + } else if (cmp == -1) { + if (td) { + InsertHeadList(le3->Blink, &td->list_entry); + inserted = TRUE; + } else if (bi2->operation == Batch_DeleteInodeRef && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { + add_delete_inode_extref(Vcb, bi2, &br->items); + } + break; + } + } + + le3 = le3->Flink; + } + + if (td) { + if (!inserted) + InsertTailList(&tp.tree->itemlist, &td->list_entry); - add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); + if (!ignore) { + tp.tree->header.num_items++; + tp.tree->size += bi2->datalen + sizeof(leaf_node); + + if (rollback) { + // FIXME - free this correctly + tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside); + if (!tp2) { + ERR("out of memory\n"); + return; + } + + tp2->tree = tp.tree; + tp2->item = td; + + add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2); + } + + listhead = &td->list_entry; + } + } else if (!inserted && bi2->operation == Batch_DeleteInodeRef && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) { + add_delete_inode_extref(Vcb, bi2, &br->items); } - listhead = &td->list_entry; - } - - le = le2; - } else - break; - - le2 = le2->Flink; - } - - t = tp.tree; - while (t) { - if (t->paritem && t->paritem->ignore) { - t->paritem->ignore = FALSE; - t->parent->header.num_items++; - t->parent->size += sizeof(internal_node); + le = le2; + } else + break; - // FIXME - do we need to add a rollback entry here? + le2 = le2->Flink; } + + t = tp.tree; + while (t) { + if (t->paritem && t->paritem->ignore) { + t->paritem->ignore = FALSE; + t->parent->header.num_items++; + t->parent->size += sizeof(internal_node); + + // FIXME - do we need to add a rollback entry here? + } - t->header.generation = Vcb->superblock.generation; - t = t->parent; + t->header.generation = Vcb->superblock.generation; + t = t->parent; + } } le = le->Flink; @@ -1594,6 +2105,9 @@ static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP I LIST_ENTRY* le = RemoveHeadList(&br->items); batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry); + if ((bi->operation == Batch_DeleteDirItem || bi->operation == Batch_DeleteInodeRef || bi->operation == Batch_DeleteInodeExtRef) && bi->data) + ExFreePool(bi->data); + ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi); } } diff --git a/reactos/drivers/filesystems/btrfs/write.c b/reactos/drivers/filesystems/btrfs/write.c index 9fc46ee4e64..c4698e0b25d 100644 --- a/reactos/drivers/filesystems/btrfs/write.c +++ b/reactos/drivers/filesystems/btrfs/write.c @@ -44,7 +44,11 @@ typedef struct { static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr); static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback); -BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) { +extern tPsUpdateDiskCounters PsUpdateDiskCounters; +extern tCcCopyWriteEx CcCopyWriteEx; +extern BOOL diskacc; + +BOOL find_data_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) { LIST_ENTRY* le; space* s; @@ -117,7 +121,7 @@ static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) { UINT64 lastaddr; LIST_ENTRY* le; - lastaddr = 0; + lastaddr = 0xc00000; le = Vcb->chunks.Flink; while (le != &Vcb->chunks) { @@ -135,33 +139,39 @@ static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) { } static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size) { - UINT64 j, devnum, devusage = 0xffffffffffffffff; + UINT64 devusage = 0xffffffffffffffff; space *devdh1 = NULL, *devdh2 = NULL; + LIST_ENTRY* le; + device* dev2; - for (j = 0; j < Vcb->superblock.num_devices; j++) { - if (!Vcb->devices[j].readonly) { - UINT64 usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes; + le = Vcb->devices.Flink; + + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly && !dev->reloc) { + UINT64 usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes; // favour devices which have been used the least if (usage < devusage) { - if (!IsListEmpty(&Vcb->devices[j].space)) { - LIST_ENTRY* le; + if (!IsListEmpty(&dev->space)) { + LIST_ENTRY* le2; space *dh1 = NULL, *dh2 = NULL; - le = Vcb->devices[j].space.Flink; - while (le != &Vcb->devices[j].space) { - space* dh = CONTAINING_RECORD(le, space, list_entry); + le2 = dev->space.Flink; + while (le2 != &dev->space) { + space* dh = CONTAINING_RECORD(le2, space, list_entry); - if (dh->size >= max_stripe_size && (!dh1 || dh->size < dh1->size)) { + if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) { dh2 = dh1; dh1 = dh; } - le = le->Flink; + le2 = le2->Flink; } if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) { - devnum = j; + dev2 = dev; devusage = usage; devdh1 = dh1; devdh2 = dh2 ? dh2 : dh1; @@ -169,34 +179,94 @@ static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 } } } + + le = le->Flink; } - if (!devdh1) - return FALSE; + if (!devdh1) { + UINT64 size = 0; + + // Can't find hole of at least max_stripe_size; look for the largest one we can find + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + + if (!dev->readonly && !dev->reloc) { + if (!IsListEmpty(&dev->space)) { + LIST_ENTRY* le2; + space *dh1 = NULL, *dh2 = NULL; + + le2 = dev->space.Flink; + while (le2 != &dev->space) { + space* dh = CONTAINING_RECORD(le2, space, list_entry); + + if (!dh1 || !dh2 || dh->size < dh1->size) { + dh2 = dh1; + dh1 = dh; + } + + le2 = le2->Flink; + } + + if (dh1) { + UINT64 devsize; + + if (dh2) + devsize = max(dh1->size / 2, min(dh1->size, dh2->size)); + else + devsize = min(dh1->size, dh2->size); + + if (devsize > size) { + dev2 = dev; + devdh1 = dh1; + + if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2) + devdh2 = dh2; + else + devdh2 = dh1; + + size = devsize; + } + } + } + } + + le = le->Flink; + } + + if (!devdh1) + return FALSE; + } - stripes[0].device = &Vcb->devices[devnum]; + stripes[0].device = stripes[1].device = dev2; stripes[0].dh = devdh1; - stripes[1].device = stripes[0].device; stripes[1].dh = devdh2; return TRUE; } static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UINT64 max_stripe_size, UINT16 type) { - UINT64 j, k, devnum = 0xffffffffffffffff, devusage = 0xffffffffffffffff; + UINT64 k, devusage = 0xffffffffffffffff; space* devdh = NULL; + LIST_ENTRY* le; + device* dev2 = NULL; - for (j = 0; j < Vcb->superblock.num_devices; j++) { + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); UINT64 usage; BOOL skip = FALSE; - if (Vcb->devices[j].readonly) + if (dev->readonly || dev->reloc) { + le = le->Flink; continue; + } // skip this device if it already has a stripe if (i > 0) { for (k = 0; k < i; k++) { - if (stripes[k].device == &Vcb->devices[j]) { + if (stripes[k].device == dev) { skip = TRUE; break; } @@ -204,37 +274,84 @@ static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UI } if (!skip) { - usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes; + usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes; // favour devices which have been used the least if (usage < devusage) { - if (!IsListEmpty(&Vcb->devices[j].space)) { - LIST_ENTRY* le; + if (!IsListEmpty(&dev->space)) { + LIST_ENTRY* le2; - le = Vcb->devices[j].space.Flink; - while (le != &Vcb->devices[j].space) { - space* dh = CONTAINING_RECORD(le, space, list_entry); + le2 = dev->space.Flink; + while (le2 != &dev->space) { + space* dh = CONTAINING_RECORD(le2, space, list_entry); - if ((devnum != j && dh->size >= max_stripe_size) || - (devnum == j && dh->size >= max_stripe_size && dh->size < devdh->size) + if ((dev2 != dev && dh->size >= max_stripe_size) || + (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size) ) { devdh = dh; - devnum = j; + dev2 = dev; devusage = usage; } - le = le->Flink; + le2 = le2->Flink; } } } } + + le = le->Flink; } - if (!devdh) - return FALSE; + if (!devdh) { + // Can't find hole of at least max_stripe_size; look for the largest one we can find + + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + BOOL skip = FALSE; + + if (dev->readonly || dev->reloc) { + le = le->Flink; + continue; + } + + // skip this device if it already has a stripe + if (i > 0) { + for (k = 0; k < i; k++) { + if (stripes[k].device == dev) { + skip = TRUE; + break; + } + } + } + + if (!skip) { + if (!IsListEmpty(&dev->space)) { + LIST_ENTRY* le2; + + le2 = dev->space.Flink; + while (le2 != &dev->space) { + space* dh = CONTAINING_RECORD(le2, space, list_entry); + + if (!devdh || devdh->size < dh->size) { + devdh = dh; + dev2 = dev; + } + + le2 = le2->Flink; + } + } + } + + le = le->Flink; + } + + if (!devdh) + return FALSE; + } stripes[i].dh = devdh; - stripes[i].device = &Vcb->devices[devnum]; + stripes[i].device = dev2; return TRUE; } @@ -249,12 +366,18 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { chunk* c = NULL; space* s = NULL; BOOL success = FALSE; + LIST_ENTRY* le; ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE); - for (i = 0; i < Vcb->superblock.num_devices; i++) { - total_size += Vcb->devices[i].devitem.num_bytes; + le = Vcb->devices.Flink; + while (le != &Vcb->devices) { + device* dev = CONTAINING_RECORD(le, device, list_entry); + total_size += dev->devitem.num_bytes; + + le = le->Flink; } + TRACE("total_size = %llx\n", total_size); // We purposely check for DATA first - mixed blocks have the same size @@ -362,10 +485,14 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel - stripe_size = max_stripe_size; - for (i = 0; i < num_stripes; i++) { - if (stripes[i].dh->size < stripe_size) - stripe_size = stripes[i].dh->size; + if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh) + stripe_size = min(stripes[0].dh->size / 2, max_stripe_size); + else { + stripe_size = max_stripe_size; + for (i = 0; i < num_stripes; i++) { + if (stripes[i].dh->size < stripe_size) + stripe_size = stripes[i].dh->size; + } } if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1) @@ -427,6 +554,9 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { c->used = c->oldused = 0; c->cache = NULL; c->readonly = FALSE; + c->reloc = FALSE; + c->last_alloc_set = FALSE; + InitializeListHead(&c->space); InitializeListHead(&c->space_size); InitializeListHead(&c->deleting); @@ -460,6 +590,9 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) { success = TRUE; + if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6) + Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56; + end: if (stripes) ExFreePool(stripes); @@ -490,6 +623,7 @@ end: c->created = TRUE; InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed); + c->list_entry_balance.Flink = NULL; } ExReleaseResourceLite(&Vcb->chunk_lock); @@ -1875,6 +2009,8 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext->datalen = sizeof(EXTENT_DATA) - 1 + size; newext->unique = ext->unique; newext->ignore = FALSE; + newext->inserted = TRUE; + newext->csum = NULL; InsertHeadList(&ext->list_entry, &newext->list_entry); remove_fcb_extent(fcb, ext, rollback); @@ -1917,6 +2053,8 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext->datalen = sizeof(EXTENT_DATA) - 1 + size; newext->unique = ext->unique; newext->ignore = FALSE; + newext->inserted = TRUE; + newext->csum = NULL; InsertHeadList(&ext->list_entry, &newext->list_entry); remove_fcb_extent(fcb, ext, rollback); @@ -1959,6 +2097,8 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext1->datalen = sizeof(EXTENT_DATA) - 1 + size; newext1->unique = ext->unique; newext1->ignore = FALSE; + newext1->inserted = TRUE; + newext1->csum = NULL; size = ext->offset + len - end_data; @@ -1995,6 +2135,8 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext2->datalen = sizeof(EXTENT_DATA) - 1 + size; newext2->unique = ext->unique; newext2->ignore = FALSE; + newext2->inserted = TRUE; + newext2->csum = NULL; InsertHeadList(&ext->list_entry, &newext1->list_entry); InsertHeadList(&newext1->list_entry, &newext2->list_entry); @@ -2070,6 +2212,36 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext->unique = ext->unique; newext->ignore = FALSE; + newext->inserted = TRUE; + + if (ext->csum) { + if (ed->compression == BTRFS_COMPRESSION_NONE) { + newext->csum = ExAllocatePoolWithTag(PagedPool, ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(ned); + ExFreePool(newext); + goto end; + } + + RtlCopyMemory(newext->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size], + ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + } else { + newext->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(ned); + ExFreePool(newext); + goto end; + } + + RtlCopyMemory(newext->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + } + } else + newext->csum = NULL; + InsertHeadList(&ext->list_entry, &newext->list_entry); remove_fcb_extent(fcb, ext, rollback); @@ -2116,6 +2288,35 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext->unique = ext->unique; newext->ignore = FALSE; + newext->inserted = TRUE; + + if (ext->csum) { + if (ed->compression == BTRFS_COMPRESSION_NONE) { + newext->csum = ExAllocatePoolWithTag(PagedPool, ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(ned); + ExFreePool(newext); + goto end; + } + + RtlCopyMemory(newext->csum, ext->csum, ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + } else { + newext->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(ned); + ExFreePool(newext); + goto end; + } + + RtlCopyMemory(newext->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + } + } else + newext->csum = NULL; + InsertHeadList(&ext->list_entry, &newext->list_entry); remove_fcb_extent(fcb, ext, rollback); @@ -2169,7 +2370,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT } newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG); - if (!newext1) { + if (!newext2) { ERR("out of memory\n"); Status = STATUS_INSUFFICIENT_RESOURCES; ExFreePool(neda); @@ -2209,12 +2410,74 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext1->unique = ext->unique; newext1->ignore = FALSE; + newext1->inserted = TRUE; newext2->offset = end_data; newext2->data = nedb; newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); newext2->unique = ext->unique; newext2->ignore = FALSE; + newext2->inserted = TRUE; + + if (ext->csum) { + if (ed->compression == BTRFS_COMPRESSION_NONE) { + newext1->csum = ExAllocatePoolWithTag(PagedPool, neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext1->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(neda); + ExFreePool(newext1); + ExFreePool(nedb); + ExFreePool(newext2); + goto end; + } + + newext2->csum = ExAllocatePoolWithTag(PagedPool, nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext2->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(newext1->csum); + ExFreePool(neda); + ExFreePool(newext1); + ExFreePool(nedb); + ExFreePool(newext2); + goto end; + } + + RtlCopyMemory(newext1->csum, ext->csum, neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + RtlCopyMemory(newext2->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size], + nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size); + } else { + newext1->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext1->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(neda); + ExFreePool(newext1); + ExFreePool(nedb); + ExFreePool(newext2); + goto end; + } + + newext2->csum = ExAllocatePoolWithTag(PagedPool, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); + if (!newext1->csum) { + ERR("out of memory\n"); + Status = STATUS_INSUFFICIENT_RESOURCES; + ExFreePool(newext1->csum); + ExFreePool(neda); + ExFreePool(newext1); + ExFreePool(nedb); + ExFreePool(newext2); + goto end; + } + + RtlCopyMemory(newext1->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + RtlCopyMemory(newext2->csum, ext->csum, ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size); + } + } else { + newext1->csum = NULL; + newext2->csum = NULL; + } InsertHeadList(&ext->list_entry, &newext1->list_entry); InsertHeadList(&newext1->list_entry, &newext2->list_entry); @@ -2237,45 +2500,6 @@ end: return Status; } -static NTSTATUS do_write_data(device_extension* Vcb, UINT64 address, void* data, UINT64 length, LIST_ENTRY* changed_sector_list, PIRP Irp) { - NTSTATUS Status; - changed_sector* sc; - int i; - - Status = write_data_complete(Vcb, address, data, length, Irp, NULL); - if (!NT_SUCCESS(Status)) { - ERR("write_data returned %08x\n", Status); - return Status; - } - - if (changed_sector_list) { - sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG); - if (!sc) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } - - sc->ol.key = address; - sc->length = length / Vcb->superblock.sector_size; - sc->deleted = FALSE; - - sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG); - if (!sc->checksums) { - ERR("out of memory\n"); - ExFreePool(sc); - return STATUS_INSUFFICIENT_RESOURCES; - } - - for (i = 0; i < sc->length; i++) { - sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); - } - - insert_into_ordered_list(changed_sector_list, &sc->ol); - } - - return STATUS_SUCCESS; -} - static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) { rollback_extent* re; @@ -2291,7 +2515,7 @@ static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* e add_rollback(fcb->Vcb, rollback, ROLLBACK_INSERT_EXTENT, re); } -static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG edsize, BOOL unique, LIST_ENTRY* rollback) { +static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG edsize, BOOL unique, UINT32* csum, LIST_ENTRY* rollback) { extent* ext; LIST_ENTRY* le; @@ -2306,6 +2530,8 @@ static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG ed ext->datalen = edsize; ext->unique = unique; ext->ignore = FALSE; + ext->inserted = TRUE; + ext->csum = csum; le = fcb->extents.Flink; while (le != &fcb->extents) { @@ -2348,21 +2574,51 @@ static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) { } } +static NTSTATUS calc_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { + NTSTATUS Status; + calc_job* cj; + + // From experimenting, it seems that 40 sectors is roughly the crossover + // point where offloading the crc32 calculation becomes worth it. + + if (sectors < 40) { + ULONG j; + + for (j = 0; j < sectors; j++) { + csum[j] = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); + } + + return STATUS_SUCCESS; + } + + Status = add_calc_job(Vcb, data, sectors, csum, &cj); + if (!NT_SUCCESS(Status)) { + ERR("add_calc_job returned %08x\n", Status); + return Status; + } + + KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); + free_calc_job(cj); + + return STATUS_SUCCESS; +} + BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, - LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size) { + PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size) { UINT64 address; NTSTATUS Status; EXTENT_DATA* ed; EXTENT_DATA2* ed2; ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2); + UINT32* csum = NULL; // #ifdef DEBUG_PARANOID // traverse_ptr tp; // KEY searchkey; // #endif - TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %u, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, changed_sector_list, rollback); + TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback); - if (!find_address_in_chunk(Vcb, c, length, &address)) + if (!find_data_address_in_chunk(Vcb, c, length, &address)) return FALSE; // #ifdef DEBUG_PARANOID @@ -2399,7 +2655,23 @@ BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start ed2->offset = 0; ed2->num_bytes = decoded_size; - if (!add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, rollback)) { + if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + ULONG sl = length / Vcb->superblock.sector_size; + + csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); + if (!csum) { + ERR("out of memory\n"); + return FALSE; + } + + Status = calc_csum(Vcb, data, sl, csum); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + return FALSE; + } + } + + if (!add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, csum, rollback)) { ERR("add_extent_to_fcb failed\n"); ExFreePool(ed); return FALSE; @@ -2423,16 +2695,16 @@ BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start ExReleaseResourceLite(&c->lock); if (data) { - Status = do_write_data(Vcb, address, data, length, changed_sector_list, Irp); + Status = write_data_complete(Vcb, address, data, length, Irp, NULL); if (!NT_SUCCESS(Status)) - ERR("do_write_data returned %08x\n", Status); + ERR("write_data_complete returned %08x\n", Status); } return TRUE; } static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, - LIST_ENTRY* changed_sector_list, PIRP Irp, UINT64* written, LIST_ENTRY* rollback) { + PIRP Irp, UINT64* written, LIST_ENTRY* rollback) { BOOL success = FALSE; EXTENT_DATA* ed; EXTENT_DATA2* ed2; @@ -2488,6 +2760,9 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, c = get_chunk_from_address(Vcb, ed2->address); + if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags) + return FALSE; + ExAcquireResourceExclusiveLite(&c->lock, TRUE); le = c->space.Flink; @@ -2497,7 +2772,7 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, if (s->address == ed2->address + ed2->size) { UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE); - success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen); + success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen); if (success) *written += newlen; @@ -2539,11 +2814,11 @@ static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LI while (le != &fcb->Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - if (!c->readonly) { + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) { + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) { ExReleaseResourceLite(&fcb->Vcb->chunk_lock); goto cont; } @@ -2565,7 +2840,7 @@ static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LI ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) { - if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) + if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) goto cont; } @@ -2597,15 +2872,15 @@ end: // } // } -NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, PIRP Irp, LIST_ENTRY* rollback) { LIST_ENTRY* le; chunk* c; UINT64 flags, orig_length = length, written = 0; - TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list); + TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data); if (start_data > 0) { - try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, Irp, &written, rollback); + try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, rollback); if (written == length) return STATUS_SUCCESS; @@ -2631,11 +2906,11 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6 while (le != &Vcb->chunks) { c = CONTAINING_RECORD(le, chunk, list_entry); - if (!c->readonly) { + if (!c->readonly && !c->reloc) { ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen && - insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) { + insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) { written += newlen; if (written == orig_length) { @@ -2669,7 +2944,7 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6 ExAcquireResourceExclusiveLite(&c->lock, TRUE); if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen && - insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) { + insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) { written += newlen; if (written == orig_length) @@ -2696,13 +2971,6 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6 return STATUS_DISK_FULL; } -void commit_checksum_changes(device_extension* Vcb, LIST_ENTRY* changed_sector_list) { - while (!IsListEmpty(changed_sector_list)) { - LIST_ENTRY* le = RemoveHeadList(changed_sector_list); - InsertTailList(&Vcb->sector_checksums, le); - } -} - NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; @@ -2768,8 +3036,6 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR cur_inline = ed->type == EXTENT_TYPE_INLINE; if (cur_inline && end > fcb->Vcb->options.max_inline) { - LIST_ENTRY changed_sector_list; - BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM; UINT64 origlength, length; UINT8* data; UINT64 offset = ext->offset; @@ -2780,9 +3046,6 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR cur_inline = FALSE; - if (!nocsum) - InitializeListHead(&changed_sector_list); - length = sector_align(origlength, fcb->Vcb->superblock.sector_size); data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG); @@ -2803,14 +3066,14 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR remove_fcb_extent(fcb, ext, rollback); if (write_fcb_compressed(fcb)) { - Status = write_compressed(fcb, offset, offset + length, data, nocsum ? NULL : &changed_sector_list, Irp, rollback); + Status = write_compressed(fcb, offset, offset + length, data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("write_compressed returned %08x\n", Status); ExFreePool(data); return Status; } } else { - Status = insert_extent(fcb->Vcb, fcb, offset, length, data, nocsum ? NULL : &changed_sector_list, Irp, rollback); + Status = insert_extent(fcb->Vcb, fcb, offset, length, data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_extent returned %08x\n", Status); ExFreePool(data); @@ -2821,12 +3084,6 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR oldalloc = ext->offset + length; ExFreePool(data); - - if (!nocsum) { - ExAcquireResourceExclusiveLite(&fcb->Vcb->checksum_lock, TRUE); - commit_checksum_changes(fcb->Vcb, &changed_sector_list); - ExReleaseResourceLite(&fcb->Vcb->checksum_lock); - } } if (cur_inline) { @@ -2848,7 +3105,7 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR remove_fcb_extent(fcb, ext, rollback); - if (!add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, rollback)) { + if (!add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback)) { ERR("add_extent_to_fcb failed\n"); ExFreePool(ed); return STATUS_INTERNAL_ERROR; @@ -2940,7 +3197,7 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR RtlZeroMemory(ed->data, end); - if (!add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, rollback)) { + if (!add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, NULL, rollback)) { ERR("add_extent_to_fcb failed\n"); ExFreePool(ed); return STATUS_INTERNAL_ERROR; @@ -2964,7 +3221,7 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR } static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, UINT64 end_data, void* data, UINT64* written, - LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { + PIRP Irp, LIST_ENTRY* rollback) { EXTENT_DATA* ed = ext->data; EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; NTSTATUS Status; @@ -2991,12 +3248,36 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, ned->type = EXTENT_TYPE_REGULAR; - Status = do_write_data(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, ed2->num_bytes, changed_sector_list, Irp); + Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, ed2->num_bytes, Irp, NULL); if (!NT_SUCCESS(Status)) { - ERR("do_write_data returned %08x\n", Status); + ERR("write_data_complete returned %08x\n", Status); return Status; } + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + ULONG sl = ed2->num_bytes / fcb->Vcb->superblock.sector_size; + UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); + + if (!csum) { + ERR("out of memory\n"); + ExFreePool(ned); + ExFreePool(newext); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = calc_csum(fcb->Vcb, (UINT8*)data + ext->offset - start_data, sl, csum); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + ExFreePool(csum); + ExFreePool(ned); + ExFreePool(newext); + return Status; + } + + newext->csum = csum; + } else + newext->csum = NULL; + *written = ed2->num_bytes; newext->offset = ext->offset; @@ -3004,6 +3285,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext->datalen = ext->datalen; newext->unique = ext->unique; newext->ignore = FALSE; + newext->inserted = TRUE; InsertHeadList(&ext->list_entry, &newext->list_entry); add_insert_extent_rollback(rollback, fcb, newext); @@ -3054,12 +3336,40 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, ned2->offset += end_data - ext->offset; ned2->num_bytes -= end_data - ext->offset; - Status = do_write_data(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, end_data - ext->offset, changed_sector_list, Irp); + Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, end_data - ext->offset, Irp, NULL); if (!NT_SUCCESS(Status)) { - ERR("do_write_data returned %08x\n", Status); + ERR("write_data_complete returned %08x\n", Status); return Status; } + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + ULONG sl = (end_data - ext->offset) / fcb->Vcb->superblock.sector_size; + UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); + + if (!csum) { + ERR("out of memory\n"); + ExFreePool(ned); + ExFreePool(nedb); + ExFreePool(newext1); + ExFreePool(newext2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = calc_csum(fcb->Vcb, (UINT8*)data + ext->offset - start_data, sl, csum); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + ExFreePool(ned); + ExFreePool(nedb); + ExFreePool(newext1); + ExFreePool(newext2); + ExFreePool(csum); + return Status; + } + + newext1->csum = csum; + } else + newext1->csum = NULL; + *written = end_data - ext->offset; newext1->offset = ext->offset; @@ -3067,6 +3377,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext1->datalen = ext->datalen; newext1->unique = ext->unique; newext1->ignore = FALSE; + newext1->inserted = TRUE; InsertHeadList(&ext->list_entry, &newext1->list_entry); add_insert_extent_rollback(rollback, fcb, newext1); @@ -3076,6 +3387,8 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext2->datalen = ext->datalen; newext2->unique = ext->unique; newext2->ignore = FALSE; + newext2->inserted = TRUE; + newext2->csum = NULL; InsertHeadList(&newext1->list_entry, &newext2->list_entry); add_insert_extent_rollback(rollback, fcb, newext2); @@ -3142,12 +3455,40 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, ned2->offset += start_data - ext->offset; ned2->num_bytes = ext->offset + ed2->num_bytes - start_data; - Status = do_write_data(fcb->Vcb, ed2->address + ned2->offset, data, ned2->num_bytes, changed_sector_list, Irp); + Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, ned2->num_bytes, Irp, NULL); if (!NT_SUCCESS(Status)) { - ERR("do_write_data returned %08x\n", Status); + ERR("write_data_complete returned %08x\n", Status); return Status; } + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + ULONG sl = ned2->num_bytes / fcb->Vcb->superblock.sector_size; + UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); + + if (!csum) { + ERR("out of memory\n"); + ExFreePool(ned); + ExFreePool(nedb); + ExFreePool(newext1); + ExFreePool(newext2); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = calc_csum(fcb->Vcb, data, sl, csum); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + ExFreePool(ned); + ExFreePool(nedb); + ExFreePool(newext1); + ExFreePool(newext2); + ExFreePool(csum); + return Status; + } + + newext2->csum = csum; + } else + newext2->csum = NULL; + *written = ned2->num_bytes; newext1->offset = ext->offset; @@ -3155,6 +3496,8 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext1->datalen = ext->datalen; newext1->unique = ext->unique; newext1->ignore = FALSE; + newext1->inserted = TRUE; + newext1->csum = NULL; InsertHeadList(&ext->list_entry, &newext1->list_entry); add_insert_extent_rollback(rollback, fcb, newext1); @@ -3164,6 +3507,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext2->datalen = ext->datalen; newext2->unique = ext->unique; newext2->ignore = FALSE; + newext2->inserted = TRUE; InsertHeadList(&newext1->list_entry, &newext2->list_entry); add_insert_extent_rollback(rollback, fcb, newext2); @@ -3256,12 +3600,44 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, ned2->num_bytes -= end_data - ext->offset; ned2 = (EXTENT_DATA2*)nedb->data; - Status = do_write_data(fcb->Vcb, ed2->address + ned2->offset, data, end_data - start_data, changed_sector_list, Irp); + Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, end_data - start_data, Irp, NULL); if (!NT_SUCCESS(Status)) { - ERR("do_write_data returned %08x\n", Status); + ERR("write_data_complete returned %08x\n", Status); return Status; } + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + ULONG sl = (end_data - start_data) / fcb->Vcb->superblock.sector_size; + UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG); + + if (!csum) { + ERR("out of memory\n"); + ExFreePool(ned); + ExFreePool(nedb); + ExFreePool(nedc); + ExFreePool(newext1); + ExFreePool(newext2); + ExFreePool(newext3); + return STATUS_INSUFFICIENT_RESOURCES; + } + + Status = calc_csum(fcb->Vcb, data, sl, csum); + if (!NT_SUCCESS(Status)) { + ERR("calc_csum returned %08x\n", Status); + ExFreePool(ned); + ExFreePool(nedb); + ExFreePool(nedc); + ExFreePool(newext1); + ExFreePool(newext2); + ExFreePool(newext3); + ExFreePool(csum); + return Status; + } + + newext2->csum = csum; + } else + newext2->csum = NULL; + *written = end_data - start_data; newext1->offset = ext->offset; @@ -3269,6 +3645,8 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext1->datalen = ext->datalen; newext1->unique = ext->unique; newext1->ignore = FALSE; + newext1->inserted = TRUE; + newext1->csum = NULL; InsertHeadList(&ext->list_entry, &newext1->list_entry); add_insert_extent_rollback(rollback, fcb, newext1); @@ -3278,6 +3656,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext2->datalen = ext->datalen; newext2->unique = ext->unique; newext2->ignore = FALSE; + newext2->inserted = TRUE; InsertHeadList(&newext1->list_entry, &newext2->list_entry); add_insert_extent_rollback(rollback, fcb, newext2); @@ -3287,6 +3666,8 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, newext3->datalen = ext->datalen; newext3->unique = ext->unique; newext3->ignore = FALSE; + newext3->inserted = TRUE; + newext3->csum = NULL; InsertHeadList(&newext2->list_entry, &newext3->list_entry); add_insert_extent_rollback(rollback, fcb, newext3); @@ -3311,7 +3692,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, return STATUS_SUCCESS; } -NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; LIST_ENTRY *le, *le2; UINT64 written = 0, length = end_data - start; @@ -3351,7 +3732,7 @@ NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST return Status; } - Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, data, changed_sector_list, Irp, rollback); + Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_extent returned %08x\n", Status); return Status; @@ -3376,32 +3757,12 @@ NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST return Status; } - if (changed_sector_list) { - unsigned int i; - changed_sector* sc; - - sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG); - if (!sc) { - ERR("out of memory\n"); - return STATUS_INSUFFICIENT_RESOURCES; - } + // This shouldn't ever get called - nocow files should always also be nosum. + if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) { + calc_csum(fcb->Vcb, (UINT8*)data + written, write_len / fcb->Vcb->superblock.sector_size, + &ext->csum[(start + written - ext->offset) / fcb->Vcb->superblock.sector_size]); - sc->ol.key = writeaddr; - sc->length = write_len / fcb->Vcb->superblock.sector_size; - sc->deleted = FALSE; - - sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG); - if (!sc->checksums) { - ERR("out of memory\n"); - ExFreePool(sc); - return STATUS_INSUFFICIENT_RESOURCES; - } - - for (i = 0; i < sc->length; i++) { - sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + written + (i * fcb->Vcb->superblock.sector_size), fcb->Vcb->superblock.sector_size); - } - - insert_into_ordered_list(changed_sector_list, &sc->ol); + ext->inserted = TRUE; } written += write_len; @@ -3413,7 +3774,7 @@ NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST UINT64 write_len; Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (UINT8*)data + written, &write_len, - changed_sector_list, Irp, rollback); + Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("do_write_file_prealloc returned %08x\n", Status); return Status; @@ -3443,7 +3804,7 @@ nextitem: return Status; } - Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, data, changed_sector_list, Irp, rollback); + Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("insert_extent returned %08x\n", Status); return Status; @@ -3479,7 +3840,7 @@ nextitem: return STATUS_SUCCESS; } -NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) { +NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) { NTSTATUS Status; UINT64 i; @@ -3490,7 +3851,7 @@ NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* da s2 = start_data + (i * COMPRESSED_EXTENT_SIZE); e2 = min(s2 + COMPRESSED_EXTENT_SIZE, end_data); - Status = write_compressed_bit(fcb, s2, e2, (UINT8*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, changed_sector_list, Irp, rollback); + Status = write_compressed_bit(fcb, s2, e2, (UINT8*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("write_compressed_bit returned %08x\n", Status); @@ -3506,7 +3867,7 @@ NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* da // write subsequent data non-compressed if (e2 < end_data) { - Status = do_write_file(fcb, e2, end_data, (UINT8*)data + e2, changed_sector_list, Irp, rollback); + Status = do_write_file(fcb, e2, end_data, (UINT8*)data + e2, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); @@ -3530,9 +3891,8 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void UINT32 bufhead; BOOL make_inline; UINT8* data; - LIST_ENTRY changed_sector_list; INODE_ITEM* origii; - BOOL changed_length = FALSE, nocsum/*, lazy_writer = FALSE, write_eof = FALSE*/; + BOOL changed_length = FALSE/*, lazy_writer = FALSE, write_eof = FALSE*/; NTSTATUS Status; LARGE_INTEGER time; BTRFS_TIME now; @@ -3628,8 +3988,6 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void } } - nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM; - newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size; if (fcb->deleted) @@ -3701,12 +4059,8 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void ccfs.FileSize = fcb->Header.FileSize; ccfs.ValidDataLength = fcb->Header.ValidDataLength; - if (!FileObject->PrivateCacheMap) { - TRACE("calling CcInitializeCacheMap...\n"); - CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject); - - CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY); - } + if (!FileObject->PrivateCacheMap) + init_file_cache(FileObject, &ccfs); CcSetFileSizes(FileObject, &ccfs); } @@ -3717,12 +4071,21 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void Status = Irp->IoStatus.Status; goto end; } else { - TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf); - if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) { - Status = STATUS_PENDING; - goto end; + if (CcCopyWriteEx) { + TRACE("CcCopyWriteEx(%p, %llx, %x, %u, %p, %p)\n", FileObject, offset.QuadPart, *length, wait, buf, Irp->Tail.Overlay.Thread); + if (!CcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) { + Status = STATUS_PENDING; + goto end; + } + TRACE("CcCopyWriteEx finished\n"); + } else { + TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf); + if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) { + Status = STATUS_PENDING; + goto end; + } + TRACE("CcCopyWrite finished\n"); } - TRACE("CcCopyWrite finished\n"); } Status = STATUS_SUCCESS; @@ -3808,11 +4171,11 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) { if (changed_length) { if (fcb->inode_item.st_size > start_data) - Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp); + Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp, TRUE); else Status = STATUS_SUCCESS; } else - Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp); + Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp, TRUE); if (!NT_SUCCESS(Status)) { ERR("read_file returned %08x\n", Status); @@ -3823,9 +4186,6 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length); - if (!nocsum) - InitializeListHead(&changed_sector_list); - if (make_inline) { Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback); if (!NT_SUCCESS(Status)) { @@ -3842,7 +4202,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void ed2->encoding = BTRFS_ENCODING_NONE; ed2->type = EXTENT_TYPE_INLINE; - if (!add_extent_to_fcb(fcb, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, FALSE, rollback)) { + if (!add_extent_to_fcb(fcb, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, FALSE, NULL, rollback)) { ERR("add_extent_to_fcb failed\n"); ExFreePool(data); Status = STATUS_INTERNAL_ERROR; @@ -3851,7 +4211,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void fcb->inode_item.st_blocks += newlength; } else if (compress) { - Status = write_compressed(fcb, start_data, end_data, data, nocsum ? NULL : &changed_sector_list, Irp, rollback); + Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("write_compressed returned %08x\n", Status); @@ -3861,7 +4221,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void ExFreePool(data); } else { - Status = do_write_file(fcb, start_data, end_data, data, nocsum ? NULL : &changed_sector_list, Irp, rollback); + Status = do_write_file(fcb, start_data, end_data, data, Irp, rollback); if (!NT_SUCCESS(Status)) { ERR("do_write_file returned %08x\n", Status); @@ -3930,12 +4290,6 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb); } - if (!nocsum) { - ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE); - commit_checksum_changes(Vcb, &changed_sector_list); - ExReleaseResourceLite(&Vcb->checksum_lock); - } - if (changed_length) { CC_FILE_SIZES ccfs; @@ -4052,6 +4406,20 @@ NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_wr // check_extent_tree_consistent(Vcb); #endif + + if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) { + PETHREAD thread = NULL; + + if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) + thread = Irp->Tail.Overlay.Thread; + else if (!IoIsSystemThread(PsGetCurrentThread())) + thread = PsGetCurrentThread(); + else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) + thread = PsGetCurrentThread(); + + if (thread) + PsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0); + } } exit: @@ -4079,6 +4447,7 @@ NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { PFILE_OBJECT FileObject = IrpSp->FileObject; fcb* fcb = FileObject ? FileObject->FsContext : NULL; ccb* ccb = FileObject ? FileObject->FsContext2 : NULL; + BOOL wait = FileObject ? IoIsOperationSynchronous(Irp) : TRUE; FsRtlEnterFileSystem(); @@ -4101,22 +4470,37 @@ NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { goto end; } - if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { + if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) { + WARN("insufficient permissions\n"); Status = STATUS_ACCESS_DENIED; goto end; } - if (Vcb->readonly) { - Status = STATUS_MEDIA_WRITE_PROTECTED; - goto end; + if (fcb == Vcb->volume_fcb) { + if (!Vcb->locked || Vcb->locked_fileobj != FileObject) { + ERR("trying to write to volume when not locked, or locked with another FileObject\n"); + Status = STATUS_ACCESS_DENIED; + goto end; + } + + TRACE("writing directly to volume\n"); + + IoSkipCurrentIrpStackLocation(Irp); + + Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); + goto exit; } - if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) { - WARN("insufficient permissions\n"); + if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) { Status = STATUS_ACCESS_DENIED; goto end; } + if (Vcb->readonly) { + Status = STATUS_MEDIA_WRITE_PROTECTED; + goto end; + } + // ERR("recursive = %s\n", Irp != IoGetTopLevelIrp() ? "TRUE" : "FALSE"); _SEH2_TRY { @@ -4126,7 +4510,12 @@ NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) { Irp->MdlAddress = NULL; Status = STATUS_SUCCESS; } else { - Status = write_file(Vcb, Irp, IoIsOperationSynchronous(Irp), FALSE); + // Don't offload jobs when doing paging IO - otherwise this can lead to + // deadlocks in CcCopyWrite. + if (Irp->Flags & IRP_PAGING_IO) + wait = TRUE; + + Status = write_file(Vcb, Irp, wait, FALSE); } } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { Status = _SEH2_GetExceptionCode(); diff --git a/reactos/media/doc/README.FSD b/reactos/media/doc/README.FSD index cf9c6cabe7a..a351dff4821 100644 --- a/reactos/media/doc/README.FSD +++ b/reactos/media/doc/README.FSD @@ -3,7 +3,7 @@ The following FSD are shared with: https://github.com/maharmstone/btrfs. -reactos/drivers/filesystems/btrfs # Synced to 0.7 +reactos/drivers/filesystems/btrfs # Synced to 0.8 reactos/dll/shellext/shellbtrfs # Synced to 0.7 The following FSD are shared with: http://www.ext2fsd.com/