[BTRFS]
authorPierre Schweitzer <pierre@reactos.org>
Sun, 1 Jan 2017 17:12:12 +0000 (17:12 +0000)
committerPierre Schweitzer <pierre@reactos.org>
Sun, 1 Jan 2017 17:12:12 +0000 (17:12 +0000)
Sync btrfs to 0.8.

CORE-12617

svn path=/trunk/; revision=73498

28 files changed:
reactos/drivers/filesystems/btrfs/CMakeLists.txt
reactos/drivers/filesystems/btrfs/balance.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/btrfs.c
reactos/drivers/filesystems/btrfs/btrfs.h
reactos/drivers/filesystems/btrfs/btrfs.rc
reactos/drivers/filesystems/btrfs/btrfs_drv.h
reactos/drivers/filesystems/btrfs/btrfsioctl.h
reactos/drivers/filesystems/btrfs/cache.c
reactos/drivers/filesystems/btrfs/calcthread.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/compress.c
reactos/drivers/filesystems/btrfs/create.c
reactos/drivers/filesystems/btrfs/devctrl.c
reactos/drivers/filesystems/btrfs/dirctrl.c
reactos/drivers/filesystems/btrfs/extent-tree.c
reactos/drivers/filesystems/btrfs/fastio.c
reactos/drivers/filesystems/btrfs/fileinfo.c
reactos/drivers/filesystems/btrfs/flushthread.c
reactos/drivers/filesystems/btrfs/free-space.c
reactos/drivers/filesystems/btrfs/fsctl.c
reactos/drivers/filesystems/btrfs/pnp.c
reactos/drivers/filesystems/btrfs/read.c
reactos/drivers/filesystems/btrfs/registry.c
reactos/drivers/filesystems/btrfs/reparse.c
reactos/drivers/filesystems/btrfs/search.c
reactos/drivers/filesystems/btrfs/security.c
reactos/drivers/filesystems/btrfs/treefuncs.c
reactos/drivers/filesystems/btrfs/write.c
reactos/media/doc/README.FSD

index 118429e..581bab9 100644 (file)
@@ -4,8 +4,10 @@ include_directories(${REACTOS_SOURCE_DIR}/sdk/include/reactos/drivers
                     inc)
 
 list(APPEND SOURCE
+    balance.c
     btrfs.c
     cache.c
+    calcthread.c
     compress.c
     crc32c.c
     create.c
@@ -33,7 +35,7 @@ add_library(btrfs SHARED ${SOURCE} btrfs.rc)
 
 add_definitions(-D__KERNEL__)
 set_module_type(btrfs kernelmodedriver)
-target_link_libraries(btrfs ntoskrnl_vista zlib_solo ${PSEH_LIB})
+target_link_libraries(btrfs rtlver ntoskrnl_vista zlib_solo wdmguid ${PSEH_LIB})
 add_importlibs(btrfs ntoskrnl hal)
 add_pch(btrfs btrfs_drv.h SOURCE)
 add_cd_file(TARGET btrfs DESTINATION reactos/system32/drivers NO_CAB FOR all)
diff --git a/reactos/drivers/filesystems/btrfs/balance.c b/reactos/drivers/filesystems/btrfs/balance.c
new file mode 100644 (file)
index 0000000..63e8821
--- /dev/null
@@ -0,0 +1,3180 @@
+/* Copyright (c) Mark Harmstone 2016
+ * 
+ * This file is part of WinBtrfs.
+ * 
+ * WinBtrfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public Licence as published by
+ * the Free Software Foundation, either version 3 of the Licence, or
+ * (at your option) any later version.
+ * 
+ * WinBtrfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public Licence for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public Licence
+ * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "btrfs_drv.h"
+#include "btrfsioctl.h"
+
+typedef struct {
+    UINT64 address;
+    UINT64 new_address;
+    tree_header* data;
+    EXTENT_ITEM* ei;
+    tree* t;
+    BOOL system;
+    LIST_ENTRY refs;
+    LIST_ENTRY list_entry;
+} metadata_reloc;
+
+typedef struct {
+    UINT8 type;
+    
+    union {
+        TREE_BLOCK_REF tbr;
+        SHARED_BLOCK_REF sbr;
+    };
+    
+    metadata_reloc* parent;
+    BOOL top;
+    LIST_ENTRY list_entry;
+} metadata_reloc_ref;
+
+typedef struct {
+    UINT64 address;
+    UINT64 size;
+    UINT64 new_address;
+    chunk* newchunk;
+    EXTENT_ITEM* ei;
+    LIST_ENTRY refs;
+    LIST_ENTRY list_entry;
+} data_reloc;
+
+typedef struct {
+    UINT8 type;
+    
+    union {
+        EXTENT_DATA_REF edr;
+        SHARED_DATA_REF sdr;
+    };
+    
+    metadata_reloc* parent;
+    LIST_ENTRY list_entry;
+} data_reloc_ref;
+
+extern LIST_ENTRY volumes;
+extern ERESOURCE volumes_lock;
+
+static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) {
+    metadata_reloc* mr;
+    EXTENT_ITEM* ei;
+    UINT16 len;
+    UINT64 inline_rc;
+    UINT8* ptr;
+    
+    mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG);
+    if (!mr) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    mr->address = tp->item->key.obj_id;
+    mr->data = NULL;
+    mr->ei = (EXTENT_ITEM*)tp->item->data;
+    mr->system = FALSE;
+    InitializeListHead(&mr->refs);
+    
+    delete_tree_item(Vcb, tp, rollback);
+    
+    if (!c)
+        c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
+        
+    if (c) {
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        decrease_chunk_usage(c, Vcb->superblock.node_size);
+        
+        space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, Vcb->superblock.node_size, rollback);
+        
+        ExReleaseResourceLite(&c->lock);
+    }
+    
+    ei = (EXTENT_ITEM*)tp->item->data;
+    inline_rc = 0;
+    
+    len = tp->item->size - sizeof(EXTENT_ITEM);
+    ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
+    if (!skinny) {
+        len -= sizeof(EXTENT_ITEM2);
+        ptr += sizeof(EXTENT_ITEM2);
+    }
+    
+    while (len > 0) {
+        UINT8 secttype = *ptr;
+        ULONG sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0);
+        metadata_reloc_ref* ref;
+        
+        len--;
+        
+        if (sectlen > len) {
+            ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
+            return STATUS_INTERNAL_ERROR;
+        }
+
+        if (sectlen == 0) {
+            ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
+        if (!ref) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (secttype == TYPE_TREE_BLOCK_REF) {
+            ref->type = TYPE_TREE_BLOCK_REF;
+            RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF));
+            inline_rc++;
+        } else if (secttype == TYPE_SHARED_BLOCK_REF) {
+            ref->type = TYPE_SHARED_BLOCK_REF;
+            RtlCopyMemory(&ref->sbr, ptr + sizeof(UINT8), sizeof(SHARED_BLOCK_REF));
+            inline_rc++;
+        } else {
+            ERR("unexpected tree type %x\n", secttype);
+            ExFreePool(ref);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        ref->parent = NULL;
+        ref->top = FALSE;
+        InsertTailList(&mr->refs, &ref->list_entry);
+        
+        len -= sectlen;
+        ptr += sizeof(UINT8) + sectlen;
+    }
+    
+    if (inline_rc < ei->refcount) { // look for non-inline entries
+        traverse_ptr tp2 = *tp, next_tp;
+        
+        while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
+            tp2 = next_tp;
+            
+            if (tp2.item->key.obj_id == tp->item->key.obj_id) {
+                if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF && tp2.item->size >= sizeof(TREE_BLOCK_REF)) {
+                    metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
+                    if (!ref) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    ref->type = TYPE_TREE_BLOCK_REF;
+                    RtlCopyMemory(&ref->tbr, tp2.item->data, sizeof(TREE_BLOCK_REF));
+                    ref->parent = NULL;
+                    ref->top = FALSE;
+                    InsertTailList(&mr->refs, &ref->list_entry);
+                    
+                    delete_tree_item(Vcb, &tp2, rollback);
+                } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF && tp2.item->size >= sizeof(SHARED_BLOCK_REF)) {
+                    metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
+                    if (!ref) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    ref->type = TYPE_SHARED_BLOCK_REF;
+                    RtlCopyMemory(&ref->sbr, tp2.item->data, sizeof(SHARED_BLOCK_REF));
+                    ref->parent = NULL;
+                    ref->top = FALSE;
+                    InsertTailList(&mr->refs, &ref->list_entry);
+                    
+                    delete_tree_item(Vcb, &tp2, rollback);
+                }
+            } else
+                break;
+        }
+    }
+    
+    InsertTailList(items, &mr->list_entry);
+    
+    if (mr2)
+        *mr2 = mr;
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* items, UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    KEY searchkey;
+    traverse_ptr tp;
+    BOOL skinny = FALSE;
+    NTSTATUS Status;
+    
+    le = items->Flink;
+    while (le != items) {
+        metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
+        
+        if (mr->address == address) {
+            *mr2 = mr;
+            return STATUS_SUCCESS;
+        }
+        
+        le = le->Flink;
+    }
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_METADATA_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM))
+        skinny = TRUE;
+    else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
+             tp.item->size >= sizeof(EXTENT_ITEM)) {
+        EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+        
+        if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) {
+            ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address);
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else {
+        ERR("could not find valid EXTENT_ITEM for address %llx\n", address);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("add_metadata_reloc returned %08x\n", Status);
+        return Status;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_reloc* mr, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    UINT64 rc = 0;
+    UINT16 inline_len;
+    BOOL all_inline = TRUE;
+    metadata_reloc_ref* first_noninline = NULL;
+    EXTENT_ITEM* ei;
+    UINT8* ptr;
+    
+    inline_len = sizeof(EXTENT_ITEM);
+    if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA))
+        inline_len += sizeof(EXTENT_ITEM2);
+    
+    le = mr->refs.Flink;
+    while (le != &mr->refs) {
+        metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
+        ULONG extlen = 0;
+        
+        rc++;
+        
+        if (ref->type == TYPE_TREE_BLOCK_REF)
+            extlen += sizeof(TREE_BLOCK_REF);
+        else if (ref->type == TYPE_SHARED_BLOCK_REF)
+            extlen += sizeof(SHARED_BLOCK_REF);
+
+        if (all_inline) {
+            if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
+                all_inline = FALSE;
+                first_noninline = ref;
+            } else
+                inline_len += extlen + 1;
+        }
+        
+        le = le->Flink;
+    }
+    
+    ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
+    if (!ei) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    ei->refcount = rc;
+    ei->generation = mr->ei->generation;
+    ei->flags = mr->ei->flags;
+    ptr = (UINT8*)&ei[1];
+    
+    if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
+        EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
+        
+        ei2->firstitem = *(KEY*)&mr->data[1];
+        ei2->level = mr->data->level;
+        
+        ptr += sizeof(EXTENT_ITEM2);
+    }
+    
+    le = mr->refs.Flink;
+    while (le != &mr->refs) {
+        metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
+        
+        if (ref == first_noninline)
+            break;
+        
+        *ptr = ref->type;
+        ptr++;
+        
+        if (ref->type == TYPE_TREE_BLOCK_REF) {
+            TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr;
+            
+            tbr->offset = ref->tbr.offset;
+            
+            ptr += sizeof(TREE_BLOCK_REF);
+        } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
+            SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr;
+            
+            sbr->offset = ref->parent->new_address;
+            
+            ptr += sizeof(SHARED_BLOCK_REF);
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+        if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL, rollback)) {
+            ERR("insert_tree_item failed\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else {
+        if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL, rollback)) {
+            ERR("insert_tree_item failed\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
+    
+    if (!all_inline) {
+        le = &first_noninline->list_entry;
+        
+        while (le != &mr->refs) {
+            metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
+            
+            if (ref->type == TYPE_TREE_BLOCK_REF) {
+                TREE_BLOCK_REF* tbr;
+                
+                tbr = ExAllocatePoolWithTag(PagedPool, sizeof(TREE_BLOCK_REF), ALLOC_TAG);
+                if (!tbr) {
+                    ERR("out of memory\n");
+                    return STATUS_INSUFFICIENT_RESOURCES;
+                }
+                
+                tbr->offset = ref->tbr.offset;
+                
+                if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, tbr->offset, tbr, sizeof(TREE_BLOCK_REF), NULL, NULL, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    return STATUS_INTERNAL_ERROR;
+                }
+            } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
+                SHARED_BLOCK_REF* sbr;
+                
+                sbr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_BLOCK_REF), ALLOC_TAG);
+                if (!sbr) {
+                    ERR("out of memory\n");
+                    return STATUS_INSUFFICIENT_RESOURCES;
+                }
+                
+                sbr->offset = ref->parent->new_address;
+                
+                if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, sbr->offset, sbr, sizeof(SHARED_BLOCK_REF), NULL, NULL, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    return STATUS_INTERNAL_ERROR;
+                }
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) {
+        if (mr->data->level > 0) {
+            UINT16 i;
+            internal_node* in = (internal_node*)&mr->data[1];
+                        
+            for (i = 0; i < mr->data->num_items; i++) {
+                UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL);
+
+                if (sbrrc > 0) {
+                    NTSTATUS Status;
+                    SHARED_BLOCK_REF sbr;
+                    
+                    sbr.offset = mr->new_address;
+                    
+                    Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
+                                                      NULL, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("increase_extent_refcount returned %08x\n", Status);
+                        return Status;
+                    }
+        
+                    sbr.offset = mr->address;
+                    
+                    Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
+                                                      sbr.offset, FALSE, NULL, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("decrease_extent_refcount returned %08x\n", Status);
+                        return Status;
+                    }
+                }
+            }
+        } else {
+            UINT16 i;
+            leaf_node* ln = (leaf_node*)&mr->data[1];
+            
+            for (i = 0; i < mr->data->num_items; i++) {
+                if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                    EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
+                    
+                    if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                        
+                        if (ed2->size > 0) { // not sparse
+                            UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL);
+                            
+                            if (sdrrc > 0) {
+                                NTSTATUS Status;
+                                SHARED_DATA_REF sdr;
+                                chunk* c;
+                                
+                                sdr.offset = mr->new_address;
+                                sdr.count = sdrrc;
+                                
+                                Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
+                                                                  NULL, rollback);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("increase_extent_refcount returned %08x\n", Status);
+                                    return Status;
+                                }
+                                
+                                sdr.offset = mr->address;
+                                
+                                Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
+                                                                  sdr.offset, FALSE, NULL, rollback);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("decrease_extent_refcount returned %08x\n", Status);
+                                    return Status;
+                                }
+                                
+                                c = get_chunk_from_address(Vcb, ed2->address);
+                                
+                                if (c) {
+                                    // check changed_extents
+                                    
+                                    ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
+                                    
+                                    le = c->changed_extents.Flink;
+                                    
+                                    while (le != &c->changed_extents) {
+                                        changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
+                                        
+                                        if (ce->address == ed2->address) {
+                                            LIST_ENTRY* le2;
+                                            
+                                            le2 = ce->refs.Flink;
+                                            while (le2 != &ce->refs) {
+                                                changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+                                                
+                                                if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
+                                                    cer->sdr.offset = mr->new_address;
+                                                    break;
+                                                }
+                                                
+                                                le2 = le2->Flink;
+                                            }
+                                            
+                                            le2 = ce->old_refs.Flink;
+                                            while (le2 != &ce->old_refs) {
+                                                changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+                                                
+                                                if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
+                                                    cer->sdr.offset = mr->new_address;
+                                                    break;
+                                                }
+                                                
+                                                le2 = le2->Flink;
+                                            }
+                                            
+                                            break;
+                                        }
+                                        
+                                        le = le->Flink;
+                                    }
+                                    
+                                    ExReleaseResourceLite(&c->changed_extents_lock);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) {
+    LIST_ENTRY tree_writes, *le;
+    NTSTATUS Status;
+    traverse_ptr tp;
+    UINT8 level, max_level = 0;
+    chunk* newchunk = NULL;
+    
+    InitializeListHead(&tree_writes);
+    
+    le = items->Flink;
+    while (le != items) {
+        metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
+        LIST_ENTRY* le2;
+        chunk* pc;
+        
+//         ERR("address %llx\n", mr->address);
+        
+        mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
+        if (!mr->data) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data,
+                           c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, FALSE);
+        if (!NT_SUCCESS(Status)) {
+            ERR("read_data returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM)
+            mr->system = TRUE;
+        
+        if (data_items && mr->data->level == 0) {
+            LIST_ENTRY* le2 = data_items->Flink;
+            while (le2 != data_items) {
+                data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
+                leaf_node* ln = (leaf_node*)&mr->data[1];
+                UINT16 i;
+                
+                for (i = 0; i < mr->data->num_items; i++) {
+                    if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                        EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
+                        
+                        if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+                            EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                            
+                            if (ed2->address == dr->address)
+                                ed2->address = dr->new_address;
+                        }
+                    }
+                }
+                
+                le2 = le2->Flink;
+            }
+        }
+        
+        if (mr->data->level > max_level)
+            max_level = mr->data->level;
+        
+        le2 = mr->refs.Flink;
+        while (le2 != &mr->refs) {
+            metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
+            
+            if (ref->type == TYPE_TREE_BLOCK_REF) {
+                KEY* firstitem;
+                root* r = NULL;
+                LIST_ENTRY* le3;
+                tree* t;
+                
+                firstitem = (KEY*)&mr->data[1];
+                
+                le3 = Vcb->roots.Flink;
+                while (le3 != &Vcb->roots) {
+                    root* r2 = CONTAINING_RECORD(le3, root, list_entry);
+                    
+                    if (r2->id == ref->tbr.offset) {
+                        r = r2;
+                        break;
+                    }
+                    
+                    le3 = le3->Flink;
+                }
+                
+                if (!r) {
+                    ERR("could not find subvol with id %llx\n", ref->tbr.offset);
+                    return STATUS_INTERNAL_ERROR;
+                }
+                
+                Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL);
+                if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
+                    ERR("find_item_to_level returned %08x\n", Status);
+                    return Status;
+                }
+                
+                t = tp.tree;
+                while (t && t->header.level < mr->data->level + 1) {
+                    t = t->parent;
+                }
+                
+                if (!t)
+                    ref->top = TRUE;
+                else {
+                    metadata_reloc* mr2;
+                    
+                    Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                        return Status;
+                    }
+                    
+                    ref->parent = mr2;
+                }
+            } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
+                metadata_reloc* mr2;
+                
+                Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                    return Status;
+                }
+                
+                ref->parent = mr2;
+            }
+            
+            le2 = le2->Flink;
+        }
+        
+        le = le->Flink;
+    }
+    
+    le = items->Flink;
+    while (le != items) {
+        metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
+        LIST_ENTRY* le2;
+        UINT32 hash;
+        
+        mr->t = NULL;
+        
+        hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64));
+        
+        le2 = Vcb->trees_ptrs[hash >> 24];
+        
+        if (le2) {
+            while (le2 != &Vcb->trees_hash) {
+                tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash);
+                
+                if (t->header.address == mr->address) {
+                    mr->t = t;
+                    break;
+                } else if (t->hash > hash)
+                    break;
+                
+                le2 = le2->Flink;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    for (level = 0; level <= max_level; level++) {
+        le = items->Flink;
+        while (le != items) {
+            metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
+            
+            if (mr->data->level == level) {
+                BOOL done = FALSE;
+                LIST_ENTRY* le2;
+                tree_write* tw;
+                UINT64 flags;
+                tree* t3;
+                
+                if (mr->system)
+                    flags = Vcb->system_flags;
+                else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
+                    flags = Vcb->data_flags;
+                else
+                    flags = Vcb->metadata_flags;
+                
+                if (newchunk) {
+                    ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
+                    
+                    if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
+                        increase_chunk_usage(newchunk, Vcb->superblock.node_size);
+                        space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
+                        done = TRUE;
+                    }
+                    
+                    ExReleaseResourceLite(&newchunk->lock);
+                }
+                
+                if (!done) {
+                    ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+    
+                    le2 = Vcb->chunks.Flink;
+                    while (le2 != &Vcb->chunks) {
+                        chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
+                        
+                        if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) {
+                            ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
+                            
+                            if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) {
+                                if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) {
+                                    increase_chunk_usage(c2, Vcb->superblock.node_size);
+                                    space_list_subtract(Vcb, c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
+                                    ExReleaseResourceLite(&c2->lock);
+                                    newchunk = c2;
+                                    done = TRUE;
+                                    break;
+                                }
+                            }
+                            
+                            ExReleaseResourceLite(&c2->lock);
+                        }
+
+                        le2 = le2->Flink;
+                    }
+                    
+                    // allocate new chunk if necessary
+                    if (!done) {
+                        newchunk = alloc_chunk(Vcb, flags);
+                        
+                        if (!newchunk) {
+                            ERR("could not allocate new chunk\n");
+                            ExReleaseResourceLite(&Vcb->chunk_lock);
+                            Status = STATUS_DISK_FULL;
+                            goto end;
+                        }
+                        
+                        ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
+                        
+                        if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
+                            ExReleaseResourceLite(&newchunk->lock);
+                            ERR("could not find address in new chunk\n");
+                            Status = STATUS_DISK_FULL;
+                            goto end;
+                        } else {
+                            increase_chunk_usage(newchunk, Vcb->superblock.node_size);
+                            space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
+                        }
+                        
+                        ExReleaseResourceLite(&newchunk->lock);
+                    }
+                    
+                    ExReleaseResourceLite(&Vcb->chunk_lock);
+                }
+                
+                // update parents
+                le2 = mr->refs.Flink;
+                while (le2 != &mr->refs) {
+                    metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
+                    
+                    if (ref->parent) {
+                        UINT16 i;
+                        internal_node* in = (internal_node*)&ref->parent->data[1];
+                        
+                        for (i = 0; i < ref->parent->data->num_items; i++) {
+                            if (in[i].address == mr->address) {
+                                in[i].address = mr->new_address;
+                                break;
+                            }
+                        }
+                        
+                        if (ref->parent->t) {
+                            LIST_ENTRY* le3;
+                            
+                            le3 = ref->parent->t->itemlist.Flink;
+                            while (le3 != &ref->parent->t->itemlist) {
+                                tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
+                                
+                                if (!td->inserted && td->treeholder.address == mr->address)
+                                    td->treeholder.address = mr->new_address;
+                                
+                                le3 = le3->Flink;
+                            }
+                        }
+                    } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) {
+                        LIST_ENTRY* le3;
+                        root* r = NULL;
+                        
+                        // alter ROOT_ITEM
+                        
+                        le3 = Vcb->roots.Flink;
+                        while (le3 != &Vcb->roots) {
+                            root* r2 = CONTAINING_RECORD(le3, root, list_entry);
+                            
+                            if (r2->id == ref->tbr.offset) {
+                                r = r2;
+                                break;
+                            }
+                            
+                            le3 = le3->Flink;
+                        }
+                        
+                        if (r) {
+                            r->treeholder.address = mr->new_address;
+                            
+                            if (r == Vcb->root_root)
+                                Vcb->superblock.root_tree_addr = mr->new_address;
+                            else if (r == Vcb->chunk_root)
+                                Vcb->superblock.chunk_tree_addr = mr->new_address;
+                            else if (r->root_item.block_number == mr->address) {
+                                KEY searchkey;
+                                ROOT_ITEM* ri;
+                                
+                                r->root_item.block_number = mr->new_address;
+                                
+                                searchkey.obj_id = r->id;
+                                searchkey.obj_type = TYPE_ROOT_ITEM;
+                                searchkey.offset = 0xffffffffffffffff;
+                                
+                                Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("find_item returned %08x\n", Status);
+                                    goto end;
+                                }
+                                
+                                if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+                                    ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+                                    Status = STATUS_INTERNAL_ERROR;
+                                    goto end;
+                                }
+                                
+                                ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
+                                if (!ri) {
+                                    ERR("out of memory\n");
+                                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                                    goto end;
+                                }
+                                
+                                RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
+                                
+                                delete_tree_item(Vcb, &tp, rollback);
+                                
+                                if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL, rollback)) {
+                                    ERR("insert_tree_item failed\n");
+                                    Status = STATUS_INTERNAL_ERROR;
+                                    goto end;
+                                }
+                            }
+                        }
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+                
+                mr->data->address = mr->new_address;
+                
+                t3 = mr->t;
+
+                while (t3) {
+                    UINT8 h;
+                    BOOL inserted;
+                    tree* t4 = NULL;
+                    
+                    // check if tree loaded more than once
+                    if (t3->list_entry.Flink != &Vcb->trees_hash) {
+                        tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
+                        
+                        if (nt->header.address == t3->header.address)
+                            t4 = nt;
+                    }
+                    
+                    t3->header.address = mr->new_address;
+                    
+                    h = t3->hash >> 24;
+                    
+                    if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) {
+                        if (t3->list_entry_hash.Flink == &Vcb->trees_hash)
+                            Vcb->trees_ptrs[h] = NULL;
+                        else {
+                            tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
+                            
+                            if (t2->hash >> 24 == h)
+                                Vcb->trees_ptrs[h] = &t2->list_entry_hash;
+                            else
+                                Vcb->trees_ptrs[h] = NULL;
+                        }
+                    }
+                        
+                    RemoveEntryList(&t3->list_entry_hash);
+                    
+                    t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64));
+                    h = t3->hash >> 24;
+                    
+                    if (!Vcb->trees_ptrs[h]) {
+                        UINT8 h2 = h;
+                        
+                        le2 = Vcb->trees_hash.Flink;
+                        
+                        if (h2 > 0) {
+                            h2--;
+                            do {
+                                if (Vcb->trees_ptrs[h2]) {
+                                    le2 = Vcb->trees_ptrs[h2];
+                                    break;
+                                }
+                                    
+                                h2--;
+                            } while (h2 > 0);
+                        }
+                    } else
+                        le2 = Vcb->trees_ptrs[h];
+                    
+                    inserted = FALSE;
+                    while (le2 != &Vcb->trees_hash) {
+                        tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash);
+                        
+                        if (t2->hash >= t3->hash) {
+                            InsertHeadList(le2->Blink, &t3->list_entry_hash);
+                            inserted = TRUE;
+                            break;
+                        }
+                        
+                        le2 = le2->Flink;
+                    }
+
+                    if (!inserted)
+                        InsertTailList(&Vcb->trees_hash, &t3->list_entry_hash);
+
+                    if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h])
+                        Vcb->trees_ptrs[h] = &t3->list_entry_hash;
+                    
+                    if (data_items && level == 0) {
+                        le2 = data_items->Flink;
+                        
+                        while (le2 != data_items) {
+                            data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
+                            LIST_ENTRY* le3 = t3->itemlist.Flink;
+                            
+                            while (le3 != &t3->itemlist) {
+                                tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
+                                
+                                if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                                    EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
+                                    
+                                    if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+                                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                                        
+                                        if (ed2->address == dr->address)
+                                            ed2->address = dr->new_address;
+                                    }
+                                }
+                                
+                                le3 = le3->Flink;
+                            }
+                            
+                            le2 = le2->Flink;
+                        }
+                    }
+                    
+                    t3 = t4;
+                }
+
+                *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum));
+                
+                tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
+                if (!tw) {
+                    ERR("out of memory\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    goto end;
+                }
+                
+                tw->address = mr->new_address;
+                tw->length = Vcb->superblock.node_size;
+                tw->data = (UINT8*)mr->data;
+                tw->overlap = FALSE;
+                
+                if (IsListEmpty(&tree_writes))
+                    InsertTailList(&tree_writes, &tw->list_entry);
+                else {
+                    BOOL inserted = FALSE;
+                    
+                    le2 = tree_writes.Flink;
+                    while (le2 != &tree_writes) {
+                        tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
+                        
+                        if (tw2->address > tw->address) {
+                            InsertHeadList(le2->Blink, &tw->list_entry);
+                            inserted = TRUE;
+                            break;
+                        }
+                        
+                        le2 = le2->Flink;
+                    }
+                    
+                    if (!inserted)
+                        InsertTailList(&tree_writes, &tw->list_entry);
+                }
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    le = items->Flink;
+    while (le != items) {
+        metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
+        
+        Status = add_metadata_reloc_extent_item(Vcb, mr, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("add_metadata_reloc_extent_item returned %08x\n", Status);
+            goto end;
+        }
+        
+        le = le->Flink;
+    }
+    
+    Status = do_tree_writes(Vcb, &tree_writes, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("do_tree_writes returned %08x\n", Status);
+        goto end;
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+end:
+    while (!IsListEmpty(&tree_writes)) {
+        tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry);
+        ExFreePool(tw);
+    }
+    
+    return Status;
+}
+
+static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* changed) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    BOOL b;
+    LIST_ENTRY items, rollback;
+    UINT32 loaded = 0;
+    
+    TRACE("chunk %llx\n", c->offset);
+    
+    InitializeListHead(&rollback);
+    InitializeListHead(&items);
+    
+    ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+    
+    searchkey.obj_id = c->offset;
+    searchkey.obj_type = TYPE_METADATA_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        goto end;
+    }
+    
+    do {
+        traverse_ptr next_tp;
+        
+        if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
+            break;
+        
+        if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
+            BOOL tree = FALSE, skinny = FALSE;
+            
+            if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+                tree = TRUE;
+                skinny = TRUE;
+            } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
+                       tp.item->size >= sizeof(EXTENT_ITEM)) {
+                EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+                
+                if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
+                    tree = TRUE;
+            }
+            
+            if (tree) {
+                Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback);
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("add_metadata_reloc returned %08x\n", Status);
+                    goto end;
+                }
+                
+                loaded++;
+                
+                if (loaded >= 64) // only do 64 at a time
+                    break;
+            }
+        }
+    
+        b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
+        
+        if (b)
+            tp = next_tp;
+    } while (b);
+    
+    if (IsListEmpty(&items)) {
+        *changed = FALSE;
+        Status = STATUS_SUCCESS;
+        goto end;
+    } else
+        *changed = TRUE;
+    
+    Status = write_metadata_items(Vcb, &items, NULL, c, &rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("write_metadata_items returned %08x\n", Status);
+        goto end;
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+    Vcb->need_write = TRUE;
+    
+end:
+    if (NT_SUCCESS(Status))
+        clear_rollback(Vcb, &rollback);
+    else
+        do_rollback(Vcb, &rollback);
+    
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
+    while (!IsListEmpty(&items)) {
+        metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry);
+        
+        while (!IsListEmpty(&mr->refs)) {
+            metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
+            
+            ExFreePool(ref);
+        }
+        
+        ExFreePool(mr);
+    }
+    
+    return Status;
+}
+
+static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) {
+    data_reloc* dr;
+    EXTENT_ITEM* ei;
+    UINT16 len;
+    UINT64 inline_rc;
+    UINT8* ptr;
+    
+    dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG);
+    if (!dr) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    dr->address = tp->item->key.obj_id;
+    dr->size = tp->item->key.offset;
+    dr->ei = (EXTENT_ITEM*)tp->item->data;
+    InitializeListHead(&dr->refs);
+    
+    delete_tree_item(Vcb, tp, rollback);
+    
+    if (!c)
+        c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
+        
+    if (c) {
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        decrease_chunk_usage(c, tp->item->key.offset);
+        
+        space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, tp->item->key.offset, rollback);
+        
+        ExReleaseResourceLite(&c->lock);
+    }
+    
+    ei = (EXTENT_ITEM*)tp->item->data;
+    inline_rc = 0;
+    
+    len = tp->item->size - sizeof(EXTENT_ITEM);
+    ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
+    
+    while (len > 0) {
+        UINT8 secttype = *ptr;
+        ULONG sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0);
+        data_reloc_ref* ref;
+        NTSTATUS Status;
+        metadata_reloc* mr;
+        
+        len--;
+        
+        if (sectlen > len) {
+            ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
+            return STATUS_INTERNAL_ERROR;
+        }
+
+        if (sectlen == 0) {
+            ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
+        if (!ref) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (secttype == TYPE_EXTENT_DATA_REF) {
+            LIST_ENTRY* le;
+            KEY searchkey;
+            traverse_ptr tp3;
+            root* r = NULL;
+            
+            ref->type = TYPE_EXTENT_DATA_REF;
+            RtlCopyMemory(&ref->edr, ptr + sizeof(UINT8), sizeof(EXTENT_DATA_REF));
+            inline_rc += ref->edr.count;
+            
+            le = Vcb->roots.Flink;
+            while (le != &Vcb->roots) {
+                root* r2 = CONTAINING_RECORD(le, root, list_entry);
+                
+                if (r2->id == ref->edr.root) {
+                    r = r2;
+                    break;
+                }
+                
+                le = le->Flink;
+            }
+            
+            if (!r) {
+                ERR("could not find subvol %llx\n", ref->edr.count);
+                ExFreePool(ref);
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            searchkey.obj_id = ref->edr.objid;
+            searchkey.obj_type = TYPE_EXTENT_DATA;
+            searchkey.offset = ref->edr.offset;
+            
+            Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL);
+            if (!NT_SUCCESS(Status)) {
+                ERR("find_item returned %08x\n", Status);
+                ExFreePool(ref);
+                return Status;
+            }
+            
+            if (keycmp(tp3.item->key, searchkey)) {
+                ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id);
+                ExFreePool(ref);
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                ExFreePool(ref);
+                return Status;
+            }
+            
+            ref->parent = mr;
+        } else if (secttype == TYPE_SHARED_DATA_REF) {
+            ref->type = TYPE_SHARED_DATA_REF;
+            RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF));
+            inline_rc += ref->sdr.count;
+            
+            Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                ExFreePool(ref);
+                return Status;
+            }
+            
+            ref->parent = mr;
+        } else {
+            ERR("unexpected tree type %x\n", secttype);
+            ExFreePool(ref);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        InsertTailList(&dr->refs, &ref->list_entry);
+        
+        len -= sectlen;
+        ptr += sizeof(UINT8) + sectlen;
+    }
+    
+    if (inline_rc < ei->refcount) { // look for non-inline entries
+        traverse_ptr tp2 = *tp, next_tp;
+        
+        while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
+            metadata_reloc* mr;
+            NTSTATUS Status;
+            
+            tp2 = next_tp;
+            
+            if (tp2.item->key.obj_id == tp->item->key.obj_id) {
+                if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) {
+                    data_reloc_ref* ref;
+                    LIST_ENTRY* le;
+                    KEY searchkey;
+                    traverse_ptr tp3;
+                    root* r = NULL;
+
+                    ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
+                    if (!ref) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    ref->type = TYPE_EXTENT_DATA_REF;
+                    RtlCopyMemory(&ref->edr, tp2.item->data, sizeof(EXTENT_DATA_REF));
+                    
+                    le = Vcb->roots.Flink;
+                    while (le != &Vcb->roots) {
+                        root* r2 = CONTAINING_RECORD(le, root, list_entry);
+                        
+                        if (r2->id == ref->edr.root) {
+                            r = r2;
+                            break;
+                        }
+                        
+                        le = le->Flink;
+                    }
+                    
+                    if (!r) {
+                        ERR("could not find subvol %llx\n", ref->edr.count);
+                        ExFreePool(ref);
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    searchkey.obj_id = ref->edr.objid;
+                    searchkey.obj_type = TYPE_EXTENT_DATA;
+                    searchkey.offset = ref->edr.offset;
+                    
+                    Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("find_item returned %08x\n", Status);
+                        ExFreePool(ref);
+                        return Status;
+                    }
+                    
+                    if (!keycmp(tp3.item->key, searchkey)) {
+                        ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id);
+                        ExFreePool(ref);
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                        ExFreePool(ref);
+                        return Status;
+                    }
+                    
+                    ref->parent = mr;
+                    InsertTailList(&dr->refs, &ref->list_entry);
+                    
+                    delete_tree_item(Vcb, &tp2, rollback);
+                } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(SHARED_DATA_REF)) {
+                    data_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
+                    if (!ref) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    ref->type = TYPE_SHARED_DATA_REF;
+                    RtlCopyMemory(&ref->sdr, tp2.item->data, sizeof(SHARED_DATA_REF));
+                    
+                    Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                        ExFreePool(ref);
+                        return Status;
+                    }
+                    
+                    ref->parent = mr;
+                    InsertTailList(&dr->refs, &ref->list_entry);
+                    
+                    delete_tree_item(Vcb, &tp2, rollback);
+                }
+            } else
+                break;
+        }
+    }
+    
+    InsertTailList(items, &dr->list_entry);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    UINT64 rc = 0;
+    UINT16 inline_len;
+    BOOL all_inline = TRUE;
+    data_reloc_ref* first_noninline = NULL;
+    EXTENT_ITEM* ei;
+    UINT8* ptr;
+    
+    inline_len = sizeof(EXTENT_ITEM);
+    
+    le = dr->refs.Flink;
+    while (le != &dr->refs) {
+        data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
+        ULONG extlen = 0;
+        
+        rc++;
+        
+        if (ref->type == TYPE_EXTENT_DATA_REF)
+            extlen += sizeof(EXTENT_DATA_REF);
+        else if (ref->type == TYPE_SHARED_DATA_REF)
+            extlen += sizeof(SHARED_DATA_REF);
+
+        if (all_inline) {
+            if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
+                all_inline = FALSE;
+                first_noninline = ref;
+            } else
+                inline_len += extlen + 1;
+        }
+        
+        le = le->Flink;
+    }
+    
+    ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
+    if (!ei) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    ei->refcount = rc;
+    ei->generation = dr->ei->generation;
+    ei->flags = dr->ei->flags;
+    ptr = (UINT8*)&ei[1];
+    
+    le = dr->refs.Flink;
+    while (le != &dr->refs) {
+        data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
+        
+        if (ref == first_noninline)
+            break;
+        
+        *ptr = ref->type;
+        ptr++;
+        
+        if (ref->type == TYPE_EXTENT_DATA_REF) {
+            EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr;
+            
+            RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
+            
+            ptr += sizeof(EXTENT_DATA_REF);
+        } else if (ref->type == TYPE_SHARED_DATA_REF) {
+            SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr;
+            
+            sdr->offset = ref->parent->new_address;
+            sdr->count = ref->sdr.count;
+            
+            ptr += sizeof(SHARED_DATA_REF);
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL, rollback)) {
+        ERR("insert_tree_item failed\n");
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    if (!all_inline) {
+        le = &first_noninline->list_entry;
+        
+        while (le != &dr->refs) {
+            data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
+            
+            if (ref->type == TYPE_EXTENT_DATA_REF) {
+                EXTENT_DATA_REF* edr;
+                UINT64 off;
+                
+                edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
+                if (!edr) {
+                    ERR("out of memory\n");
+                    return STATUS_INSUFFICIENT_RESOURCES;
+                }
+                
+                RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
+                
+                off = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset);
+                
+                if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, off, edr, sizeof(EXTENT_DATA_REF), NULL, NULL, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    return STATUS_INTERNAL_ERROR;
+                }
+            } else if (ref->type == TYPE_SHARED_DATA_REF) {
+                SHARED_DATA_REF* sdr;
+                
+                sdr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_DATA_REF), ALLOC_TAG);
+                if (!sdr) {
+                    ERR("out of memory\n");
+                    return STATUS_INSUFFICIENT_RESOURCES;
+                }
+                
+                sdr->offset = ref->parent->new_address;
+                sdr->count = ref->sdr.count;
+                
+                if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, sdr->offset, sdr, sizeof(SHARED_DATA_REF), NULL, NULL, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    return STATUS_INTERNAL_ERROR;
+                }
+            }
+            
+            le = le->Flink;
+        }
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* changed) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    BOOL b;
+    LIST_ENTRY items, metadata_items, rollback, *le;
+    UINT64 loaded = 0, num_loaded = 0;
+    chunk* newchunk = NULL;
+    UINT8* data = NULL;
+    
+    TRACE("chunk %llx\n", c->offset);
+    
+    InitializeListHead(&rollback);
+    InitializeListHead(&items);
+    InitializeListHead(&metadata_items);
+    
+    ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+    
+    searchkey.obj_id = c->offset;
+    searchkey.obj_type = TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        goto end;
+    }
+    
+    do {
+        traverse_ptr next_tp;
+        
+        if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
+            break;
+        
+        if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
+            BOOL tree = FALSE;
+            
+            if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+                EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+                
+                if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
+                    tree = TRUE;
+            }
+            
+            if (!tree) {
+                Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback);
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("add_data_reloc returned %08x\n", Status);
+                    goto end;
+                }
+                
+                loaded += tp.item->key.offset;
+                num_loaded++;
+                
+                if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously
+                    break;
+            }
+        }
+    
+        b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
+        
+        if (b)
+            tp = next_tp;
+    } while (b);
+    
+    if (IsListEmpty(&items)) {
+        *changed = FALSE;
+        Status = STATUS_SUCCESS;
+        goto end;
+    } else
+        *changed = TRUE;
+    
+    data = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG);
+    if (!data) {
+        ERR("out of memory\n");
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto end;
+    }
+
+    le = items.Flink;
+    while (le != &items) {
+        data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
+        BOOL done = FALSE;
+        LIST_ENTRY* le2;
+        UINT32* csum;
+        UINT64 off;
+        
+        if (newchunk) {
+            ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
+            
+            if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
+                increase_chunk_usage(newchunk, dr->size);
+                space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback);
+                done = TRUE;
+            }
+            
+            ExReleaseResourceLite(&newchunk->lock);
+        }
+        
+        if (!done) {
+            ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+            le2 = Vcb->chunks.Flink;
+            while (le2 != &Vcb->chunks) {
+                chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
+                
+                if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) {
+                    ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
+                    
+                    if ((c2->chunk_item->size - c2->used) >= dr->size) {
+                        if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) {
+                            increase_chunk_usage(c2, dr->size);
+                            space_list_subtract(Vcb, c2, FALSE, dr->new_address, dr->size, &rollback);
+                            ExReleaseResourceLite(&c2->lock);
+                            newchunk = c2;
+                            done = TRUE;
+                            break;
+                        }
+                    }
+                    
+                    ExReleaseResourceLite(&c2->lock);
+                }
+
+                le2 = le2->Flink;
+            }
+            
+            // allocate new chunk if necessary
+            if (!done) {
+                newchunk = alloc_chunk(Vcb, Vcb->data_flags);
+                
+                if (!newchunk) {
+                    ERR("could not allocate new chunk\n");
+                    ExReleaseResourceLite(&Vcb->chunk_lock);
+                    Status = STATUS_DISK_FULL;
+                    goto end;
+                }
+                
+                ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
+                
+                if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
+                    ExReleaseResourceLite(&newchunk->lock);
+                    ERR("could not find address in new chunk\n");
+                    Status = STATUS_DISK_FULL;
+                    goto end;
+                } else {
+                    increase_chunk_usage(newchunk, dr->size);
+                    space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback);
+                }
+                
+                ExReleaseResourceLite(&newchunk->lock);
+            }
+            
+            ExReleaseResourceLite(&Vcb->chunk_lock);
+        }
+        
+        dr->newchunk = newchunk;
+        
+        csum = ExAllocatePoolWithTag(PagedPool, dr->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
+        if (!csum) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto end;
+        }
+        
+        Status = load_csum(Vcb, csum, dr->address, dr->size / Vcb->superblock.sector_size, NULL);
+
+        if (NT_SUCCESS(Status)) {
+            add_checksum_entry(Vcb, dr->new_address, dr->size / Vcb->superblock.sector_size, csum, NULL, &rollback);
+            add_checksum_entry(Vcb, dr->address, dr->size / Vcb->superblock.sector_size, NULL, NULL, &rollback);
+        }
+
+        ExFreePool(csum);
+        
+        off = 0;
+        
+        while (off < dr->size) {
+            ULONG ds = min(dr->size - off, 0x100000);
+            
+            Status = read_data(Vcb, dr->address + off, ds, NULL, FALSE, data, c, NULL, NULL, FALSE);
+            if (!NT_SUCCESS(Status)) {
+                ERR("read_data returned %08x\n", Status);
+                goto end;
+            }
+            
+            Status = write_data_complete(Vcb, dr->new_address + off, data, ds, NULL, newchunk);
+            if (!NT_SUCCESS(Status)) {
+                ERR("write_data_complete returned %08x\n", Status);
+                goto end;
+            }
+            
+            off += ds;
+        }
+
+        le = le->Flink;
+    }
+    
+    ExFreePool(data);
+    data = NULL;
+    
+    Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("write_metadata_items returned %08x\n", Status);
+        goto end;
+    }
+    
+    le = items.Flink;
+    while (le != &items) {
+        data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
+        
+        Status = add_data_reloc_extent_item(Vcb, dr, &rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("add_data_reloc_extent_item returned %08x\n", Status);
+            goto end;
+        }
+        
+        le = le->Flink;
+    }
+    
+    le = c->changed_extents.Flink;
+    while (le != &c->changed_extents) {
+        LIST_ENTRY *le2, *le3;
+        changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
+        
+        le3 = le->Flink;
+        
+        le2 = items.Flink;
+        while (le2 != &items) {
+            data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
+            
+            if (ce->address == dr->address) {
+                ce->address = dr->new_address;
+                RemoveEntryList(&ce->list_entry);
+                InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry);
+                break;
+            }
+            
+            le2 = le2->Flink;
+        }
+        
+        le = le3;
+    }
+    
+    // update open FCBs
+    // FIXME - speed this up
+    
+    ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE);
+    
+    le = Vcb->all_fcbs.Flink;
+    while (le != &Vcb->all_fcbs) {
+        struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all);
+        LIST_ENTRY* le2;
+
+        ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+        
+        le2 = fcb->extents.Flink;
+        while (le2 != &fcb->extents) {
+            extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+            
+            if (!ext->ignore) {
+                if (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) {
+                    EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+                    
+                    if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
+                        LIST_ENTRY* le3 = items.Flink;
+                        while (le3 != &items) {
+                            data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
+                            
+                            if (ed2->address == dr->address) {
+                                ed2->address = dr->new_address;
+                                break;
+                            }
+                            
+                            le3 = le3->Flink;
+                        }
+                    }
+                }
+            }
+            
+            le2 = le2->Flink;
+        }
+        
+        ExReleaseResourceLite(fcb->Header.Resource);
+        
+        le = le->Flink;
+    }
+    
+    ExReleaseResourceLite(&Vcb->fcb_lock);
+    
+    Status = STATUS_SUCCESS;
+    
+    Vcb->need_write = TRUE;
+    
+end:
+    if (NT_SUCCESS(Status))
+        clear_rollback(Vcb, &rollback);
+    else
+        do_rollback(Vcb, &rollback);
+    
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
+    if (data)
+        ExFreePool(data);
+    
+    while (!IsListEmpty(&items)) {
+        data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry);
+        
+        while (!IsListEmpty(&dr->refs)) {
+            data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry);
+            
+            ExFreePool(ref);
+        }
+        
+        ExFreePool(dr);
+    }
+    
+    while (!IsListEmpty(&metadata_items)) {
+        metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry);
+        
+        while (!IsListEmpty(&mr->refs)) {
+            metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
+            
+            ExFreePool(ref);
+        }
+        
+        ExFreePool(mr);
+    }
+    
+    return Status;
+}
+
+static __inline UINT64 get_chunk_dup_type(chunk* c) {
+    if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+        return BLOCK_FLAG_RAID0;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
+        return BLOCK_FLAG_RAID1;
+    else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
+        return BLOCK_FLAG_DUPLICATE;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+        return BLOCK_FLAG_RAID10;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
+        return BLOCK_FLAG_RAID5;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
+        return BLOCK_FLAG_RAID6;
+    else
+        return BLOCK_FLAG_SINGLE;
+}
+
+static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) {
+    btrfs_balance_opts* opts;
+    
+    opts = &Vcb->balance.opts[sort];
+    
+    if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED))
+        return FALSE;
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
+        UINT64 type = get_chunk_dup_type(c);
+        
+        if (!(type & opts->profiles))
+            return FALSE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
+        UINT16 i;
+        CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+        BOOL b = FALSE;
+        
+        for (i = 0; i < c->chunk_item->num_stripes; i++) {
+            if (cis[i].dev_id == opts->devid) {
+                b = TRUE;
+                break;
+            }
+        }
+        
+        if (!b)
+            return FALSE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
+        UINT16 i, factor;
+        UINT64 physsize;
+        CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+        BOOL b = FALSE;
+        
+        if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+            factor = c->chunk_item->num_stripes;
+        else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+            factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
+        else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
+            factor = c->chunk_item->num_stripes - 1;
+        else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
+            factor = c->chunk_item->num_stripes - 2;
+        else // SINGLE, DUPLICATE, RAID1
+            factor = 1;
+        
+        physsize = c->chunk_item->size / factor;
+        
+        for (i = 0; i < c->chunk_item->num_stripes; i++) {
+            if (cis[i].offset >= opts->drange_start && cis[i].offset + physsize < opts->drange_end) {
+                b = TRUE;
+                break;
+            }
+        }
+        
+        if (!b)
+            return FALSE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
+        if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end)
+            return FALSE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
+        if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end)
+            return FALSE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
+        UINT64 usage = c->used * 100 / c->chunk_item->size;
+        
+        // usage == 0 should mean completely empty, not just that usage rounds to 0%
+        if (c->used > 0 && usage == 0)
+            usage = 1;
+        
+        if (usage < opts->usage_start || usage > opts->usage_end)
+            return FALSE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) {
+        UINT64 type = get_chunk_dup_type(c);
+        
+        if (type == opts->convert)
+            return FALSE;
+    }
+    
+    return TRUE;
+}
+
+static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
+    if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
+        args->profiles = opts->profiles;
+        args->flags |= BALANCE_ARGS_FLAGS_PROFILES;
+    }
+
+    if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
+        if (args->usage_start == 0) {
+            args->flags |= BALANCE_ARGS_FLAGS_USAGE_RANGE;
+            args->usage_start = opts->usage_start;
+            args->usage_end = opts->usage_end;
+        } else {
+            args->flags |= BALANCE_ARGS_FLAGS_USAGE;
+            args->usage = opts->usage_end;
+        }
+    }
+
+    if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
+        args->devid = opts->devid;
+        args->flags |= BALANCE_ARGS_FLAGS_DEVID;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
+        args->drange_start = opts->drange_start;
+        args->drange_end = opts->drange_end;
+        args->flags |= BALANCE_ARGS_FLAGS_DRANGE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
+        args->vrange_start = opts->vrange_start;
+        args->vrange_end = opts->vrange_end;
+        args->flags |= BALANCE_ARGS_FLAGS_VRANGE;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) {
+        args->convert = opts->convert;
+        args->flags |= BALANCE_ARGS_FLAGS_CONVERT;
+        
+        if (opts->flags & BTRFS_BALANCE_OPTS_SOFT)
+            args->flags |= BALANCE_ARGS_FLAGS_SOFT;
+    }
+    
+    if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) {
+        if (args->limit_start == 0) {
+            args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE;
+            args->limit_start = opts->limit_start;
+            args->limit_end = opts->limit_end;
+        } else {
+            args->flags |= BALANCE_ARGS_FLAGS_LIMIT;
+            args->limit = opts->limit_end;
+        }
+    }
+
+    if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
+        args->stripes_start = opts->stripes_start;
+        args->stripes_end = opts->stripes_end;
+        args->flags |= BALANCE_ARGS_FLAGS_STRIPES_RANGE;
+    }
+}
+
+static NTSTATUS add_balance_item(device_extension* Vcb) {
+    LIST_ENTRY rollback;
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    BALANCE_ITEM* bi;
+    
+    InitializeListHead(&rollback);
+    
+    searchkey.obj_id = BALANCE_ITEM_ID;
+    searchkey.obj_type = TYPE_TEMP_ITEM;
+    searchkey.offset = 0;
+    
+    ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        goto end;
+    }
+    
+    if (!keycmp(tp.item->key, searchkey))
+        delete_tree_item(Vcb, &tp, &rollback);
+    
+    bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG);
+    if (!bi) {
+        ERR("out of memory\n");
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto end;
+    }
+    
+    RtlZeroMemory(bi, sizeof(BALANCE_ITEM));
+    
+    if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
+        bi->flags |= BALANCE_FLAGS_DATA;
+        copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
+    }
+    
+    if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
+        bi->flags |= BALANCE_FLAGS_METADATA;
+        copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
+    }
+    
+    if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) {
+        bi->flags |= BALANCE_FLAGS_SYSTEM;
+        copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
+    }
+    
+    if (!insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL, &rollback)) {
+        ERR("insert_tree_item failed\n");
+        Status = STATUS_INTERNAL_ERROR;
+        goto end;
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+end:
+    if (NT_SUCCESS(Status)) {
+        do_write(Vcb, NULL, &rollback);
+        free_trees(Vcb);
+        
+        clear_rollback(Vcb, &rollback);
+    } else
+        do_rollback(Vcb, &rollback);
+    
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
+    return Status;
+}
+
+static NTSTATUS remove_balance_item(device_extension* Vcb) {
+    LIST_ENTRY rollback;
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    InitializeListHead(&rollback);
+    
+    searchkey.obj_id = BALANCE_ITEM_ID;
+    searchkey.obj_type = TYPE_TEMP_ITEM;
+    searchkey.offset = 0;
+    
+    ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        goto end;
+    }
+    
+    if (!keycmp(tp.item->key, searchkey)) {
+        delete_tree_item(Vcb, &tp, &rollback);
+        
+        do_write(Vcb, NULL, &rollback);
+        free_trees(Vcb);
+    }
+
+    Status = STATUS_SUCCESS;
+    
+end:
+    if (NT_SUCCESS(Status))
+        clear_rollback(Vcb, &rollback);
+    else
+        do_rollback(Vcb, &rollback);
+    
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
+    return Status;
+}
+
+static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
+    opts->flags = BTRFS_BALANCE_OPTS_ENABLED;
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) {
+        opts->flags |= BTRFS_BALANCE_OPTS_PROFILES;
+        opts->profiles = args->profiles;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_USAGE) {
+        opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
+        
+        opts->usage_start = 0;
+        opts->usage_end = args->usage;
+    } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) {
+        opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
+        
+        opts->usage_start = args->usage_start;
+        opts->usage_end = args->usage_end;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_DEVID) {
+        opts->flags |= BTRFS_BALANCE_OPTS_DEVID;
+        opts->devid = args->devid;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) {
+        opts->flags |= BTRFS_BALANCE_OPTS_DRANGE;
+        opts->drange_start = args->drange_start;
+        opts->drange_end = args->drange_end;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) {
+        opts->flags |= BTRFS_BALANCE_OPTS_VRANGE;
+        opts->vrange_start = args->vrange_start;
+        opts->vrange_end = args->vrange_end;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) {
+        opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
+        
+        opts->limit_start = 0;
+        opts->limit_end = args->limit;
+    } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) {
+        opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
+        
+        opts->limit_start = args->limit_start;
+        opts->limit_end = args->limit_end;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) {
+        opts->flags |= BTRFS_BALANCE_OPTS_STRIPES;
+        
+        opts->stripes_start = args->stripes_start;
+        opts->stripes_end = args->stripes_end;
+    }
+    
+    if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) {
+        opts->flags |= BTRFS_BALANCE_OPTS_CONVERT;
+        opts->convert = args->convert;
+        
+        if (args->flags & BALANCE_ARGS_FLAGS_SOFT)
+            opts->flags |= BTRFS_BALANCE_OPTS_SOFT;
+    }
+}
+
+static NTSTATUS remove_superblocks(device* dev) {
+    NTSTATUS Status;
+    superblock* sb;
+    int i = 0;
+    
+    sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG);
+    if (!sb) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlZeroMemory(sb, sizeof(superblock));
+    
+    while (superblock_addrs[i] > 0 && dev->length >= superblock_addrs[i] + sizeof(superblock)) {
+        Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock));
+        
+        if (!NT_SUCCESS(Status)) {
+            ExFreePool(sb);
+            return Status;
+        }
+        
+        i++;
+    }
+    
+    ExFreePool(sb);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS replace_mount_dev(device_extension* Vcb, device* dev, PDEVICE_OBJECT mountmgr, BOOL part0) {
+    NTSTATUS Status;
+    MOUNTDEV_NAME mdn, *mdn2 = NULL, *mdn3 = NULL;
+    ULONG mdnsize, mmpsize;
+    MOUNTMGR_MOUNT_POINT* mmp = NULL;
+    MOUNTMGR_MOUNT_POINTS mmps, *mmps2 = NULL;
+    ULONG i;
+    UNICODE_STRING us;
+
+    // get old device name
+    
+    Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
+    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
+        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
+        return Status;
+    }
+    
+    mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
+    
+    mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
+    if (!mdn2) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
+        goto end;
+    }
+    
+    // get new device name
+    
+    Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
+    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
+        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
+        goto end2;
+    }
+    
+    mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
+    
+    mdn3 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
+    if (!mdn3) {
+        ERR("out of memory\n");
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto end2;
+    }
+    
+    Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn3, mdnsize, TRUE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
+        goto end2;
+    }
+    
+    // query and delete existing mount points
+    
+    mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + mdn2->NameLength;
+    
+    mmp = ExAllocatePoolWithTag(PagedPool, mmpsize, ALLOC_TAG);
+    if (!mmp) {
+        ERR("out of memory\n");
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto end2;
+    }
+    
+    RtlZeroMemory(mmp, sizeof(MOUNTMGR_MOUNT_POINT));
+    mmp->DeviceNameOffset = sizeof(MOUNTMGR_MOUNT_POINT);
+    mmp->DeviceNameLength = mdn2->NameLength;
+    RtlCopyMemory(&mmp[1], mdn2->Name, mdn2->NameLength);
+    
+    Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, mmp, mmpsize, &mmps, mmpsize, TRUE, NULL);
+    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
+        ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status);
+        goto end2;
+    }
+    
+    mmps2 = ExAllocatePoolWithTag(PagedPool, mmps.Size, ALLOC_TAG);
+    if (!mmps2) {
+        ERR("out of memory\n");
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto end2;
+    }
+    
+    Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, mmps2, mmps.Size, TRUE, NULL);
+    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
+        ERR("IOCTL_MOUNTMGR_DELETE_POINTS returned %08x\n", Status);
+        goto end2;
+    }
+    
+    // re-create mount points
+    
+    for (i = 0; i < mmps2->NumberOfMountPoints; i++) {
+        if (mmps2->MountPoints[i].SymbolicLinkNameOffset != 0) {
+            ULONG mcpilen;
+            MOUNTMGR_CREATE_POINT_INPUT* mcpi;
+            
+            mcpilen = sizeof(MOUNTMGR_CREATE_POINT_INPUT) + mmps2->MountPoints[i].SymbolicLinkNameLength + mdn3->NameLength;
+            
+            mcpi = ExAllocatePoolWithTag(PagedPool, mcpilen, ALLOC_TAG);
+            if (!mcpi) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto end2;
+            }
+            
+            mcpi->SymbolicLinkNameOffset = sizeof(MOUNTMGR_CREATE_POINT_INPUT);
+            mcpi->SymbolicLinkNameLength = mmps2->MountPoints[i].SymbolicLinkNameLength;
+            mcpi->DeviceNameOffset = mcpi->SymbolicLinkNameOffset + mcpi->SymbolicLinkNameLength;
+            mcpi->DeviceNameLength = mdn3->NameLength;
+            
+            RtlCopyMemory((UINT8*)mcpi + mcpi->SymbolicLinkNameOffset, (UINT8*)mmps2 + mmps2->MountPoints[i].SymbolicLinkNameOffset,
+                          mcpi->SymbolicLinkNameLength);
+            RtlCopyMemory((UINT8*)mcpi + mcpi->DeviceNameOffset, mdn3->Name, mdn3->NameLength);
+
+            Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_CREATE_POINT, mcpi, mcpilen, NULL, 0, TRUE, NULL);
+            if (!NT_SUCCESS(Status)) {
+                ERR("IOCTL_MOUNTMGR_CREATE_POINT returned %08x\n", Status);
+                ExFreePool(mcpi);
+                goto end2;
+            }
+            
+            ExFreePool(mcpi);
+        }
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+end2:
+    // re-add old device back to mountmgr
+    
+    if (!part0) {
+        us.Buffer = mdn2->Name;
+        us.Length = us.MaximumLength = mdn2->NameLength;
+        
+        add_volume(mountmgr, &us);
+    }
+    
+end:
+    if (mdn2)
+        ExFreePool(mdn2);
+    
+    if (mdn3)
+        ExFreePool(mdn3);
+    
+    if (mmp)
+        ExFreePool(mmp);
+    
+    if (mmps2)
+        ExFreePool(mmps2);
+
+    return Status;
+}
+
+static NTSTATUS finish_removing_device(device_extension* Vcb, device* dev) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    LIST_ENTRY rollback, *le;
+    BOOL first_dev, part0 = FALSE;
+    
+    InitializeListHead(&rollback);
+    
+    if (Vcb->need_write)
+        do_write(Vcb, NULL, &rollback);
+    
+    free_trees(Vcb);
+    
+    clear_rollback(Vcb, &rollback);
+    
+    // remove entry in chunk tree
+
+    searchkey.obj_id = 1;
+    searchkey.obj_type = TYPE_DEV_ITEM;
+    searchkey.offset = dev->devitem.dev_id;
+    
+    Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+
+    if (!keycmp(searchkey, tp.item->key))
+        delete_tree_item(Vcb, &tp, &rollback);
+    
+    // remove stats entry in device tree
+    
+    searchkey.obj_id = 0;
+    searchkey.obj_type = TYPE_DEV_STATS;
+    searchkey.offset = dev->devitem.dev_id;
+    
+    Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+
+    if (!keycmp(searchkey, tp.item->key))
+        delete_tree_item(Vcb, &tp, &rollback);
+    
+    // update superblock
+    
+    Vcb->superblock.num_devices--;
+    Vcb->superblock.total_bytes -= dev->devitem.num_bytes;
+    Vcb->devices_loaded--;
+    
+    first_dev = first_device(Vcb) == dev;
+    
+    RemoveEntryList(&dev->list_entry);
+    
+    // flush
+    
+    do_write(Vcb, NULL, &rollback);
+    
+    free_trees(Vcb);
+    
+    clear_rollback(Vcb, &rollback);
+    
+    if (!dev->readonly) {
+        Status = remove_superblocks(dev);
+        if (!NT_SUCCESS(Status))
+            WARN("remove_superblocks returned %08x\n", Status);
+    }
+    
+    // remove entry in volume list
+    
+    ExAcquireResourceExclusiveLite(&volumes_lock, TRUE);
+    
+    le = volumes.Flink;
+    while (le != &volumes) {
+        volume* v = CONTAINING_RECORD(le, volume, list_entry);
+        
+        if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) &&
+            RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+            PFILE_OBJECT FileObject;
+            PDEVICE_OBJECT mountmgr;
+            UNICODE_STRING mmdevpath;
+        
+            RemoveEntryList(&v->list_entry);
+        
+            // re-add entry to mountmgr
+
+            if (!first_dev && v->part_num != 0) {
+                RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
+                Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
+                if (!NT_SUCCESS(Status))
+                    ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
+                else {
+                    add_volume(mountmgr, &v->devpath);
+                    ObDereferenceObject(FileObject);
+                }
+            }
+            
+            part0 = v->part_num == 0 ? TRUE : FALSE;
+        
+            if (v->devpath.Buffer)
+                ExFreePool(v->devpath.Buffer);
+            
+            ExFreePool(v);
+            break;
+        }
+        
+        le = le->Flink;
+    }
+    
+    ExReleaseResourceLite(&volumes_lock);
+    
+    if (first_dev) {
+        PDEVICE_OBJECT DeviceObject, olddev;
+        device* newfirstdev;
+        PFILE_OBJECT FileObject;
+        UNICODE_STRING mmdevpath;
+        PDEVICE_OBJECT mountmgr;
+        
+        DeviceObject = Vcb->Vpb->DeviceObject;
+        
+        olddev = DeviceObject->Vpb->RealDevice;
+        newfirstdev = first_device(Vcb);
+        
+        ObReferenceObject(newfirstdev->devobj);
+        DeviceObject->Vpb->RealDevice = newfirstdev->devobj;
+        ObDereferenceObject(olddev);
+        
+        RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
+        Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
+        if (!NT_SUCCESS(Status))
+            ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
+        else {
+            Status = replace_mount_dev(Vcb, dev, mountmgr, part0);
+            if (!NT_SUCCESS(Status))
+                ERR("replace_mount_dev returned %08x\n", Status);
+            
+            ObDereferenceObject(FileObject);
+        }
+        
+    }
+    
+    // free dev
+    
+    ObDereferenceObject(dev->devobj);
+    
+    while (!IsListEmpty(&dev->space)) {
+        LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
+        space* s = CONTAINING_RECORD(le2, space, list_entry);
+        
+        ExFreePool(s);
+    }
+    
+    ExFreePool(dev);
+    
+    return STATUS_SUCCESS;
+}
+
+#ifndef __REACTOS__
+static void balance_thread(void* context) {
+#else
+static void NTAPI balance_thread(void* context) {
+#endif
+    device_extension* Vcb = (device_extension*)context;
+    LIST_ENTRY chunks;
+    LIST_ENTRY* le;
+    UINT64 num_chunks[3];
+    NTSTATUS Status;
+    
+    Vcb->balance.stopping = FALSE;
+    Vcb->balance.cancelling = FALSE;
+    KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE);
+    
+    if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT)
+        Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert);
+    
+    if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT)
+        Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert);
+    
+    if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT)
+        Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert);
+    
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) {
+        if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
+            RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
+        else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
+            RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
+    }
+    
+    // FIXME - what are we supposed to do with limit_start?
+    
+    if (!Vcb->readonly) {
+        if (!Vcb->balance.removing) {
+            Status = add_balance_item(Vcb);
+            if (!NT_SUCCESS(Status)) {
+                ERR("add_balance_item returned %08x\n", Status);
+                goto end;
+            }
+        } else {
+            if (Vcb->need_write) {
+                LIST_ENTRY rollback;
+                
+                InitializeListHead(&rollback);
+                do_write(Vcb, NULL, &rollback);
+                free_trees(Vcb);
+                
+                clear_rollback(Vcb, &rollback);
+            }
+        }
+    }
+    
+    num_chunks[0] = num_chunks[1] = num_chunks[2] = 0;
+    Vcb->balance.total_chunks = 0;
+    
+    InitializeListHead(&chunks);
+    
+    KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
+    
+    if (Vcb->balance.stopping)
+        goto end;
+    
+    ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
+    
+    le = Vcb->chunks.Flink;
+    while (le != &Vcb->chunks) {
+        chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
+        UINT8 sort;
+        
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        if (c->chunk_item->type & BLOCK_FLAG_DATA)
+            sort = BALANCE_OPTS_DATA;
+        else if (c->chunk_item->type & BLOCK_FLAG_METADATA)
+            sort = BALANCE_OPTS_METADATA;
+        else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
+            sort = BALANCE_OPTS_SYSTEM;
+        else {
+            ERR("unexpected chunk type %llx\n", c->chunk_item->type);
+            ExReleaseResourceLite(&c->lock);
+            break;
+        }
+        
+        if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) &&
+            should_balance_chunk(Vcb, sort, c)) {
+            c->reloc = TRUE;
+            
+            InsertTailList(&chunks, &c->list_entry_balance);
+            
+            num_chunks[sort]++;
+            Vcb->balance.total_chunks++;
+        }
+        
+        ExReleaseResourceLite(&c->lock);
+        
+        le = le->Flink;
+    }
+    
+    ExReleaseResourceLite(&Vcb->chunk_lock);
+    
+    Vcb->balance.chunks_left = Vcb->balance.total_chunks;
+    
+    // do data chunks before metadata
+    le = chunks.Flink;
+    while (le != &chunks) {
+        chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
+        LIST_ENTRY* le2 = le->Flink;
+        
+        if (c->chunk_item->type & BLOCK_FLAG_DATA) {
+            NTSTATUS Status;
+            BOOL changed;
+            
+            do {
+                changed = FALSE;
+                
+                FsRtlEnterFileSystem();
+                
+                Status = balance_data_chunk(Vcb, c, &changed);
+                
+                FsRtlExitFileSystem();
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("balance_data_chunk returned %08x\n", Status);
+                    Vcb->balance.status = Status;
+                    goto end;
+                }
+                
+                KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
+                
+                if (Vcb->balance.stopping)
+                    break;
+            } while (changed);
+        
+            if (!c->list_entry_changed.Flink)
+                InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
+        }
+            
+        if (Vcb->balance.stopping) {
+            while (le != &chunks) {
+                c = CONTAINING_RECORD(le, chunk, list_entry_balance);
+                c->reloc = FALSE;
+                
+                le = le->Flink;
+            }
+            goto end;
+        }
+        
+        if (c->chunk_item->type & BLOCK_FLAG_DATA &&
+            (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
+            RemoveEntryList(&c->list_entry_balance);
+            c->list_entry_balance.Flink = NULL;
+            
+            Vcb->balance.chunks_left--;
+        }
+        
+        le = le2;
+    }
+    
+    // do metadata chunks
+    while (!IsListEmpty(&chunks)) {
+        chunk* c;
+        NTSTATUS Status;
+        BOOL changed;
+        
+        le = RemoveHeadList(&chunks);
+        c = CONTAINING_RECORD(le, chunk, list_entry_balance);
+        
+        if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
+            do {
+                FsRtlEnterFileSystem();
+                
+                Status = balance_metadata_chunk(Vcb, c, &changed);
+                
+                FsRtlExitFileSystem();
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("balance_metadata_chunk returned %08x\n", Status);
+                    Vcb->balance.status = Status;
+                    goto end;
+                }
+                
+                KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
+                
+                if (Vcb->balance.stopping)
+                    break;
+            } while (changed);
+            
+            if (!c->list_entry_changed.Flink)
+                InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
+        }
+        
+        if (Vcb->balance.stopping) {
+            while (le != &chunks) {
+                c = CONTAINING_RECORD(le, chunk, list_entry_balance);
+                c->reloc = FALSE;
+                
+                le = le->Flink;
+                c->list_entry_balance.Flink = NULL;
+            }
+            break;
+        }
+        
+        c->list_entry_balance.Flink = NULL;
+        
+        Vcb->balance.chunks_left--;
+    }
+    
+end:
+    if (!Vcb->readonly) {
+        if (!Vcb->balance.removing) {
+            FsRtlEnterFileSystem();
+            Status = remove_balance_item(Vcb);
+            FsRtlExitFileSystem();
+            
+            if (!NT_SUCCESS(Status)) {
+                ERR("remove_balance_item returned %08x\n", Status);
+                goto end;
+            }
+        } else {
+            device* dev = NULL;
+            
+            FsRtlEnterFileSystem();
+            ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+            
+            le = Vcb->devices.Flink;
+            while (le != &Vcb->devices) {
+                device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+                
+                if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
+                    dev = dev2;
+                    break;
+                }
+                
+                le = le->Flink;
+            }
+            
+            if (dev) {
+                if (Vcb->balance.chunks_left == 0) {
+                    Status = finish_removing_device(Vcb, dev);
+                    
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("finish_removing_device returned %08x\n", Status);
+                        dev->reloc = FALSE;
+                    }
+                } else
+                    dev->reloc = FALSE;
+            }
+            
+            ExReleaseResourceLite(&Vcb->tree_lock);
+            FsRtlExitFileSystem();
+        }
+    }
+    
+    ZwClose(Vcb->balance.thread);
+    Vcb->balance.thread = NULL;
+    
+    KeSetEvent(&Vcb->balance.finished, 0, FALSE);
+}
+
+NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) {
+    NTSTATUS Status;
+    btrfs_start_balance* bsb = (btrfs_start_balance*)data;
+    UINT8 i;
+    
+    if (length < sizeof(btrfs_start_balance) || !data)
+        return STATUS_INVALID_PARAMETER;
+    
+    if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
+        return STATUS_PRIVILEGE_NOT_HELD;
+    
+    if (Vcb->balance.thread) {
+        WARN("balance already running\n");
+        return STATUS_DEVICE_NOT_READY;
+    }
+    
+    if (Vcb->readonly)
+        return STATUS_MEDIA_WRITE_PROTECTED;
+    
+    if (!(bsb->opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) &&
+        !(bsb->opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) &&
+        !(bsb->opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED))
+        return STATUS_SUCCESS;
+    
+    for (i = 0; i < 3; i++) {
+        if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) {
+                bsb->opts[i].profiles &= BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE | BLOCK_FLAG_RAID10 |
+                                         BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 | BLOCK_FLAG_SINGLE;
+
+                if (bsb->opts[i].profiles == 0)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) {
+                if (bsb->opts[i].devid == 0)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) {
+                if (bsb->opts[i].drange_start > bsb->opts[i].drange_end)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) {
+                if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) {
+                bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start);
+                bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end);
+                
+                if (bsb->opts[i].limit_start > bsb->opts[i].limit_end)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) {
+                bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start);
+                bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end);
+                
+                if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) {
+                bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start);
+                bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end);
+                
+                if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
+                    return STATUS_INVALID_PARAMETER;
+            }
+            
+            if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) {
+                if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 &&
+                    bsb->opts[i].convert != BLOCK_FLAG_DUPLICATE && bsb->opts[i].convert != BLOCK_FLAG_RAID10 &&
+                    bsb->opts[i].convert != BLOCK_FLAG_RAID5 && bsb->opts[i].convert != BLOCK_FLAG_RAID6 &&
+                    bsb->opts[i].convert != BLOCK_FLAG_SINGLE)
+                    return STATUS_INVALID_PARAMETER;
+            }
+        }
+    }
+    
+    RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
+    RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bsb->opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
+    RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bsb->opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts));
+    
+    Vcb->balance.paused = FALSE;
+    Vcb->balance.removing = FALSE;
+    Vcb->balance.status = STATUS_SUCCESS;
+    KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
+    
+    Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
+    if (!NT_SUCCESS(Status)) {
+        ERR("PsCreateSystemThread returned %08x\n", Status);
+        return Status;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS look_for_balance_item(device_extension* Vcb) {
+    LIST_ENTRY rollback;
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    BALANCE_ITEM* bi;
+    int i;
+    
+    InitializeListHead(&rollback);
+    
+    searchkey.obj_id = BALANCE_ITEM_ID;
+    searchkey.obj_type = TYPE_TEMP_ITEM;
+    searchkey.offset = 0;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (keycmp(tp.item->key, searchkey)) {
+        TRACE("no balance item found\n");
+        return STATUS_NOT_FOUND;
+    }
+    
+    if (tp.item->size < sizeof(BALANCE_ITEM)) {
+        WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
+             tp.item->size, sizeof(BALANCE_ITEM));
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    bi = (BALANCE_ITEM*)tp.item->data;
+    
+    if (bi->flags & BALANCE_FLAGS_DATA)
+        load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
+    
+    if (bi->flags & BALANCE_FLAGS_METADATA)
+        load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
+    
+    if (bi->flags & BALANCE_FLAGS_SYSTEM)
+        load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
+    
+    // do the heuristics that Linux driver does
+    
+    for (i = 0; i < 3; i++) {
+        if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
+            // if converting, don't redo chunks already done
+            
+            if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
+                Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT;
+            
+            // don't balance chunks more than 90% filled - presumably these
+            // have already been done
+            
+            if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) &&
+                !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
+            ) {
+                Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_USAGE;
+                Vcb->balance.opts[i].usage_start = 0;
+                Vcb->balance.opts[i].usage_end = 90;
+            }
+        }
+    }
+    
+    if (Vcb->readonly || Vcb->options.skip_balance)
+        Vcb->balance.paused = TRUE;
+    else
+        Vcb->balance.paused = FALSE;
+    
+    Vcb->balance.removing = FALSE;
+    Vcb->balance.status = STATUS_SUCCESS;
+    KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
+    
+    Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
+    if (!NT_SUCCESS(Status)) {
+        ERR("PsCreateSystemThread returned %08x\n", Status);
+        return Status;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) {
+    btrfs_query_balance* bqb = (btrfs_query_balance*)data;
+    
+    if (length < sizeof(btrfs_query_balance) || !data)
+        return STATUS_INVALID_PARAMETER;
+    
+    if (!Vcb->balance.thread) {
+        bqb->status = BTRFS_BALANCE_STOPPED;
+        
+        if (!NT_SUCCESS(Vcb->balance.status)) {
+            bqb->status |= BTRFS_BALANCE_ERROR;
+            bqb->error = Vcb->balance.status;
+        }
+        
+        return STATUS_SUCCESS;
+    }
+    
+    bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING;
+    
+    if (Vcb->balance.removing)
+        bqb->status |= BTRFS_BALANCE_REMOVAL;
+    
+    if (!NT_SUCCESS(Vcb->balance.status))
+        bqb->status |= BTRFS_BALANCE_ERROR;
+    
+    bqb->chunks_left = Vcb->balance.chunks_left;
+    bqb->total_chunks = Vcb->balance.total_chunks;
+    bqb->error = Vcb->balance.status;
+    RtlCopyMemory(&bqb->data_opts, &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
+    RtlCopyMemory(&bqb->metadata_opts, &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
+    RtlCopyMemory(&bqb->system_opts, &Vcb->balance.opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts));
+
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
+    if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
+        return STATUS_PRIVILEGE_NOT_HELD;
+    
+    if (!Vcb->balance.thread)
+        return STATUS_DEVICE_NOT_READY;
+    
+    if (Vcb->balance.paused)
+        return STATUS_DEVICE_NOT_READY;
+    
+    Vcb->balance.paused = TRUE;
+    KeClearEvent(&Vcb->balance.event);
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
+    if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
+        return STATUS_PRIVILEGE_NOT_HELD;
+    
+    if (!Vcb->balance.thread)
+        return STATUS_DEVICE_NOT_READY;
+    
+    if (!Vcb->balance.paused)
+        return STATUS_DEVICE_NOT_READY;
+    
+    if (Vcb->readonly)
+        return STATUS_MEDIA_WRITE_PROTECTED;
+    
+    Vcb->balance.paused = FALSE;
+    KeSetEvent(&Vcb->balance.event, 0, FALSE);
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
+    if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
+        return STATUS_PRIVILEGE_NOT_HELD;
+    
+    if (!Vcb->balance.thread)
+        return STATUS_DEVICE_NOT_READY;
+    
+    Vcb->balance.paused = FALSE;
+    Vcb->balance.stopping = TRUE;
+    Vcb->balance.cancelling = TRUE;
+    Vcb->balance.status = STATUS_SUCCESS;
+    KeSetEvent(&Vcb->balance.event, 0, FALSE);
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) {
+    UINT64 devid;
+    LIST_ENTRY* le;
+    device* dev = NULL;
+    NTSTATUS Status;
+    int i;
+    UINT64 num_rw_devices;
+    
+    TRACE("(%p, %p, %x)\n", Vcb, data, length);
+    
+    if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
+        return STATUS_PRIVILEGE_NOT_HELD;
+    
+    if (length < sizeof(UINT64))
+        return STATUS_INVALID_PARAMETER;
+    
+    devid = *(UINT64*)data;
+    
+    ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
+    
+    if (Vcb->readonly) {
+        ExReleaseResourceLite(&Vcb->tree_lock);
+        return STATUS_MEDIA_WRITE_PROTECTED;
+    }
+    
+    num_rw_devices = 0;
+    
+    le = Vcb->devices.Flink;
+    while (le != &Vcb->devices) {
+        device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+        
+        if (dev2->devitem.dev_id == devid)
+            dev = dev2;
+        
+        if (!dev2->readonly)
+            num_rw_devices++;
+        
+        le = le->Flink;
+    }
+    
+    if (!dev) {
+        ExReleaseResourceLite(&Vcb->tree_lock);
+        WARN("device %llx not found\n", devid);
+        return STATUS_NOT_FOUND;
+    }
+    
+    if (!dev->readonly) {
+        if (num_rw_devices == 1) {
+            ExReleaseResourceLite(&Vcb->tree_lock);
+            WARN("not removing last non-readonly device\n");
+            return STATUS_INVALID_PARAMETER;
+        }
+        
+        if (num_rw_devices == 4 &&
+            ((Vcb->data_flags & BLOCK_FLAG_RAID10 || Vcb->metadata_flags & BLOCK_FLAG_RAID10 || Vcb->system_flags & BLOCK_FLAG_RAID10) ||
+             (Vcb->data_flags & BLOCK_FLAG_RAID6 || Vcb->metadata_flags & BLOCK_FLAG_RAID6 || Vcb->system_flags & BLOCK_FLAG_RAID6))
+        ) {
+            ExReleaseResourceLite(&Vcb->tree_lock);
+            ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n");
+            return STATUS_CANNOT_DELETE;
+        }
+        
+        if (num_rw_devices == 3 && (Vcb->data_flags & BLOCK_FLAG_RAID5 || Vcb->metadata_flags & BLOCK_FLAG_RAID5 || Vcb->system_flags & BLOCK_FLAG_RAID5)) {
+            ExReleaseResourceLite(&Vcb->tree_lock);
+            ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n");
+            return STATUS_CANNOT_DELETE;
+        }
+        
+        if (num_rw_devices == 2 &&
+            ((Vcb->data_flags & BLOCK_FLAG_RAID0 || Vcb->metadata_flags & BLOCK_FLAG_RAID0 || Vcb->system_flags & BLOCK_FLAG_RAID0) ||
+             (Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->metadata_flags & BLOCK_FLAG_RAID1 || Vcb->system_flags & BLOCK_FLAG_RAID1))
+        ) {
+            ExReleaseResourceLite(&Vcb->tree_lock);
+            ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
+            return STATUS_CANNOT_DELETE;
+        }
+    }
+    
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
+    if (Vcb->balance.thread) {
+        WARN("balance already running\n");
+        return STATUS_DEVICE_NOT_READY;
+    }
+    
+    dev->reloc = TRUE;
+    
+    RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3);
+    
+    for (i = 0; i < 3; i++) {
+        Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID;
+        Vcb->balance.opts[i].devid = devid;
+    }
+    
+    Vcb->balance.paused = FALSE;
+    Vcb->balance.removing = TRUE;
+    KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
+    
+    Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
+    if (!NT_SUCCESS(Status)) {
+        ERR("PsCreateSystemThread returned %08x\n", Status);
+        dev->reloc = FALSE;
+        return Status;
+    }
+    
+    return STATUS_SUCCESS;
+}
index ed8a283..2a54cc3 100644 (file)
@@ -52,6 +52,8 @@ BOOL have_sse42 = FALSE, have_sse2 = FALSE;
 UINT64 num_reads = 0;
 LIST_ENTRY uid_map_list;
 LIST_ENTRY volumes;
+ERESOURCE volumes_lock;
+LIST_ENTRY pnp_disks;
 LIST_ENTRY VcbList;
 ERESOURCE global_loading_lock;
 UINT32 debug_log_level = 0;
@@ -63,8 +65,15 @@ UINT32 mount_flush_interval = 30;
 UINT32 mount_max_inline = 2048;
 UINT32 mount_raid5_recalculation = 1;
 UINT32 mount_raid6_recalculation = 1;
+UINT32 mount_skip_balance = 0;
 BOOL log_started = FALSE;
 UNICODE_STRING log_device, log_file, registry_path;
+tPsUpdateDiskCounters PsUpdateDiskCounters;
+tCcCopyReadEx CcCopyReadEx;
+tCcCopyWriteEx CcCopyWriteEx;
+tCcSetAdditionalCacheAttributesEx CcSetAdditionalCacheAttributesEx;
+BOOL diskacc = FALSE;
+void* notification_entry = NULL;
 
 #ifdef _DEBUG
 PFILE_OBJECT comfo = NULL;
@@ -72,8 +81,6 @@ PDEVICE_OBJECT comdo = NULL;
 HANDLE log_handle = NULL;
 #endif
 
-int __security_cookie = __LINE__;
-
 static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObject);
 
 typedef struct {
@@ -263,6 +270,13 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) {
     free_cache();
     
     IoUnregisterFileSystem(DriverObject->DeviceObject);
+    
+    if (notification_entry)
+#ifdef __REACTOS__
+        IoUnregisterPlugPlayNotification(notification_entry);
+#else
+        IoUnregisterPlugPlayNotificationEx(notification_entry);
+#endif
    
     dosdevice_nameW.Buffer = dosdevice_name;
     dosdevice_nameW.Length = dosdevice_nameW.MaximumLength = (USHORT)wcslen(dosdevice_name) * sizeof(WCHAR);
@@ -280,6 +294,7 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) {
     }
     
     // FIXME - free volumes and their devpaths
+    // FIXME - free pnp_disks and their devpaths
     
 #ifdef _DEBUG
     if (comfo)
@@ -291,6 +306,8 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) {
     
     ExDeleteResourceLite(&global_loading_lock);
     
+    ExDeleteResourceLite(&volumes_lock);
+    
     if (log_device.Buffer)
         ExFreePool(log_device.Buffer);
     
@@ -342,46 +359,18 @@ static BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
     return TRUE;
 }
 
-BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp;
-    DIR_ITEM* xa;
-    ULONG size, xasize;
-    NTSTATUS Status;
-    
-    TRACE("(%p, %llx, %llx, %s, %08x, %p, %p)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_XATTR_ITEM;
-    searchkey.offset = crc32;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return FALSE;
-    }
-    
-    if (keycmp(tp.item->key, searchkey)) {
-        TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        return FALSE;
-    }
-    
-    if (tp.item->size < sizeof(DIR_ITEM)) {
-        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
-        return FALSE;
-    }
-    
-    xa = (DIR_ITEM*)tp.item->data;
-    size = tp.item->size;
+BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen) {
+    DIR_ITEM* xa = (DIR_ITEM*)item;
+    USHORT xasize;
     
     while (TRUE) {
         if (size < sizeof(DIR_ITEM) || size < (sizeof(DIR_ITEM) - 1 + xa->m + xa->n)) {
-            WARN("(%llx,%x,%llx) is truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+            WARN("DIR_ITEM is truncated\n");
             return FALSE;
         }
         
         if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
-            TRACE("found xattr %s in (%llx,%x,%llx)\n", name, searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+            TRACE("found xattr %s\n", name);
             
             *datalen = xa->m;
             
@@ -408,11 +397,41 @@ BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char*
             break;
     }
     
-    TRACE("xattr %s not found in (%llx,%x,%llx)\n", name, searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+    TRACE("xattr %s not found\n", name);
     
     return FALSE;
 }
 
+BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    TRACE("(%p, %llx, %llx, %s, %08x, %p, %p)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
+    
+    searchkey.obj_id = inode;
+    searchkey.obj_type = TYPE_XATTR_ITEM;
+    searchkey.offset = crc32;
+    
+    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return FALSE;
+    }
+    
+    if (keycmp(tp.item->key, searchkey)) {
+        TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+        return FALSE;
+    }
+    
+    if (tp.item->size < sizeof(DIR_ITEM)) {
+        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
+        return FALSE;
+    }
+    
+    return extract_xattr(tp.item->data, tp.item->size, name, data, datalen);
+}
+
 static NTSTATUS STDCALL drv_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
     NTSTATUS Status;
     PIO_STACK_LOCATION IrpSp;
@@ -603,7 +622,10 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
             TRACE("FileFsDeviceInformation\n");
             
             ffdi->DeviceType = FILE_DEVICE_DISK;
-            ffdi->Characteristics = Vcb->devices[0].devobj->Characteristics;
+            
+            ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
+            ffdi->Characteristics = first_device(Vcb)->devobj->Characteristics;
+            ExReleaseResourceLite(&Vcb->tree_lock);
             
             if (Vcb->readonly)
                 ffdi->Characteristics |= FILE_READ_ONLY_DEVICE;
@@ -716,7 +738,7 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
             Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS;
             break;
         }
-
+        
 #ifndef __REACTOS__
 #ifdef _MSC_VER // not in mingw yet
         case FileFsSectorSizeInformation:
@@ -947,9 +969,9 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t
         t->new_address = 0;
         t->has_new_address = FALSE;
         t->updated_extents = FALSE;
-        t->flags = tp.tree->flags;
         
         InsertTailList(&Vcb->trees, &t->list_entry);
+        t->list_entry_hash.Flink = NULL;
         
         t->write = TRUE;
         Vcb->need_write = TRUE;
@@ -1183,6 +1205,61 @@ void STDCALL tree_test(void* context) {
 }
 #endif
 
+// static void test_calc_thread(device_extension* Vcb) {
+//     UINT8* data;
+//     ULONG sectors, max_sectors, i, j;
+//     calc_job* cj;
+//     LARGE_INTEGER* sertimes;
+//     LARGE_INTEGER* partimes;
+//     LARGE_INTEGER time1, time2;
+//     
+//     max_sectors = 256;
+//     
+//     sertimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG);
+//     partimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG);
+//     RtlZeroMemory(sertimes, sizeof(LARGE_INTEGER) * max_sectors);
+//     RtlZeroMemory(partimes, sizeof(LARGE_INTEGER) * max_sectors);
+//     
+//     for (sectors = 1; sectors <= max_sectors; sectors++) {
+//         data = ExAllocatePoolWithTag(PagedPool, sectors * Vcb->superblock.sector_size, ALLOC_TAG);
+//         RtlZeroMemory(data, sectors * Vcb->superblock.sector_size);
+//         
+//         for (j = 0; j < 100; j++) {
+//             time1 = KeQueryPerformanceCounter(NULL);
+//             
+//             for (i = 0; i < sectors; i++) {
+//                 UINT32 tmp;
+//                 
+//                 tmp = ~calc_crc32c(0xffffffff, data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+//             }
+//             
+//             time2 = KeQueryPerformanceCounter(NULL);
+//             
+//             sertimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart;
+//             
+//             time1 = KeQueryPerformanceCounter(NULL);
+//             
+//             add_calc_job(Vcb, data, sectors, &cj);
+//             KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
+//             
+//             time2 = KeQueryPerformanceCounter(NULL);
+//             
+//             partimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart;
+//             
+//             free_calc_job(cj);
+//         }
+//         
+//         ExFreePool(data);
+//     }
+//     
+//     for (sectors = 1; sectors <= max_sectors; sectors++) {
+//         ERR("%u sectors: serial %llu, parallel %llu\n", sectors, sertimes[sectors - 1].QuadPart, partimes[sectors - 1].QuadPart);
+//     }
+//     
+//     ExFreePool(partimes);
+//     ExFreePool(sertimes);
+// }
+
 static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATION* ffli) {
     ULONG utf8len;
     NTSTATUS Status;
@@ -1234,6 +1311,7 @@ static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATI
 //     test_creating_root(Vcb);
 //     test_alloc_chunk(Vcb);
 //     test_space_list(Vcb);
+//     test_calc_thread(Vcb);
     
     Vcb->need_write = TRUE;
     
@@ -1312,245 +1390,6 @@ exit:
     return Status;
 }
 
-NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, UINT32 crc32, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = parinode;
-    searchkey.obj_type = TYPE_DIR_ITEM;
-    searchkey.offset = crc32;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(searchkey, tp.item->key)) {
-        if (tp.item->size < sizeof(DIR_ITEM)) {
-            WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
-        } else {
-            DIR_ITEM* di;
-            LONG len;
-            
-            di = (DIR_ITEM*)tp.item->data;
-            len = tp.item->size;
-            
-            do {
-                if (di->n == utf8->Length && RtlCompareMemory(di->name, utf8->Buffer, di->n) == di->n) {
-                    ULONG newlen = tp.item->size - (sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m);
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    
-                    if (newlen == 0) {
-                        TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    } else {
-                        UINT8 *newdi = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *dioff;
-                        
-                        if (!newdi) {
-                            ERR("out of memory\n");
-                            return STATUS_INSUFFICIENT_RESOURCES;
-                        }
-                        
-                        TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
-                        if ((UINT8*)di > tp.item->data) {
-                            RtlCopyMemory(newdi, tp.item->data, (UINT8*)di - tp.item->data);
-                            dioff = newdi + ((UINT8*)di - tp.item->data);
-                        } else {
-                            dioff = newdi;
-                        }
-                        
-                        if ((UINT8*)&di->name[di->n + di->m] - tp.item->data < tp.item->size)
-                            RtlCopyMemory(dioff, &di->name[di->n + di->m], tp.item->size - ((UINT8*)&di->name[di->n + di->m] - tp.item->data));
-                        
-                        insert_tree_item(Vcb, subvol, parinode, TYPE_DIR_ITEM, crc32, newdi, newlen, NULL, Irp, rollback);
-                    }
-                    
-                    break;
-                }
-                
-                len -= sizeof(DIR_ITEM) - sizeof(char) + di->n + di->m;
-                di = (DIR_ITEM*)&di->name[di->n + di->m];
-            } while (len > 0);
-        }
-    } else {
-        WARN("could not find DIR_ITEM for crc32 %08x\n", crc32);
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    BOOL changed = FALSE;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_REF;
-    searchkey.offset = parinode;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(searchkey, tp.item->key)) {
-        if (tp.item->size < sizeof(INODE_REF)) {
-            WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
-        } else {
-            INODE_REF* ir;
-            ULONG len;
-            
-            ir = (INODE_REF*)tp.item->data;
-            len = tp.item->size;
-            
-            do {
-                ULONG itemlen;
-                
-                if (len < sizeof(INODE_REF) || len < sizeof(INODE_REF) - 1 + ir->n) {
-                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    break;
-                }
-                
-                itemlen = sizeof(INODE_REF) - sizeof(char) + ir->n;
-                
-                if (ir->n == utf8->Length && RtlCompareMemory(ir->name, utf8->Buffer, ir->n) == ir->n) {
-                    ULONG newlen = tp.item->size - itemlen;
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    changed = TRUE;
-                    
-                    if (newlen == 0) {
-                        TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    } else {
-                        UINT8 *newir = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *iroff;
-                        
-                        if (!newir) {
-                            ERR("out of memory\n");
-                            return STATUS_INSUFFICIENT_RESOURCES;
-                        }
-                        
-                        TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
-                        if ((UINT8*)ir > tp.item->data) {
-                            RtlCopyMemory(newir, tp.item->data, (UINT8*)ir - tp.item->data);
-                            iroff = newir + ((UINT8*)ir - tp.item->data);
-                        } else {
-                            iroff = newir;
-                        }
-                        
-                        if ((UINT8*)&ir->name[ir->n] - tp.item->data < tp.item->size)
-                            RtlCopyMemory(iroff, &ir->name[ir->n], tp.item->size - ((UINT8*)&ir->name[ir->n] - tp.item->data));
-                        
-                        insert_tree_item(Vcb, subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newir, newlen, NULL, Irp, rollback);
-                    }
-                    
-                    break;
-                }
-                
-                if (len > itemlen) {
-                    len -= itemlen;
-                    ir = (INODE_REF*)&ir->name[ir->n];
-                } else
-                    break;
-            } while (len > 0);
-            
-            if (!changed) {
-                WARN("found INODE_REF entry, but couldn't find filename\n");
-            }
-        }
-    } else {
-        WARN("could not find INODE_REF entry for inode %llx in %llx\n", searchkey.obj_id, searchkey.offset);
-    }
-    
-    if (changed)
-        return STATUS_SUCCESS;
-    
-    if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF))
-        return STATUS_INTERNAL_ERROR;
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_EXTREF;
-    searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length);
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(searchkey, tp.item->key)) {
-        if (tp.item->size < sizeof(INODE_EXTREF)) {
-            WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_EXTREF));
-        } else {
-            INODE_EXTREF* ier;
-            ULONG len;
-            
-            ier = (INODE_EXTREF*)tp.item->data;
-            len = tp.item->size;
-            
-            do {
-                ULONG itemlen;
-                
-                if (len < sizeof(INODE_EXTREF) || len < sizeof(INODE_EXTREF) - 1 + ier->n) {
-                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    break;
-                }
-                
-                itemlen = sizeof(INODE_EXTREF) - sizeof(char) + ier->n;
-                
-                if (ier->dir == parinode && ier->n == utf8->Length && RtlCompareMemory(ier->name, utf8->Buffer, ier->n) == ier->n) {
-                    ULONG newlen = tp.item->size - itemlen;
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    changed = TRUE;
-                    
-                    if (newlen == 0) {
-                        TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    } else {
-                        UINT8 *newier = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *ieroff;
-                        
-                        if (!newier) {
-                            ERR("out of memory\n");
-                            return STATUS_INSUFFICIENT_RESOURCES;
-                        }
-                        
-                        TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
-                        if ((UINT8*)ier > tp.item->data) {
-                            RtlCopyMemory(newier, tp.item->data, (UINT8*)ier - tp.item->data);
-                            ieroff = newier + ((UINT8*)ier - tp.item->data);
-                        } else {
-                            ieroff = newier;
-                        }
-                        
-                        if ((UINT8*)&ier->name[ier->n] - tp.item->data < tp.item->size)
-                            RtlCopyMemory(ieroff, &ier->name[ier->n], tp.item->size - ((UINT8*)&ier->name[ier->n] - tp.item->data));
-                        
-                        insert_tree_item(Vcb, subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newier, newlen, NULL, Irp, rollback);
-                    }
-                    
-                    break;
-                }
-                
-                if (len > itemlen) {
-                    len -= itemlen;
-                    ier = (INODE_EXTREF*)&ier->name[ier->n];
-                } else
-                    break;
-            } while (len > 0);
-        }
-    } else {
-        WARN("couldn't find INODE_EXTREF entry either (offset = %08x)\n", (UINT32)searchkey.offset);
-    }
-    
-    return changed ? STATUS_SUCCESS : STATUS_INTERNAL_ERROR;
-}
-
 static WCHAR* file_desc_fcb(fcb* fcb) {
     char s[60];
     UNICODE_STRING us;
@@ -1811,7 +1650,7 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line)
    
     ExDeleteResourceLite(&fcb->nonpaged->resource);
     ExDeleteResourceLite(&fcb->nonpaged->paging_resource);
-    ExDeleteResourceLite(&fcb->nonpaged->index_lock);
+    ExDeleteResourceLite(&fcb->nonpaged->dir_children_lock);
     ExFreePool(fcb->nonpaged);
     
     if (fcb->sd)
@@ -1836,19 +1675,13 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line)
         LIST_ENTRY* le = RemoveHeadList(&fcb->extents);
         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
         
+        if (ext->csum)
+            ExFreePool(ext->csum);
+        
         ExFreePool(ext->data);
         ExFreePool(ext);
     }
     
-    while (!IsListEmpty(&fcb->index_list)) {
-        LIST_ENTRY* le = RemoveHeadList(&fcb->index_list);
-        index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
-
-        if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-        if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer);
-        ExFreePool(ie);
-    }
-    
     while (!IsListEmpty(&fcb->hardlinks)) {
         LIST_ENTRY* le = RemoveHeadList(&fcb->hardlinks);
         hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry);
@@ -1862,6 +1695,22 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line)
         ExFreePool(hl);
     }
     
+    while (!IsListEmpty(&fcb->dir_children_index)) {
+        LIST_ENTRY* le = RemoveHeadList(&fcb->dir_children_index);
+        dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index);
+        
+        ExFreePool(dc->utf8.Buffer);
+        ExFreePool(dc->name.Buffer);
+        ExFreePool(dc->name_uc.Buffer);
+        ExFreePool(dc);
+    }
+    
+    if (fcb->hash_ptrs)
+        ExFreePool(fcb->hash_ptrs);
+    
+    if (fcb->hash_ptrs_uc)
+        ExFreePool(fcb->hash_ptrs_uc);
+    
     FsRtlUninitializeFileLock(&fcb->lock);
     
     ExFreePool(fcb);
@@ -1932,6 +1781,9 @@ void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned in
     if (fr->fcb->fileref == fr)
         fr->fcb->fileref = NULL;
     
+    if (fr->dc)
+        fr->dc->fileref = NULL;
+
     if (fr->list_entry.Flink)
         RemoveEntryList(&fr->list_entry);
     
@@ -2017,8 +1869,15 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) {
     
     RemoveEntryList(&Vcb->list_entry);
     
+    if (Vcb->balance.thread) {
+        Vcb->balance.paused = FALSE;
+        Vcb->balance.stopping = TRUE;
+        KeSetEvent(&Vcb->balance.event, 0, FALSE);
+        KeWaitForSingleObject(&Vcb->balance.finished, Executive, KernelMode, FALSE, NULL);
+    }
+    
     Status = registry_mark_volume_unmounted(&Vcb->superblock.uuid);
-    if (!NT_SUCCESS(Status))
+    if (!NT_SUCCESS(Status) && Status != STATUS_TOO_LATE)
         WARN("registry_mark_volume_unmounted returned %08x\n", Status);
     
     if (flush) {
@@ -2036,6 +1895,21 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) {
         ExReleaseResourceLite(&Vcb->tree_lock);
     }
     
+    for (i = 0; i < Vcb->calcthreads.num_threads; i++) {
+        Vcb->calcthreads.threads[i].quit = TRUE;
+    }
+    
+    KeSetEvent(&Vcb->calcthreads.event, 0, FALSE);
+        
+    for (i = 0; i < Vcb->calcthreads.num_threads; i++) {
+        KeWaitForSingleObject(&Vcb->calcthreads.threads[i].finished, Executive, KernelMode, FALSE, NULL);
+        
+        ZwClose(Vcb->calcthreads.threads[i].handle);
+    }
+    
+    ExDeleteResourceLite(&Vcb->calcthreads.lock);
+    ExFreePool(Vcb->calcthreads.threads);
+    
     time.QuadPart = 0;
     KeSetTimer(&Vcb->flush_thread_timer, time, NULL); // trigger the timer early
     KeWaitForSingleObject(&Vcb->flush_thread_finished, Executive, KernelMode, FALSE, NULL);
@@ -2101,28 +1975,23 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) {
     
     // FIXME - free any open fcbs?
     
-    while (!IsListEmpty(&Vcb->sector_checksums)) {
-        LIST_ENTRY* le = RemoveHeadList(&Vcb->sector_checksums);
-        changed_sector* cs = (changed_sector*)le;
+    while (!IsListEmpty(&Vcb->devices)) {
+        LIST_ENTRY* le = RemoveHeadList(&Vcb->devices);
+        device* dev = CONTAINING_RECORD(le, device, list_entry);
         
-        ExFreePool(cs);
-    }
-    
-    for (i = 0; i < Vcb->superblock.num_devices; i++) {
-        while (!IsListEmpty(&Vcb->devices[i].space)) {
-            LIST_ENTRY* le = RemoveHeadList(&Vcb->devices[i].space);
-            space* s = CONTAINING_RECORD(le, space, list_entry);
+        while (!IsListEmpty(&dev->space)) {
+            LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
+            space* s = CONTAINING_RECORD(le2, space, list_entry);
             
             ExFreePool(s);
         }
+        
+        ExFreePool(dev);
     }
     
-    ExFreePool(Vcb->devices);
-    
     ExDeleteResourceLite(&Vcb->fcb_lock);
     ExDeleteResourceLite(&Vcb->load_lock);
     ExDeleteResourceLite(&Vcb->tree_lock);
-    ExDeleteResourceLite(&Vcb->checksum_lock);
     ExDeleteResourceLite(&Vcb->chunk_lock);
     
     ExDeletePagedLookasideList(&Vcb->tree_data_lookaside);
@@ -2162,12 +2031,13 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
             
             mark_fcb_dirty(fileref->fcb);
             
+            fileref->fcb->inode_item_changed = TRUE;
+            
             if (fileref->fcb->inode_item.st_nlink > 1) {
                 fileref->fcb->inode_item.st_nlink--;
                 fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
                 fileref->fcb->inode_item.sequence++;
                 fileref->fcb->inode_item.st_ctime = now;
-                fileref->fcb->inode_item_changed = TRUE;
             } else {
                 fileref->fcb->deleted = TRUE;
             
@@ -2234,6 +2104,22 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
         mark_fcb_dirty(fileref->fcb);
     }
     
+    // remove dir_child from parent
+    
+    if (fileref->dc) {
+        ExAcquireResourceExclusiveLite(&fileref->parent->fcb->nonpaged->dir_children_lock, TRUE);
+        RemoveEntryList(&fileref->dc->list_entry_index);
+        remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc);
+        ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock);
+        
+        ExFreePool(fileref->dc->utf8.Buffer);
+        ExFreePool(fileref->dc->name.Buffer);
+        ExFreePool(fileref->dc->name_uc.Buffer);
+        ExFreePool(fileref->dc);
+        
+        fileref->dc = NULL;
+    }
+    
     // update INODE_ITEM of parent
     
     TRACE("delete file %.*S\n", fileref->filepart.Length / sizeof(WCHAR), fileref->filepart.Buffer);
@@ -2395,6 +2281,32 @@ exit2:
     return Status;
 }
 
+BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts) {
+    if (len > 2 && val[0] == '0' && val[1] == 'x') {
+        int i;
+        ULONG dosnum = 0;
+
+        for (i = 2; i < len; i++) {
+            dosnum *= 0x10;
+            
+            if (val[i] >= '0' && val[i] <= '9')
+                dosnum |= val[i] - '0';
+            else if (val[i] >= 'a' && val[i] <= 'f')
+                dosnum |= val[i] + 10 - 'a';
+            else if (val[i] >= 'A' && val[i] <= 'F')
+                dosnum |= val[i] + 10 - 'a';
+        }
+        
+        TRACE("DOSATTRIB: %08x\n", dosnum);
+        
+        *atts = dosnum;
+        
+        return TRUE;
+    }
+    
+    return FALSE;
+}
+
 ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp) {
     ULONG att;
     char* eaval;
@@ -2403,33 +2315,24 @@ ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r
     // ii can be NULL
     
     if (!ignore_xa && get_xattr(Vcb, r, inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, (UINT8**)&eaval, &ealen, Irp)) {
-        if (ealen > 2) {
-            if (eaval[0] == '0' && eaval[1] == 'x') {
-                int i;
-                ULONG dosnum = 0;
-
-                for (i = 2; i < ealen; i++) {
-                    dosnum *= 0x10;
-                    
-                    if (eaval[i] >= '0' && eaval[i] <= '9')
-                        dosnum |= eaval[i] - '0';
-                    else if (eaval[i] >= 'a' && eaval[i] <= 'f')
-                        dosnum |= eaval[i] + 10 - 'a';
-                    else if (eaval[i] >= 'A' && eaval[i] <= 'F')
-                        dosnum |= eaval[i] + 10 - 'a';
-                }
-                
-                TRACE("DOSATTRIB: %08x\n", dosnum);
-
-                ExFreePool(eaval);
-                
-                if (type == BTRFS_TYPE_DIRECTORY)
-                    dosnum |= FILE_ATTRIBUTE_DIRECTORY;
-                else if (type == BTRFS_TYPE_SYMLINK)
-                    dosnum |= FILE_ATTRIBUTE_REPARSE_POINT;
-                
-                return dosnum;
+        ULONG dosnum = 0;
+        
+        if (get_file_attributes_from_xattr(eaval, ealen, &dosnum)) {
+            ExFreePool(eaval);
+            
+            if (type == BTRFS_TYPE_DIRECTORY)
+                dosnum |= FILE_ATTRIBUTE_DIRECTORY;
+            else if (type == BTRFS_TYPE_SYMLINK)
+                dosnum |= FILE_ATTRIBUTE_REPARSE_POINT;
+            
+            if (inode == SUBVOL_ROOT_INODE) {
+                if (r->root_item.flags & BTRFS_SUBVOL_READONLY)
+                    dosnum |= FILE_ATTRIBUTE_READONLY;
+                else
+                    dosnum &= ~FILE_ATTRIBUTE_READONLY;
             }
+            
+            return dosnum;
         }
         
         ExFreePool(eaval);
@@ -2455,6 +2358,13 @@ ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r
     
     att |= FILE_ATTRIBUTE_ARCHIVE;
     
+    if (inode == SUBVOL_ROOT_INODE) {
+        if (r->root_item.flags & BTRFS_SUBVOL_READONLY)
+            att |= FILE_ATTRIBUTE_READONLY;
+        else
+            att &= ~FILE_ATTRIBUTE_READONLY;
+    }
+    
     // FIXME - get READONLY from ii->st_mode
     // FIXME - return SYSTEM for block/char devices?
     
@@ -2464,7 +2374,7 @@ ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r
     return att;
 }
 
-static NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override) {
+NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override) {
     IO_STATUS_BLOCK* IoStatus;
     LARGE_INTEGER Offset;
     PIRP Irp;
@@ -2568,7 +2478,7 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de
     unsigned int i, to_read;
     UINT8 valid_superblocks;
     
-    to_read = sector_align(sizeof(superblock), device->SectorSize);
+    to_read = device->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), device->SectorSize);
     
     sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG);
     if (!sb) {
@@ -2592,15 +2502,22 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de
             return Status;
         }
         
-        TRACE("got superblock %u!\n", i);
-        
-        crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
-        
-        if (crc32 != *((UINT32*)sb->checksum))
-            WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum));
-        else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) {
-            RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock));
-            valid_superblocks++;
+        if (sb->magic != BTRFS_MAGIC) {
+            if (i == 0) {
+                TRACE("not a BTRFS volume\n");
+                return STATUS_UNRECOGNIZED_VOLUME;
+            }
+        } else {
+            TRACE("got superblock %u!\n", i);
+            
+            crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
+            
+            if (crc32 != *((UINT32*)sb->checksum))
+                WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum));
+            else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) {
+                RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock));
+                valid_superblocks++;
+            }
         }
         
         i++;
@@ -2912,20 +2829,46 @@ static NTSTATUS find_disk_holes(device_extension* Vcb, device* dev, PIRP Irp) {
     return STATUS_SUCCESS;
 }
 
+static void add_device_to_list(device_extension* Vcb, device* dev) {
+    LIST_ENTRY* le;
+    
+    le = Vcb->devices.Flink;
+    
+    while (le != &Vcb->devices) {
+        device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+        
+        if (dev2->devitem.dev_id > dev->devitem.dev_id) {
+            InsertHeadList(le->Blink, &dev->list_entry);
+            return;
+        }
+        
+        le = le->Flink;
+    }
+    
+    InsertTailList(&Vcb->devices, &dev->list_entry);
+}
+
 device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) {
-    UINT64 i;
+    LIST_ENTRY* le;
     
-    for (i = 0; i < Vcb->devices_loaded; i++) {
-        TRACE("device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", i,
-            Vcb->devices[i].devitem.device_uuid.uuid[0], Vcb->devices[i].devitem.device_uuid.uuid[1], Vcb->devices[i].devitem.device_uuid.uuid[2], Vcb->devices[i].devitem.device_uuid.uuid[3], Vcb->devices[i].devitem.device_uuid.uuid[4], Vcb->devices[i].devitem.device_uuid.uuid[5], Vcb->devices[i].devitem.device_uuid.uuid[6], Vcb->devices[i].devitem.device_uuid.uuid[7],
-            Vcb->devices[i].devitem.device_uuid.uuid[8], Vcb->devices[i].devitem.device_uuid.uuid[9], Vcb->devices[i].devitem.device_uuid.uuid[10], Vcb->devices[i].devitem.device_uuid.uuid[11], Vcb->devices[i].devitem.device_uuid.uuid[12], Vcb->devices[i].devitem.device_uuid.uuid[13], Vcb->devices[i].devitem.device_uuid.uuid[14], Vcb->devices[i].devitem.device_uuid.uuid[15]);
+    le = Vcb->devices.Flink;
+    while (le != &Vcb->devices) {
+        device* dev = CONTAINING_RECORD(le, device, list_entry);
+        
+        TRACE("device %llx, uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", dev->devitem.dev_id,
+            dev->devitem.device_uuid.uuid[0], dev->devitem.device_uuid.uuid[1], dev->devitem.device_uuid.uuid[2], dev->devitem.device_uuid.uuid[3], dev->devitem.device_uuid.uuid[4], dev->devitem.device_uuid.uuid[5], dev->devitem.device_uuid.uuid[6], dev->devitem.device_uuid.uuid[7],
+            dev->devitem.device_uuid.uuid[8], dev->devitem.device_uuid.uuid[9], dev->devitem.device_uuid.uuid[10], dev->devitem.device_uuid.uuid[11], dev->devitem.device_uuid.uuid[12], dev->devitem.device_uuid.uuid[13], dev->devitem.device_uuid.uuid[14], dev->devitem.device_uuid.uuid[15]);
         
-        if (Vcb->devices[i].devobj && RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
-            TRACE("returning device %llx\n", i);
-            return &Vcb->devices[i];
+        if (dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+            TRACE("returning device %llx\n", dev->devitem.dev_id);
+            return dev;
         }
+        
+        le = le->Flink;
     }
     
+    ExAcquireResourceSharedLite(&volumes_lock, TRUE);
+    
     if (Vcb->devices_loaded < Vcb->superblock.num_devices && !IsListEmpty(&volumes)) {
         LIST_ENTRY* le = volumes.Flink;
         
@@ -2938,9 +2881,11 @@ device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) {
                 NTSTATUS Status;
                 PFILE_OBJECT FileObject;
                 PDEVICE_OBJECT DeviceObject;
+                device* dev;
                 
                 Status = IoGetDeviceObjectPointer(&v->devpath, FILE_READ_ATTRIBUTES, &FileObject, &DeviceObject);
                 if (!NT_SUCCESS(Status)) {
+                    ExReleaseResourceLite(&volumes_lock);
                     ERR("IoGetDeviceObjectPointer(%.*S) returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status);
                     return NULL;
                 }
@@ -2950,20 +2895,38 @@ device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) {
                 ObReferenceObject(DeviceObject);
                 ObDereferenceObject(FileObject);
                 
-                Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject;
-                Vcb->devices[Vcb->devices_loaded].devitem.device_uuid = *uuid;
-                Vcb->devices[Vcb->devices_loaded].seeding = v->seeding;
-                Vcb->devices[Vcb->devices_loaded].readonly = Vcb->devices[Vcb->devices_loaded].seeding;
-                Vcb->devices[Vcb->devices_loaded].removable = FALSE;
+                dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG);
+                if (!dev) {
+                    ExReleaseResourceLite(&volumes_lock);
+                    ERR("out of memory\n");
+                    ObDereferenceObject(DeviceObject);
+                    return NULL;
+                }
+                
+                RtlZeroMemory(dev, sizeof(device));
+                dev->devobj = DeviceObject;
+                dev->devitem.device_uuid = *uuid;
+                dev->devitem.dev_id = v->devnum;
+                dev->seeding = v->seeding;
+                dev->readonly = dev->seeding;
+                dev->reloc = FALSE;
+                dev->removable = FALSE;
+                dev->disk_num = v->disk_num;
+                dev->part_num = v->part_num;
+                add_device_to_list(Vcb, dev);
                 Vcb->devices_loaded++;
                 
-                return &Vcb->devices[Vcb->devices_loaded - 1];
+                ExReleaseResourceLite(&volumes_lock);
+                
+                return dev;
             }
             
             le = le->Flink;
         }
     }
     
+    ExReleaseResourceLite(&volumes_lock);
+    
     WARN("could not find device with uuid %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n",
          uuid->uuid[0], uuid->uuid[1], uuid->uuid[2], uuid->uuid[3], uuid->uuid[4], uuid->uuid[5], uuid->uuid[6], uuid->uuid[7],
          uuid->uuid[8], uuid->uuid[9], uuid->uuid[10], uuid->uuid[11], uuid->uuid[12], uuid->uuid[13], uuid->uuid[14], uuid->uuid[15]);
@@ -3005,9 +2968,8 @@ static ULONG get_device_change_count(PDEVICE_OBJECT devobj) {
     return cc;
 }
 
-static void init_device(device_extension* Vcb, device* dev, BOOL get_length) {
+void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_nums) {
     NTSTATUS Status;
-    GET_LENGTH_INFORMATION gli;
     ULONG aptelen;
     ATA_PASS_THROUGH_EX* apte;
     IDENTIFY_DEVICE_DATA* idd;
@@ -3016,18 +2978,37 @@ static void init_device(device_extension* Vcb, device* dev, BOOL get_length) {
     dev->change_count = dev->removable ? get_device_change_count(dev->devobj) : 0;
     
     if (get_length) {
+        GET_LENGTH_INFORMATION gli;
+        
         Status = dev_ioctl(dev->devobj, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0,
-                        &gli, sizeof(gli), TRUE, NULL);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error reading length information: %08x\n", Status);
-        }
+                           &gli, sizeof(GET_LENGTH_INFORMATION), TRUE, NULL);
+        
+        if (!NT_SUCCESS(Status))
+            ERR("IOCTL_DISK_GET_LENGTH_INFO returned %08x\n", Status);
         
         dev->length = gli.Length.QuadPart;
     }
     
+    if (get_nums) {
+        STORAGE_DEVICE_NUMBER sdn;
+        
+        Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_GET_DEVICE_NUMBER, NULL, 0,
+                           &sdn, sizeof(STORAGE_DEVICE_NUMBER), TRUE, NULL);
+        
+        if (!NT_SUCCESS(Status)) {
+            WARN("IOCTL_STORAGE_GET_DEVICE_NUMBER returned %08x\n", Status);
+            dev->disk_num = 0;
+            dev->part_num = 0;
+        } else {
+            dev->disk_num = sdn.DeviceNumber;
+            dev->part_num = sdn.PartitionNumber;
+        }
+    }
+    
     dev->ssd = FALSE;
     dev->trim = FALSE;
     dev->readonly = dev->seeding;
+    dev->reloc = FALSE;
     
     if (!dev->readonly) {
         Status = dev_ioctl(dev->devobj, IOCTL_DISK_IS_WRITABLE, NULL, 0,
@@ -3055,9 +3036,9 @@ static void init_device(device_extension* Vcb, device* dev, BOOL get_length) {
     Status = dev_ioctl(dev->devobj, IOCTL_ATA_PASS_THROUGH, apte, aptelen,
                        apte, aptelen, TRUE, NULL);
     
-    if (!NT_SUCCESS(Status)) {
-        ERR("error calling ATA IDENTIFY DEVICE: %08x\n", Status);
-    else {
+    if (!NT_SUCCESS(Status))
+        TRACE("IOCTL_ATA_PASS_THROUGH returned %08x for IDENTIFY DEVICE\n", Status);
+    else {
         idd = (IDENTIFY_DEVICE_DATA*)((UINT8*)apte + sizeof(ATA_PASS_THROUGH_EX));
         
         if (idd->NominalMediaRotationRate == 1) {
@@ -3084,7 +3065,6 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
     KEY searchkey;
     BOOL b;
     chunk* c;
-    UINT64 i;
     NTSTATUS Status;
 
     searchkey.obj_id = 0;
@@ -3092,6 +3072,8 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
     searchkey.offset = 0;
     
     Vcb->data_flags = 0;
+    Vcb->metadata_flags = 0;
+    Vcb->system_flags = 0;
     
     Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
@@ -3107,21 +3089,29 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_ITEM));
             } else {
                 DEV_ITEM* di = (DEV_ITEM*)tp.item->data;
+                LIST_ENTRY* le;
                 BOOL done = FALSE;
                 
-                for (i = 0; i < Vcb->devices_loaded; i++) {
-                    if (Vcb->devices[i].devobj && RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &di->device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
-                        RtlCopyMemory(&Vcb->devices[i].devitem, tp.item->data, min(tp.item->size, sizeof(DEV_ITEM)));
+                le = Vcb->devices.Flink;
+                while (le != &Vcb->devices) {
+                    device* dev = CONTAINING_RECORD(le, device, list_entry);
+                    
+                    if (dev->devobj && RtlCompareMemory(&dev->devitem.device_uuid, &di->device_uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+                        RtlCopyMemory(&dev->devitem, tp.item->data, min(tp.item->size, sizeof(DEV_ITEM)));
                         
-                        if (i > 0)
-                            init_device(Vcb, &Vcb->devices[i], TRUE);
+                        if (le != Vcb->devices.Flink)
+                            init_device(Vcb, dev, TRUE, TRUE);
                         
                         done = TRUE;
                         break;
                     }
+
+                    le = le->Flink;
                 }
                 
                 if (!done) {
+                    ExAcquireResourceSharedLite(&volumes_lock, TRUE);
+                    
                     if (!IsListEmpty(&volumes) && Vcb->devices_loaded < Vcb->superblock.num_devices) {
                         LIST_ENTRY* le = volumes.Flink;
                         
@@ -3131,9 +3121,11 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                             if (RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
                                 PFILE_OBJECT FileObject;
                                 PDEVICE_OBJECT DeviceObject;
+                                device* dev;
                                 
                                 Status = IoGetDeviceObjectPointer(&v->devpath, FILE_READ_DATA | FILE_WRITE_DATA, &FileObject, &DeviceObject);
                                 if (!NT_SUCCESS(Status)) {
+                                    ExReleaseResourceLite(&volumes_lock);
                                     ERR("IoGetDeviceObjectPointer(%.*S) returned %08x\n", v->devpath.Length / sizeof(WCHAR), v->devpath.Buffer, Status);
                                     return Status;
                                 }
@@ -3143,13 +3135,25 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                                 ObReferenceObject(DeviceObject);
                                 ObDereferenceObject(FileObject);
                                 
-                                Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject;
-                                RtlCopyMemory(&Vcb->devices[Vcb->devices_loaded].devitem, di, min(tp.item->size, sizeof(DEV_ITEM)));
-                                init_device(Vcb, &Vcb->devices[i], FALSE);
-
-                                Vcb->devices[i].seeding = v->seeding;
-
-                                Vcb->devices[i].length = v->length;
+                                dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG);
+                                if (!dev) {
+                                    ExReleaseResourceLite(&volumes_lock);
+                                    ERR("out of memory\n");
+                                    ObDereferenceObject(DeviceObject);
+                                    return STATUS_INSUFFICIENT_RESOURCES;
+                                }
+                                
+                                RtlZeroMemory(dev, sizeof(device));
+                               
+                                dev->devobj = DeviceObject;
+                                RtlCopyMemory(&dev->devitem, di, min(tp.item->size, sizeof(DEV_ITEM)));
+                                dev->seeding = v->seeding;
+                                init_device(Vcb, dev, FALSE, FALSE);
+
+                                dev->length = v->length;
+                                dev->disk_num = v->disk_num;
+                                dev->part_num = v->part_num;
+                                add_device_to_list(Vcb, dev);
                                 Vcb->devices_loaded++;
 
                                 done = TRUE;
@@ -3166,6 +3170,8 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                         }
                     } else
                         ERR("unexpected device %llx found\n", tp.item->key.offset);
+                    
+                    ExReleaseResourceLite(&volumes_lock);
                 }
             }
         } else if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
@@ -3185,6 +3191,7 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                 c->cache = NULL;
                 c->created = FALSE;
                 c->readonly = FALSE;
+                c->reloc = FALSE;
                 
                 c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, tp.item->size, ALLOC_TAG);
                 
@@ -3199,8 +3206,15 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                 if (c->chunk_item->type & BLOCK_FLAG_DATA && c->chunk_item->type > Vcb->data_flags)
                     Vcb->data_flags = c->chunk_item->type;
                 
+                if (c->chunk_item->type & BLOCK_FLAG_METADATA && c->chunk_item->type > Vcb->metadata_flags)
+                    Vcb->metadata_flags = c->chunk_item->type;
+                
+                if (c->chunk_item->type & BLOCK_FLAG_SYSTEM && c->chunk_item->type > Vcb->system_flags)
+                    Vcb->system_flags = c->chunk_item->type;
+                
                 if (c->chunk_item->num_stripes > 0) {
                     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+                    UINT16 i;
                     
                     c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * c->chunk_item->num_stripes, ALLOC_TAG);
                     
@@ -3239,10 +3253,13 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                 InitializeListHead(&c->range_locks);
                 KeInitializeSpinLock(&c->range_locks_spinlock);
                 KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
+                
+                c->last_alloc_set = FALSE;
 
                 InsertTailList(&Vcb->chunks, &c->list_entry);
                 
                 c->list_entry_changed.Flink = NULL;
+                c->list_entry_balance.Flink = NULL;
             }
         }
     
@@ -3257,6 +3274,17 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
     if (Vcb->data_flags == 0)
         Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID0 : 0);
     
+    if (Vcb->metadata_flags == 0)
+        Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID1 : BLOCK_FLAG_DUPLICATE);
+    
+    if (Vcb->system_flags == 0)
+        Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->superblock.num_devices > 1 ? BLOCK_FLAG_RAID1 : BLOCK_FLAG_DUPLICATE);
+    
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) {
+        Vcb->metadata_flags |= BLOCK_FLAG_DATA;
+        Vcb->data_flags = Vcb->metadata_flags;
+    }
+    
     return STATUS_SUCCESS;
 }
 
@@ -3365,9 +3393,6 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) {
     BLOCK_GROUP_ITEM* bgi;
     NTSTATUS Status;
     
-// c00000,c0,800000
-// block_group_item size=7f0000 chunktreeid=100 flags=1
-    
     searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
     
     while (le != &Vcb->chunks) {
@@ -3394,27 +3419,23 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) {
                     Vcb->extent_root->id, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
             }
         }
-            
-//         if (addr >= c->offset && (addr - c->offset) < c->chunk_item->size && c->chunk_item->num_stripes > 0) {
-//             cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
-// 
-//             return (addr - c->offset) + cis->offset;
-//         }
 
-        // It doesn't make a great deal of sense to load the free space cache of a
-        // readonly seeding chunk, as we'll never write to it. But btrfs check will
-        // complain if we don't write a valid cache, so we have to do it anyway...
+        if (!Vcb->readonly) {
+            // It doesn't make a great deal of sense to load the free space cache of a
+            // readonly seeding chunk, as we'll never write to it. But btrfs check will
+            // complain if we don't write a valid cache, so we have to do it anyway...
+                
+            // FIXME - make sure we free occasionally after doing one of these, or we
+            // might use up a lot of memory with a big disk.
             
-        // FIXME - make sure we free occasionally after doing one of these, or we
-        // might use up a lot of memory with a big disk.
-        
-        Status = load_free_space_cache(Vcb, c, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("load_free_space_cache returned %08x\n", Status);
-            return Status;
+            Status = load_free_space_cache(Vcb, c, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("load_free_space_cache returned %08x\n", Status);
+                return Status;
+            }
+            
+            protect_superblocks(Vcb, c);
         }
-        
-        protect_superblocks(Vcb, c);
 
         le = le->Flink;
     }
@@ -3608,81 +3629,137 @@ end:
     return NULL;
 }
 
+void init_file_cache(PFILE_OBJECT FileObject, CC_FILE_SIZES* ccfs) {
+    TRACE("(%p, %p)\n", FileObject, ccfs);
+    
+    CcInitializeCacheMap(FileObject, ccfs, FALSE, cache_callbacks, FileObject);
+    
+    if (diskacc)
+        CcSetAdditionalCacheAttributesEx(FileObject, CC_ENABLE_DISK_IO_ACCOUNTING);
+
+    CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
+}
+
+static NTSTATUS create_calc_threads(PDEVICE_OBJECT DeviceObject) {
+    device_extension* Vcb = DeviceObject->DeviceExtension;
+    ULONG i;
+    
+    Vcb->calcthreads.num_threads = KeQueryActiveProcessorCount(NULL);
+    
+    Vcb->calcthreads.threads = ExAllocatePoolWithTag(NonPagedPool, sizeof(drv_calc_thread) * Vcb->calcthreads.num_threads, ALLOC_TAG);
+    if (!Vcb->calcthreads.threads) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    InitializeListHead(&Vcb->calcthreads.job_list);
+    ExInitializeResourceLite(&Vcb->calcthreads.lock);
+    KeInitializeEvent(&Vcb->calcthreads.event, NotificationEvent, FALSE);
+    
+    RtlZeroMemory(Vcb->calcthreads.threads, sizeof(drv_calc_thread) * Vcb->calcthreads.num_threads);
+    
+    for (i = 0; i < Vcb->calcthreads.num_threads; i++) {
+        NTSTATUS Status;
+        
+        Vcb->calcthreads.threads[i].DeviceObject = DeviceObject;
+        KeInitializeEvent(&Vcb->calcthreads.threads[i].finished, NotificationEvent, FALSE);
+        
+        Status = PsCreateSystemThread(&Vcb->calcthreads.threads[i].handle, 0, NULL, NULL, NULL, calc_thread, &Vcb->calcthreads.threads[i]);
+        if (!NT_SUCCESS(Status)) {
+            ULONG j;
+            
+            ERR("PsCreateSystemThread returned %08x\n", Status);
+            
+            for (j = 0; j < i; j++) {
+                Vcb->calcthreads.threads[i].quit = TRUE;
+            }
+            
+            KeSetEvent(&Vcb->calcthreads.event, 0, FALSE);
+            
+            return Status;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
 static BOOL raid_generations_okay(device_extension* Vcb) {
-    UINT64 i;
+    LIST_ENTRY* le2;
     
     // FIXME - if the difference between superblocks is small, we should try to recover
     
-    for (i = 0; i < Vcb->superblock.num_devices; i++) {
-        LIST_ENTRY* le = volumes.Flink;
+    le2 = Vcb->devices.Flink;
+    while (le2 != &Vcb->devices) {
+        LIST_ENTRY* le;
+        device* dev = CONTAINING_RECORD(le2, device, list_entry);
+        
+        ExAcquireResourceSharedLite(&volumes_lock, TRUE);
+        
+        le = volumes.Flink;
+        
         while (le != &volumes) {
             volume* v = CONTAINING_RECORD(le, volume, list_entry);
             
             if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) &&
-                RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)
+                RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)
             ) {
                 if (v->gen1 != Vcb->superblock.generation - 1) {
-                    WARN("device %llu had generation %llx, expected %llx\n", i, v->gen1, Vcb->superblock.generation - 1);
+                    WARN("device %llu had generation %llx, expected %llx\n", dev->devitem.dev_id, v->gen1, Vcb->superblock.generation - 1);
+                    ExReleaseResourceLite(&volumes_lock);
                     return FALSE;
                 } else
                     break;
             }
             le = le->Flink;
         }
+        
+        ExReleaseResourceLite(&volumes_lock);
+        
+        le2 = le2->Flink;
     }
     
     return TRUE;
 }
 
 static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
-    PIO_STACK_LOCATION Stack;
+    PIO_STACK_LOCATION IrpSp;
     PDEVICE_OBJECT NewDeviceObject = NULL;
     PDEVICE_OBJECT DeviceToMount;
     NTSTATUS Status;
     device_extension* Vcb = NULL;
     GET_LENGTH_INFORMATION gli;
-    UINT64 i;
     LIST_ENTRY *le, batchlist;
     KEY searchkey;
     traverse_ptr tp;
     fcb* root_fcb = NULL;
     ccb* root_ccb = NULL;
     BOOL init_lookaside = FALSE;
+    device* dev;
     
-    TRACE("mount_vol called\n");
+    TRACE("(%p, %p)\n", DeviceObject, Irp);
     
-    if (DeviceObject != devobj)
-    {
+    if (DeviceObject != devobj) {
         Status = STATUS_INVALID_DEVICE_REQUEST;
         goto exit;
     }
 
-    Stack = IoGetCurrentIrpStackLocation(Irp);
-    DeviceToMount = Stack->Parameters.MountVolume.DeviceObject;
+    IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    DeviceToMount = IrpSp->Parameters.MountVolume.DeviceObject;
 
-    Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0,
-                       &gli, sizeof(gli), TRUE, NULL);
+    Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, &gli, sizeof(gli), TRUE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("error reading length information: %08x\n", Status);
         Status = STATUS_UNRECOGNIZED_VOLUME;
         goto exit;
     }
 
-    Status = IoCreateDevice(drvobj,
-                            sizeof(device_extension),
-                            NULL,
-                            FILE_DEVICE_DISK_FILE_SYSTEM,
-                            0,
-                            FALSE,
-                            &NewDeviceObject);
+    Status = IoCreateDevice(drvobj, sizeof(device_extension), NULL, FILE_DEVICE_DISK_FILE_SYSTEM, 0, FALSE, &NewDeviceObject);
     if (!NT_SUCCESS(Status)) {
         ERR("IoCreateDevice returned %08x\n", Status);
         Status = STATUS_UNRECOGNIZED_VOLUME;
         goto exit;
     }
     
-//     TRACE("DEV_ITEM = %x, superblock = %x\n", sizeof(DEV_ITEM), sizeof(superblock));
-
     NewDeviceObject->Flags |= DO_DIRECT_IO;
     Vcb = (PVOID)NewDeviceObject->DeviceExtension;
     RtlZeroMemory(Vcb, sizeof(device_extension));
@@ -3693,39 +3770,13 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     Vcb->need_write = FALSE;
 
     ExInitializeResourceLite(&Vcb->fcb_lock);
-    ExInitializeResourceLite(&Vcb->DirResource);
-    ExInitializeResourceLite(&Vcb->checksum_lock);
     ExInitializeResourceLite(&Vcb->chunk_lock);
 
-    ExAcquireResourceExclusiveLite(&global_loading_lock, TRUE);
-    InsertTailList(&VcbList, &Vcb->list_entry);
-    ExReleaseResourceLite(&global_loading_lock);
-
     ExInitializeResourceLite(&Vcb->load_lock);
     ExAcquireResourceExclusiveLite(&Vcb->load_lock, TRUE);
 
-//     Vcb->Identifier.Type = NTFS_TYPE_VCB;
-//     Vcb->Identifier.Size = sizeof(NTFS_TYPE_VCB);
-// 
-//     Status = NtfsGetVolumeData(DeviceToMount,
-//                                Vcb);
-//     if (!NT_SUCCESS(Status))
-//         goto ByeBye;
-    
-//     Vcb->device = DeviceToMount;
     DeviceToMount->Flags |= DO_DIRECT_IO;
     
-//     Status = dev_ioctl(DeviceToMount, IOCTL_DISK_GET_DRIVE_GEOMETRY, NULL, 0,
-//                        &Vcb->geometry, sizeof(DISK_GEOMETRY), TRUE);
-//     if (!NT_SUCCESS(Status)) {
-//         ERR("error reading disk geometry: %08x\n", Status);
-//         goto exit;
-//     } else {
-//         TRACE("media type = %u, cylinders = %u, tracks per cylinder = %u, sectors per track = %u, bytes per sector = %u\n",
-//                       Vcb->geometry.MediaType, Vcb->geometry.Cylinders, Vcb->geometry.TracksPerCylinder,
-//                       Vcb->geometry.SectorsPerTrack, Vcb->geometry.BytesPerSector);
-//     }
-    
     TRACE("partition length = %llx\n", gli.Length.QuadPart);
 
     Status = read_superblock(Vcb, DeviceToMount, gli.Length.QuadPart);
@@ -3734,14 +3785,6 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     }
 
-    if (Vcb->superblock.magic != BTRFS_MAGIC) {
-        ERR("not a BTRFS volume\n");
-        Status = STATUS_UNRECOGNIZED_VOLUME;
-        goto exit;
-    } else {
-        TRACE("btrfs magic found\n");
-    }
-    
     Status = registry_load_volume_options(Vcb);
     if (!NT_SUCCESS(Status)) {
         ERR("registry_load_volume_options returned %08x\n", Status);
@@ -3760,12 +3803,15 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     }
     
+    ExAcquireResourceSharedLite(&volumes_lock, TRUE);
+    
     le = volumes.Flink;
     while (le != &volumes) {
         volume* v = CONTAINING_RECORD(le, volume, list_entry);
         
         if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) && v->devnum < Vcb->superblock.dev_item.dev_id) {
             // skipping over device in RAID which isn't the first one
+            ExReleaseResourceLite(&volumes_lock);
             Status = STATUS_UNRECOGNIZED_VOLUME;
             goto exit;
         }
@@ -3773,6 +3819,8 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         le = le->Flink;
     }
     
+    ExReleaseResourceLite(&volumes_lock);
+    
     Vcb->readonly = FALSE;
     if (Vcb->superblock.compat_ro_flags & ~COMPAT_RO_SUPPORTED) {
         WARN("mounting read-only because of unsupported flags (%llx)\n", Vcb->superblock.compat_ro_flags & ~COMPAT_RO_SUPPORTED);
@@ -3785,31 +3833,30 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     Vcb->superblock.generation++;
     Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF;
     
-    Vcb->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device) * Vcb->superblock.num_devices, ALLOC_TAG);
-    if (!Vcb->devices) {
+    InitializeListHead(&Vcb->devices);
+    dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device), ALLOC_TAG);
+    if (!dev) {
         ERR("out of memory\n");
         Status = STATUS_INSUFFICIENT_RESOURCES;
         goto exit;
     }
     
-    Vcb->devices[0].devobj = DeviceToMount;
-    RtlCopyMemory(&Vcb->devices[0].devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM));
-    
-    Vcb->devices[0].seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE;
+    dev->devobj = DeviceToMount;
+    RtlCopyMemory(&dev->devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM));
     
-    init_device(Vcb, &Vcb->devices[0], FALSE);
-    Vcb->devices[0].length = gli.Length.QuadPart;
+    dev->seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE;
     
-    if (Vcb->superblock.num_devices > 1)
-        RtlZeroMemory(&Vcb->devices[1], sizeof(DEV_ITEM) * (Vcb->superblock.num_devices - 1));
+    init_device(Vcb, dev, FALSE, TRUE);
+    dev->length = gli.Length.QuadPart;
     
+    InsertTailList(&Vcb->devices, &dev->list_entry);
     Vcb->devices_loaded = 1;
     
     if (DeviceToMount->Flags & DO_SYSTEM_BOOT_PARTITION)
         Vcb->disallow_dismount = TRUE;
     
     TRACE("DeviceToMount = %p\n", DeviceToMount);
-    TRACE("Stack->Parameters.MountVolume.Vpb = %p\n", Stack->Parameters.MountVolume.Vpb);
+    TRACE("IrpSp->Parameters.MountVolume.Vpb = %p\n", IrpSp->Parameters.MountVolume.Vpb);
 
     NewDeviceObject->StackSize = DeviceToMount->StackSize + 1;
     NewDeviceObject->Flags &= ~DO_DEVICE_INITIALIZING;
@@ -3837,10 +3884,10 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     InitializeListHead(&Vcb->chunks);
     InitializeListHead(&Vcb->chunks_changed);
     InitializeListHead(&Vcb->trees);
+    InitializeListHead(&Vcb->trees_hash);
     InitializeListHead(&Vcb->all_fcbs);
     InitializeListHead(&Vcb->dirty_fcbs);
     InitializeListHead(&Vcb->dirty_filerefs);
-    InitializeListHead(&Vcb->sector_checksums);
     
     KeInitializeSpinLock(&Vcb->dirty_fcbs_lock);
     KeInitializeSpinLock(&Vcb->dirty_filerefs_lock);
@@ -3872,17 +3919,22 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             goto exit;
         }
         
-        if (Vcb->devices[0].readonly && !Vcb->readonly) {
+        if (dev->readonly && !Vcb->readonly) {
             Vcb->readonly = TRUE;
             
-            for (i = 0; i < Vcb->superblock.num_devices; i++) {
-                if (Vcb->devices[i].readonly && !Vcb->devices[i].seeding)
+            le = Vcb->devices.Flink;
+            while (le != &Vcb->devices) {
+                device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+                
+                if (dev2->readonly && !dev2->seeding)
                     break;
                 
-                if (!Vcb->devices[i].readonly) {
+                if (!dev2->readonly) {
                     Vcb->readonly = FALSE;
                     break;
                 }
+                
+                le = le->Flink;
             }
             
             if (Vcb->readonly)
@@ -3898,7 +3950,7 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             goto exit;
         }
     } else {
-        if (Vcb->devices[0].readonly) {
+        if (dev->readonly) {
             WARN("setting volume to readonly as device is readonly\n");
             Vcb->readonly = TRUE;
         }
@@ -3918,12 +3970,10 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     }
     
-    if (!Vcb->readonly) {
-        Status = find_chunk_usage(Vcb, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("find_chunk_usage returned %08x\n", Status);
-            goto exit;
-        }
+    Status = find_chunk_usage(Vcb, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_chunk_usage returned %08x\n", Status);
+        goto exit;
     }
     
     InitializeListHead(&batchlist);
@@ -3975,6 +4025,12 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     }
     
+    Status = load_dir_children(root_fcb, TRUE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("load_dir_children returned %08x\n", Status);
+        goto exit;
+    }
+    
     searchkey.obj_id = root_fcb->inode;
     searchkey.obj_type = TYPE_INODE_ITEM;
     searchkey.offset = 0xffffffffffffffff;
@@ -3994,7 +4050,7 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     if (tp.item->size > 0)
         RtlCopyMemory(&root_fcb->inode_item, tp.item->data, min(sizeof(INODE_ITEM), tp.item->size));
     
-    fcb_get_sd(root_fcb, NULL, Irp);
+    fcb_get_sd(root_fcb, NULL, TRUE, Irp);
     
     root_fcb->atts = get_file_attributes(Vcb, &root_fcb->inode_item, root_fcb->subvol, root_fcb->inode, root_fcb->type, FALSE, FALSE, Irp);
     
@@ -4036,21 +4092,22 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     } _SEH2_END;
     
-    for (i = 0; i < Vcb->superblock.num_devices; i++) {
-        Status = find_disk_holes(Vcb, &Vcb->devices[i], Irp);
+    le = Vcb->devices.Flink;
+    while (le != &Vcb->devices) {
+        device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+        
+        Status = find_disk_holes(Vcb, dev2, Irp);
         if (!NT_SUCCESS(Status)) {
             ERR("find_disk_holes returned %08x\n", Status);
             goto exit;
         }
+        
+        le = le->Flink;
     }
     
-//     root_test(Vcb);
-    
-    KeInitializeSpinLock(&Vcb->FcbListLock);
-
-    NewDeviceObject->Vpb = Stack->Parameters.MountVolume.Vpb;
-    Stack->Parameters.MountVolume.Vpb->DeviceObject = NewDeviceObject;
-    Stack->Parameters.MountVolume.Vpb->Flags |= VPB_MOUNTED;
+    NewDeviceObject->Vpb = IrpSp->Parameters.MountVolume.Vpb;
+    IrpSp->Parameters.MountVolume.Vpb->DeviceObject = NewDeviceObject;
+    IrpSp->Parameters.MountVolume.Vpb->Flags |= VPB_MOUNTED;
     NewDeviceObject->Vpb->VolumeLabelLength = 4; // FIXME
     NewDeviceObject->Vpb->VolumeLabel[0] = '?';
     NewDeviceObject->Vpb->VolumeLabel[1] = 0;
@@ -4065,10 +4122,20 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     }
     
+    Status = create_calc_threads(NewDeviceObject);
+    if (!NT_SUCCESS(Status)) {
+        ERR("create_calc_threads returned %08x\n", Status);
+        goto exit;
+    }
+    
     Status = registry_mark_volume_mounted(&Vcb->superblock.uuid);
     if (!NT_SUCCESS(Status))
         WARN("registry_mark_volume_mounted returned %08x\n", Status);
     
+    Status = look_for_balance_item(Vcb);
+    if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND)
+        WARN("look_for_balance_item returned %08x\n", Status);
+    
     Status = STATUS_SUCCESS;
 
 exit:
@@ -4099,34 +4166,41 @@ exit:
             ExDeleteResourceLite(&Vcb->tree_lock);
             ExDeleteResourceLite(&Vcb->load_lock);
             ExDeleteResourceLite(&Vcb->fcb_lock);
-            ExDeleteResourceLite(&Vcb->DirResource);
-            ExDeleteResourceLite(&Vcb->checksum_lock);
             ExDeleteResourceLite(&Vcb->chunk_lock);
 
-            if (Vcb->devices)
-                ExFreePoolWithTag(Vcb->devices, ALLOC_TAG);
-
-            RemoveEntryList(&Vcb->list_entry);
+            if (Vcb->devices.Flink) {
+                while (!IsListEmpty(&Vcb->devices)) {
+                    LIST_ENTRY* le = RemoveHeadList(&Vcb->devices);
+                    device* dev = CONTAINING_RECORD(le, device, list_entry);
+                    
+                    ExFreePool(dev);
+                }
+            }
         }
 
         if (NewDeviceObject)
             IoDeleteDevice(NewDeviceObject);
-    } else
+    } else {
+        ExAcquireResourceExclusiveLite(&global_loading_lock, TRUE);
+        InsertTailList(&VcbList, &Vcb->list_entry);
+        ExReleaseResourceLite(&global_loading_lock);
+        
         FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_MOUNT);
+    }
 
     TRACE("mount_vol done (status: %lx)\n", Status);
 
     return Status;
 }
 
-static NTSTATUS verify_volume(PDEVICE_OBJECT device) {
-    device_extension* Vcb = device->DeviceExtension;
+static NTSTATUS verify_volume(PDEVICE_OBJECT devobj) {
+    device_extension* Vcb = devobj->DeviceExtension;
     ULONG cc, to_read;
     IO_STATUS_BLOCK iosb;
     NTSTATUS Status;
     superblock* sb;
     UINT32 crc32;
-    UINT64 i;
+    LIST_ENTRY* le;
     
     if (Vcb->removing)
         return STATUS_WRONG_VOLUME;
@@ -4138,7 +4212,7 @@ static NTSTATUS verify_volume(PDEVICE_OBJECT device) {
         return Status;
     }
     
-    to_read = sector_align(sizeof(superblock), device->SectorSize);
+    to_read = devobj->SectorSize == 0 ? sizeof(superblock) : sector_align(sizeof(superblock), devobj->SectorSize);
     
     sb = ExAllocatePoolWithTag(NonPagedPool, to_read, ALLOC_TAG);
     if (!sb) {
@@ -4176,30 +4250,41 @@ static NTSTATUS verify_volume(PDEVICE_OBJECT device) {
     
     ExFreePool(sb);
     
-    for (i = 0; i < Vcb->superblock.num_devices; i++) {
-        if (Vcb->devices[i].removable) {
+    ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
+    
+    le = Vcb->devices.Flink;
+    while (le != &Vcb->devices) {
+        device* dev = CONTAINING_RECORD(le, device, list_entry);
+        
+        if (dev->removable) {
             NTSTATUS Status;
             ULONG cc;
             IO_STATUS_BLOCK iosb;
             
-            Status = dev_ioctl(Vcb->devices[i].devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb);
+            Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_CHECK_VERIFY, NULL, 0, &cc, sizeof(ULONG), TRUE, &iosb);
             
             if (!NT_SUCCESS(Status)) {
+                ExReleaseResourceLite(&Vcb->tree_lock);
                 ERR("dev_ioctl returned %08x\n", Status);
                 return Status;
             }
             
             if (iosb.Information < sizeof(ULONG)) {
+                ExReleaseResourceLite(&Vcb->tree_lock);
                 ERR("iosb.Information was too short\n");
                 return STATUS_INTERNAL_ERROR;
             }
             
-            Vcb->devices[i].change_count = cc;
+            dev->change_count = cc;
         }
         
-        Vcb->devices[i].devobj->Flags &= ~DO_VERIFY_VOLUME;
+        dev->devobj->Flags &= ~DO_VERIFY_VOLUME;
+        
+        le = le->Flink;
     }
     
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
     Vcb->Vpb->RealDevice->Flags &= ~DO_VERIFY_VOLUME;
     
     return STATUS_SUCCESS;
@@ -4575,6 +4660,7 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist
     PDEVICE_OBJECT DeviceObject;
     UNICODE_STRING device_nameW;
     UNICODE_STRING dosdevice_nameW;
+    control_device_extension* cde;
     
     InitializeListHead(&uid_map_list);
     
@@ -4609,6 +4695,38 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist
 #endif
    
 //    TRACE("check CRC32C: %08x\n", calc_crc32c((UINT8*)"123456789", 9)); // should be e3069283
+    
+    if (RtlIsNtDdiVersionAvailable(NTDDI_WIN8)) {
+        UNICODE_STRING name;
+        tPsIsDiskCountersEnabled PsIsDiskCountersEnabled;
+        
+        RtlInitUnicodeString(&name, L"PsIsDiskCountersEnabled");
+        PsIsDiskCountersEnabled = (tPsIsDiskCountersEnabled)MmGetSystemRoutineAddress(&name);
+        
+        if (PsIsDiskCountersEnabled) {
+            diskacc = PsIsDiskCountersEnabled();
+            
+            RtlInitUnicodeString(&name, L"PsUpdateDiskCounters");
+            PsUpdateDiskCounters = (tPsUpdateDiskCounters)MmGetSystemRoutineAddress(&name);
+            
+            if (!PsUpdateDiskCounters)
+                diskacc = FALSE;
+        }
+        
+        RtlInitUnicodeString(&name, L"CcCopyReadEx");
+        CcCopyReadEx = (tCcCopyReadEx)MmGetSystemRoutineAddress(&name);
+        
+        RtlInitUnicodeString(&name, L"CcCopyWriteEx");
+        CcCopyWriteEx = (tCcCopyWriteEx)MmGetSystemRoutineAddress(&name);
+        
+        RtlInitUnicodeString(&name, L"CcSetAdditionalCacheAttributesEx");
+        CcSetAdditionalCacheAttributesEx = (tCcSetAdditionalCacheAttributesEx)MmGetSystemRoutineAddress(&name);
+    } else {
+        PsUpdateDiskCounters = NULL;
+        CcCopyReadEx = NULL;
+        CcCopyWriteEx = NULL;
+        CcSetAdditionalCacheAttributesEx = NULL;
+    }
    
     drvobj = DriverObject;
 
@@ -4642,13 +4760,17 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist
     dosdevice_nameW.Buffer = dosdevice_name;
     dosdevice_nameW.Length = dosdevice_nameW.MaximumLength = (USHORT)wcslen(dosdevice_name) * sizeof(WCHAR);
 
-    Status = IoCreateDevice(DriverObject, 0, &device_nameW, FILE_DEVICE_DISK_FILE_SYSTEM, FILE_DEVICE_SECURE_OPEN, FALSE, &DeviceObject);
+    Status = IoCreateDevice(DriverObject, sizeof(control_device_extension), &device_nameW, FILE_DEVICE_DISK_FILE_SYSTEM,
+                            FILE_DEVICE_SECURE_OPEN, FALSE, &DeviceObject);
     if (!NT_SUCCESS(Status)) {
         ERR("IoCreateDevice returned %08x\n", Status);
         return Status;
     }
     
     devobj = DeviceObject;
+    cde = (control_device_extension*)devobj->DeviceExtension;
+    
+    cde->type = VCB_TYPE_CONTROL;
     
     DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING;
 
@@ -4665,10 +4787,16 @@ NTSTATUS STDCALL DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING Regist
     }
 
     InitializeListHead(&volumes);
-    look_for_vols(DriverObject, &volumes);
+    InitializeListHead(&pnp_disks);
     
     InitializeListHead(&VcbList);
     ExInitializeResourceLite(&global_loading_lock);
+    ExInitializeResourceLite(&volumes_lock);
+    
+    Status = IoRegisterPlugPlayNotification(EventCategoryDeviceInterfaceChange, PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES,
+                                            (PVOID)&GUID_DEVINTERFACE_DISK, DriverObject, pnp_notification, DriverObject, &notification_entry);
+    if (!NT_SUCCESS(Status))
+        ERR("IoRegisterPlugPlayNotification returned %08x\n", Status);
     
     IoRegisterFileSystem(DeviceObject);
 
index c5184cd..f7a3d45 100644 (file)
@@ -37,6 +37,8 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4
 #define TYPE_DEV_EXTENT        0xCC
 #define TYPE_DEV_ITEM          0xD8
 #define TYPE_CHUNK_ITEM        0xE4
+#define TYPE_TEMP_ITEM         0xF8
+#define TYPE_DEV_STATS         0xF9
 #define TYPE_SUBVOL_UUID       0xFB
 
 #define BTRFS_ROOT_ROOT         1
@@ -72,6 +74,7 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4
 
 #define FREE_SPACE_CACHE_ID     0xFFFFFFFFFFFFFFF5
 #define EXTENT_CSUM_ID          0xFFFFFFFFFFFFFFF6
+#define BALANCE_ITEM_ID         0xFFFFFFFFFFFFFFFC
 
 #define BTRFS_INODE_NODATASUM   0x001
 #define BTRFS_INODE_NODATACOW   0x002
@@ -436,6 +439,62 @@ typedef struct {
     BTRFS_UUID chunktree_uuid;
 } DEV_EXTENT;
 
+#define BALANCE_FLAGS_DATA          0x1
+#define BALANCE_FLAGS_SYSTEM        0x2
+#define BALANCE_FLAGS_METADATA      0x4
+
+#define BALANCE_ARGS_FLAGS_PROFILES         0x001
+#define BALANCE_ARGS_FLAGS_USAGE            0x002
+#define BALANCE_ARGS_FLAGS_DEVID            0x004
+#define BALANCE_ARGS_FLAGS_DRANGE           0x008
+#define BALANCE_ARGS_FLAGS_VRANGE           0x010
+#define BALANCE_ARGS_FLAGS_LIMIT            0x020
+#define BALANCE_ARGS_FLAGS_LIMIT_RANGE      0x040
+#define BALANCE_ARGS_FLAGS_STRIPES_RANGE    0x080
+#define BALANCE_ARGS_FLAGS_CONVERT          0x100
+#define BALANCE_ARGS_FLAGS_SOFT             0x200
+#define BALANCE_ARGS_FLAGS_USAGE_RANGE      0x400
+
+typedef struct {
+    UINT64 profiles;
+
+    union {
+            UINT64 usage;
+            struct {
+                    UINT32 usage_start;
+                    UINT32 usage_end;
+            };
+    };
+
+    UINT64 devid;
+    UINT64 drange_start;
+    UINT64 drange_end;
+    UINT64 vrange_start;
+    UINT64 vrange_end;
+    UINT64 convert;
+    UINT64 flags;
+
+    union {
+            UINT64 limit;
+            struct {
+                    UINT32 limit_start;
+                    UINT32 limit_end;
+            };
+    };
+
+    UINT32 stripes_start;
+    UINT32 stripes_end;
+    UINT8 reserved[48];
+} BALANCE_ARGS;
+
+typedef struct {
+    UINT64 flags;
+    BALANCE_ARGS data;
+    BALANCE_ARGS metadata;
+    BALANCE_ARGS system;
+    UINT8 reserved[32];
+} BALANCE_ITEM;
+
 #pragma pack(pop)
 
 #endif
index 467f2e8..333e8b3 100644 (file)
@@ -70,12 +70,12 @@ BEGIN
         BLOCK "080904b0"
         BEGIN
             VALUE "FileDescription", "WinBtrfs"
-            VALUE "FileVersion", "0.7"
+            VALUE "FileVersion", "0.8"
             VALUE "InternalName", "btrfs"
             VALUE "LegalCopyright", "Copyright (c) Mark Harmstone 2016"
             VALUE "OriginalFilename", "btrfs.sys"
             VALUE "ProductName", "WinBtrfs"
-            VALUE "ProductVersion", "0.7"
+            VALUE "ProductVersion", "0.8"
         END
     END
     BLOCK "VarFileInfo"
index 22ae32e..b3aebf4 100644 (file)
@@ -46,6 +46,7 @@
 #include <stddef.h>
 #include <emmintrin.h>
 #include "btrfs.h"
+#include "btrfsioctl.h"
 
 #ifdef _DEBUG
 // #define DEBUG_FCB_REFCOUNTS
@@ -108,15 +109,23 @@ typedef struct {
     UINT64 gen1, gen2;
     BOOL seeding;
     BOOL processed;
+    DWORD disk_num;
+    DWORD part_num;
     LIST_ENTRY list_entry;
 } volume;
 
+typedef struct {
+    UNICODE_STRING devpath;
+    ULONG disk_num;
+    LIST_ENTRY list_entry;
+} pnp_disk;
+
 typedef struct _fcb_nonpaged {
     FAST_MUTEX HeaderMutex;
     SECTION_OBJECT_POINTERS segment_object;
     ERESOURCE resource;
     ERESOURCE paging_resource;
-    ERESOURCE index_lock;
+    ERESOURCE dir_children_lock;
 } fcb_nonpaged;
 
 struct _root;
@@ -127,21 +136,12 @@ typedef struct {
     ULONG datalen;
     BOOL unique;
     BOOL ignore;
+    BOOL inserted;
+    UINT32* csum;
     
     LIST_ENTRY list_entry;
 } extent;
 
-typedef struct {
-    UINT32 hash;
-    KEY key;
-    UINT8 type;
-    UINT64 index;
-    ANSI_STRING utf8;
-    UNICODE_STRING filepart_uc;
-
-    LIST_ENTRY list_entry;
-} index_entry;
-
 typedef struct {
     UINT64 parent;
     UINT64 index;
@@ -152,6 +152,21 @@ typedef struct {
 
 struct _file_ref;
 
+typedef struct {
+    KEY key;
+    UINT64 index;
+    UINT8 type;
+    ANSI_STRING utf8;
+    UINT32 hash;
+    UNICODE_STRING name;
+    UINT32 hash_uc;
+    UNICODE_STRING name_uc;
+    struct _file_ref* fileref;
+    LIST_ENTRY list_entry_index;
+    LIST_ENTRY list_entry_hash;
+    LIST_ENTRY list_entry_hash_uc;
+} dir_child;
+
 typedef struct _fcb {
     FSRTL_ADVANCED_FCB_HEADER Header;
     struct _fcb_nonpaged* nonpaged;
@@ -177,8 +192,11 @@ typedef struct _fcb {
     struct _file_ref* fileref;
     BOOL inode_item_changed;
     
-    BOOL index_loaded;
-    LIST_ENTRY index_list;
+    LIST_ENTRY dir_children_index;
+    LIST_ENTRY dir_children_hash;
+    LIST_ENTRY dir_children_hash_uc;
+    LIST_ENTRY** hash_ptrs;
+    LIST_ENTRY** hash_ptrs_uc;
     
     BOOL dirty;
     BOOL sd_dirty;
@@ -223,6 +241,7 @@ typedef struct _file_ref {
     LONG open_count;
     struct _file_ref* parent;
     WCHAR* debug_desc;
+    dir_child* dc;
     
     BOOL dirty;
     
@@ -244,6 +263,8 @@ typedef struct _ccb {
     UNICODE_STRING query_string;
     BOOL has_wildcard;
     BOOL specific_file;
+    BOOL manage_volume_privilege;
+    BOOL allow_extended_dasd_io;
     ACCESS_MASK access;
     file_ref* fileref;
     UNICODE_STRING filename;
@@ -309,6 +330,7 @@ typedef struct _tree {
 //     UINT64 address;
 //     UINT8 level;
     tree_header header;
+    UINT32 hash;
     BOOL has_address;
     UINT32 size;
     struct _device_extension* Vcb;
@@ -318,10 +340,10 @@ typedef struct _tree {
 //     tree_nonpaged* nonpaged;
     LIST_ENTRY itemlist;
     LIST_ENTRY list_entry;
+    LIST_ENTRY list_entry_hash;
     UINT64 new_address;
     BOOL has_new_address;
     BOOL updated_extents;
-    UINT64 flags;
     BOOL write;
 } tree;
 
@@ -343,10 +365,15 @@ typedef struct _root {
 
 enum batch_operation {
     Batch_Insert,
+    Batch_Delete,
     Batch_SetXattr,
     Batch_DirItem,
     Batch_InodeRef,
     Batch_InodeExtRef,
+    Batch_DeleteInode,
+    Batch_DeleteDirItem,
+    Batch_DeleteInodeRef,
+    Batch_DeleteInodeExtRef,
 };
 
 typedef struct {
@@ -386,11 +413,15 @@ typedef struct {
     BOOL removable;
     BOOL seeding;
     BOOL readonly;
+    BOOL reloc;
     BOOL ssd;
     BOOL trim;
     ULONG change_count;
     UINT64 length;
+    ULONG disk_num;
+    ULONG part_num;
     LIST_ENTRY space;
+    LIST_ENTRY list_entry;
 } device;
 
 typedef struct {
@@ -419,9 +450,13 @@ typedef struct {
     ERESOURCE changed_extents_lock;
     BOOL created;
     BOOL readonly;
+    BOOL reloc;
+    BOOL last_alloc_set;
+    UINT64 last_alloc;
     
     LIST_ENTRY list_entry;
     LIST_ENTRY list_entry_changed;
+    LIST_ENTRY list_entry_balance;
 } chunk;
 
 typedef struct {
@@ -455,6 +490,31 @@ typedef struct {
     LIST_ENTRY list_entry;
 } sys_chunk;
 
+typedef struct {
+    UINT8* data;
+    UINT32* csum;
+    UINT32 sectors;
+    LONG pos, done;
+    KEVENT event;
+    LONG refcount;
+    LIST_ENTRY list_entry;
+} calc_job;
+
+typedef struct {
+    PDEVICE_OBJECT DeviceObject;
+    HANDLE handle;
+    KEVENT finished;
+    BOOL quit;
+} drv_calc_thread;
+
+typedef struct {
+    ULONG num_threads;
+    LIST_ENTRY job_list;
+    ERESOURCE lock;
+    drv_calc_thread* threads;
+    KEVENT event;
+} drv_calc_threads;
+
 typedef struct {
     BOOL ignore;
     BOOL compress;
@@ -467,10 +527,12 @@ typedef struct {
     UINT64 subvol_id;
     UINT32 raid5_recalculation;
     UINT32 raid6_recalculation;
+    BOOL skip_balance;
 } mount_options;
 
 #define VCB_TYPE_VOLUME     1
 #define VCB_TYPE_PARTITION0 2
+#define VCB_TYPE_CONTROL    3
 
 #ifdef DEBUG_STATS
 typedef struct {
@@ -479,14 +541,40 @@ typedef struct {
     UINT64 read_total_time;
     UINT64 read_csum_time;
     UINT64 read_disk_time;
+    
+    UINT64 num_opens;
+    UINT64 open_total_time;
+    UINT64 num_overwrites;
+    UINT64 overwrite_total_time;
+    UINT64 num_creates;
+    UINT64 create_total_time;
 } debug_stats;
 #endif
 
+#define BALANCE_OPTS_DATA       0
+#define BALANCE_OPTS_METADATA   1
+#define BALANCE_OPTS_SYSTEM     2
+
+typedef struct {
+    HANDLE thread;
+    UINT64 total_chunks;
+    UINT64 chunks_left;
+    btrfs_balance_opts opts[3];
+    BOOL paused;
+    BOOL stopping;
+    BOOL cancelling;
+    BOOL removing;
+    BOOL dev_readonly;
+    NTSTATUS status;
+    KEVENT event;
+    KEVENT finished;
+} balance_info;
+
 typedef struct _device_extension {
     UINT32 type;
     mount_options options;
     PVPB Vpb;
-    device* devices;
+    LIST_ENTRY devices;
 #ifdef DEBUG_STATS
     debug_stats stats;
 #endif
@@ -497,14 +585,13 @@ typedef struct _device_extension {
     BOOL readonly;
     BOOL removing;
     BOOL locked;
+    BOOL lock_paused_balance;
     BOOL disallow_dismount;
     BOOL trim;
     PFILE_OBJECT locked_fileobj;
     fcb* volume_fcb;
     file_ref* root_fileref;
     LONG open_files;
-    ERESOURCE DirResource;
-    KSPIN_LOCK FcbListLock;
     ERESOURCE fcb_lock;
     ERESOURCE load_lock;
     ERESOURCE tree_lock;
@@ -514,8 +601,9 @@ typedef struct _device_extension {
     BOOL need_write;
 //     ERESOURCE LogToPhysLock;
 //     UINT64 chunk_root_phys_addr;
-    UINT64 root_tree_phys_addr;
     UINT64 data_flags;
+    UINT64 metadata_flags;
+    UINT64 system_flags;
 //     log_to_phys* log_to_phys;
     LIST_ENTRY roots;
     LIST_ENTRY drop_roots;
@@ -531,17 +619,19 @@ typedef struct _device_extension {
     LIST_ENTRY chunks;
     LIST_ENTRY chunks_changed;
     LIST_ENTRY trees;
+    LIST_ENTRY trees_hash;
+    LIST_ENTRY* trees_ptrs[256];
     LIST_ENTRY all_fcbs;
     LIST_ENTRY dirty_fcbs;
     KSPIN_LOCK dirty_fcbs_lock;
     LIST_ENTRY dirty_filerefs;
     KSPIN_LOCK dirty_filerefs_lock;
-    ERESOURCE checksum_lock;
     ERESOURCE chunk_lock;
-    LIST_ENTRY sector_checksums;
     HANDLE flush_thread_handle;
     KTIMER flush_thread_timer;
     KEVENT flush_thread_finished;
+    drv_calc_threads calcthreads;
+    balance_info balance;
     PFILE_OBJECT root_file;
     PAGED_LOOKASIDE_LIST tree_data_lookaside;
     PAGED_LOOKASIDE_LIST traverse_ptr_lookaside;
@@ -558,24 +648,16 @@ typedef struct {
     UNICODE_STRING name;
 } part0_device_extension;
 
+typedef struct {
+    UINT32 type;
+} control_device_extension;
+
 typedef struct {
     LIST_ENTRY listentry;
     PSID sid;
     UINT32 uid;
 } uid_map;
 
-typedef struct {
-    LIST_ENTRY list_entry;
-    UINT64 key;
-} ordered_list;
-
-typedef struct {
-    ordered_list ol;
-    ULONG length;
-    UINT32* checksums;
-    BOOL deleted;
-} changed_sector;
-
 enum write_data_status {
     WriteDataStatus_Pending,
     WriteDataStatus_Success,
@@ -605,6 +687,14 @@ typedef struct _write_data_context {
     BOOL tree;
 } write_data_context;
 
+typedef struct {
+    UINT64 address;
+    UINT32 length;
+    BOOL overlap;
+    UINT8* data;
+    LIST_ENTRY list_entry;
+} tree_write;
+
 // #pragma pack(pop)
 
 static __inline void* map_user_buffer(PIRP Irp) {
@@ -626,27 +716,6 @@ static __inline void win_time_to_unix(LARGE_INTEGER t, BTRFS_TIME* out) {
     out->nanoseconds = (l % 10000000) * 100;
 }
 
-static __inline void insert_into_ordered_list(LIST_ENTRY* list, ordered_list* ins) {
-    LIST_ENTRY* le = list->Flink;
-    ordered_list* ol;
-    
-    while (le != list) {
-        ol = (ordered_list*)le;
-        
-        if (ol->key > ins->key) {
-            le->Blink->Flink = &ins->list_entry;
-            ins->list_entry.Blink = le->Blink;
-            le->Blink = &ins->list_entry;
-            ins->list_entry.Flink = le;
-            return;
-        }
-        
-        le = le->Flink;
-    }
-    
-    InsertTailList(list, &ins->list_entry);
-}
-
 static __inline void get_raid0_offset(UINT64 off, UINT64 stripe_length, UINT16 num_stripes, UINT64* stripeoff, UINT16* stripe) {
     UINT64 initoff, startoff;
     
@@ -682,12 +751,12 @@ static UINT64 __inline make_file_id(root* r, UINT64 inode) {
 // in btrfs.c
 device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid);
 UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment );
+BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts);
 ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp);
+BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen);
 BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp);
 void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line);
 void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line);
-NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, UINT32 crc32, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
 fcb* create_fcb(POOL_TYPE pool_type);
 file_ref* create_fileref();
 void protect_superblocks(device_extension* Vcb, chunk* c);
@@ -708,6 +777,9 @@ void mark_fileref_dirty(file_ref* fileref);
 NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback);
 void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length);
 void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length);
+void init_device(device_extension* Vcb, device* dev, BOOL get_length, BOOL get_nums);
+void init_file_cache(PFILE_OBJECT FileObject, CC_FILE_SIZES* ccfs);
+NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override);
 
 #ifdef _MSC_VER
 #define funcname __FUNCTION__
@@ -729,6 +801,7 @@ extern UINT32 mount_flush_interval;
 extern UINT32 mount_max_inline;
 extern UINT32 mount_raid5_recalculation;
 extern UINT32 mount_raid6_recalculation;
+extern UINT32 mount_skip_balance;
 
 #ifdef _DEBUG
 
@@ -815,6 +888,8 @@ typedef struct {
 
 // in treefuncs.c
 NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
+NTSTATUS STDCALL _find_item_to_level(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, UINT8 level,
+                                     PIRP Irp, const char* func, const char* file, unsigned int line);
 BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
 BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* prev_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
 void STDCALL free_trees(device_extension* Vcb);
@@ -832,6 +907,7 @@ void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, L
 void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist);
 
 #define find_item(Vcb, r, tp, searchkey, ignore, Irp) _find_item(Vcb, r, tp, searchkey, ignore, Irp, funcname, __FILE__, __LINE__)
+#define find_item_to_level(Vcb, r, tp, searchkey, ignore, level, Irp) _find_item_to_level(Vcb, r, tp, searchkey, ignore, level, Irp, funcname, __FILE__, __LINE__)
 #define find_next_item(Vcb, tp, next_tp, ignore, Irp) _find_next_item(Vcb, tp, next_tp, ignore, Irp, funcname, __FILE__, __LINE__)
 #define find_prev_item(Vcb, tp, prev_tp, ignore, Irp) _find_prev_item(Vcb, tp, prev_tp, ignore, Irp, funcname, __FILE__, __LINE__)
 #define free_tree(t) _free_tree(t, funcname, __FILE__, __LINE__)
@@ -839,7 +915,13 @@ void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist);
 #define do_load_tree(Vcb, th, r, t, td, loaded, Irp) _do_load_tree(Vcb, th, r, t, td, loaded, Irp, funcname, __FILE__, __LINE__)  
 
 // in search.c
-void STDCALL look_for_vols(PDRIVER_OBJECT DriverObject, LIST_ENTRY* volumes);
+void remove_drive_letter(PDEVICE_OBJECT mountmgr, volume* v);
+void add_volume(PDEVICE_OBJECT mountmgr, PUNICODE_STRING us);
+#ifdef __REACTOS__
+NTSTATUS NTAPI pnp_notification(PVOID NotificationStructure, PVOID Context);
+#else
+NTSTATUS pnp_notification(PVOID NotificationStructure, PVOID Context);
+#endif
 
 // in cache.c
 NTSTATUS STDCALL init_cache();
@@ -853,19 +935,18 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void
 NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback);
-void commit_checksum_changes(device_extension* Vcb, LIST_ENTRY* changed_sector_list);
 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address);
 chunk* alloc_chunk(device_extension* Vcb, UINT64 flags);
 NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c);
 NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c);
 void free_write_data_stripes(write_data_context* wtc);
 NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, LIST_ENTRY* changed_sector_list,
+BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data,
                          PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size);
-NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
-BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address);
+NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback);
+BOOL find_data_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address);
 void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen);
 
 // in dirctrl.c
@@ -875,7 +956,8 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode,
 // in security.c
 NTSTATUS STDCALL drv_query_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 NTSTATUS STDCALL drv_set_security(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-void fcb_get_sd(fcb* fcb, struct _fcb* parent, PIRP Irp);
+BOOL get_sd_from_xattr(fcb* fcb, ULONG buflen);
+void fcb_get_sd(fcb* fcb, struct _fcb* parent, BOOL look_for_xattr, PIRP Irp);
 // UINT32 STDCALL get_uid();
 void add_user_mapping(WCHAR* sidstring, ULONG sidstringlength, UINT32 uid);
 UINT32 sid_to_uid(PSID sid);
@@ -891,6 +973,8 @@ NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* nam
 NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp);
 NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
+void insert_dir_child_into_hash_lists(fcb* fcb, dir_child* dc);
+void remove_dir_child_from_hash_lists(fcb* fcb, dir_child* dc);
 
 // in reparse.c
 NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen);
@@ -899,15 +983,16 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp);
 
 // in create.c
 NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr,
-                                  root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, BOOL case_sensitive, PIRP Irp);
-NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset,
+NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* parsed, ULONG* fn_offset,
                       POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp);
 NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp);
 NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI_STRING* xattr, UINT32 streamhash, fcb* parent, fcb** pfcb, PIRP Irp);
 void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock);
 NTSTATUS fcb_get_last_dir_index(fcb* fcb, UINT64* index, PIRP Irp);
 NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp);
+NTSTATUS load_csum(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp);
+NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp);
+NTSTATUS add_dir_child(fcb* fcb, UINT64 inode, BOOL subvol, UINT64 index, PANSI_STRING utf8, PUNICODE_STRING name, PUNICODE_STRING name_uc, UINT8 type, dir_child** pdc);
 
 // in fsctl.c
 NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user);
@@ -920,11 +1005,15 @@ NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENT
 void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length);
 BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp);
+NTSTATUS do_tree_writes(device_extension* Vcb, LIST_ENTRY* tree_writes, PIRP Irp);
+void add_checksum_entry(device_extension* Vcb, UINT64 address, ULONG length, UINT32* csum, PIRP Irp, LIST_ENTRY* rollback);
+BOOL find_metadata_address_in_chunk(device_extension* Vcb, chunk* c, UINT64* address);
 
 // in read.c
 NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp);
-NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp);
-NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp);
+NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc,
+                           PIRP Irp, BOOL check_nocsum_parity);
+NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp, BOOL check_nocsum_parity);
 NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read);
 
 // in pnp.c
@@ -948,7 +1037,8 @@ void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY*
 
 // in extent-tree.c
 NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset,
+                                       UINT32 refcount, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback);
 void decrease_chunk_usage(chunk* c, UINT64 delta);
 // NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback);
@@ -963,7 +1053,8 @@ void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root,
 UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp);
 UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp);
 NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
-                                  UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback);
+                                  UINT8 level, UINT64 parent, BOOL superseded, PIRP Irp, LIST_ENTRY* rollback);
+UINT64 get_extent_data_ref_hash2(UINT64 root, UINT64 objid, UINT64 offset);
 
 // in worker-thread.c
 void do_read_job(PIRP Irp);
@@ -977,7 +1068,7 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb);
 
 // in compress.c
 NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen);
-NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback);
 
 // in galois.c
 void galois_double(UINT8* data, UINT32 len);
@@ -989,6 +1080,24 @@ UINT8 gdiv(UINT8 a, UINT8 b);
 // in devctrl.c
 NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 
+// in calcthread.c
+#ifdef __REACTOS__
+void NTAPI calc_thread(void* context);
+#else
+void calc_thread(void* context);
+#endif
+NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum, calc_job** pcj);
+void free_calc_job(calc_job* cj);
+
+// in balance.c
+NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode);
+NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length);
+NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode);
+NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode);
+NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode);
+NTSTATUS look_for_balance_item(device_extension* Vcb);
+NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode);
+
 #define fast_io_possible(fcb) (!FsRtlAreThereCurrentFileLocks(&fcb->lock) && !fcb->Vcb->readonly ? FastIoIsPossible : FastIoIsQuestionable)
 
 static __inline void print_open_trees(device_extension* Vcb) {
@@ -1048,6 +1157,8 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) {
     }
 }
 
+#define first_device(Vcb) CONTAINING_RECORD(Vcb->devices.Flink, device, list_entry)
+
 #ifdef DEBUG_FCB_REFCOUNTS
 #ifdef DEBUG_LONG_MESSAGES
 #define increase_fileref_refcount(fileref) {\
@@ -1144,6 +1255,29 @@ static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) {
 #define called_from_lxss() FALSE
 #endif
 
+typedef BOOLEAN (*tPsIsDiskCountersEnabled)();
+
+typedef VOID (*tPsUpdateDiskCounters)(PEPROCESS Process, ULONG64 BytesRead, ULONG64 BytesWritten,
+                                      ULONG ReadOperationCount, ULONG WriteOperationCount, ULONG FlushOperationCount);
+
+typedef BOOLEAN (*tCcCopyWriteEx)(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait,
+                                  PVOID Buffer, PETHREAD IoIssuerThread);
+
+typedef BOOLEAN (*tCcCopyReadEx)(PFILE_OBJECT FileObject, PLARGE_INTEGER FileOffset, ULONG Length, BOOLEAN Wait,
+                                 PVOID Buffer, PIO_STATUS_BLOCK IoStatus, PETHREAD IoIssuerThread);
+
+#define CC_ENABLE_DISK_IO_ACCOUNTING 0x00000010
+
+typedef VOID (*tCcSetAdditionalCacheAttributesEx)(PFILE_OBJECT FileObject, ULONG Flags);
+
+#ifndef __REACTOS__
+#undef RtlIsNtDdiVersionAvailable
+
+BOOLEAN RtlIsNtDdiVersionAvailable(ULONG Version);
+
+PEPROCESS PsGetThreadProcess(PETHREAD Thread); // not in mingw
+#endif
+
 #if defined(__REACTOS__) && (NTDDI_VERSION < NTDDI_WIN7)
 NTSTATUS WINAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max,
                                   ULONG *utf8_bytes_written,
index bc35ad0..c28aafe 100644 (file)
@@ -8,6 +8,17 @@
 #define FSCTL_BTRFS_CREATE_SNAPSHOT CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82b, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
 #define FSCTL_BTRFS_GET_INODE_INFO CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82c, METHOD_OUT_DIRECT, FILE_ANY_ACCESS)
 #define FSCTL_BTRFS_SET_INODE_INFO CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82d, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_GET_DEVICES CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82e, METHOD_OUT_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_GET_USAGE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x82f, METHOD_OUT_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_START_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x830, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_QUERY_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x831, METHOD_OUT_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_PAUSE_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x832, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_RESUME_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x833, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_STOP_BALANCE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x834, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_ADD_DEVICE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x835, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_REMOVE_DEVICE CTL_CODE(FILE_DEVICE_UNKNOWN, 0x836, METHOD_IN_DIRECT, FILE_ANY_ACCESS)
+#define IOCTL_BTRFS_QUERY_FILESYSTEMS CTL_CODE(FILE_DEVICE_UNKNOWN, 0x837, METHOD_OUT_DIRECT, FILE_ANY_ACCESS)
+#define FSCTL_BTRFS_GET_UUID CTL_CODE(FILE_DEVICE_UNKNOWN, 0x838, METHOD_OUT_DIRECT, FILE_ANY_ACCESS)
 
 typedef struct {
     UINT64 subvol;
@@ -46,4 +57,92 @@ typedef struct {
     BOOL mode_changed;
 } btrfs_set_inode_info;
 
+typedef struct {
+    UINT32 next_entry;
+    UINT64 dev_id;
+    UINT64 size;
+    BOOL readonly;
+    ULONG device_number;
+    ULONG partition_number;
+    USHORT namelen;
+    WCHAR name[1];
+} btrfs_device;
+
+typedef struct {
+    UINT64 dev_id;
+    UINT64 alloc;
+} btrfs_usage_device;
+
+typedef struct {
+    UINT32 next_entry;
+    UINT64 type;
+    UINT64 size;
+    UINT64 used;
+    UINT64 num_devices;
+    btrfs_usage_device devices[1];
+} btrfs_usage;
+
+#define BTRFS_BALANCE_OPTS_ENABLED      0x001
+#define BTRFS_BALANCE_OPTS_PROFILES     0x002
+#define BTRFS_BALANCE_OPTS_DEVID        0x004
+#define BTRFS_BALANCE_OPTS_DRANGE       0x008
+#define BTRFS_BALANCE_OPTS_VRANGE       0x010
+#define BTRFS_BALANCE_OPTS_LIMIT        0x020
+#define BTRFS_BALANCE_OPTS_STRIPES      0x040
+#define BTRFS_BALANCE_OPTS_USAGE        0x080
+#define BTRFS_BALANCE_OPTS_CONVERT      0x100
+#define BTRFS_BALANCE_OPTS_SOFT         0x200
+
+#define BLOCK_FLAG_SINGLE 0x1000000000000 // only used in balance
+
+typedef struct {
+    UINT64 flags;
+    UINT64 profiles;
+    UINT64 devid;
+    UINT64 drange_start;
+    UINT64 drange_end;
+    UINT64 vrange_start;
+    UINT64 vrange_end;
+    UINT64 limit_start;
+    UINT64 limit_end;
+    UINT16 stripes_start;
+    UINT16 stripes_end;
+    UINT8 usage_start;
+    UINT8 usage_end;
+    UINT64 convert;
+} btrfs_balance_opts;
+
+#define BTRFS_BALANCE_STOPPED   0
+#define BTRFS_BALANCE_RUNNING   1
+#define BTRFS_BALANCE_PAUSED    2
+#define BTRFS_BALANCE_REMOVAL   4
+#define BTRFS_BALANCE_ERROR     8
+
+typedef struct {
+    UINT32 status;
+    UINT64 chunks_left;
+    UINT64 total_chunks;
+    NTSTATUS error;
+    btrfs_balance_opts data_opts;
+    btrfs_balance_opts metadata_opts;
+    btrfs_balance_opts system_opts;
+} btrfs_query_balance;
+
+typedef struct {
+    btrfs_balance_opts opts[3];
+} btrfs_start_balance;
+
+typedef struct {
+    UINT8 uuid[16];
+    USHORT name_length;
+    WCHAR name[1];
+} btrfs_filesystem_device;
+
+typedef struct {
+    UINT32 next_entry;
+    UINT8 uuid[16];
+    UINT32 num_devices;
+    btrfs_filesystem_device device;
+} btrfs_filesystem;
+
 #endif
index 2a6ba4d..75d87db 100644 (file)
@@ -28,9 +28,14 @@ static BOOLEAN STDCALL acquire_for_lazy_write(PVOID Context, BOOLEAN Wait) {
     
 //     if (!fcb || FileObject->Flags & FO_CLEANUP_COMPLETE)
 //         return FALSE;
+    
+    if (!ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, Wait))
+        return FALSE;
 
-    if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, Wait))
+    if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, Wait)) {
+        ExReleaseResourceLite(&fcb->Vcb->tree_lock);
         return FALSE;
+    }
     
     fcb->lazy_writer_thread = KeGetCurrentThread();
     
@@ -48,7 +53,9 @@ static void STDCALL release_from_lazy_write(PVOID Context) {
     
     fcb->lazy_writer_thread = NULL;
     
-    ExReleaseResourceLite(fcb->Header.PagingIoResource);
+    ExReleaseResourceLite(fcb->Header.Resource);
+    
+    ExReleaseResourceLite(&fcb->Vcb->tree_lock);
 }
 
 static BOOLEAN STDCALL acquire_for_read_ahead(PVOID Context, BOOLEAN Wait) {
diff --git a/reactos/drivers/filesystems/btrfs/calcthread.c b/reactos/drivers/filesystems/btrfs/calcthread.c
new file mode 100644 (file)
index 0000000..e84107d
--- /dev/null
@@ -0,0 +1,142 @@
+/* Copyright (c) Mark Harmstone 2016
+ * 
+ * This file is part of WinBtrfs.
+ * 
+ * WinBtrfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public Licence as published by
+ * the Free Software Foundation, either version 3 of the Licence, or
+ * (at your option) any later version.
+ * 
+ * WinBtrfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public Licence for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public Licence
+ * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "btrfs_drv.h"
+
+#define SECTOR_BLOCK 16
+
+NTSTATUS add_calc_job(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum, calc_job** pcj) {
+    calc_job* cj;
+    
+    cj = ExAllocatePoolWithTag(NonPagedPool, sizeof(calc_job), ALLOC_TAG);
+    if (!cj) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    cj->data = data;
+    cj->sectors = sectors;
+    cj->csum = csum;
+    cj->pos = 0;
+    cj->done = 0;
+    cj->refcount = 1;
+    KeInitializeEvent(&cj->event, NotificationEvent, FALSE);
+
+    ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE);
+    InsertTailList(&Vcb->calcthreads.job_list, &cj->list_entry);
+    ExReleaseResourceLite(&Vcb->calcthreads.lock);
+    
+    KeSetEvent(&Vcb->calcthreads.event, 0, FALSE);
+    KeClearEvent(&Vcb->calcthreads.event);
+    
+    *pcj = cj;
+    
+    return STATUS_SUCCESS;
+}
+
+void free_calc_job(calc_job* cj) {
+    LONG rc = InterlockedDecrement(&cj->refcount);
+    
+    if (rc == 0)
+        ExFreePool(cj);
+}
+
+static BOOL do_calc(device_extension* Vcb, calc_job* cj) {
+    LONG pos, done;
+    UINT32* csum;
+    UINT8* data;
+    ULONG blocksize, i;
+    
+    pos = InterlockedIncrement(&cj->pos) - 1;
+    
+    if (pos * SECTOR_BLOCK >= cj->sectors)
+        return FALSE;
+
+    csum = &cj->csum[pos * SECTOR_BLOCK];
+    data = cj->data + (pos * SECTOR_BLOCK * Vcb->superblock.sector_size);
+    
+    blocksize = min(SECTOR_BLOCK, cj->sectors - (pos * SECTOR_BLOCK));
+    for (i = 0; i < blocksize; i++) {
+        *csum = ~calc_crc32c(0xffffffff, data, Vcb->superblock.sector_size);
+        csum++;
+        data += Vcb->superblock.sector_size;
+    }
+    
+    done = InterlockedIncrement(&cj->done);
+    
+    if (done * SECTOR_BLOCK >= cj->sectors) {
+        ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE);
+        RemoveEntryList(&cj->list_entry);
+        ExReleaseResourceLite(&Vcb->calcthreads.lock);
+        
+        KeSetEvent(&cj->event, 0, FALSE);
+    }
+    
+    return TRUE;
+}
+
+#ifdef __REACTOS__
+void NTAPI calc_thread(void* context) {
+#else
+void calc_thread(void* context) {
+#endif
+    drv_calc_thread* thread = context;
+    device_extension* Vcb = thread->DeviceObject->DeviceExtension;
+    
+    ObReferenceObject(thread->DeviceObject);
+    
+    while (TRUE) {
+        KeWaitForSingleObject(&Vcb->calcthreads.event, Executive, KernelMode, FALSE, NULL);
+        
+        FsRtlEnterFileSystem();
+        
+        while (TRUE) {
+            calc_job* cj;
+            BOOL b;
+            
+            ExAcquireResourceExclusiveLite(&Vcb->calcthreads.lock, TRUE);
+            
+            if (IsListEmpty(&Vcb->calcthreads.job_list)) {
+                ExReleaseResourceLite(&Vcb->calcthreads.lock);
+                break;
+            }
+            
+            cj = CONTAINING_RECORD(Vcb->calcthreads.job_list.Flink, calc_job, list_entry);
+            cj->refcount++;
+            
+            ExReleaseResourceLite(&Vcb->calcthreads.lock);
+            
+            b = do_calc(Vcb, cj);
+            
+            free_calc_job(cj);
+            
+            if (!b)
+                break;
+        }
+        
+        FsRtlExitFileSystem();
+        
+        if (thread->quit)
+            break;
+    }
+
+    ObDereferenceObject(thread->DeviceObject);
+     
+    KeSetEvent(&thread->finished, 0, FALSE);
+     
+    PsTerminateSystemThread(STATUS_SUCCESS);
+}
index 67ad41b..e94211b 100755 (executable)
@@ -357,7 +357,7 @@ NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT6
     }
 }
 
-static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) {
     NTSTATUS Status;
     UINT8 compression;
     UINT64 comp_length;
@@ -444,11 +444,11 @@ static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 en
     while (le != &fcb->Vcb->chunks) {
         c = CONTAINING_RECORD(le, chunk, list_entry);
         
-        if (!c->readonly) {
+        if (!c->readonly && !c->reloc) {
             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
             
             if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
-                if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+                if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) {
                     ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
                     
                     if (compression != BTRFS_COMPRESSION_NONE)
@@ -474,7 +474,7 @@ static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 en
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
         
         if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
-            if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+            if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) {
                 if (compression != BTRFS_COMPRESSION_NONE)
                     ExFreePool(comp_data);
                 
@@ -732,7 +732,7 @@ static __inline UINT32 lzo_max_outlen(UINT32 inlen) {
     return inlen + (inlen / 16) + 64 + 3; // formula comes from LZO.FAQ
 }
 
-static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) {
     NTSTATUS Status;
     UINT8 compression;
     UINT64 comp_length;
@@ -829,11 +829,11 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end
     while (le != &fcb->Vcb->chunks) {
         c = CONTAINING_RECORD(le, chunk, list_entry);
         
-        if (!c->readonly) {
+        if (!c->readonly && !c->reloc) {
             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
             
             if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
-                if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+                if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) {
                     ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
                     
                     if (compression != BTRFS_COMPRESSION_NONE)
@@ -859,7 +859,7 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
         
         if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
-            if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+            if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, Irp, rollback, compression, end_data - start_data)) {
                 if (compression != BTRFS_COMPRESSION_NONE)
                     ExFreePool(comp_data);
                 
@@ -876,7 +876,7 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end
     return STATUS_DISK_FULL;
 }
 
-NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, PIRP Irp, LIST_ENTRY* rollback) {
     UINT8 type;
 
     if (fcb->Vcb->options.compress_type != 0)
@@ -890,7 +890,7 @@ NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void
     
     if (type == BTRFS_COMPRESSION_LZO) {
         fcb->Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO;
-        return lzo_write_compressed_bit(fcb, start_data, end_data, data, compressed, changed_sector_list, Irp, rollback);
+        return lzo_write_compressed_bit(fcb, start_data, end_data, data, compressed, Irp, rollback);
     } else
-        return zlib_write_compressed_bit(fcb, start_data, end_data, data, compressed, changed_sector_list, Irp, rollback);
+        return zlib_write_compressed_bit(fcb, start_data, end_data, data, compressed, Irp, rollback);
 }
index f389dfb..04dc8f2 100644 (file)
@@ -27,651 +27,6 @@ extern PDEVICE_OBJECT devobj;
 
 static WCHAR datastring[] = L"::$DATA";
 
-static NTSTATUS find_file_dir_index(device_extension* Vcb, root* r, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    UINT64 index;
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_REF;
-    searchkey.offset = parinode;
-    
-    Status = find_item(Vcb, r, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(tp.item->key, searchkey)) {
-        INODE_REF* ir;
-        ULONG len;
-        
-        index = 0;
-        
-        ir = (INODE_REF*)tp.item->data;
-        len = tp.item->size;
-        
-        do {
-            ULONG itemlen;
-            
-            if (len < sizeof(INODE_REF) || len < sizeof(INODE_REF) - 1 + ir->n) {
-                ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                break;
-            }
-            
-            itemlen = sizeof(INODE_REF) - sizeof(char) + ir->n;
-            
-            if (ir->n == utf8->Length && RtlCompareMemory(ir->name, utf8->Buffer, ir->n) == ir->n) {
-                index = ir->index;
-                break;
-            }
-            
-            if (len > itemlen) {
-                len -= itemlen;
-                ir = (INODE_REF*)&ir->name[ir->n];
-            } else
-                break;
-        } while (len > 0);
-        
-        if (index == 0)
-            return STATUS_NOT_FOUND;
-        
-        *pindex = index;
-        
-        return STATUS_SUCCESS;
-    } else
-        return STATUS_NOT_FOUND;
-}
-
-static NTSTATUS find_file_dir_index_extref(device_extension* Vcb, root* r, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    UINT64 index;
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_EXTREF;
-    searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length);
-    
-    Status = find_item(Vcb, r, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(tp.item->key, searchkey)) {
-        INODE_EXTREF* ier;
-        ULONG len;
-        
-        index = 0;
-        
-        ier = (INODE_EXTREF*)tp.item->data;
-        len = tp.item->size;
-        
-        do {
-            ULONG itemlen;
-            
-            if (len < sizeof(INODE_EXTREF) || len < sizeof(INODE_EXTREF) - 1 + ier->n) {
-                ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                break;
-            }
-            
-            itemlen = sizeof(INODE_EXTREF) - sizeof(char) + ier->n;
-            
-            if (ier->n == utf8->Length && RtlCompareMemory(ier->name, utf8->Buffer, ier->n) == ier->n) {
-                index = ier->index;
-                break;
-            }
-            
-            if (len > itemlen) {
-                len -= itemlen;
-                ier = (INODE_EXTREF*)&ier->name[ier->n];
-            } else
-                break;
-        } while (len > 0);
-        
-        if (index == 0)
-            return STATUS_NOT_FOUND;
-        
-        *pindex = index;
-        
-        return STATUS_SUCCESS;
-    } else
-        return STATUS_NOT_FOUND;
-}
-
-static NTSTATUS find_subvol_dir_index(device_extension* Vcb, root* r, UINT64 subvolid, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    ROOT_REF* rr;
-    
-    searchkey.obj_id = r->id;
-    searchkey.obj_type = TYPE_ROOT_REF;
-    searchkey.offset = subvolid;
-    
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (keycmp(tp.item->key, searchkey)) {
-        ERR("couldn't find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    if (tp.item->size < sizeof(ROOT_REF)) {
-        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
-            tp.item->size, sizeof(ROOT_REF));
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    rr = (ROOT_REF*)tp.item->data;
-    
-    if (tp.item->size < sizeof(ROOT_REF) - 1 + rr->n) {
-        ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
-            tp.item->size, sizeof(ROOT_REF) - 1 + rr->n);
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(utf8->Buffer, rr->name, rr->n) == rr->n) {
-        *pindex = rr->index;
-        return STATUS_SUCCESS;
-    } else
-        return STATUS_NOT_FOUND;
-}
-
-static NTSTATUS load_index_list(fcb* fcb, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp, next_tp;
-    NTSTATUS Status;
-    BOOL b;
-    
-    searchkey.obj_id = fcb->inode;
-    searchkey.obj_type = TYPE_DIR_INDEX;
-    searchkey.offset = 2;
-    
-    Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-
-    if (keycmp(tp.item->key, searchkey) == -1) {
-        if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) {
-            tp = next_tp;
-            
-            TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        }
-    }
-    
-    if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_DIR_INDEX) {
-        Status = STATUS_SUCCESS;
-        goto end;
-    }
-    
-    do {
-        DIR_ITEM* di;
-        
-        TRACE("key: %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        di = (DIR_ITEM*)tp.item->data;
-        
-        if (tp.item->size < sizeof(DIR_ITEM) || tp.item->size < (sizeof(DIR_ITEM) - 1 + di->m + di->n)) {
-            WARN("(%llx,%x,%llx) is truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        } else {
-            index_entry* ie;
-            ULONG stringlen;
-            UNICODE_STRING us;
-            LIST_ENTRY* le;
-            BOOL inserted;
-            
-            ie = ExAllocatePoolWithTag(PagedPool, sizeof(index_entry), ALLOC_TAG);
-            if (!ie) {
-                ERR("out of memory\n");
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                goto end;
-            }
-            
-            ie->utf8.Length = ie->utf8.MaximumLength = di->n;
-            
-            if (di->n > 0) {
-                ie->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, ie->utf8.MaximumLength, ALLOC_TAG);
-                if (!ie->utf8.Buffer) {
-                    ERR("out of memory\n");
-                    ExFreePool(ie);
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto end;
-                }
-                
-                RtlCopyMemory(ie->utf8.Buffer, di->name, di->n);
-            } else
-                ie->utf8.Buffer = NULL;
-            
-            Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, di->name, di->n);
-            if (!NT_SUCCESS(Status)) {
-                ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
-                if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-                ExFreePool(ie);
-                goto nextitem;
-            }
-            
-            if (stringlen == 0) {
-                ERR("UTF8 length was 0\n");
-                if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-                ExFreePool(ie);
-                goto nextitem;
-            }
-            
-            us.Length = us.MaximumLength = stringlen;
-            us.Buffer = ExAllocatePoolWithTag(PagedPool, us.MaximumLength, ALLOC_TAG);
-            
-            if (!us.Buffer) {
-                ERR("out of memory\n");
-                if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-                ExFreePool(ie);
-                return STATUS_INSUFFICIENT_RESOURCES;
-            }
-            
-            Status = RtlUTF8ToUnicodeN(us.Buffer, stringlen, &stringlen, di->name, di->n);
-            if (!NT_SUCCESS(Status)) {
-                ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
-                ExFreePool(us.Buffer);
-                if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-                ExFreePool(ie);
-                goto nextitem;
-            }
-            
-            Status = RtlUpcaseUnicodeString(&ie->filepart_uc, &us, TRUE);
-            if (!NT_SUCCESS(Status)) {
-                ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
-                ExFreePool(us.Buffer);
-                if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-                ExFreePool(ie);
-                goto nextitem;
-            }
-            
-            ie->key = di->key;
-            ie->type = di->type;
-            ie->index = tp.item->key.offset;
-            
-            ie->hash = calc_crc32c(0xfffffffe, (UINT8*)ie->filepart_uc.Buffer, (ULONG)ie->filepart_uc.Length);
-            inserted = FALSE;
-            
-            le = fcb->index_list.Flink;
-            while (le != &fcb->index_list) {
-                index_entry* ie2 = CONTAINING_RECORD(le, index_entry, list_entry);
-                
-                if (ie2->hash >= ie->hash) {
-                    InsertHeadList(le->Blink, &ie->list_entry);
-                    inserted = TRUE;
-                    break;
-                }
-                
-                le = le->Flink;
-            }
-            
-            if (!inserted)
-                InsertTailList(&fcb->index_list, &ie->list_entry);
-        }
-        
-nextitem:
-        b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp);
-         
-        if (b) {
-            tp = next_tp;
-            
-            b = tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_DIR_INDEX;
-        }
-    } while (b);
-    
-    Status = STATUS_SUCCESS;
-    
-end:
-    if (!NT_SUCCESS(Status)) {
-        while (!IsListEmpty(&fcb->index_list)) {
-            LIST_ENTRY* le = RemoveHeadList(&fcb->index_list);
-            index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
-
-            if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-            if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer);
-            ExFreePool(ie);
-        }
-    } else
-        mark_fcb_dirty(fcb); // It's not necessarily dirty, but this is an easy way of making sure
-                             // the list remains in memory until the next flush.
-    
-    return Status;
-}
-
-static NTSTATUS STDCALL find_file_in_dir_index(file_ref* fr, PUNICODE_STRING filename, root** subvol, UINT64* inode, UINT8* type,
-                                               UINT64* pindex, PANSI_STRING utf8, PIRP Irp) {
-    LIST_ENTRY* le;
-    NTSTATUS Status;
-    UNICODE_STRING us;
-    UINT32 hash;
-        
-    Status = RtlUpcaseUnicodeString(&us, filename, TRUE);
-    if (!NT_SUCCESS(Status)) {
-        ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
-        return Status;
-    }
-    
-    hash = calc_crc32c(0xfffffffe, (UINT8*)us.Buffer, (ULONG)us.Length);
-    
-    ExAcquireResourceExclusiveLite(&fr->fcb->nonpaged->index_lock, TRUE);
-    
-    if (!fr->fcb->index_loaded) {
-        Status = load_index_list(fr->fcb, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("load_index_list returned %08x\n", Status);
-            goto end;
-        }
-        
-        fr->fcb->index_loaded = TRUE;
-    }
-    
-    ExConvertExclusiveToSharedLite(&fr->fcb->nonpaged->index_lock);
-    
-    le = fr->fcb->index_list.Flink;
-    while (le != &fr->fcb->index_list) {
-        index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
-        
-        if (ie->hash == hash && ie->filepart_uc.Length == us.Length && RtlCompareMemory(ie->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length) {
-            LIST_ENTRY* le;
-            BOOL ignore_entry = FALSE;
-            
-            ExAcquireResourceSharedLite(&fr->nonpaged->children_lock, TRUE);
-
-            le = fr->children.Flink;
-            while (le != &fr->children) {
-                file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry);
-                
-                if (fr2->index == ie->index) {
-                    if (fr2->deleted || fr2->filepart_uc.Length != us.Length ||
-                        RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) != us.Length) {
-                        ignore_entry = TRUE;
-                        break;
-                    }
-                    break;
-                } else if (fr2->index > ie->index)
-                    break;
-                
-                le = le->Flink;
-            }
-            
-            ExReleaseResourceLite(&fr->nonpaged->children_lock);
-            
-            if (ignore_entry)
-                goto nextitem;
-            
-            if (ie->key.obj_type == TYPE_ROOT_ITEM) {
-                if (subvol) {
-                    *subvol = NULL;
-                    
-                    le = fr->fcb->Vcb->roots.Flink;
-                    while (le != &fr->fcb->Vcb->roots) {
-                        root* r2 = CONTAINING_RECORD(le, root, list_entry);
-                        
-                        if (r2->id == ie->key.obj_id) {
-                            *subvol = r2;
-                            break;
-                        }
-                        
-                        le = le->Flink;
-                    }
-                }
-                
-                if (inode)
-                    *inode = SUBVOL_ROOT_INODE;
-                
-                if (type)
-                    *type = BTRFS_TYPE_DIRECTORY;
-            } else {
-                if (subvol)
-                    *subvol = fr->fcb->subvol;
-                
-                if (inode)
-                    *inode = ie->key.obj_id;
-                
-                if (type)
-                    *type = ie->type;
-            }
-            
-            if (utf8) {
-                utf8->MaximumLength = utf8->Length = ie->utf8.Length;
-                utf8->Buffer = ExAllocatePoolWithTag(PagedPool, utf8->MaximumLength, ALLOC_TAG);
-                if (!utf8->Buffer) {
-                    ERR("out of memory\n");
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto end;
-                }
-                
-                RtlCopyMemory(utf8->Buffer, ie->utf8.Buffer, ie->utf8.Length);
-            }
-            
-            if (pindex)
-                *pindex = ie->index;
-            
-            Status = STATUS_SUCCESS;
-            goto end;
-        } else if (ie->hash > hash) {
-            Status = STATUS_OBJECT_NAME_NOT_FOUND;
-            goto end;
-        }
-        
-nextitem:
-        le = le->Flink;
-    }
-    
-    Status = STATUS_OBJECT_NAME_NOT_FOUND;
-    
-end:
-    ExReleaseResourceLite(&fr->fcb->nonpaged->index_lock);
-    
-    ExFreePool(us.Buffer);
-    
-    return Status;
-}
-
-static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNICODE_STRING filename, UINT32 crc32, file_ref* fr,
-                                                    root** subvol, UINT64* inode, UINT8* type, UINT64* pindex, PANSI_STRING utf8,
-                                                    BOOL case_sensitive, PIRP Irp) {
-    DIR_ITEM* di;
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    ULONG stringlen;
-    
-    TRACE("(%p, %.*S, %08x, (%llx, %llx), %p, %p, %p)\n", Vcb, filename->Length / sizeof(WCHAR), filename->Buffer, crc32,
-                                                          fr->fcb->subvol->id, fr->fcb->inode, subvol, inode, type);
-    
-    searchkey.obj_id = fr->fcb->inode;
-    searchkey.obj_type = TYPE_DIR_ITEM;
-    searchkey.offset = crc32;
-    
-    Status = find_item(Vcb, fr->fcb->subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    TRACE("found item %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-    
-    if (!keycmp(searchkey, tp.item->key)) {
-        UINT32 size = tp.item->size;
-        
-        // found by hash
-        
-        if (tp.item->size < sizeof(DIR_ITEM)) {
-            WARN("(%llx;%llx,%x,%llx) was %u bytes, expected at least %u\n", fr->fcb->subvol->id, tp.item->key.obj_id, tp.item->key.obj_type,
-                                                                             tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
-        } else {
-            di = (DIR_ITEM*)tp.item->data;
-            
-            while (size > 0) {
-                if (size < sizeof(DIR_ITEM) || size < (sizeof(DIR_ITEM) - 1 + di->m + di->n)) {
-                    WARN("(%llx,%x,%llx) is truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    break;
-                }
-                
-                size -= sizeof(DIR_ITEM) - sizeof(char);
-                size -= di->n;
-                size -= di->m;
-                
-                Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, di->name, di->n);
-                if (!NT_SUCCESS(Status)) {
-                    ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
-                } else {
-                    WCHAR* utf16 = ExAllocatePoolWithTag(PagedPool, stringlen, ALLOC_TAG);
-                    UNICODE_STRING us;
-                    
-                    if (!utf16) {
-                        ERR("out of memory\n");
-                        return STATUS_INSUFFICIENT_RESOURCES;
-                    }
-                    
-                    Status = RtlUTF8ToUnicodeN(utf16, stringlen, &stringlen, di->name, di->n);
-
-                    if (!NT_SUCCESS(Status)) {
-                        ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
-                    } else {
-                        ANSI_STRING nutf8;
-                        
-                        us.Buffer = utf16;
-                        us.Length = us.MaximumLength = (USHORT)stringlen;
-                        
-                        if (FsRtlAreNamesEqual(filename, &us, !case_sensitive, NULL)) {
-                            UINT64 index;
-                            
-                            if (di->key.obj_type == TYPE_ROOT_ITEM) {
-                                LIST_ENTRY* le = Vcb->roots.Flink;
-                                
-                                if (subvol) {
-                                    *subvol = NULL;
-                                    
-                                    while (le != &Vcb->roots) {
-                                        root* r2 = CONTAINING_RECORD(le, root, list_entry);
-                                        
-                                        if (r2->id == di->key.obj_id) {
-                                            *subvol = r2;
-                                            break;
-                                        }
-                                        
-                                        le = le->Flink;
-                                    }
-                                }
-
-                                if (inode)
-                                    *inode = SUBVOL_ROOT_INODE;
-                                
-                                if (type)
-                                    *type = BTRFS_TYPE_DIRECTORY;
-                            } else {
-                                if (subvol)
-                                    *subvol = fr->fcb->subvol;
-                                
-                                if (inode)
-                                    *inode = di->key.obj_id;
-                                
-                                if (type)
-                                    *type = di->type;
-                            }
-                            
-                            if (utf8) {
-                                utf8->MaximumLength = di->n;
-                                utf8->Length = utf8->MaximumLength;
-                                utf8->Buffer = ExAllocatePoolWithTag(PagedPool, utf8->MaximumLength, ALLOC_TAG);
-                                if (!utf8->Buffer) {
-                                    ERR("out of memory\n");
-                                    ExFreePool(utf16);
-                                    return STATUS_INSUFFICIENT_RESOURCES;
-                                }
-                                
-                                RtlCopyMemory(utf8->Buffer, di->name, di->n);
-                            }
-                            
-                            ExFreePool(utf16);
-                            
-                            index = 0;
-                                
-                            if (fr->fcb->subvol != Vcb->root_root) {
-                                nutf8.Buffer = di->name;
-                                nutf8.Length = nutf8.MaximumLength = di->n;
-                                
-                                if (di->key.obj_type == TYPE_ROOT_ITEM) {
-                                    Status = find_subvol_dir_index(Vcb, fr->fcb->subvol, di->key.obj_id, fr->fcb->inode, &nutf8, &index, Irp);
-                                    if (!NT_SUCCESS(Status)) {
-                                        ERR("find_subvol_dir_index returned %08x\n", Status);
-                                        return Status;
-                                    }
-                                } else {
-                                    Status = find_file_dir_index(Vcb, fr->fcb->subvol, di->key.obj_id, fr->fcb->inode, &nutf8, &index, Irp);
-                                    if (!NT_SUCCESS(Status)) {
-                                        if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
-                                            Status = find_file_dir_index_extref(Vcb, fr->fcb->subvol, di->key.obj_id, fr->fcb->inode, &nutf8, &index, Irp);
-                                            
-                                            if (!NT_SUCCESS(Status)) {
-                                                ERR("find_file_dir_index_extref returned %08x\n", Status);
-                                                return Status;
-                                            }
-                                        } else {
-                                            ERR("find_file_dir_index returned %08x\n", Status);
-                                            return Status;
-                                        }
-                                    }
-                                }
-                            }
-                            
-                            if (index != 0) {
-                                LIST_ENTRY* le = fr->children.Flink;
-                                
-                                while (le != &fr->children) {
-                                    file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry);
-                                    
-                                    if (fr2->index == index) {
-                                        if (fr2->deleted || !FsRtlAreNamesEqual(&fr2->filepart, filename, !case_sensitive, NULL)) {
-                                            goto byindex;
-                                        }
-                                        break;
-                                    } else if (fr2->index > index)
-                                        break;
-                                    
-                                    le = le->Flink;
-                                }
-                            }
-                            
-//                             TRACE("found %.*S by hash at (%llx,%llx)\n", filename->Length / sizeof(WCHAR), filename->Buffer, (*subvol)->id, *inode);
-
-                            if (pindex)
-                                *pindex = index;
-                            
-                            return STATUS_SUCCESS;
-                        }
-                    }
-                    
-                    ExFreePool(utf16);
-                }
-                
-                di = (DIR_ITEM*)&di->name[di->n + di->m];
-            }
-        }
-    }
-    
-byindex:
-    if (case_sensitive)
-        return STATUS_OBJECT_NAME_NOT_FOUND;
-    
-    Status = find_file_in_dir_index(fr, filename, subvol, inode, type, pindex, utf8, Irp);
-    if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) {
-        ERR("find_file_in_dir_index returned %08x\n", Status);
-        return Status;
-    }
-    
-    return Status;
-}
-
 fcb* create_fcb(POOL_TYPE pool_type) {
     fcb* fcb;
     
@@ -711,14 +66,17 @@ fcb* create_fcb(POOL_TYPE pool_type) {
     ExInitializeResourceLite(&fcb->nonpaged->resource);
     fcb->Header.Resource = &fcb->nonpaged->resource;
     
-    ExInitializeResourceLite(&fcb->nonpaged->index_lock);
+    ExInitializeResourceLite(&fcb->nonpaged->dir_children_lock);
     
     FsRtlInitializeFileLock(&fcb->lock, NULL, NULL);
     
     InitializeListHead(&fcb->extents);
-    InitializeListHead(&fcb->index_list);
     InitializeListHead(&fcb->hardlinks);
     
+    InitializeListHead(&fcb->dir_children_index);
+    InitializeListHead(&fcb->dir_children_hash);
+    InitializeListHead(&fcb->dir_children_hash_uc);
+    
     return fcb;
 }
 
@@ -740,52 +98,148 @@ file_ref* create_fileref() {
         return NULL;
     }
     
-    fr->refcount = 1;
-    
-#ifdef DEBUG_FCB_REFCOUNTS
-    WARN("fileref %p: refcount now 1\n", fr);
-#endif
-    
-    InitializeListHead(&fr->children);
-    
-    ExInitializeResourceLite(&fr->nonpaged->children_lock);
-    
-    return fr;
-}
+    fr->refcount = 1;
+    
+#ifdef DEBUG_FCB_REFCOUNTS
+    WARN("fileref %p: refcount now 1\n", fr);
+#endif
+    
+    InitializeListHead(&fr->children);
+    
+    ExInitializeResourceLite(&fr->nonpaged->children_lock);
+    
+    return fr;
+}
+
+static NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, fcb* fcb,
+                                         root** subvol, UINT64* inode, dir_child** pdc, BOOL case_sensitive, PIRP Irp) {
+    NTSTATUS Status;
+    UNICODE_STRING fnus;
+    UINT32 hash;
+    LIST_ENTRY* le;
+    UINT8 c;
+    
+    if (!case_sensitive) {
+        Status = RtlUpcaseUnicodeString(&fnus, filename, TRUE);
+        
+        if (!NT_SUCCESS(Status)) {
+            ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
+            return Status;
+        }
+    } else
+        fnus = *filename;
+    
+    hash = calc_crc32c(0xffffffff, (UINT8*)fnus.Buffer, fnus.Length);
+    
+    c = hash >> 24;
+    
+    ExAcquireResourceSharedLite(&fcb->nonpaged->dir_children_lock, TRUE);
+    
+    if (case_sensitive) {
+        if (!fcb->hash_ptrs[c]) {
+            Status = STATUS_OBJECT_NAME_NOT_FOUND;
+            goto end;
+        }
+        
+        le = fcb->hash_ptrs[c];
+        while (le != &fcb->dir_children_hash) {
+            dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_hash);
+            
+            if (dc->hash == hash) {
+                if (dc->name.Length == fnus.Length && RtlCompareMemory(dc->name.Buffer, fnus.Buffer, fnus.Length) == fnus.Length) {
+                    if (dc->key.obj_type == TYPE_ROOT_ITEM) {
+                        LIST_ENTRY* le2;
+                        
+                        *subvol = NULL;
+                        
+                        le2 = fcb->Vcb->roots.Flink;
+                        while (le2 != &fcb->Vcb->roots) {
+                            root* r2 = CONTAINING_RECORD(le2, root, list_entry);
+                            
+                            if (r2->id == dc->key.obj_id) {
+                                *subvol = r2;
+                                break;
+                            }
+                            
+                            le2 = le2->Flink;
+                        }
+                        
+                        *inode = SUBVOL_ROOT_INODE;
+                    } else {
+                        *subvol = fcb->subvol;
+                        *inode = dc->key.obj_id;
+                    }
+                    
+                    *pdc = dc;
+
+                    Status = STATUS_SUCCESS;
+                    goto end;
+                }
+            } else if (dc->hash > hash) {
+                Status = STATUS_OBJECT_NAME_NOT_FOUND;
+                goto end;
+            }
+            
+            le = le->Flink;
+        }
+    } else {
+        if (!fcb->hash_ptrs_uc[c]) {
+            Status = STATUS_OBJECT_NAME_NOT_FOUND;
+            goto end;
+        }
+        
+        le = fcb->hash_ptrs_uc[c];
+        while (le != &fcb->dir_children_hash_uc) {
+            dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_hash_uc);
+            
+            if (dc->hash_uc == hash) {
+                if (dc->name_uc.Length == fnus.Length && RtlCompareMemory(dc->name_uc.Buffer, fnus.Buffer, fnus.Length) == fnus.Length) {
+                    if (dc->key.obj_type == TYPE_ROOT_ITEM) {
+                        LIST_ENTRY* le2;
+                        
+                        *subvol = NULL;
+                        
+                        le2 = fcb->Vcb->roots.Flink;
+                        while (le2 != &fcb->Vcb->roots) {
+                            root* r2 = CONTAINING_RECORD(le2, root, list_entry);
+                            
+                            if (r2->id == dc->key.obj_id) {
+                                *subvol = r2;
+                                break;
+                            }
+                            
+                            le2 = le2->Flink;
+                        }
+                        
+                        *inode = SUBVOL_ROOT_INODE;
+                    } else {
+                        *subvol = fcb->subvol;
+                        *inode = dc->key.obj_id;
+                    }
+                    
+                    *pdc = dc;
+                    
+                    Status = STATUS_SUCCESS;
+                    goto end;
+                }
+            } else if (dc->hash_uc > hash) {
+                Status = STATUS_OBJECT_NAME_NOT_FOUND;
+                goto end;
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    Status = STATUS_OBJECT_NAME_NOT_FOUND;
 
-NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr,
-                                  root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8,
-                                  BOOL case_sensitive, PIRP Irp) {
-    char* fn;
-    UINT32 crc32;
-    ULONG utf8len;
-    NTSTATUS Status;
-    
-    Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, filename->Buffer, filename->Length);
-    if (!NT_SUCCESS(Status)) {
-        ERR("RtlUnicodeToUTF8N 1 returned %08x\n", Status);
-        return Status;
-    }
-    
-    fn = ExAllocatePoolWithTag(PagedPool, utf8len, ALLOC_TAG);
-    if (!fn) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    Status = RtlUnicodeToUTF8N(fn, utf8len, &utf8len, filename->Buffer, filename->Length);
-    if (!NT_SUCCESS(Status)) {
-        ExFreePool(fn);
-        ERR("RtlUnicodeToUTF8N 2 returned %08x\n", Status);
-        return Status;
-    }
-    
-    TRACE("%.*s\n", utf8len, fn);
+end:
+    ExReleaseResourceLite(&fcb->nonpaged->dir_children_lock);
     
-    crc32 = calc_crc32c(0xfffffffe, (UINT8*)fn, (ULONG)utf8len);
-    TRACE("crc32c(%.*s) = %08x\n", utf8len, fn, crc32);
+    if (!case_sensitive)
+        ExFreePool(fnus.Buffer);
     
-    return find_file_in_dir_with_crc32(Vcb, filename, crc32, fr, subvol, inode, type, index, utf8, case_sensitive, Irp);
+    return Status;
 }
 
 static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream, PUNICODE_STRING newstreamname, UINT32* hash, PANSI_STRING xattr, PIRP Irp) {
@@ -1178,14 +632,195 @@ end:
     return deleted;
 }
 
+NTSTATUS load_csum(device_extension* Vcb, UINT32* csum, UINT64 start, UINT64 length, PIRP Irp) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp, next_tp;
+    UINT64 i, j;
+    BOOL b;
+    
+    searchkey.obj_id = EXTENT_CSUM_ID;
+    searchkey.obj_type = TYPE_EXTENT_CSUM;
+    searchkey.offset = start;
+    
+    Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    i = 0;
+    do {
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+            ULONG readlen;
+            
+            if (start < tp.item->key.offset)
+                j = 0;
+            else
+                j = ((start - tp.item->key.offset) / Vcb->superblock.sector_size) + i;
+            
+            if (j * sizeof(UINT32) > tp.item->size || tp.item->key.offset > start + (i * Vcb->superblock.sector_size)) {
+                ERR("checksum not found for %llx\n", start + (i * Vcb->superblock.sector_size));
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            readlen = min((tp.item->size / sizeof(UINT32)) - j, length - i);
+            RtlCopyMemory(&csum[i], tp.item->data + (j * sizeof(UINT32)), readlen * sizeof(UINT32));
+            i += readlen;
+            
+            if (i == length)
+                break;
+        }
+        
+        b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
+        
+        if (b)
+            tp = next_tp;
+    } while (b);
+    
+    if (i < length) {
+        ERR("could not read checksums: offset %llx, length %llx sectors\n", start, length);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS load_dir_children(fcb* fcb, BOOL ignore_size, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp, next_tp;
+    NTSTATUS Status;
+    
+    fcb->hash_ptrs = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG);
+    if (!fcb->hash_ptrs) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlZeroMemory(fcb->hash_ptrs, sizeof(LIST_ENTRY*) * 256);
+    
+    fcb->hash_ptrs_uc = ExAllocatePoolWithTag(PagedPool, sizeof(LIST_ENTRY*) * 256, ALLOC_TAG);
+    if (!fcb->hash_ptrs_uc) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlZeroMemory(fcb->hash_ptrs_uc, sizeof(LIST_ENTRY*) * 256);
+    
+    if (!ignore_size && fcb->inode_item.st_size == 0)
+        return STATUS_SUCCESS;
+    
+    searchkey.obj_id = fcb->inode;
+    searchkey.obj_type = TYPE_DIR_INDEX;
+    searchkey.offset = 2;
+    
+    Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (keycmp(tp.item->key, searchkey) == -1) {
+        if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) {
+            tp = next_tp;
+            TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+        }
+    }
+    
+    while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+        DIR_ITEM* di = (DIR_ITEM*)tp.item->data;
+        dir_child* dc;
+        ULONG utf16len;
+        
+        if (tp.item->size < sizeof(DIR_ITEM)) {
+            WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
+            goto cont;
+        }
+        
+        if (di->n == 0) {
+            WARN("(%llx,%x,%llx): DIR_ITEM name length is zero\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+            goto cont;
+        }
+        
+        Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, di->name, di->n);
+        if (!NT_SUCCESS(Status)) {
+            ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
+            goto cont;
+        }
+
+        dc = ExAllocatePoolWithTag(PagedPool, sizeof(dir_child), ALLOC_TAG);
+        if (!dc) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        dc->key = di->key;
+        dc->index = tp.item->key.offset;
+        dc->type = di->type;
+        dc->fileref = NULL;
+        
+        dc->utf8.MaximumLength = dc->utf8.Length = di->n;
+        dc->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, di->n, ALLOC_TAG);
+        if (!dc->utf8.Buffer) {
+            ERR("out of memory\n");
+            ExFreePool(dc);
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        RtlCopyMemory(dc->utf8.Buffer, di->name, di->n);
+        
+        dc->name.MaximumLength = dc->name.Length = utf16len;
+        dc->name.Buffer = ExAllocatePoolWithTag(PagedPool, dc->name.MaximumLength, ALLOC_TAG);
+        if (!dc->name.Buffer) {
+            ERR("out of memory\n");
+            ExFreePool(dc->utf8.Buffer);
+            ExFreePool(dc);
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        Status = RtlUTF8ToUnicodeN(dc->name.Buffer, utf16len, &utf16len, di->name, di->n);
+        if (!NT_SUCCESS(Status)) {
+            ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
+            ExFreePool(dc->utf8.Buffer);
+            ExFreePool(dc->name.Buffer);
+            ExFreePool(dc);
+            goto cont;
+        }
+        
+        Status = RtlUpcaseUnicodeString(&dc->name_uc, &dc->name, TRUE);
+        if (!NT_SUCCESS(Status)) {
+            ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
+            ExFreePool(dc->utf8.Buffer);
+            ExFreePool(dc->name.Buffer);
+            ExFreePool(dc);
+            goto cont;
+        }
+        
+        dc->hash = calc_crc32c(0xffffffff, (UINT8*)dc->name.Buffer, dc->name.Length);
+        dc->hash_uc = calc_crc32c(0xffffffff, (UINT8*)dc->name_uc.Buffer, dc->name_uc.Length);
+        
+        InsertTailList(&fcb->dir_children_index, &dc->list_entry_index);
+        
+        insert_dir_child_into_hash_lists(fcb, dc);
+        
+cont:
+        if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp))
+            tp = next_tp;
+        else
+            break;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
 NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp) {
     KEY searchkey;
-    traverse_ptr tp;
+    traverse_ptr tp, next_tp;
     NTSTATUS Status;
     fcb* fcb;
-    BOOL b;
-    UINT8* eadata;
-    UINT16 ealen;
+    BOOL atts_set = FALSE, sd_set = FALSE, no_data;
+    LIST_ENTRY* lastle = NULL;
+    EXTENT_DATA* ed = NULL;
     
     if (!IsListEmpty(&subvol->fcbs)) {
         LIST_ENTRY* le = subvol->fcbs.Flink;
@@ -1193,17 +828,22 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type,
         while (le != &subvol->fcbs) {
             fcb = CONTAINING_RECORD(le, struct _fcb, list_entry);
             
-            if (fcb->inode == inode && !fcb->ads) {
+            if (fcb->inode == inode) {
+                if (!fcb->ads) {
 #ifdef DEBUG_FCB_REFCOUNTS
-                LONG rc = InterlockedIncrement(&fcb->refcount);
-                
-                WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode);
+                    LONG rc = InterlockedIncrement(&fcb->refcount);
+
+                    WARN("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol->id, fcb->inode);
 #else
-                InterlockedIncrement(&fcb->refcount);
+                    InterlockedIncrement(&fcb->refcount);
 #endif
 
-                *pfcb = fcb;
-                return STATUS_SUCCESS;
+                    *pfcb = fcb;
+                    return STATUS_SUCCESS;
+                }
+            } else if (fcb->inode > inode) {
+                lastle = le->Blink;
+                break;
             }
             
             le = le->Flink;
@@ -1250,326 +890,356 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type,
         else if (fcb->inode_item.st_mode & __S_IFBLK)
             fcb->type = BTRFS_TYPE_BLOCKDEV;
         else if (fcb->inode_item.st_mode & __S_IFIFO)
-            fcb->type = BTRFS_TYPE_FIFO;
-        else if (fcb->inode_item.st_mode & __S_IFLNK)
-            fcb->type = BTRFS_TYPE_SYMLINK;
-        else if (fcb->inode_item.st_mode & __S_IFSOCK)
-            fcb->type = BTRFS_TYPE_SOCKET;
-        else
-            fcb->type = BTRFS_TYPE_FILE;
-    }
-    
-    fcb->atts = get_file_attributes(Vcb, &fcb->inode_item, fcb->subvol, fcb->inode, fcb->type, utf8 && utf8->Buffer[0] == '.', FALSE, Irp);
-    
-    fcb_get_sd(fcb, parent, Irp);
-    
-    if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) {
-        UINT8* xattrdata;
-        UINT16 xattrlen;
-        
-        if (get_xattr(Vcb, subvol, inode, EA_REPARSE, EA_REPARSE_HASH, &xattrdata, &xattrlen, Irp)) {
-            fcb->reparse_xattr.Buffer = (char*)xattrdata;
-            fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = xattrlen;
-        } else {
-            fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT;
-            
-            if (!Vcb->readonly && !(subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) {
-                fcb->atts_changed = TRUE;
-                mark_fcb_dirty(fcb);
-            }
-        }
-    }
-    
-    fcb->ealen = 0;
-    
-    if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &ealen, Irp)) {
-        ULONG offset;
-        
-        Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, ealen, &offset);
-        
-        if (!NT_SUCCESS(Status)) {
-            WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
-            ExFreePool(eadata);
-        } else {
-            FILE_FULL_EA_INFORMATION* eainfo;
-            fcb->ea_xattr.Buffer = (char*)eadata;
-            fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen;
-            
-            fcb->ealen = 4;
-            
-            // calculate ealen
-            eainfo = (FILE_FULL_EA_INFORMATION*)eadata;
-            do {
-                fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
-                
-                if (eainfo->NextEntryOffset == 0)
-                    break;
-                
-                eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
-            } while (TRUE);
-        }
-    }
-    
-    InsertTailList(&subvol->fcbs, &fcb->list_entry);
-    InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all);
-    
-    fcb->Header.IsFastIoPossible = fast_io_possible(fcb);
-    
-    if (fcb->inode_item.st_size == 0 || (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK)) {
-        fcb->Header.AllocationSize.QuadPart = 0;
-        fcb->Header.FileSize.QuadPart = 0;
-        fcb->Header.ValidDataLength.QuadPart = 0;
-    } else {
-        EXTENT_DATA* ed = NULL;
-        traverse_ptr next_tp;
-        
-        searchkey.obj_id = fcb->inode;
-        searchkey.obj_type = TYPE_EXTENT_DATA;
-        searchkey.offset = 0;
-        
-        Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            free_fcb(fcb);
-            return Status;
-        }
-        
-        do {
-            if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
-                extent* ext;
-                BOOL unique = FALSE;
-                
-                ed = (EXTENT_DATA*)tp.item->data;
-                
-                if (tp.item->size < sizeof(EXTENT_DATA)) {
-                    ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
-                        tp.item->size, sizeof(EXTENT_DATA));
-                    
-                    free_fcb(fcb);
-                    return STATUS_INTERNAL_ERROR;
-                }
-                
-                if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
-                    EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0];
-                    
-                    if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
-                        ERR("(%llx,%x,%llx) was %llx bytes, expected at least %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
-                            tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
-                    
-                        free_fcb(fcb);
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    if (ed2->address == 0 && ed2->size == 0) // sparse
-                        goto nextitem;
-                    
-                    if (ed2->size != 0 && is_tree_unique(Vcb, tp.tree, Irp))
-                        unique = is_extent_unique(Vcb, ed2->address, ed2->size, Irp);
-                }
-                
-                ext = ExAllocatePoolWithTag(pooltype, sizeof(extent), ALLOC_TAG);
-                if (!ext) {
-                    ERR("out of memory\n");
-                    free_fcb(fcb);
-                    return STATUS_INSUFFICIENT_RESOURCES;
-                }
-                
-                ext->data = ExAllocatePoolWithTag(pooltype, tp.item->size, ALLOC_TAG);
-                if (!ext->data) {
-                    ERR("out of memory\n");
-                    ExFreePool(ext);
-                    free_fcb(fcb);
-                    return STATUS_INSUFFICIENT_RESOURCES;
-                }
-                
-                ext->offset = tp.item->key.offset;
-                RtlCopyMemory(ext->data, tp.item->data, tp.item->size);
-                ext->datalen = tp.item->size;
-                ext->unique = unique;
-                ext->ignore = FALSE;
-                
-                InsertTailList(&fcb->extents, &ext->list_entry);
-            }
-            
-nextitem:
-            b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
-         
-            if (b) {
-                tp = next_tp;
-                
-                if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
-                    break;
-            }
-        } while (b);
-        
-        if (ed && ed->type == EXTENT_TYPE_INLINE)
-            fcb->Header.AllocationSize.QuadPart = fcb->inode_item.st_size;
-        else
-            fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
-        
-        fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
-        fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
-    }
-    
-    // FIXME - only do if st_nlink > 1?
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_REF;
-    searchkey.offset = 0;
-    
-    Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        free_fcb(fcb);
-        return Status;
+            fcb->type = BTRFS_TYPE_FIFO;
+        else if (fcb->inode_item.st_mode & __S_IFLNK)
+            fcb->type = BTRFS_TYPE_SYMLINK;
+        else if (fcb->inode_item.st_mode & __S_IFSOCK)
+            fcb->type = BTRFS_TYPE_SOCKET;
+        else
+            fcb->type = BTRFS_TYPE_FILE;
     }
     
-    do {
-        traverse_ptr next_tp;
+    no_data = fcb->inode_item.st_size == 0 || (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK);
+    
+    while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
+        tp = next_tp;
+        
+      &n